예제 #1
0
def memory_profiling_job():
    data_engine.select()
    a = data_engine.select().all()
    del a
    a = data_engine.get('NORMALIZED:RAW_BOVESPA').process('PETR4')
    del a
    selection = data_engine.select('{RAW_BOVESPA}')
    for reference in selection:
        data_engine.get(reference).process('PETR4')
    results = []
    for reference in selection:
        results.append(data_engine.get(reference).process('PETR4'))
    print results
    del results
    for reference in selection:
        data_engine.get(reference).process('PETR4')
예제 #2
0
def memory_profiling_job():
    data_engine.select()
    a = data_engine.select().all()
    del a
    a = data_engine.get('NORMALIZED:RAW_BOVESPA').process('PETR4')
    del a
    selection = data_engine.select('{RAW_BOVESPA}')
    for reference in selection:
        data_engine.get(reference).process('PETR4')
    results = []
    for reference in selection:
        results.append(data_engine.get(reference).process('PETR4'))
    print results
    del results
    for reference in selection:
        data_engine.get(reference).process('PETR4')
예제 #3
0
# -*- coding: utf-8 -*-


from instances import data_engine


@data_engine.for_synched(data_engine.select('RAW'),
                         data_engine.select('RAW_NORMALIZATION_FACTOR'))
def _factory(raw_reference, normalization_reference):

    @data_engine.datasource('NORMALIZED:%s' % (raw_reference),
                            dependencies=[raw_reference, normalization_reference],
                            frequency='B',
                            tags=['NORMALIZED', 'STOCK_TICKS'])
    class Normalized(object):
        def evaluate(self, context, symbol, start=None, end=None):
            df_raw = context.dependencies(raw_reference)
            df_factor = context.dependencies(normalization_reference)
            return (df_raw * df_factor).dropna()
예제 #4
0
auditor.generate_multiple(data_engine,
                          data_engine
                          .select()
                          .all()
                          .difference('RAW')
                          .difference('NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'),
                          'PETR4',
                          None,
                          None,
                          _get_test_data_filepath,
                          sys.modules[__name__])


auditor.generate_multiple(data_engine,
                          data_engine.select().all().difference('RAW'),
                          'PETR4',
                          d(2008, 1, 1),
                          d(2008, 12, 31),
                          _get_test_data_filepath,
                          sys.modules[__name__])


auditor.generate_multiple(data_engine,
                          data_engine.select('NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'),
                          'PETR4',
                          None,
                          d(2013, 8, 7),
                          _get_test_data_filepath,
                          sys.modules[__name__])
# -*- coding: utf-8 -*-

import pandas

import util.dateutils as dateutils
from instances import data_engine


@data_engine.for_synched(data_engine.select('NORMALIZED'))
def _factory(source_reference):
    @data_engine.datasource('MONTHLY:%s' % (source_reference),
                            dependencies=[source_reference],
                            lookback=31,
                            frequency='M',
                            tags=['MONTHLY', 'STOCK_TICKS'])
    class Monthly(object):
        def evaluate(self, context, symbol, start=None, end=None):
            df_norm = context.dependencies(source_reference)
            df_index = dateutils.range(start, end, frequency='M')

            result = pandas.DataFrame(columns=df_norm.columns, index=df_index)

            result['open'] = df_norm['open'].resample('M', how='first')
            result['high'] = df_norm['high'].resample('M', how='max')
            result['low'] = df_norm['low'].resample('M', how='min')
            result['close'] = df_norm['close'].resample('M', how='last')
            result['volume'] = df_norm['volume'].resample('M', how='sum')

            return result.dropna()
예제 #6
0
# -*- coding: utf-8 -*-

import talib
import pandas

from instances import data_engine


@data_engine.for_synched(data_engine.select('CLOSE'))
def _factory(source_reference):

    period_profiles = [(12, 26, 9), (5, 35, 5), (12, 26, 3)]

    for profile in period_profiles:
        _generate_for_profile(source_reference, *profile)


def _generate_for_profile(source_reference, fast_period, slow_period,
                          signal_period):
    @data_engine.datasource(
        'MACD(%i,%i,%i):%s' %
        (fast_period, slow_period, signal_period, source_reference),
        dependencies=[source_reference],
        lookback=slow_period * 2,
        tags=['MACD'])
    class MACD(object):
        def evaluate(self, context, symbol, start=None, end=None):
            df_source = context.dependencies(source_reference)
            macd, macdsignal, macdhist = talib.MACD(df_source['value'],
                                                    fastperiod=fast_period,
                                                    slowperiod=slow_period,
예제 #7
0
# -*- coding: utf-8 -*-


from instances import data_engine


@data_engine.for_each(data_engine.select('RAW'))
def _factory(source_reference, pct_period=1):

    @data_engine.datasource('PCT_CHANGE(%i):%s' % (pct_period, source_reference),
                            dependencies=[source_reference],
                            lookback=pct_period,
                            tags=['RAW_PCT_CHANGE'])
    class RawPctChange(object):

        def evaluate(self, context, symbol, start=None, end=None):
            return context.dependencies(source_reference).pct_change()
예제 #8
0
# -*- coding: utf-8 -*-


import pandas

from instances import data_engine


@data_engine.for_synched(data_engine.select('STOCK_TICKS'))
def _factory(source_reference):

    @data_engine.datasource('CLOSE:%s' % (source_reference),
                            dependencies=[source_reference],
                            tags=['CLOSE'])
    class Close(object):

        def evaluate(self, context, symbol, start=None, end=None):
            df_source = context.dependencies(source_reference)

            result = pandas.DataFrame(columns=['close'], index=df_source.index)
            result['value'] = df_source['close']

            return result
예제 #9
0
# -*- coding: utf-8 -*-


import pandas

import configurations as config

from instances import data_engine


@data_engine.for_each(data_engine.select('RAW_PCT_CHANGE'))
def _factory(source_reference):

    @data_engine.datasource('SPLITS:%s' % (source_reference),
                            dependencies=[source_reference],
                            tags=['RAW_SPLITS'])
    class RawSplits(object):

        def evaluate(self, context, symbol, start=None, end=None):
            df_pct_change = context.dependencies(source_reference)
            limit = config.NORMALIZATION_PCT_CHANGE_LIMIT

            def _calculateAfter(x):
                return 1 if x >= 0 else (1 / (1 - abs(x))).round(decimals=0)

            def _calculateBefore(x):
                return 1 if x <= 0 else (abs(x) + 1).round(decimals=0)

            df_split_join = (df_pct_change[(abs(df_pct_change['open']) > limit) &
                             (abs(df_pct_change['high']) > limit) &
                             (abs(df_pct_change['low']) > limit) &
예제 #10
0
# -*- coding: utf-8 -*-


import pandas

import util.dateutils as dateutils
from instances import data_engine


@data_engine.for_synched(data_engine.select('NORMALIZED'))
def _factory(source_reference):

    @data_engine.datasource('MONTHLY:%s' % (source_reference),
                            dependencies=[source_reference],
                            lookback=31,
                            frequency='M',
                            tags=['MONTHLY', 'STOCK_TICKS'])
    class Monthly(object):

        def evaluate(self, context, symbol, start=None, end=None):
            df_norm = context.dependencies(source_reference)
            df_index = dateutils.range(start, end, frequency='M')

            result = pandas.DataFrame(columns=df_norm.columns, index=df_index)

            result['open'] = df_norm['open'].resample('M', how='first')
            result['high'] = df_norm['high'].resample('M', how='max')
            result['low'] = df_norm['low'].resample('M', how='min')
            result['close'] = df_norm['close'].resample('M', how='last')
            result['volume'] = df_norm['volume'].resample('M', how='sum')
# -*- coding: utf-8 -*-


import pandas

import util.dateutils as dateutils

from instances import data_engine


@data_engine.for_each(data_engine.select('RAW_SPLITS'))
def _factory(source_reference):

    @data_engine.datasource('NORMALIZATION_FACTOR:%s' % (source_reference),
                            dependencies=[source_reference],
                            tags=['RAW_NORMALIZATION_FACTOR'])
    class RawNormalizationFactor(object):

        def evaluate(self, context, symbol, start=None, end=None):
            df_split = context.dependencies(source_reference)
            df_normalization = pandas.DataFrame(index=dateutils.range(start, end))

            for column in ['open', 'high', 'low', 'close']:
                df_normalization[column] = 1

            for i in range(len(df_split)):
                row = df_split.ix[i]
                day = dateutils.relative_working_day(-1, row.name)
                df_normalization.update(df_normalization[:day] * row['factor'])
            df_normalization['volume'] = 1 / df_normalization['close']
예제 #12
0
# -*- coding: utf-8 -*-


import talib
import pandas

from instances import data_engine


@data_engine.for_synched(data_engine.select('CLOSE'))
def _factory(source_reference):

    period_profiles = [(12, 26, 9), (5, 35, 5), (12, 26, 3)]

    for profile in period_profiles:
        _generate_for_profile(source_reference, *profile)


def _generate_for_profile(source_reference, fast_period, slow_period, signal_period):

    @data_engine.datasource('MACD(%i,%i,%i):%s' % (fast_period, slow_period, signal_period, source_reference),
                            dependencies=[source_reference],
                            lookback=slow_period*2,
                            tags=['MACD'])
    class MACD(object):

        def evaluate(self, context, symbol, start=None, end=None):
            df_source = context.dependencies(source_reference)
            macd, macdsignal, macdhist = talib.MACD(df_source['value'],
                                                    fastperiod=fast_period,
                                                    slowperiod=slow_period,
예제 #13
0
import bigtempo.auditor as auditor
from instances import data_engine


def _get_test_data_filename(reference, symbol=None):
    symbol_part = '' if not symbol else '{%s}' % symbol
    return '%s%s.csv' % (reference, symbol_part)


def _get_test_data_filepath(reference, symbol=None):
    return os.path.join(config.DATASOURCE_TEST_DATA_DIR,
                        _get_test_data_filename(reference, symbol))


auditor.generate_multiple(
    data_engine,
    data_engine.select().all().difference('RAW').difference(
        'NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'), 'PETR4',
    None, None, _get_test_data_filepath, sys.modules[__name__])

auditor.generate_multiple(data_engine,
                          data_engine.select().all().difference('RAW'),
                          'PETR4', d(2008, 1, 1), d(2008, 12, 31),
                          _get_test_data_filepath, sys.modules[__name__])

auditor.generate_multiple(
    data_engine,
    data_engine.select(
        'NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'), 'PETR4',
    None, d(2013, 8, 7), _get_test_data_filepath, sys.modules[__name__])