def memory_profiling_job(): data_engine.select() a = data_engine.select().all() del a a = data_engine.get('NORMALIZED:RAW_BOVESPA').process('PETR4') del a selection = data_engine.select('{RAW_BOVESPA}') for reference in selection: data_engine.get(reference).process('PETR4') results = [] for reference in selection: results.append(data_engine.get(reference).process('PETR4')) print results del results for reference in selection: data_engine.get(reference).process('PETR4')
# -*- coding: utf-8 -*- from instances import data_engine @data_engine.for_synched(data_engine.select('RAW'), data_engine.select('RAW_NORMALIZATION_FACTOR')) def _factory(raw_reference, normalization_reference): @data_engine.datasource('NORMALIZED:%s' % (raw_reference), dependencies=[raw_reference, normalization_reference], frequency='B', tags=['NORMALIZED', 'STOCK_TICKS']) class Normalized(object): def evaluate(self, context, symbol, start=None, end=None): df_raw = context.dependencies(raw_reference) df_factor = context.dependencies(normalization_reference) return (df_raw * df_factor).dropna()
auditor.generate_multiple(data_engine, data_engine .select() .all() .difference('RAW') .difference('NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'), 'PETR4', None, None, _get_test_data_filepath, sys.modules[__name__]) auditor.generate_multiple(data_engine, data_engine.select().all().difference('RAW'), 'PETR4', d(2008, 1, 1), d(2008, 12, 31), _get_test_data_filepath, sys.modules[__name__]) auditor.generate_multiple(data_engine, data_engine.select('NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'), 'PETR4', None, d(2013, 8, 7), _get_test_data_filepath, sys.modules[__name__])
# -*- coding: utf-8 -*- import pandas import util.dateutils as dateutils from instances import data_engine @data_engine.for_synched(data_engine.select('NORMALIZED')) def _factory(source_reference): @data_engine.datasource('MONTHLY:%s' % (source_reference), dependencies=[source_reference], lookback=31, frequency='M', tags=['MONTHLY', 'STOCK_TICKS']) class Monthly(object): def evaluate(self, context, symbol, start=None, end=None): df_norm = context.dependencies(source_reference) df_index = dateutils.range(start, end, frequency='M') result = pandas.DataFrame(columns=df_norm.columns, index=df_index) result['open'] = df_norm['open'].resample('M', how='first') result['high'] = df_norm['high'].resample('M', how='max') result['low'] = df_norm['low'].resample('M', how='min') result['close'] = df_norm['close'].resample('M', how='last') result['volume'] = df_norm['volume'].resample('M', how='sum') return result.dropna()
# -*- coding: utf-8 -*- import talib import pandas from instances import data_engine @data_engine.for_synched(data_engine.select('CLOSE')) def _factory(source_reference): period_profiles = [(12, 26, 9), (5, 35, 5), (12, 26, 3)] for profile in period_profiles: _generate_for_profile(source_reference, *profile) def _generate_for_profile(source_reference, fast_period, slow_period, signal_period): @data_engine.datasource( 'MACD(%i,%i,%i):%s' % (fast_period, slow_period, signal_period, source_reference), dependencies=[source_reference], lookback=slow_period * 2, tags=['MACD']) class MACD(object): def evaluate(self, context, symbol, start=None, end=None): df_source = context.dependencies(source_reference) macd, macdsignal, macdhist = talib.MACD(df_source['value'], fastperiod=fast_period, slowperiod=slow_period,
# -*- coding: utf-8 -*- from instances import data_engine @data_engine.for_each(data_engine.select('RAW')) def _factory(source_reference, pct_period=1): @data_engine.datasource('PCT_CHANGE(%i):%s' % (pct_period, source_reference), dependencies=[source_reference], lookback=pct_period, tags=['RAW_PCT_CHANGE']) class RawPctChange(object): def evaluate(self, context, symbol, start=None, end=None): return context.dependencies(source_reference).pct_change()
# -*- coding: utf-8 -*- import pandas from instances import data_engine @data_engine.for_synched(data_engine.select('STOCK_TICKS')) def _factory(source_reference): @data_engine.datasource('CLOSE:%s' % (source_reference), dependencies=[source_reference], tags=['CLOSE']) class Close(object): def evaluate(self, context, symbol, start=None, end=None): df_source = context.dependencies(source_reference) result = pandas.DataFrame(columns=['close'], index=df_source.index) result['value'] = df_source['close'] return result
# -*- coding: utf-8 -*- import pandas import configurations as config from instances import data_engine @data_engine.for_each(data_engine.select('RAW_PCT_CHANGE')) def _factory(source_reference): @data_engine.datasource('SPLITS:%s' % (source_reference), dependencies=[source_reference], tags=['RAW_SPLITS']) class RawSplits(object): def evaluate(self, context, symbol, start=None, end=None): df_pct_change = context.dependencies(source_reference) limit = config.NORMALIZATION_PCT_CHANGE_LIMIT def _calculateAfter(x): return 1 if x >= 0 else (1 / (1 - abs(x))).round(decimals=0) def _calculateBefore(x): return 1 if x <= 0 else (abs(x) + 1).round(decimals=0) df_split_join = (df_pct_change[(abs(df_pct_change['open']) > limit) & (abs(df_pct_change['high']) > limit) & (abs(df_pct_change['low']) > limit) &
# -*- coding: utf-8 -*- import pandas import util.dateutils as dateutils from instances import data_engine @data_engine.for_synched(data_engine.select('NORMALIZED')) def _factory(source_reference): @data_engine.datasource('MONTHLY:%s' % (source_reference), dependencies=[source_reference], lookback=31, frequency='M', tags=['MONTHLY', 'STOCK_TICKS']) class Monthly(object): def evaluate(self, context, symbol, start=None, end=None): df_norm = context.dependencies(source_reference) df_index = dateutils.range(start, end, frequency='M') result = pandas.DataFrame(columns=df_norm.columns, index=df_index) result['open'] = df_norm['open'].resample('M', how='first') result['high'] = df_norm['high'].resample('M', how='max') result['low'] = df_norm['low'].resample('M', how='min') result['close'] = df_norm['close'].resample('M', how='last') result['volume'] = df_norm['volume'].resample('M', how='sum')
# -*- coding: utf-8 -*- import pandas import util.dateutils as dateutils from instances import data_engine @data_engine.for_each(data_engine.select('RAW_SPLITS')) def _factory(source_reference): @data_engine.datasource('NORMALIZATION_FACTOR:%s' % (source_reference), dependencies=[source_reference], tags=['RAW_NORMALIZATION_FACTOR']) class RawNormalizationFactor(object): def evaluate(self, context, symbol, start=None, end=None): df_split = context.dependencies(source_reference) df_normalization = pandas.DataFrame(index=dateutils.range(start, end)) for column in ['open', 'high', 'low', 'close']: df_normalization[column] = 1 for i in range(len(df_split)): row = df_split.ix[i] day = dateutils.relative_working_day(-1, row.name) df_normalization.update(df_normalization[:day] * row['factor']) df_normalization['volume'] = 1 / df_normalization['close']
# -*- coding: utf-8 -*- import talib import pandas from instances import data_engine @data_engine.for_synched(data_engine.select('CLOSE')) def _factory(source_reference): period_profiles = [(12, 26, 9), (5, 35, 5), (12, 26, 3)] for profile in period_profiles: _generate_for_profile(source_reference, *profile) def _generate_for_profile(source_reference, fast_period, slow_period, signal_period): @data_engine.datasource('MACD(%i,%i,%i):%s' % (fast_period, slow_period, signal_period, source_reference), dependencies=[source_reference], lookback=slow_period*2, tags=['MACD']) class MACD(object): def evaluate(self, context, symbol, start=None, end=None): df_source = context.dependencies(source_reference) macd, macdsignal, macdhist = talib.MACD(df_source['value'], fastperiod=fast_period, slowperiod=slow_period,
import bigtempo.auditor as auditor from instances import data_engine def _get_test_data_filename(reference, symbol=None): symbol_part = '' if not symbol else '{%s}' % symbol return '%s%s.csv' % (reference, symbol_part) def _get_test_data_filepath(reference, symbol=None): return os.path.join(config.DATASOURCE_TEST_DATA_DIR, _get_test_data_filename(reference, symbol)) auditor.generate_multiple( data_engine, data_engine.select().all().difference('RAW').difference( 'NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'), 'PETR4', None, None, _get_test_data_filepath, sys.modules[__name__]) auditor.generate_multiple(data_engine, data_engine.select().all().difference('RAW'), 'PETR4', d(2008, 1, 1), d(2008, 12, 31), _get_test_data_filepath, sys.modules[__name__]) auditor.generate_multiple( data_engine, data_engine.select( 'NORMALIZATION_FACTOR:SPLITS:PCT_CHANGE(1):RAW_BOVESPA'), 'PETR4', None, d(2013, 8, 7), _get_test_data_filepath, sys.modules[__name__])