def save_data(self, data_contexts): for rows, context in data_contexts: f = context.pop('_open_file') if rows: bar_ = self.row_to_bar( rows[-1], context['frequency']) if isinstance(bar_, bar.Bar): context['to_date_time'] = bar_.get_date_time() else: printf('latest datetime for %s was invalid: %s' % ( context['symbol'], bar_)) data = '%s\n' % '\n'.join(rows) if settings.DATA_COMPRESSION == 'lz4': data = lz4.dumps(data) f.write(data) f.close() yield context else: file_path = f.name f.close() if os.stat(file_path).st_size == 0: os.remove(file_path) continue
def initialize_symbols(self, symbols, frequency=None): frequency = frequency or self._default_frequency initialized = [x for x in symbols if self._data_writer.symbol_initialized(x, frequency)\ or x in FailedSymbols] if initialized: printf('%i symbols %s already initialized!' % ( len(initialized), initialized)) for symbol in initialized: symbols.pop(symbols.index(symbol)) if not symbols: printf('no symbols to initialize.') return None for context in self.__update_symbols(symbols, frequency, sleep=1): self._updated_event.emit(context)
def initialize_symbols(self, symbols, frequency=None): frequency = frequency or self._default_frequency initialized = [x for x in symbols if self._data_writer.symbol_initialized(x, frequency)\ or x in FailedSymbols] if initialized: printf('%i symbols %s already initialized!' % (len(initialized), initialized)) for symbol in initialized: symbols.pop(symbols.index(symbol)) if not symbols: printf('no symbols to initialize.') return None for context in self.__update_symbols(symbols, frequency, sleep=1): self._updated_event.emit(context)
def update_data(self, data_contexts): for update_rows, context in data_contexts: f = context['_open_file'] # read existing data, relying on string sorting for date comparisons if utils.supports_seeking(settings.DATA_COMPRESSION): # read the tail of the file to rows and get newest stored datetime new_rows = [] try: f.seek(-512, 2) except IOError: f.seek(0) newest_existing_datetime = f.read().split('\n')[-1].split( ',')[0] elif settings.DATA_COMPRESSION == 'lz4': # read entire file to rows and get newest stored datetime new_rows = lz4.loads(f.read()).strip().split('\n') newest_existing_datetime = new_rows[-1].split(',')[0] # only add new rows if row datetime is greater than stored datetime for row in update_rows: row_datetime = row.split(',')[0] if row_datetime > newest_existing_datetime: new_rows.append(row) # seek to the proper place in the file in preparation for write_data if utils.supports_seeking(settings.DATA_COMPRESSION): # jump to the end of the file so we only update existing data try: f.seek(-1, 2) except IOError: printf('unexpected file seeking bug :(', f.name) # make sure there's a trailing new-line character at the end last_char = f.read() if last_char != '\n': f.write('\n') elif settings.DATA_COMPRESSION == 'lz4': # jump to the beginning of the file so we rewrite everything f.seek(0) yield (new_rows, context)
def update_symbols(self, symbols, frequency=None): frequency = frequency or self._default_frequency uninitialized = \ [x for x in symbols if x not in FailedSymbols \ and not self._data_writer.symbol_initialized(x, frequency)] if uninitialized: printf('%i symbols %s not initialized yet!' % ( len(uninitialized), uninitialized)) for symbol in uninitialized: symbols.pop(symbols.index(symbol)) if not symbols: return None for context in self.__update_symbols(symbols, frequency, operation_name='update', open_files_function=self.open_files_updatable, process_data_update_function=self._data_writer.update_data, init=False, sleep=1 ): self._updated_event.emit(context)
def update_symbols(self, symbols, frequency=None): frequency = frequency or self._default_frequency uninitialized = \ [x for x in symbols if x not in FailedSymbols \ and not self._data_writer.symbol_initialized(x, frequency)] if uninitialized: printf('%i symbols %s not initialized yet!' % (len(uninitialized), uninitialized)) for symbol in uninitialized: symbols.pop(symbols.index(symbol)) if not symbols: return None for context in self.__update_symbols( symbols, frequency, operation_name='update', open_files_function=self.open_files_updatable, process_data_update_function=self._data_writer.update_data, init=False, sleep=1): self._updated_event.emit(context)
def update_data(self, data_contexts): for update_rows, context in data_contexts: f = context['_open_file'] # read existing data, relying on string sorting for date comparisons if utils.supports_seeking(settings.DATA_COMPRESSION): # read the tail of the file to rows and get newest stored datetime new_rows = [] try: f.seek(-512, 2) except IOError: f.seek(0) newest_existing_datetime = f.read().split('\n')[-1].split(',')[0] elif settings.DATA_COMPRESSION == 'lz4': # read entire file to rows and get newest stored datetime new_rows = lz4.loads(f.read()).strip().split('\n') newest_existing_datetime = new_rows[-1].split(',')[0] # only add new rows if row datetime is greater than stored datetime for row in update_rows: row_datetime = row.split(',')[0] if row_datetime > newest_existing_datetime: new_rows.append(row) # seek to the proper place in the file in preparation for write_data if utils.supports_seeking(settings.DATA_COMPRESSION): # jump to the end of the file so we only update existing data try: f.seek(-1, 2) except IOError: printf('unexpected file seeking bug :(', f.name) # make sure there's a trailing new-line character at the end last_char = f.read() if last_char != '\n': f.write('\n') elif settings.DATA_COMPRESSION == 'lz4': # jump to the beginning of the file so we rewrite everything f.seek(0) yield (new_rows, context)
def save_data(self, data_contexts): for rows, context in data_contexts: f = context.pop('_open_file') if rows: bar_ = self.row_to_bar(rows[-1], context['frequency']) if isinstance(bar_, bar.Bar): context['to_date_time'] = bar_.get_date_time() else: printf('latest datetime for %s was invalid: %s' % (context['symbol'], bar_)) data = '%s\n' % '\n'.join(rows) if settings.DATA_COMPRESSION == 'lz4': data = lz4.dumps(data) f.write(data) f.close() yield context else: file_path = f.name f.close() if os.stat(file_path).st_size == 0: os.remove(file_path) continue
def __update_symbols(self, symbols, frequency, operation_name='download', open_files_function=None, process_data_update_function=None, init=True, sleep=None ): ''' This function contains the actual pipeline logic for downloading, initializing and updating symbols' data. It can display the rough progress of bulk operation to stdout using display_progress. ''' open_files_function = \ open_files_function or self.open_files_writeable process_data_update_function = \ process_data_update_function or self.__process_data_to_initialize frequency = frequency or self._default_frequency batch_size = 200 if frequency is not bar.Frequency.MINUTE else 500 sleep = sleep if frequency is not bar.Frequency.MINUTE else None display_progress = True if len(symbols) > 1 else False # Load the latest stored datetime for the requested combination of # symbols and frequency. This doubles as a flag for init vs update.\ symbol_contexts = [ (x, {'symbol': x, 'frequency': frequency, 'from_date_time': None}) for x in symbols] if frequency != bar.Frequency.MINUTE and not init: for symbol, context in symbol_contexts: context['from_date_time'] = self._db.get_updated(bar.FrequencyToStr[frequency], symbol) elif not init: for symbol, context in symbol_contexts: context['from_date_time'] = True # set update over init url_contexts = self._data_downloader.get_urls(symbol_contexts) if not url_contexts: op = ' ' if not display_progress else ' bulk ' raise Exception('no urls returned for%s%sing historical data!' % ( op, operation_name)) elif display_progress: total_len = len(url_contexts) current_idx = 0 last_pct = 0 printf('starting bulk %s of historical data for %i symbols.' % ( operation_name, total_len)) sys.stdout.flush() for context in self.__bulk_dl_and_save(url_contexts, process_data_update_function, open_files_function, batch_size, sleep ): if display_progress: current_idx += 1 pct = int(current_idx / (total_len + 1.0) * 100.0) if pct != last_pct: last_pct = pct printf('%i%%' % pct) yield context if display_progress: if last_pct != 100: printf('100%')
def __update_symbols(self, symbols, frequency, operation_name='download', open_files_function=None, process_data_update_function=None, init=True, sleep=None): ''' This function contains the actual pipeline logic for downloading, initializing and updating symbols' data. It can display the rough progress of bulk operation to stdout using display_progress. ''' open_files_function = \ open_files_function or self.open_files_writeable process_data_update_function = \ process_data_update_function or self.__process_data_to_initialize frequency = frequency or self._default_frequency batch_size = 200 if frequency is not bar.Frequency.MINUTE else 500 sleep = sleep if frequency is not bar.Frequency.MINUTE else None display_progress = True if len(symbols) > 1 else False # Load the latest stored datetime for the requested combination of # symbols and frequency. This doubles as a flag for init vs update.\ symbol_contexts = [(x, { 'symbol': x, 'frequency': frequency, 'from_date_time': None }) for x in symbols] if frequency != bar.Frequency.MINUTE and not init: for symbol, context in symbol_contexts: context['from_date_time'] = self._db.get_updated( bar.FrequencyToStr[frequency], symbol) elif not init: for symbol, context in symbol_contexts: context['from_date_time'] = True # set update over init url_contexts = self._data_downloader.get_urls(symbol_contexts) if not url_contexts: op = ' ' if not display_progress else ' bulk ' raise Exception('no urls returned for%s%sing historical data!' % (op, operation_name)) elif display_progress: total_len = len(url_contexts) current_idx = 0 last_pct = 0 printf('starting bulk %s of historical data for %i symbols.' % (operation_name, total_len)) sys.stdout.flush() for context in self.__bulk_dl_and_save(url_contexts, process_data_update_function, open_files_function, batch_size, sleep): if display_progress: current_idx += 1 pct = int(current_idx / (total_len + 1.0) * 100.0) if pct != last_pct: last_pct = pct printf('%i%%' % pct) yield context if display_progress: if last_pct != 100: printf('100%')
def remove_blacklisted(self, symbol): printf('removing blacklisted symbol: %s' % symbol) self.__blacklisted.pop(self.__blacklisted.index(symbol)) reason_added = self.__dict.pop(symbol) self.save() return reason_added
def remove_failed(self, symbol): printf('removing failed symbol: %s' % symbol) reason_added = self.__dict.pop(symbol) self.save() return reason_added
def add_blacklisted(self, symbol, reason_blacklisted_msg=None): printf('adding blacklisted symbol: %s: %s' % (symbol, reason_blacklisted_msg)) self.__dict[symbol] = reason_blacklisted_msg self.__blacklisted.append(symbol) self.save()
def add_failed(self, symbol, reason_failed_msg): printf('adding failed symbol: %s: %s' % (symbol, reason_failed_msg)) self.__dict[symbol] = reason_failed_msg self.save()