Ejemplo n.º 1
0
    def save_data(self, data_contexts):
        for rows, context in data_contexts:
            f = context.pop('_open_file')
            if rows:
                bar_ = self.row_to_bar(
                    rows[-1], context['frequency'])
                if isinstance(bar_, bar.Bar):
                    context['to_date_time'] = bar_.get_date_time()
                else:
                    printf('latest datetime for %s was invalid: %s' % (
                                           context['symbol'], bar_))

                data = '%s\n' % '\n'.join(rows)
                if settings.DATA_COMPRESSION == 'lz4':
                    data = lz4.dumps(data)

                f.write(data)
                f.close()
                yield context
            else:
                file_path = f.name
                f.close()
                if os.stat(file_path).st_size == 0:
                    os.remove(file_path)
                continue
Ejemplo n.º 2
0
    def initialize_symbols(self, symbols, frequency=None):
        frequency = frequency or self._default_frequency
        initialized = [x for x in symbols
                       if self._data_writer.symbol_initialized(x, frequency)\
                       or x in FailedSymbols]
        if initialized:
            printf('%i symbols %s already initialized!' % (
                len(initialized), initialized))
            for symbol in initialized:
                symbols.pop(symbols.index(symbol))

        if not symbols:
            printf('no symbols to initialize.')
            return None
        for context in self.__update_symbols(symbols, frequency, sleep=1):
            self._updated_event.emit(context)
Ejemplo n.º 3
0
    def initialize_symbols(self, symbols, frequency=None):
        frequency = frequency or self._default_frequency
        initialized = [x for x in symbols
                       if self._data_writer.symbol_initialized(x, frequency)\
                       or x in FailedSymbols]
        if initialized:
            printf('%i symbols %s already initialized!' %
                   (len(initialized), initialized))
            for symbol in initialized:
                symbols.pop(symbols.index(symbol))

        if not symbols:
            printf('no symbols to initialize.')
            return None
        for context in self.__update_symbols(symbols, frequency, sleep=1):
            self._updated_event.emit(context)
Ejemplo n.º 4
0
    def update_data(self, data_contexts):
        for update_rows, context in data_contexts:
            f = context['_open_file']
            # read existing data, relying on string sorting for date comparisons
            if utils.supports_seeking(settings.DATA_COMPRESSION):
                # read the tail of the file to rows and get newest stored datetime
                new_rows = []
                try:
                    f.seek(-512, 2)
                except IOError:
                    f.seek(0)
                newest_existing_datetime = f.read().split('\n')[-1].split(
                    ',')[0]

            elif settings.DATA_COMPRESSION == 'lz4':
                # read entire file to rows and get newest stored datetime
                new_rows = lz4.loads(f.read()).strip().split('\n')
                newest_existing_datetime = new_rows[-1].split(',')[0]

            # only add new rows if row datetime is greater than stored datetime
            for row in update_rows:
                row_datetime = row.split(',')[0]
                if row_datetime > newest_existing_datetime:
                    new_rows.append(row)

            # seek to the proper place in the file in preparation for write_data
            if utils.supports_seeking(settings.DATA_COMPRESSION):
                # jump to the end of the file so we only update existing data
                try:
                    f.seek(-1, 2)
                except IOError:
                    printf('unexpected file seeking bug :(', f.name)
                # make sure there's a trailing new-line character at the end
                last_char = f.read()
                if last_char != '\n':
                    f.write('\n')
            elif settings.DATA_COMPRESSION == 'lz4':
                # jump to the beginning of the file so we rewrite everything
                f.seek(0)

            yield (new_rows, context)
Ejemplo n.º 5
0
    def update_symbols(self, symbols, frequency=None):
        frequency = frequency or self._default_frequency
        uninitialized = \
            [x for x in symbols
             if x not in FailedSymbols \
             and not self._data_writer.symbol_initialized(x, frequency)]
        if uninitialized:
            printf('%i symbols %s not initialized yet!' % (
                len(uninitialized), uninitialized))
            for symbol in uninitialized:
                symbols.pop(symbols.index(symbol))
            if not symbols:
                return None

        for context in self.__update_symbols(symbols, frequency,
            operation_name='update',
            open_files_function=self.open_files_updatable,
            process_data_update_function=self._data_writer.update_data,
            init=False,
            sleep=1
        ):
            self._updated_event.emit(context)
Ejemplo n.º 6
0
    def update_symbols(self, symbols, frequency=None):
        frequency = frequency or self._default_frequency
        uninitialized = \
            [x for x in symbols
             if x not in FailedSymbols \
             and not self._data_writer.symbol_initialized(x, frequency)]
        if uninitialized:
            printf('%i symbols %s not initialized yet!' %
                   (len(uninitialized), uninitialized))
            for symbol in uninitialized:
                symbols.pop(symbols.index(symbol))
            if not symbols:
                return None

        for context in self.__update_symbols(
                symbols,
                frequency,
                operation_name='update',
                open_files_function=self.open_files_updatable,
                process_data_update_function=self._data_writer.update_data,
                init=False,
                sleep=1):
            self._updated_event.emit(context)
Ejemplo n.º 7
0
    def update_data(self, data_contexts):
        for update_rows, context in data_contexts:
            f = context['_open_file']
            # read existing data, relying on string sorting for date comparisons
            if utils.supports_seeking(settings.DATA_COMPRESSION):
                # read the tail of the file to rows and get newest stored datetime
                new_rows = []
                try: f.seek(-512, 2)
                except IOError: f.seek(0)
                newest_existing_datetime = f.read().split('\n')[-1].split(',')[0]

            elif settings.DATA_COMPRESSION == 'lz4':
                # read entire file to rows and get newest stored datetime
                new_rows = lz4.loads(f.read()).strip().split('\n')
                newest_existing_datetime = new_rows[-1].split(',')[0]

            # only add new rows if row datetime is greater than stored datetime
            for row in update_rows:
                row_datetime = row.split(',')[0]
                if row_datetime > newest_existing_datetime:
                    new_rows.append(row)

            # seek to the proper place in the file in preparation for write_data
            if utils.supports_seeking(settings.DATA_COMPRESSION):
                # jump to the end of the file so we only update existing data
                try: f.seek(-1, 2)
                except IOError: printf('unexpected file seeking bug :(', f.name)
                # make sure there's a trailing new-line character at the end
                last_char = f.read()
                if last_char != '\n':
                    f.write('\n')
            elif settings.DATA_COMPRESSION == 'lz4':
                # jump to the beginning of the file so we rewrite everything
                f.seek(0)

            yield (new_rows, context)
Ejemplo n.º 8
0
    def save_data(self, data_contexts):
        for rows, context in data_contexts:
            f = context.pop('_open_file')
            if rows:
                bar_ = self.row_to_bar(rows[-1], context['frequency'])
                if isinstance(bar_, bar.Bar):
                    context['to_date_time'] = bar_.get_date_time()
                else:
                    printf('latest datetime for %s was invalid: %s' %
                           (context['symbol'], bar_))

                data = '%s\n' % '\n'.join(rows)
                if settings.DATA_COMPRESSION == 'lz4':
                    data = lz4.dumps(data)

                f.write(data)
                f.close()
                yield context
            else:
                file_path = f.name
                f.close()
                if os.stat(file_path).st_size == 0:
                    os.remove(file_path)
                continue
Ejemplo n.º 9
0
    def __update_symbols(self, symbols, frequency,
        operation_name='download',
        open_files_function=None,
        process_data_update_function=None,
        init=True,
        sleep=None
    ):
        '''
        This function contains the actual pipeline logic for downloading,
        initializing and updating symbols' data. It can display the rough
        progress of bulk operation to stdout using display_progress.
        '''
        open_files_function = \
            open_files_function or self.open_files_writeable
        process_data_update_function = \
            process_data_update_function or self.__process_data_to_initialize
        frequency = frequency or self._default_frequency
        batch_size = 200 if frequency is not bar.Frequency.MINUTE else 500
        sleep = sleep if frequency is not bar.Frequency.MINUTE else None

        display_progress = True if len(symbols) > 1 else False
        # Load the latest stored datetime for the requested combination of
        # symbols and frequency. This doubles as a flag for init vs update.\
        symbol_contexts = [
            (x, {'symbol': x, 'frequency': frequency, 'from_date_time': None})
            for x in symbols]
        if frequency != bar.Frequency.MINUTE and not init:
            for symbol, context in symbol_contexts:
                context['from_date_time'] = self._db.get_updated(bar.FrequencyToStr[frequency], symbol)
        elif not init:
            for symbol, context in symbol_contexts:
                context['from_date_time'] = True # set update over init

        url_contexts = self._data_downloader.get_urls(symbol_contexts)
        if not url_contexts:
            op = ' ' if not display_progress else ' bulk '
            raise Exception('no urls returned for%s%sing historical data!' % (
                                                 op, operation_name))
        elif display_progress:
            total_len = len(url_contexts)
            current_idx = 0
            last_pct = 0
            printf('starting bulk %s of historical data for %i symbols.' % (
                                 operation_name, total_len))
            sys.stdout.flush()

        for context in self.__bulk_dl_and_save(url_contexts,
            process_data_update_function, open_files_function,
            batch_size, sleep
        ):
            if display_progress:
                current_idx += 1
                pct = int(current_idx / (total_len + 1.0) * 100.0)
                if pct != last_pct:
                    last_pct = pct
                    printf('%i%%' % pct)

            yield context

        if display_progress:
            if last_pct != 100:
                printf('100%')
Ejemplo n.º 10
0
    def __update_symbols(self,
                         symbols,
                         frequency,
                         operation_name='download',
                         open_files_function=None,
                         process_data_update_function=None,
                         init=True,
                         sleep=None):
        '''
        This function contains the actual pipeline logic for downloading,
        initializing and updating symbols' data. It can display the rough
        progress of bulk operation to stdout using display_progress.
        '''
        open_files_function = \
            open_files_function or self.open_files_writeable
        process_data_update_function = \
            process_data_update_function or self.__process_data_to_initialize
        frequency = frequency or self._default_frequency
        batch_size = 200 if frequency is not bar.Frequency.MINUTE else 500
        sleep = sleep if frequency is not bar.Frequency.MINUTE else None

        display_progress = True if len(symbols) > 1 else False
        # Load the latest stored datetime for the requested combination of
        # symbols and frequency. This doubles as a flag for init vs update.\
        symbol_contexts = [(x, {
            'symbol': x,
            'frequency': frequency,
            'from_date_time': None
        }) for x in symbols]
        if frequency != bar.Frequency.MINUTE and not init:
            for symbol, context in symbol_contexts:
                context['from_date_time'] = self._db.get_updated(
                    bar.FrequencyToStr[frequency], symbol)
        elif not init:
            for symbol, context in symbol_contexts:
                context['from_date_time'] = True  # set update over init

        url_contexts = self._data_downloader.get_urls(symbol_contexts)
        if not url_contexts:
            op = ' ' if not display_progress else ' bulk '
            raise Exception('no urls returned for%s%sing historical data!' %
                            (op, operation_name))
        elif display_progress:
            total_len = len(url_contexts)
            current_idx = 0
            last_pct = 0
            printf('starting bulk %s of historical data for %i symbols.' %
                   (operation_name, total_len))
            sys.stdout.flush()

        for context in self.__bulk_dl_and_save(url_contexts,
                                               process_data_update_function,
                                               open_files_function, batch_size,
                                               sleep):
            if display_progress:
                current_idx += 1
                pct = int(current_idx / (total_len + 1.0) * 100.0)
                if pct != last_pct:
                    last_pct = pct
                    printf('%i%%' % pct)

            yield context

        if display_progress:
            if last_pct != 100:
                printf('100%')
Ejemplo n.º 11
0
 def remove_blacklisted(self, symbol):
     printf('removing blacklisted symbol: %s' % symbol)
     self.__blacklisted.pop(self.__blacklisted.index(symbol))
     reason_added = self.__dict.pop(symbol)
     self.save()
     return reason_added
Ejemplo n.º 12
0
 def remove_failed(self, symbol):
     printf('removing failed symbol: %s' % symbol)
     reason_added = self.__dict.pop(symbol)
     self.save()
     return reason_added
Ejemplo n.º 13
0
 def add_blacklisted(self, symbol, reason_blacklisted_msg=None):
     printf('adding blacklisted symbol: %s: %s' %
            (symbol, reason_blacklisted_msg))
     self.__dict[symbol] = reason_blacklisted_msg
     self.__blacklisted.append(symbol)
     self.save()
Ejemplo n.º 14
0
 def add_failed(self, symbol, reason_failed_msg):
     printf('adding failed symbol: %s: %s' % (symbol, reason_failed_msg))
     self.__dict[symbol] = reason_failed_msg
     self.save()