Example #1
0
def parse_user_args(command_line=None):
    # root parser
    parser = ArgumentParser(
        prog='tws_equities',
        description=
        'A Python CLI built to download bar-data for Japanese Equities from '
        'TWS API.',
        epilog=
        'All optional arguments work like a toggle switch, user need not pass an '
        'explicit value to them.',
    )

    parser.add_argument(
        '--verbose',
        '-v',
        default=False,
        action='store_true',
        help=
        'Use this option to enable console logging, default behavior is to display '
        'error messages only. Pair this option with "--debug / -d" option to view more '
        'detailed messages.')
    parser.add_argument(
        '--debug',
        '-d',
        default=False,
        action='store_true',
        help=
        'This option will not only enable console logging but would also start raising '
        'hidden errors, specifically built for developers trying to debug a problem.'
    )

    # add & build sub-parser for supported commands
    # refer to _COMMAND_CONFIG for available commands
    sub_praser = parser.add_subparsers(dest='command')
    for name, config in CLI_CONFIG.items():
        _build_command(sub_praser, name=name, **config)

    args = parser.parse_args(command_line)

    # user did not choose a command to run
    if args.command is None:
        write_to_console(
            'User should specify which command to run, please choose from the given options.\n',
            verbose=True)
        parser.print_help()
        exit(0)

    # user did not specify tickers
    if hasattr(args, 'tickers') and args.tickers is None:
        write_to_console(
            'User did not specify target tickers, loading from default input file.\n',
            verbose=True)
        args.tickers = get_default_tickers()

    return vars(args)
Example #2
0
def _cleanup(success_files,
             success_directory,
             failure_files,
             failure_directory,
             verbose=False):
    message = 'Post-extraction cleanup initiated...'
    write_to_console(message, verbose=verbose)

    # delete duplicate files
    # TODO: this operation should not be required
    duplicate_files = list(set(success_files).intersection(failure_files))
    for file in duplicate_files:
        delete_file(failure_directory, file)
    message = f'Cleaned {len(duplicate_files)} duplicate files...'
    write_to_console(message, pointer='->', indent=1, verbose=verbose)
Example #3
0
def create_csv_dump(target_date,
                    end_time='15:01:00',
                    bar_size='1 min',
                    verbose=False):
    """
        Creates a CSV file from JSON files for a given date.
        Raise an error if directory for the gven is not present.
        CSV files to be saved at the historical data storage location:
            'success.csv' & 'failure.csv'
    """
    logger.info('Generating final CSV dump')
    storage_dir = _setup_storage_directories(target_date, bar_size=bar_size)
    _date = f'{target_date[:4]}/{target_date[4:6]}/{target_date[6:]}'
    write_to_console(f'{"-"*30} CSV Conversion: {_date} {"-"*31}',
                     verbose=True)
    target_directory = join(CACHE_DIR, bar_size.replace(' ', ''), target_date,
                            end_time.replace(':', '_'))

    if not isdir(target_directory):
        raise NotADirectoryError(
            f'Could not find a data storage directory for date: {target_directory}'
        )

    success_directory = join(target_directory, 'success')
    failure_directory = join(target_directory, 'failure')

    if isdir(success_directory):
        path = join(storage_dir, 'success.csv')
        success = generate_success_dataframe(success_directory,
                                             bar_title='Success',
                                             verbose=verbose)
        success.to_csv(path, index=False)
        logger.debug(f'Success file saved at: {path}')

    if isdir(failure_directory):
        path = join(storage_dir, 'failure.csv')
        failure = generate_failure_dataframe(failure_directory,
                                             bar_title='Failure',
                                             verbose=verbose)
        failure.to_csv(path, index=False)
        logger.debug(f'Failure file saved at: {path}')
Example #4
0
def create_csv_dump(target_date, end_time='15:01:00', verbose=False):
    """
        Creates a CSV file from JSON files for a given date.
        Raise an error if directory for the gven is not present.
        Created CSV files will be saved at the same location by the name:
            'success.csv' & 'failure.csv'
    """
    logger.info('Generating final CSV dump')
    _date = f'{target_date[:4]}/{target_date[4:6]}/{target_date[6:]}'
    write_to_console(f'{"-"*30} CSV Conversion: {_date} {"-"*31}',
                     verbose=True)
    target_directory = join(_HISTORICAL_DATA_STORAGE, target_date,
                            end_time.replace(':', '_'))

    if not isdir(target_directory):
        raise NotADirectoryError(
            f'Could not find a data storage directory for date: {target_date}')

    success_directory = join(target_directory, '.success')
    failure_directory = join(target_directory, '.failure')

    if isdir(success_directory):
        path = join(target_directory, 'success.csv')
        success = generate_success_dataframe(success_directory,
                                             bar_title='Success',
                                             verbose=verbose)
        success.to_csv(path, index=False)
        logger.debug(f'Success file saved at: {path}')

    if isdir(failure_directory):
        failure = generate_failure_dataframe(failure_directory,
                                             bar_title='Failure',
                                             verbose=verbose)
        path = join(target_directory, 'failure.csv')
        failure.to_csv(path, index=False)
        logger.debug(f'Failure file saved at: {path}')
Example #5
0
def generate_success_dataframe(target_directory,
                               bar_title=None,
                               verbose=False):
    """
        Creates a pandas data fame from JSON files present at the given failure location.
        Assumes that all these JSON files have valid bar data.
        :param target_directory: location to read JSON files from
        :param bar_title: message to show infron of progress bar
        :param verbose: set to true to see info messages on console
    """
    if bar_title is not None:
        _BAR_CONFIG['title'] = bar_title

    def _get_ticker_id(file_name):
        return int(file_name.split(sep)[-1].split('.')[0])

    # create a place holder dataframe
    expected_columns = [
        'time_stamp', 'ecode', 'session', 'high', 'low', 'close', 'volume',
        'average', 'count'
    ]

    # create temporary directory to store smaller CSV files
    temp_directory = '.temp'
    make_dirs(temp_directory)

    # extract all json files from target directory
    success_files = get_files_by_type(target_directory)
    success_tickers = list(map(_get_ticker_id, success_files))
    total = len(success_tickers)
    data = pd.DataFrame(columns=expected_columns)

    if bool(total):
        write_to_console(f'=> Generating dataframe for success tickers...',
                         verbose=verbose)
        json_generator = map(read_json_file, success_files)
        counter = 0  # to count temp files
        with alive_bar(total=total, **_BAR_CONFIG) as bar:
            for i in range(total):
                ticker = success_tickers[i]
                ticker_data = next(
                    json_generator)  # load data into a dictionary
                bar_data, meta_data = ticker_data['bar_data'], ticker_data[
                    'meta_data']
                temp_data = pd.DataFrame(bar_data)
                temp_data['ecode'] = ticker
                data = data.append(temp_data)
                _time_to_cache = ((i > 0) and
                                  (i % 100 == 0)) or (i + 1 == total)
                if _time_to_cache:
                    if data.shape[0] > 0:
                        temp_file = join(temp_directory,
                                         f'success_{counter}.csv')
                        data.to_csv(temp_file)
                        data = pd.DataFrame(columns=expected_columns)
                        counter += 1
                bar()

        # merge all CSV files into a single dataframe
        # delete all temp files
        temp_files = get_files_by_type(temp_directory, file_type='csv')
        if bool(temp_files):
            data = pd.concat(map(read_csv, temp_files))
            data.sort_values(by=['ecode', 'time_stamp'],
                             inplace=True,
                             ignore_index=True)
            data = data[expected_columns]
    delete_directory(temp_directory)

    return data
Example #6
0
def generate_extraction_metrics(target_date,
                                end_time='15:01:00',
                                input_tickers=None,
                                verbose=False):
    """
        Generates metrics about success & failure tickers.
        Metrics are saved into a new file called 'metrics.csv'
        :param target_date: date for which metrics are needed
        :param end_time: end time for metrics are to be generated
        :param input_tickers: tickers for which metrics are to be generated
    """
    logger.info('Generating final extraction metrics')
    _date = f'{target_date[:4]}/{target_date[4:6]}/{target_date[6:]}'
    write_to_console(f'{"-"*30} Metrics Generation: {_date} {"-"*31}',
                     verbose=True)
    expected_metrics = [
        'total_tickers', 'total_extracted', 'total_extraction_ratio',
        'extraction_successful', 'extraction_failure', 'success_ratio',
        'failure_ratio', 'n_225_input_ratio', 'n_225_success_ratio',
        'n_225_failure_ratio', 'topix_input_ratio', 'topix_success_ratio',
        'topix_failure_ratio', 'jasdaq_20_input_ratio',
        'jasdaq_20_success_ratio', 'jasdaq_20_failure_ratio',
        'missing_tickers_ratio', 'missing_tickers'
    ]
    metrics = dict(zip(expected_metrics, [0.0] * len(expected_metrics)))
    target_directory = join(_HISTORICAL_DATA_STORAGE, target_date,
                            end_time.replace(':', '_'))
    if not isdir(target_directory):
        raise NotADirectoryError(
            f'Data storage directory for {target_date} not found at'
            f'{_HISTORICAL_DATA_STORAGE}')

    success_file = join(target_directory, 'success.csv')
    failure_file = join(target_directory, 'failure.csv')

    if not isfile(success_file):
        raise FileNotFoundError(f'Can not find success file: {success_file}')

    if not isfile(failure_file):
        raise FileNotFoundError(f'Can not find failure file: {failure_file}')

    input_tickers_file = join(target_directory, 'input_tickers.json')
    if input_tickers is None:
        if not isfile(input_tickers_file):
            raise FileNotFoundError(
                f'Can not find input tickers file: {input_tickers_file}')
        input_tickers = read_json_file(input_tickers_file)

    japan_indices = get_japan_indices()

    _n_225_tickers = japan_indices[japan_indices.n_225.str.contains(
        'T')].n_225.unique().tolist()
    n_225_tickers = list(map(lambda x: int(x.split('.')[0]), _n_225_tickers))

    _topix_tickers = japan_indices[japan_indices.topix.str.contains(
        'T')].topix.unique().tolist()
    topix_tickers = list(map(lambda x: int(x.split('.')[0]), _topix_tickers))

    _jasdaq_20_tickers = japan_indices[japan_indices.jasdaq_20.str.contains(
        'T')].jasdaq_20.unique().tolist()
    jasdaq_20_tickers = list(
        map(lambda x: int(x.split('.')[0]), _jasdaq_20_tickers))

    success = read_csv(success_file)
    failure = read_csv(failure_file)

    success_tickers = success.ecode.unique().tolist()
    failure_tickers = failure.ecode.unique().tolist()

    total_tickers = len(input_tickers)
    if total_tickers == 0:
        raise ValueError(
            f'Can not find any input tickers in file {input_tickers_file}')

    extraction_successful = len(success_tickers)
    extraction_failure = len(failure_tickers)
    total_extracted = extraction_successful + extraction_failure
    total_extraction_ratio = round(total_extracted / total_tickers, 3)

    success_ratio = round(extraction_successful / total_tickers, 3)
    failure_ratio = round(extraction_failure / total_tickers, 3)
    logger.debug(f'Updated over-all extraction ratio: {success_ratio}')
    write_to_console(f'Over-all Extraction: {_get_marker(success_ratio)}',
                     pointer='->',
                     indent=2,
                     verbose=True)
    write_to_console(f'Over-all Success Ratio: {success_ratio}',
                     pointer='-',
                     indent=4,
                     verbose=verbose)

    n_225_input = list(set(input_tickers).intersection(n_225_tickers))
    if bool(n_225_input):
        n_225_input_ratio = round(len(n_225_input) / len(n_225_tickers), 3)
        n_225_success = list(set(success_tickers).intersection(n_225_input))
        n_225_failure = list(set(failure_tickers).intersection(n_225_input))
        n_225_success_ratio = round(len(n_225_success) / len(n_225_input), 3)
        n_225_failure_ratio = round(len(n_225_failure) / len(n_225_input), 3)
        logger.debug(f'Updated N225 extraction ratio: {n_225_success_ratio}')
        write_to_console(
            f'N225 Extraction: {_get_marker(n_225_success_ratio)}',
            pointer='->',
            indent=2,
            verbose=True)
        write_to_console(f'Over-all Success Ratio: {n_225_success_ratio}',
                         pointer='-',
                         indent=4,
                         verbose=verbose)
    else:
        logger.debug('Could not find any N 225 tickers in the given input')

    topix_input = list(set(input_tickers).intersection(topix_tickers))
    if bool(topix_input):
        topix_input_ratio = round(len(topix_input) / len(topix_tickers), 3)
        topix_success = list(set(success_tickers).intersection(topix_input))
        topix_failure = list(set(failure_tickers).intersection(topix_input))
        topix_success_ratio = round(len(topix_success) / len(topix_input), 3)
        topix_failure_ratio = round(len(topix_failure) / len(topix_input), 3)
        logger.debug(f'Updated Topix extraction ratio: {topix_success_ratio}')
        write_to_console(
            f'Topix Extraction: {_get_marker(topix_success_ratio)}',
            pointer='->',
            indent=2,
            verbose=True)
        write_to_console(f'Topix Success Ratio: {topix_success_ratio}',
                         pointer='-',
                         indent=4,
                         verbose=verbose)
    else:
        logger.debug('Could not find any Topix tickers in the given input')

    jasdaq_20_input = list(set(input_tickers).intersection(jasdaq_20_tickers))
    if bool(jasdaq_20_input):
        jasdaq_20_input_ratio = round(
            len(jasdaq_20_input) / len(jasdaq_20_tickers), 3)
        jasdaq_20_success = list(
            set(success_tickers).intersection(jasdaq_20_input))
        jasdaq_20_failure = list(
            set(failure_tickers).intersection(jasdaq_20_input))
        jasdaq_20_success_ratio = round(
            len(jasdaq_20_success) / len(jasdaq_20_input), 3)
        jasdaq_20_failure_ratio = round(
            len(jasdaq_20_failure) / len(jasdaq_20_input), 3)
        logger.debug(
            f'Updated JASDAQ 20 extraction ratio: {jasdaq_20_success_ratio}')
        write_to_console(
            f'JASDAQ 20 Extraction: {_get_marker(jasdaq_20_success_ratio)}',
            pointer='->',
            indent=2,
            verbose=True)
        write_to_console(f'JASDAQ 20 Success Ratio: {jasdaq_20_success_ratio}',
                         pointer='-',
                         indent=4,
                         verbose=verbose)
    else:
        logger.debug('Could not find any JASDAQ 20 tickers in the given input')

    missing_tickers = list(
        set(input_tickers).difference(success_tickers + failure_tickers))
    missing_tickers_ratio = round(len(missing_tickers) / total_tickers, 3)
    logger.debug(f'Updated missing tickers ratio: {missing_tickers_ratio}')

    all_vars = vars()
    for key in all_vars:
        if key in expected_metrics:
            metrics[key] = all_vars[key]

    metrics_file = join(target_directory, 'metrics.json')
    save_data_as_json(metrics, metrics_file)
    logger.debug(f'Metrics saved at: {metrics_file}')
Example #7
0
def generate_failure_dataframe(target_directory,
                               bar_title=None,
                               verbose=False):
    """
        Creates a pandas data fame from JSON files present at the given failure location.
        Assumes that all these JSON files have valid error stacks.
        :param target_directory: location to read JSON files from
        :param bar_title: message to show infron of progress bar
        :param verbose: set to true to see info messages on console
    """
    if bar_title is not None:
        _BAR_CONFIG['title'] = bar_title

    def _get_ticker_id(file_name):
        return int(file_name.split(sep)[-1].split('.')[0])

    # create a place holder dataframe
    expected_columns = ['ecode', 'code', 'message']
    data = pd.DataFrame(columns=expected_columns)

    # create temporary directory to store smaller CSV files
    temp_directory = '.temp'
    make_dirs(temp_directory)

    # extract all json files from target directory
    file_pattern = join(
        target_directory,
        '*.json')  # TODO: can be modified to match digital values
    failure_files = glob(file_pattern)
    total = len(failure_files)

    if bool(total):
        write_to_console(f'=> Generting dataframe for failure tickers...',
                         verbose=verbose)
        json_generator = map(read_json_file, failure_files)
        counter = 0  # to count temp CSV files
        with alive_bar(total=total, **_BAR_CONFIG) as bar:
            for i in range(total):
                ticker_data = next(json_generator)
                meta = ticker_data['meta_data']
                error_stack = meta['_error_stack']
                ecode = meta.get('ecode', _get_ticker_id(failure_files[i]))
                temp_data = pd.DataFrame(error_stack, columns=expected_columns)
                temp_data['ecode'] = ecode
                # if error stack is empty, then create a dummy row
                if temp_data.shape[
                        0] == 0:  # fixme: find a way to control this in the TWS Client
                    dummy_row = {
                        'ecode': ecode,
                        'code': 'unknown',
                        'message': 'not available'
                    }
                    temp_data = temp_data.append(dummy_row, ignore_index=True)

                data = data.append(temp_data)
                _time_to_cache = (i + 1 == total) or ((i > 0) and
                                                      (i % 100 == 0))
                if _time_to_cache:
                    if data.shape[0] > 0:
                        temp_file = join(temp_directory,
                                         f'failure_{counter}.csv')
                        data.to_csv(temp_file)
                        data = pd.DataFrame(columns=expected_columns)
                        counter += 1
                bar()

        # merge all CSV files into a single dataframe
        # delete all temp files
        temp_files = get_files_by_type(temp_directory, file_type='csv')
        data = pd.concat(map(read_csv, temp_files))
        data.sort_values(by=['ecode'], ignore_index=True, inplace=True)
        data = data[expected_columns]
    delete_directory(temp_directory)

    return data
Example #8
0
def metrics_generator(date, bar_size, tickers):
    """
        Generate extraction metrics for daily downloaded data
        Writes data to two new files:
        - metrics.csv: day-wise metrics (success, failed, missed v/s total stocks)
        - status.csv: extraction status for each input ticker for a specific day

        - Parameters:
        -------------
        - data_location(str): location where downloaded data is kept
        - input_file(str): full path to input file
    """
    logger.info('Generating final extraction metrics')
    display_date = f'{date[:4]}/{date[4:6]}/{date[6:]}'
    write_to_console(f'{"-"*30} Metrics Generation: {display_date} {"-"*31}',
                     verbose=True)
    try:
        data_location = join(HISTORICAL_DATA_STORAGE,
                             bar_size.replace(' ', ''), date[:4],
                             MONTH_MAP[int(date[4:6])], date)
        # read success, failure & input files
        success = pd.read_csv(join(data_location, 'success.csv'))
        failure = pd.read_csv(join(data_location, 'failure.csv'))

        if type(tickers) is list:
            pass  # TODO: simple metrics generation
        else:  # assuming that input is a file path
            input_ = pd.read_csv(tickers)
            # filter out relevant input --> active tickers
            relevant_input = input_[input_.status == 'A']

            # get extraction metrics
            metrics = compute_extraction_metrics(success, failure,
                                                 relevant_input)
            write_to_console(
                f'Over-all Extraction: {_get_marker(metrics["extraction_ratio"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'Topix Extraction: {_get_marker(metrics["extraction_ratio_topix"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'Nikkei 225 Extraction: {_get_marker(metrics["extraction_ratio_nikkei225"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'JASDAQ 20 Extraction: {_get_marker(metrics["extraction_ratio_jasdaq20"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'First Section Extraction: {_get_marker(metrics["extraction_ratio_first_section"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'Second Section Extraction: '
                f'{_get_marker(metrics["extraction_ratio_second_section"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'Mothers Extraction: {_get_marker(metrics["extraction_ratio_mothers"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'JASDAQ Growth Extraction: '
                f'{_get_marker(metrics["extraction_ratio_jasdaq_growth"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'JASDAQ Standard Extraction: '
                f'{_get_marker(metrics["extraction_ratio_jasdaq_standard"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'Market Capital Above ¥10B Extraction: '
                f'{_get_marker(metrics["extraction_ratio_mcap_above_10b"])}',
                pointer='->',
                indent=2,
                verbose=True)
            write_to_console(
                f'Price x 3 Month\'s Trading Volume ¥85MM Extraction: '
                f'{_get_marker(metrics["extraction_ratio_pv_above_85m"])}',
                pointer='->',
                indent=2,
                verbose=True)
            # generate / update metrics sheet
            _date = f'{date[:4]}-{date[4:6]}-{date[6:]}'
            update_metrics_sheet(_date, metrics)

            # generate daily extraction status sheet
            generate_daily_extraction_status_sheet(success, input_,
                                                   data_location, date)
    except Exception as e:
        logger.critical(f'Metrics generation failed: {e}')
Example #9
0
def extract_historical_data(tickers=None,
                            end_date=None,
                            end_time=None,
                            duration='1 D',
                            bar_size='1 min',
                            what_to_show='TRADES',
                            use_rth=0,
                            date_format=1,
                            keep_upto_date=False,
                            chart_options=(),
                            batch_size=_BATCH_SIZE,
                            max_attempts=3,
                            run_counter=1,
                            verbose=False):
    """
        A wrapper function around HistoricalDataExtractor, that pulls data from TWS for the given tickers.
        :param tickers: ticker ID (ex: 1301)
        :param end_date: end date (ex: '20210101')
        :param end_time: end time (ex: '15:00:01')
        :param duration: the amount of time to go back from end_date_time (ex: '1 D')
        :param bar_size: valid bar size or granularity of data (ex: '1 min')
        :param what_to_show: the type of data to retrieve (ex: 'TRADES')
        :param use_rth: 0 means retrieve data withing regular trading hours, else 0
        :param date_format: format for bar data, 1 means yyyyMMdd, 0 means epoch time
        :param keep_upto_date: setting to True will continue to return unfinished bar data
        :param chart_options: to be documented
        :param batch_size: size of each batch as integer, default=30
        :param max_attempts: maximum number of times to try for failure tickers
        :param run_counter: counts the number of attempts performed, not to be used from outside
        :param verbose: set to True to display messages on console
    """
    logger.info(
        f'Running extractor, attempt: {run_counter} | max attempts: {max_attempts}'
    )
    # let the user know that data extraction has been initiated
    if run_counter == 1:
        _date_formatted = f'{end_date[:4]}/{end_date[4:6]}/{end_date[6:]}'
        message = f'{"-" * 30} Data Extraction: {_date_formatted} {"-" * 30}'
        write_to_console(message, verbose=True)

    # additional info, if user asks for it
    message = f'Setting things up for data-extraction...'
    write_to_console(message, indent=2, verbose=verbose)
    tickers, cache_success, cache_failure = _prep_for_extraction(
        tickers, end_date, end_time, bar_size)
    write_to_console('Refreshed cache directories...',
                     indent=4,
                     pointer='->',
                     verbose=verbose)
    write_to_console('Removed already cached tickers...',
                     indent=4,
                     pointer='->',
                     verbose=verbose)
    write_to_console('Reset failed tickers...',
                     indent=4,
                     pointer='->',
                     verbose=verbose)

    write_to_console('Generating ticker batches...', indent=2, verbose=verbose)
    batches = create_batches(tickers, batch_size)
    write_to_console(f'Total Tickers: {len(tickers)}',
                     indent=4,
                     verbose=verbose,
                     pointer='->')
    write_to_console(f'Total Batches: {len(batches)}',
                     indent=4,
                     verbose=verbose,
                     pointer='->')
    write_to_console(f'Batch Size: {batch_size}',
                     indent=4,
                     verbose=verbose,
                     pointer='->')

    # core processing section
    bar_title = f'=> Attempt: {run_counter}'
    message = 'Batch-wise extraction in progress, this can take some time. Please be patient...'
    write_to_console(message, indent=2, verbose=verbose)
    success_files, failure_files = _run_extractor(batches,
                                                  end_date,
                                                  end_time,
                                                  duration,
                                                  bar_size,
                                                  what_to_show,
                                                  use_rth,
                                                  date_format,
                                                  keep_upto_date,
                                                  chart_options,
                                                  cache_success,
                                                  cache_failure,
                                                  bar_title=bar_title)

    run_counter += 1
    # feedback loop, process failed or missing tickers until we hit the max attempt threshold
    if tickers != list(map(_get_ticker_id, success_files)):
        if run_counter <= max_attempts:
            # TODO: optimize
            unprocessed_tickers = set(tickers).difference(
                map(_get_ticker_id, success_files))
            batch_size = 10
            extract_historical_data(tickers=unprocessed_tickers,
                                    end_date=end_date,
                                    end_time=end_time,
                                    duration=duration,
                                    bar_size=bar_size,
                                    what_to_show=what_to_show,
                                    use_rth=use_rth,
                                    date_format=date_format,
                                    keep_upto_date=keep_upto_date,
                                    chart_options=chart_options,
                                    batch_size=batch_size,
                                    run_counter=run_counter)
        _cleanup(success_files,
                 cache_success,
                 failure_files,
                 cache_failure,
                 verbose=verbose)