def build_user_request(config):
    options = utils.get_cli_options()
    logger.info('CLI Options: ' + str(options))

    start_date_str, end_date_str = get_date_period(options)
    source = options.source

    # Validate that fields are present in config
    assert '{source}_fields'.format(source=source) in config, \
        'Fields must be specified in config'
    fields = config['{source}_fields'.format(source=source)]

    # Creating data structure (immutable tuple) with initial user request
    UserRequest = namedtuple(
        "UserRequest",
        "token counter_id start_date_str end_date_str source fields")

    user_request = UserRequest(
        token=config['token'],
        counter_id=config['counter_id'],
        start_date_str=start_date_str,
        end_date_str=end_date_str,
        source=source,
        fields=tuple(fields),
    )

    logger.info(user_request)
    utils.validate_user_request(user_request)
    return user_request
Example #2
0
def save_data(api_request, part):
    '''Loads data chunk from Logs API and saves to ClickHouse'''
    url = '{host}/management/v1/counter/{counter_id}/logrequest/{request_id}/part/{part}/download' \
        .format(
            host=HOST,
            counter_id=api_request.user_request.counter_id,
            request_id=api_request.request_id,
            part=part
        )

    headers = {'Authorization': 'OAuth ' + api_request.user_request.token}

    r = requests.get(url, headers=headers)
    if r.status_code != 200:
        logger.debug(r.text)
        raise ValueError(r.text)

    output_dir = utils.get_cli_options().output
    os.makedirs(output_dir, exist_ok=True)
    with open('{output}/part_{part}.csv'.format(part=part, output=output_dir),
              'w') as f:
        f.write(r.text)

    api_request.status = 'saved'
Example #3
0
                logger.info('### CLEANING DATA')
                logs_api.clean_data(api_request)
        except Exception as e:
            logger.critical('Iteration #{i} failed'.format(i=i + 1))
            if i == user_req.retries - 1:
                raise e


if __name__ == '__main__':

    start_time = time.time()

    config = utils.get_config()
    setup_logging(config)
    options = utils.get_cli_options()

    # choose from available destinations
    if (options.dest is None) or (options.dest == 'clickhouse'):
        destination = clickhouse
    elif options.dest == 'vertica':
        destination = vertica
    else:
        raise ValueError('Wrong argument: dest = ' + options.dest)

    user_request = build_user_request(config, options)

    # choose counter [from config | from cli options | all avaibalbe counters]
    if options.counter is None:
        counters = (config['counter_id'], )
    elif options.counter == 'all':