def build_user_request(config): options = utils.get_cli_options() logger.info('CLI Options: ' + str(options)) start_date_str, end_date_str = get_date_period(options) source = options.source # Validate that fields are present in config assert '{source}_fields'.format(source=source) in config, \ 'Fields must be specified in config' fields = config['{source}_fields'.format(source=source)] # Creating data structure (immutable tuple) with initial user request UserRequest = namedtuple( "UserRequest", "token counter_id start_date_str end_date_str source fields") user_request = UserRequest( token=config['token'], counter_id=config['counter_id'], start_date_str=start_date_str, end_date_str=end_date_str, source=source, fields=tuple(fields), ) logger.info(user_request) utils.validate_user_request(user_request) return user_request
def save_data(api_request, part): '''Loads data chunk from Logs API and saves to ClickHouse''' url = '{host}/management/v1/counter/{counter_id}/logrequest/{request_id}/part/{part}/download' \ .format( host=HOST, counter_id=api_request.user_request.counter_id, request_id=api_request.request_id, part=part ) headers = {'Authorization': 'OAuth ' + api_request.user_request.token} r = requests.get(url, headers=headers) if r.status_code != 200: logger.debug(r.text) raise ValueError(r.text) output_dir = utils.get_cli_options().output os.makedirs(output_dir, exist_ok=True) with open('{output}/part_{part}.csv'.format(part=part, output=output_dir), 'w') as f: f.write(r.text) api_request.status = 'saved'
logger.info('### CLEANING DATA') logs_api.clean_data(api_request) except Exception as e: logger.critical('Iteration #{i} failed'.format(i=i + 1)) if i == user_req.retries - 1: raise e if __name__ == '__main__': start_time = time.time() config = utils.get_config() setup_logging(config) options = utils.get_cli_options() # choose from available destinations if (options.dest is None) or (options.dest == 'clickhouse'): destination = clickhouse elif options.dest == 'vertica': destination = vertica else: raise ValueError('Wrong argument: dest = ' + options.dest) user_request = build_user_request(config, options) # choose counter [from config | from cli options | all avaibalbe counters] if options.counter is None: counters = (config['counter_id'], ) elif options.counter == 'all':