def step2(step2_input, step2_output): """ Handle the Step2 requests (comScore click-through) - check README for details :input: step2_input - list containing input files step2_output - output file :return: 0 on success 1 on error """ global graf logger.info("###Step 2:") graf = defaultdict(dict) start2 = clock() info = [] for input_file in step2_input: logger.info("Reading input file {}".format(input_file)) with open(input_file) as csv_file: start_file=clock() rdr = csv.reader(csv_file) header = rdr.next() header_last = len(header) - 1 try: for chunk in gen_chunks(rdr, 10000): info.extend([(row[0], row[header_last]) for row in chunk]) info = list(set(info)) process_info(info) except csv.Error as e: logger.error("Exception {}: {}".format(type(e), e)) logger.error("Check Problems section in readme for known issues.") return 1 check_time(start_file, "Done reading.") check_time(start2, "Done generating full link dict...") start_write = clock() write_to_file(step2_output, graf) check_time(start_write, "Done writing output file.") check_time(start2, "Done with Step 2.") return 0
def step3(step3_input, step3_output): """ Handle the Step3 requests (comScore purchase) - check README for details :input: step3_input - list containing input files step3_output - output file :return: 0 on success 1 on error """ global graf logger.info("###Step 3:") start3 = clock() graf = defaultdict(dict) info = [] for input_file in step3_input: start_file=clock() logger.info("Reading input file {}".format(input_file)) with open(input_file) as csv_file: rdr = csv.DictReader(csv_file) start_file = clock() try: for chunk in gen_chunks(rdr, 10000): info.extend([(row['machine_id'], row['domain_name']) for row in chunk if row['tran_flg'] == '1']) info = list(set(info)) process_info(info) except csv.Error as e: logger.error("Exception {}: {}".format(type(e), e)) logger.error("Check Problems section in readme for known issues.") return 1 check_time(start_file, "Done reading.") check_time(start_file, "Done generating full link dict...") start_write = clock() write_to_file(step3_output, graf) check_time(start_write, "Done writing output file.") check_time(start3, "Done with Step 3.") return 0
def main(): global retl args = cndi_lib.parse_cli_opts() arg_vals = args.step_to_run.split(',') if '1' in arg_vals or 'all' in arg_vals : retl.append(cndi_lib.step1(step1_input, step1_output, map_file)) if '2' in arg_vals or 'all' in arg_vals : retl.append(cndi_lib.step2(step2_input, step2_output)) if '3' in arg_vals or 'all' in arg_vals : retl.append(cndi_lib.step3(step2_input, step3_output)) if __name__ == "__main__": log_delimiter = "#"*20 + strftime("%a, %d %b %Y %X +0000", gmtime()) + "#"*10 logger.debug("\n"*2 + log_delimiter + "\n") main() if 1 in retl: logger.error(""" !!! !!!Errors detected. Check above log or logfile for details. !!!""") print "\nDebug log: '{}'\n".format(os.path.join(LOG_FILE_PATH, LOG_FILE))