"source": str(data_source), "testsize": str(data_size), "positive": str(data_positive), "negative": str(data_negative), "time_in_seconds_loading": str(seconds_loading), }, "embedding": {"model": str(embedder_model), "subset": str(embedder.model_subset)}, "data_args": data_args, "metrics": { "TP": str(TP), "FP": str(FP), "TN": str(TN), "FN": str(FN), "accuracy": str(accuracy), "precision": str(precision), "recall": str(recall), "f1": str(f1), "time_in_seconds_training": str(seconds_training), "time_in_seconds_testing": str(seconds_testing), }, } # ensure output directory exists if not os.path.isdir(dir_results): data_utils.mkdir_p(dir_results) # save json file filename_results = "{}_{}_{}.json".format(data_source, embedder_model, classifier.__class__.__name__) logger.info("Saving results to {}...".format(filename_results)) with open(os.path.join(dir_results, filename_results), "a") as outfile: json.dump(results, outfile, sort_keys=True, indent=4, separators=(",", ": ")) outfile.write("\n")
randchoice = int(random.random() * randomprob) # track numbers num_positive = 0 num_negative = 0 # get list of weekNN.csv files at file_path ow_files = [ os.path.join(file_path_in, f) for f in os.listdir(file_path_in) if re.match(r"week[0-9]{,2}\.csv", f) is not None ] ow_files.sort() # ensure directory exists if not os.isdir(file_path_out): mkdir_p(file_path_out) # create csv file with open(os.path.join(file_path_out, 'censored.csv'), 'wb') as outfile: # object to write csv file csv_writer = csv.writer(outfile, delimiter=',') # search all files for table_path in ow_files: with open(table_path, 'rbU') as f: print("checking in file {}".format(table_path)) # save line if post was censored for line in csv.reader(f, dialect=csv.excel): if len(line) > 10:
'negative': str(data_negative), 'time_in_seconds_loading': str(seconds_loading) }, 'embedding': { 'model': str(embedder_model), 'subset': str(embedder.model_subset) }, 'data_args': data_args, 'metrics': { 'TP': str(TP), 'FP': str(FP), 'TN': str(TN), 'FN': str(FN), 'accuracy': str(accuracy), 'precision': str(precision), 'recall': str(recall), 'f1': str(f1), 'time_in_seconds_training': str(seconds_training), 'time_in_seconds_testing': str(seconds_testing) } } # ensure output directory exists if not os.path.isdir(dir_results): data_utils.mkdir_p(dir_results) # save json file filename_results = "{}_{}_{}.json".format(data_source, embedder_model, classifier.__class__.__name__) logger.info("Saving results to {}...".format(filename_results)) with open(os.path.join(dir_results,filename_results), 'a') as outfile: json.dump(results, outfile, sort_keys=True, indent=4, separators=(',', ': ')) outfile.write('\n')
# randomly keep some negative samples randomprob = 2000 randchoice = int(random.random()*randomprob) # track numbers num_positive = 0 num_negative = 0 # get list of weekNN.csv files at file_path ow_files = [ os.path.join(file_path_in, f) for f in os.listdir(file_path_in) if re.match(r"week[0-9]{,2}\.csv", f) is not None ] ow_files.sort() # ensure directory exists if not os.isdir(file_path_out): mkdir_p(file_path_out) # create csv file with open(os.path.join(file_path_out, 'censored.csv'), 'wb') as outfile: # object to write csv file csv_writer = csv.writer(outfile, delimiter=',') # search all files for table_path in ow_files: with open(table_path, 'rbU') as f: print("checking in file {}".format(table_path)) # save line if post was censored for line in csv.reader(f, dialect=csv.excel): if len(line) > 10: