"source": str(data_source),
                        "testsize": str(data_size),
                        "positive": str(data_positive),
                        "negative": str(data_negative),
                        "time_in_seconds_loading": str(seconds_loading),
                    },
                    "embedding": {"model": str(embedder_model), "subset": str(embedder.model_subset)},
                    "data_args": data_args,
                    "metrics": {
                        "TP": str(TP),
                        "FP": str(FP),
                        "TN": str(TN),
                        "FN": str(FN),
                        "accuracy": str(accuracy),
                        "precision": str(precision),
                        "recall": str(recall),
                        "f1": str(f1),
                        "time_in_seconds_training": str(seconds_training),
                        "time_in_seconds_testing": str(seconds_testing),
                    },
                }
                # ensure output directory exists
                if not os.path.isdir(dir_results):
                    data_utils.mkdir_p(dir_results)
                # save json file
                filename_results = "{}_{}_{}.json".format(data_source, embedder_model, classifier.__class__.__name__)
                logger.info("Saving results to {}...".format(filename_results))
                with open(os.path.join(dir_results, filename_results), "a") as outfile:
                    json.dump(results, outfile, sort_keys=True, indent=4, separators=(",", ": "))
                    outfile.write("\n")
randchoice = int(random.random() * randomprob)

# track numbers
num_positive = 0
num_negative = 0

# get list of weekNN.csv files at file_path
ow_files = [
    os.path.join(file_path_in, f) for f in os.listdir(file_path_in)
    if re.match(r"week[0-9]{,2}\.csv", f) is not None
]
ow_files.sort()

# ensure directory exists
if not os.isdir(file_path_out):
    mkdir_p(file_path_out)

# create csv file
with open(os.path.join(file_path_out, 'censored.csv'), 'wb') as outfile:

    # object to write csv file
    csv_writer = csv.writer(outfile, delimiter=',')

    # search all files
    for table_path in ow_files:
        with open(table_path, 'rbU') as f:
            print("checking in file {}".format(table_path))

            # save line if post was censored
            for line in csv.reader(f, dialect=csv.excel):
                if len(line) > 10:
Esempio n. 3
0
                                        'negative':                 str(data_negative),
                                        'time_in_seconds_loading':  str(seconds_loading)
                                   },
                        'embedding': {  'model':                    str(embedder_model),
                                        'subset':                   str(embedder.model_subset)
                                    },
                        'data_args':    data_args,
                        'metrics': {    'TP':                       str(TP),
                                        'FP':                       str(FP),
                                        'TN':                       str(TN),
                                        'FN':                       str(FN),
                                        'accuracy':                 str(accuracy),
                                        'precision':                str(precision),
                                        'recall':                   str(recall),
                                        'f1':                       str(f1),
                                        'time_in_seconds_training': str(seconds_training),
                                        'time_in_seconds_testing':  str(seconds_testing)
                                    }
                       }

            # ensure output directory exists
            if not os.path.isdir(dir_results):
                data_utils.mkdir_p(dir_results)

            # save json file
            filename_results = "{}_{}_{}.json".format(data_source, embedder_model, classifier.__class__.__name__)
            logger.info("Saving results to {}...".format(filename_results))
            with open(os.path.join(dir_results,filename_results), 'a') as outfile:
                json.dump(results, outfile, sort_keys=True, indent=4, separators=(',', ': '))
                outfile.write('\n')
# randomly keep some negative samples
randomprob = 2000
randchoice = int(random.random()*randomprob)

# track numbers
num_positive = 0
num_negative = 0

# get list of weekNN.csv files at file_path
ow_files = [ os.path.join(file_path_in, f) for f in os.listdir(file_path_in) if re.match(r"week[0-9]{,2}\.csv", f) is not None ]
ow_files.sort()

# ensure directory exists
if not os.isdir(file_path_out):
    mkdir_p(file_path_out)

# create csv file
with open(os.path.join(file_path_out, 'censored.csv'), 'wb') as outfile:

    # object to write csv file
    csv_writer = csv.writer(outfile, delimiter=',')

    # search all files
    for table_path in ow_files:
        with open(table_path, 'rbU') as f:
            print("checking in file {}".format(table_path))

            # save line if post was censored
            for line in csv.reader(f, dialect=csv.excel):
                if len(line) > 10: