예제 #1
0
def collect_gcnn(name, most_similar_name, phi, ind):
    gcnn_results = []

    data = dataTools.AuthorshipOneVsOne(name, most_similar_name, ratioTrain,
                                        ratioValid, dataPath)
    logging.info('Training GCNN on {0} v.s. {1}'.format(
        name, most_similar_name))
    h_params = ClusterUtils.load_best_hyperparams(name)

    for split_n in range(NO_OF_SPLITS):
        data.get_split(name, most_similar_name, ratioTrain, ratioValid)

        data.reduce_dim(ind)

        start = time.perf_counter()

        gcnn = train_helper.train_net(data, h_params, phi=phi)

        end = time.perf_counter()

        gcnn_eval = evaluate_gcnn(gcnn, data)
        gcnn_eval['time'] = start - end
        gcnn_results.append(gcnn_eval)

        logging.info(
            'SPLIT {0}: GCNN results successfully collected: {1}'.format(
                split_n, gcnn_results[split_n]))

    return gcnn_results
예제 #2
0
def collect_gcnn(name, most_similar_name, perc):
    gcnn_results = []

    data = dataTools.Authorship(name, ratioTrain, ratioValid, dataPath)
    logging.info('Training GCNN on {0}'.format(name))
    h_params = ClusterUtils.load_best_hyperparams(name)

    phi, indices = load_phi(name, data, percentage=perc)

    if indices.shape[0] < 2:
        return [{'acc': 0, 'f1': 0, 'auc': 0, 'prec': 0, 'time': 0}]

    for split_n in range(NO_OF_SPLITS):
        data.get_split(name, ratioTrain, ratioValid)

        data.reduce_dim(indices)

        start = time.perf_counter()

        gcnn = train_helper.train_net(data, h_params, phi=phi)

        end = time.perf_counter()

        gcnn_eval = evaluate_gcnn(gcnn, data)
        gcnn_eval['time'] = start - end
        gcnn_results.append(gcnn_eval)

        logging.info(
            'SPLIT {0}: GCNN results successfully collected: {1}'.format(
                split_n, gcnn_results[split_n]))

    return gcnn_results
예제 #3
0
def collect_gcnn(name, most_similar_name):
    gcnn_results = []

    data = dataTools.AuthorshipOneVsOne(name, most_similar_name, ratioTrain, ratioValid, dataPath)
    logging.info('Training GCNN on {0} v.s. {1}'.format(name, most_similar_name))
    h_params = ClusterUtils.load_best_hyperparams(name)

    for split_n in range(NO_OF_SPLITS):
        data.get_split(name, most_similar_name, ratioTrain, ratioValid)
        gcnn = train_helper.train_net(data, h_params)

        gcnn_results.append(evaluate_gcnn(gcnn, data))

        logging.info('SPLIT {0}: GCNN results successfully collected: {1}'.format(split_n, gcnn_results[split_n]))

    return gcnn_results