def collect_svc(name, most_similar_name, perc): svc_results = [] data = dataTools.Authorship(name, ratioTrain, ratioValid, dataPath) logging.info('Training SVM on {0}'.format(name)) phi, indices = load_phi(name, data, percentage=perc) if indices.shape[0] < 2: return [{'acc': 0, 'f1': 0, 'auc': 0, 'prec': 0, 'time': 0}] for split_n in range(NO_OF_SPLITS): data.get_split(name, ratioTrain, ratioValid) data.reduce_dim(indices) start = time.perf_counter() svc = linear_class.train_svc(data) end = time.perf_counter() svc_result = evaluate_svc(svc, data) svc_result['time'] = start - end svc_results.append(svc_result) logging.info( 'SPLIT {0}: SVM results successfully collected: {1}'.format( split_n, svc_result)) return svc_results
def main(): Path("results/one_vs_one").mkdir(parents=True, exist_ok=True) logging.info('Loading data for One-vs-One.') data_general = dataTools.Authorship('poe', 1, 0, dataPath) logging.info('Computing Dissimilarty matrix') dis_m = signal_similarity.get_dissimilarity_matrix(data=data_general) logging.info('Starting training.') svc_results, GCNN_results = check_prev_results() for idx, name in enumerate(all_author_names): most_similar_name = get_most_similar_name(dis_m, idx) logging.info('Training GCNN and SVM on {0} v.s. {1}'.format(name, most_similar_name)) try: if name not in svc_results.keys(): svc_results[name] = collect_svc(name, most_similar_name) logging.info('SVM results successfully collected: {0}'.format(svc_results[name])) if name not in GCNN_results.keys(): GCNN_results[name] = collect_gcnn(name, most_similar_name) logging.info('GCNN results successfully collected: {0}'.format(GCNN_results[name])) except BaseException as e: dump_crash(GCNN_results, svc_results) logging.error("Exception during collecting one-vs-one", exc_info=True) raise e dump_results(GCNN_results, svc_results)
def collect_gcnn(name, most_similar_name, perc): gcnn_results = [] data = dataTools.Authorship(name, ratioTrain, ratioValid, dataPath) logging.info('Training GCNN on {0}'.format(name)) h_params = ClusterUtils.load_best_hyperparams(name) phi, indices = load_phi(name, data, percentage=perc) if indices.shape[0] < 2: return [{'acc': 0, 'f1': 0, 'auc': 0, 'prec': 0, 'time': 0}] for split_n in range(NO_OF_SPLITS): data.get_split(name, ratioTrain, ratioValid) data.reduce_dim(indices) start = time.perf_counter() gcnn = train_helper.train_net(data, h_params, phi=phi) end = time.perf_counter() gcnn_eval = evaluate_gcnn(gcnn, data) gcnn_eval['time'] = start - end gcnn_results.append(gcnn_eval) logging.info( 'SPLIT {0}: GCNN results successfully collected: {1}'.format( split_n, gcnn_results[split_n])) return gcnn_results
def main(): Path("results/one_vs_one").mkdir(parents=True, exist_ok=True) logging.info('Loading data for One-vs-One.') data_general = dataTools.Authorship('poe', 1, 0, dataPath) logging.info('Computing Dissimilarty matrix') dis_m = signal_similarity.get_dissimilarity_matrix(data=data_general) logging.info('Starting training.') GCNN_results = {} svc_results = {} for idx, name in enumerate(all_author_names): most_similar_name = get_most_similar_name(dis_m, idx) logging.info('Training GCNN and SVM on {0} v.s. {1}'.format( name, most_similar_name)) GCNN_results[name] = {} svc_results[name] = {} try: feature_search = [10, 15, 20, 25, 30, 35, 40, 45] for feture_count in feature_search: logging.info( 'Starting training with feature count: {0}'.format( feture_count)) phi, indices = load_phi(name, no_of_features=feture_count) svc_results[name][feture_count] = collect_svc( name, most_similar_name, indices) logging.info('SVM results successfully collected: {0}'.format( svc_results[name][feture_count])) GCNN_results[name][feture_count] = collect_gcnn( name, most_similar_name, phi, indices) logging.info('GCNN results successfully collected: {0}'.format( GCNN_results[name][feture_count])) dump_results(GCNN_results, svc_results) except BaseException as e: dump_crash(GCNN_results, svc_results) logging.error( "Exception during collecting one-vs-one with feature search", exc_info=True) raise e
def get_dissimilarity_matrix(data=None): if data is None: # the results from each run dataDir = 'authorData' # Data directory dataFilename = 'authorshipData.mat' # Data filename dataPath = os.path.join(dataDir, dataFilename) # Data path data = dataTools.Authorship('poe', 1, 0, dataPath=dataPath) all_author_names = data.authorData.keys() authors_mean_signals = [] for name in all_author_names: curr_signals = data.authorData[name]["wordFreq"] avg = get_author_average(curr_signals) authors_mean_signals.append(avg) dis_matrix = squareform(pdist(numpy.array(authors_mean_signals))) return dis_matrix
def collect_svc(name, indices): svc_results = [] data = dataTools.Authorship(name, ratioTrain, ratioValid, dataPath) logging.info('Training SVM on {0}'.format(name)) for split_n in range(NO_OF_SPLITS): data.get_split(name, ratioTrain, ratioValid) data.reduce_dim(indices) svc = linear_class.train_svc(data) svc_result = evaluate_svc(svc, data) svc_results.append(svc_result) logging.info( 'SPLIT {0}: SVM results successfully collected: {1}'.format( split_n, svc_result)) return svc_results
def collect_gcnn(name, phi, ind): gcnn_results = [] data = dataTools.Authorship(name, ratioTrain, ratioValid, dataPath) logging.info('Training GCNN on {0}'.format(name)) h_params = ClusterUtils.load_best_hyperparams(name) for split_n in range(NO_OF_SPLITS): data.get_split(name, ratioTrain, ratioValid) data.reduce_dim(ind) gcnn = train_helper.train_net(data, h_params, phi=phi) gcnn_results.append(evaluate_gcnn(gcnn, data)) logging.info( 'SPLIT {0}: GCNN results successfully collected: {1}'.format( split_n, gcnn_results[split_n])) return gcnn_results
def main(): Path("results/one_vs_all").mkdir(parents=True, exist_ok=True) logging.info('Loading data for One-vs-All with fingerprint matrix.') data_general = dataTools.Authorship('poe', 1, 0, dataPath) logging.info('Computing Dissimilarty matrix') dis_m = signal_similarity.get_dissimilarity_matrix(data=data_general) logging.info('Starting training.') GCNN_results = {} svc_results = {} for idx, name in enumerate(all_author_names): logging.info('Training GCNN and SVM on {0}'.format(name)) try: phi, indices = load_phi(name) svc_results[name] = collect_svc(name, indices) logging.info('SVM results successfully collected: {0}'.format( svc_results[name])) GCNN_results[name] = collect_gcnn(name, phi, indices) logging.info('GCNN results successfully collected: {0}'.format( GCNN_results[name])) except BaseException as e: dump_crash(GCNN_results, svc_results) logging.error("Exception during collecting one-vs-All", exc_info=True) raise e dump_results(GCNN_results, svc_results)
def main(): Path("results/one_vs_one").mkdir(parents=True, exist_ok=True) logging.info('Loading data for One-vs-One.') data_general = dataTools.Authorship('poe', 1, 0, dataPath) logging.info('Computing Dissimilarty matrix') dis_m = signal_similarity.get_dissimilarity_matrix(data=data_general) logging.info('Starting training.') GCNN_results, svc_results = load_prev_results() for idx, name in enumerate(all_author_names): most_similar_name = get_most_similar_name(dis_m, idx) logging.info('Training GCNN and SVM on {0} v.s. {1}'.format( name, most_similar_name)) if name not in svc_results.keys(): svc_results[name] = {} if name not in GCNN_results.keys(): GCNN_results[name] = {} try: percentages = [ 0.01, 0.02, 0.05, .1, .15, .20, .25, .30, 0.35, 0.4, 0.5 ] # percentages = [0.05, .1, .15, .20, .25, .30] for perc in percentages: logging.info( 'Starting training with feature count: {0}'.format(perc)) if str(perc) in svc_results[name].keys() and len( svc_results[name][str(perc)]) == 10: logging.info('Skipping SVC for {1} with perc: {0}'.format( perc, name)) else: svc_results[name][str(perc)] = collect_svc( name, most_similar_name, perc) logging.info( 'SVM results successfully collected: {0}'.format( svc_results[name][str(perc)])) if str(perc) in GCNN_results[name].keys() and len( GCNN_results[name][str(perc)]) == 10: logging.info('Skipping GCNN for {1} with perc: {0}'.format( perc, name)) else: GCNN_results[name][str(perc)] = collect_gcnn( name, most_similar_name, perc) logging.info( 'GCNN results successfully collected: {0}'.format( GCNN_results[name][str(perc)])) dump_results(GCNN_results, svc_results) except BaseException as e: dump_crash(GCNN_results, svc_results) logging.error( "Exception during collecting one-vs-one with feature search", exc_info=True) raise e