return conver_to_probs(votes) if __name__=="__main__": #net_file = 'res/9503-jaccard-votingnets.pickle' #test_file ='res/10003-test.csv' net_file = 'res/9003-tc-jaccard-votingnets.pickle' test_file = 'res/test-tc-1000.npy' #test = pd.read_csv(test_file, index_col=0) test = np.load(test_file) with open(net_file, 'rb') as f: nets = pickle.load(f) m = get_data() distance = lambda x,y: jaccard(x,y, m['nf']) classifier = lambda x: lcl(x, m['ml']) refs = dict() mers = dict() #test_a = list(test.index) test_a = list(test) for gamma, [mer, ref] in tqdm(nets.items()): mers[gamma] = get_predictions_score(net=mer, distance=distance, test=test_a) refs[gamma] = get_predictions_score(net=ref, distance=distance, test=test_a) with open(f"/tmp/preds-{gamma}-mers.pickle", 'wb+') as f: pickle.dump(mers[gamma], f) with open(f"/tmp/preds-{gamma}-refs.pickle", 'wb+') as f: pickle.dump(refs[gamma], f)
def distance(x, y, arg): return jaccard(x, y, arg)
def mjaccard(x, y, nafs): if len(nafs[x]) == 0 == len(nafs[y]): return 1 return jaccard(x, y, nafs)
sizes = {g:len(n.keys()) for g, n in nets.items()} perfs_di = pd.DataFrame.from_dict(res, orient='index', columns=['fp', 'fn']) szs_di = pd.DataFrame.from_dict(sizes, orient='index', columns=['sizes']) gj_euc = perfs_di.join(szs_di) part_size = len(train) gj_euc['compression'] = gj_euc.sizes/part_size gj_euc.to_csv(f"res/euc{part_size}.csv") if __name__ == "__main__": print("Setting up data...") afs, lbs = setup_data() num_parts = 2 part_size = 500 parts = get_part_indexes(afs, num_parts, part_size) test_size = 100 test_set = parts[1][:test_size] train_set = parts[0] print(f"Part size: {part_size} and {test_size=}") print(f"Jaccard experiments") distance = lambda x,y: jaccard(x, y, afs) classifier=lambda x: lcl(x, lbs) run_jaccard(train=train_set, test=test_set, distance=distance, classifier=classifier) print("Euclid experiments") distance = lambda x,y: adf(x, y, afs) run_euclid(train=train_set, test=test_set, distance=distance, classifier=classifier)