def cluster_experiments(nets, y, dataset, n_runs, test_run): max_size = int(np.floor(np.log2(len(nets.nodes())))) sizes = [2**i for i in range(1, max_size + 1)] parts = create_metis_partitions([nets], sizes=sizes, seed=123) parts = parts[0] results = [] ohe = OneHotEncoder(n_values='auto', categorical_features='all') y = y.loc[parts.index] # remove singletones for n_cluster in sizes: X = parts[str(n_cluster) + '_' + dataset].values + 1 X = ohe.fit_transform(np.expand_dims(X, axis=1)) df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run) df['n_cluster'] = n_cluster results.append(df) # use all partiionings X = parts.values + 1 X = ohe.fit_transform(X) df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run) df['n_cluster'] = 0 results.append(df) results = pd.concat(results) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/cluster', index=False) return results
def bow_class_count_experiments(G, X, y, dataset, max_k, n_runs, test_run): from feature_extract import get_neighbor_ids adjs = get_neighbor_ids(G, y.index.values, max_k=max_k) results = [] # bow and counts print 'bow + counts' results = evaluate_model(X, y, adjs=adjs, n_runs=n_runs, test_run=test_run) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/bow_counts', index=False) # bow print 'bow' results = evaluate_model(X, y, adjs=None, n_runs=n_runs, test_run=test_run) results.to_csv('../results/' + dataset + '/bow', index=False) # counts print 'counts' results = evaluate_model(None, y, adjs=adjs, n_runs=n_runs, test_run=test_run) results.to_csv('../results/' + dataset + '/counts', index=False) return results
def id_cluster_experiments(nets, y, dataset, n_runs, test_run): from feature_extract import get_neighbor_ids adjs = get_neighbor_ids(nets, y.index.values, max_k=1) max_size = int(np.floor(np.log2(len(nets.nodes())))) sizes = [2**i for i in range(1, max_size + 1)] parts = create_metis_partitions([nets], sizes=sizes, seed=123) parts = parts[0] results = [] ohe = OneHotEncoder(n_values='auto', categorical_features='all') y = y.loc[parts.index] # remove singletones # use all partiionings X = parts.values + 1 X = ohe.fit_transform(X) X = sp.hstack([X, adjs[0]]) results = evaluate_model(X, y, n_runs=n_runs, test_run=test_run) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/id_cluster', index=False) return results
def class_count_experiments(G, y, dataset, max_k, n_runs, test_run, ignore_unlabeled=False): from feature_extract import get_neighbor_ids adjs = get_neighbor_ids(G, y.index.values, max_k=max_k) results = [] for depth in range(max_k): print "depth", depth df = evaluate_model(None, y, adjs=adjs[:depth + 1], n_runs=n_runs, test_run=test_run, ignore_unlabeled=ignore_unlabeled) df['depth'] = depth results.append(df) results = pd.concat(results) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') if ignore_unlabeled: results.to_csv('../results/' + dataset + '/ids_labeled', index=False) else: results.to_csv('../results/' + dataset + '/counts', index=False) return results
def id_class_count_experiments(G, y, dataset, max_k, n_runs, test_run): from feature_extract import get_neighbor_ids adjs = get_neighbor_ids(G, y.index.values, max_k=max_k) results = [] X = adjs[0].todense() results = evaluate_model(X, y, adjs=adjs, n_runs=n_runs, test_run=test_run) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/id_counts', index=False) return results
def id_rwr_experiments(G, y, dataset, n_runs, test_run): from feature_extract import get_neighbor_ids, calc_rwr_matrix adjs = get_neighbor_ids(G, y.index.values, max_k=1) X = [] X = calc_rwr_matrix(np.array(adjs[0].todense()), 0.9) X = np.hstack([X, adjs[0].todense()]) results = evaluate_model(X, y, n_runs=n_runs, test_run=test_run) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/id_rwr', index=False) return results
def ids_experiment(G, y, dataset, max_k, n_runs, test_run): from feature_extract import get_neighbor_ids adjs = get_neighbor_ids(G, y.index.values, max_k=max_k) results = [] X = [] for depth in range(max_k): X = sp.hstack(adjs[:depth + 1]) df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run) df['depth'] = depth results.append(df) results = pd.concat(results) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/ids', index=False) return results
def class_prob_experiments(G, y, dataset, max_k, n_runs, test_run): from feature_extract import get_neighbor_ids adjs = get_neighbor_ids(G, y.index.values, max_k=max_k) results = [] for depth in range(max_k): df = evaluate_model(None, y, adjs=adjs[:depth + 1], normelize=True, n_runs=n_runs, test_run=test_run) df['depth'] = depth results.append(df) results = pd.concat(results) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/proba', index=False) return results
def rwr_experiments(G, y, dataset, n_runs, test_run): from feature_extract import get_neighbor_ids, calc_rwr_matrix adjs = get_neighbor_ids(G, y.index.values, max_k=1) results = [] X = [] c_range = np.arange(0.1, 1, 0.2) if test_run: c_range = [0.9] for c in c_range: X = calc_rwr_matrix(np.array(adjs[0].todense()), c) df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run) df['restart'] = c results.append(df) results = pd.concat(results) if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') results.to_csv('../results/' + dataset + '/rwr', index=False) return results