def cluster_experiments(nets, y, dataset, n_runs, test_run):

    max_size = int(np.floor(np.log2(len(nets.nodes()))))
    sizes = [2**i for i in range(1, max_size + 1)]

    parts = create_metis_partitions([nets], sizes=sizes, seed=123)
    parts = parts[0]

    results = []

    ohe = OneHotEncoder(n_values='auto', categorical_features='all')

    y = y.loc[parts.index]  # remove singletones

    for n_cluster in sizes:
        X = parts[str(n_cluster) + '_' + dataset].values + 1
        X = ohe.fit_transform(np.expand_dims(X, axis=1))
        df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run)
        df['n_cluster'] = n_cluster
        results.append(df)

    # use all partiionings
    X = parts.values + 1
    X = ohe.fit_transform(X)
    df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run)
    df['n_cluster'] = 0
    results.append(df)

    results = pd.concat(results)
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/cluster', index=False)
    return results
def bow_class_count_experiments(G, X, y, dataset, max_k, n_runs, test_run):
    from feature_extract import get_neighbor_ids

    adjs = get_neighbor_ids(G, y.index.values, max_k=max_k)

    results = []

    # bow and counts
    print 'bow + counts'
    results = evaluate_model(X, y, adjs=adjs, n_runs=n_runs,
            test_run=test_run)

    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/bow_counts', index=False)

    # bow
    print 'bow'
    results = evaluate_model(X, y, adjs=None, n_runs=n_runs,
            test_run=test_run)
    results.to_csv('../results/' + dataset + '/bow', index=False)

    # counts
    print 'counts'
    results = evaluate_model(None, y, adjs=adjs, n_runs=n_runs,
            test_run=test_run)
    results.to_csv('../results/' + dataset + '/counts', index=False)
    return results
Esempio n. 3
0
def id_cluster_experiments(nets, y, dataset, n_runs, test_run):
    from feature_extract import get_neighbor_ids

    adjs = get_neighbor_ids(nets, y.index.values, max_k=1)

    max_size = int(np.floor(np.log2(len(nets.nodes()))))
    sizes = [2**i for i in range(1, max_size + 1)]

    parts = create_metis_partitions([nets], sizes=sizes, seed=123)
    parts = parts[0]

    results = []

    ohe = OneHotEncoder(n_values='auto', categorical_features='all')

    y = y.loc[parts.index]  # remove singletones

    # use all partiionings
    X = parts.values + 1
    X = ohe.fit_transform(X)
    X = sp.hstack([X, adjs[0]])
    results = evaluate_model(X, y, n_runs=n_runs, test_run=test_run)

    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/id_cluster', index=False)
    return results
def class_count_experiments(G,
                            y,
                            dataset,
                            max_k,
                            n_runs,
                            test_run,
                            ignore_unlabeled=False):
    from feature_extract import get_neighbor_ids

    adjs = get_neighbor_ids(G, y.index.values, max_k=max_k)

    results = []

    for depth in range(max_k):
        print "depth", depth
        df = evaluate_model(None,
                            y,
                            adjs=adjs[:depth + 1],
                            n_runs=n_runs,
                            test_run=test_run,
                            ignore_unlabeled=ignore_unlabeled)
        df['depth'] = depth
        results.append(df)

    results = pd.concat(results)
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    if ignore_unlabeled:
        results.to_csv('../results/' + dataset + '/ids_labeled', index=False)
    else:
        results.to_csv('../results/' + dataset + '/counts', index=False)
    return results
Esempio n. 5
0
def id_class_count_experiments(G, y, dataset, max_k, n_runs, test_run):
    from feature_extract import get_neighbor_ids

    adjs = get_neighbor_ids(G, y.index.values, max_k=max_k)

    results = []
    X = adjs[0].todense()

    results = evaluate_model(X, y, adjs=adjs, n_runs=n_runs, test_run=test_run)

    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/id_counts', index=False)
    return results
Esempio n. 6
0
def id_rwr_experiments(G, y, dataset, n_runs, test_run):
    from feature_extract import get_neighbor_ids, calc_rwr_matrix

    adjs = get_neighbor_ids(G, y.index.values, max_k=1)

    X = []

    X = calc_rwr_matrix(np.array(adjs[0].todense()), 0.9)
    X = np.hstack([X, adjs[0].todense()])
    results = evaluate_model(X, y, n_runs=n_runs, test_run=test_run)

    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/id_rwr', index=False)
    return results
def ids_experiment(G, y, dataset, max_k, n_runs, test_run):
    from feature_extract import get_neighbor_ids

    adjs = get_neighbor_ids(G, y.index.values, max_k=max_k)

    results = []
    X = []

    for depth in range(max_k):
        X = sp.hstack(adjs[:depth + 1])
        df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run)
        df['depth'] = depth
        results.append(df)

    results = pd.concat(results)
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/ids', index=False)
    return results
def class_prob_experiments(G, y, dataset, max_k, n_runs, test_run):
    from feature_extract import get_neighbor_ids

    adjs = get_neighbor_ids(G, y.index.values, max_k=max_k)

    results = []

    for depth in range(max_k):
        df = evaluate_model(None,
                            y,
                            adjs=adjs[:depth + 1],
                            normelize=True,
                            n_runs=n_runs,
                            test_run=test_run)
        df['depth'] = depth
        results.append(df)

    results = pd.concat(results)
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/proba', index=False)
    return results
def rwr_experiments(G, y, dataset, n_runs, test_run):
    from feature_extract import get_neighbor_ids, calc_rwr_matrix

    adjs = get_neighbor_ids(G, y.index.values, max_k=1)

    results = []
    X = []
    c_range = np.arange(0.1, 1, 0.2)
    if test_run:
        c_range = [0.9]

    for c in c_range:
        X = calc_rwr_matrix(np.array(adjs[0].todense()), c)
        df = evaluate_model(X, y, n_runs=n_runs, test_run=test_run)
        df['restart'] = c
        results.append(df)

    results = pd.concat(results)
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    results.to_csv('../results/' + dataset + '/rwr', index=False)
    return results