Esempio n. 1
0
def run_ICA(classifier_name, classifier_args, num_iter):

    features, labels, train_, val_, test_, graph, domain_labels = load_data()

    # run training
    t_begin = time.time()

    # random ordering
    np.random.shuffle(val_)

    y_true = [graph.Graph.nodes[node]['label'] for node in test_]

    local_clf = LocalClassifier(classifier_name, classifier_args)

    agg = pick_aggregator('count', domain_labels)

    relational_clf = RelationalClassifier(classifier_name, agg,
                                          classifier_args)

    ica = ICA(local_clf, relational_clf, True, max_iteration=num_iter)

    ica.fit(graph.Graph, train_)

    print('Model fitting done...')

    conditional_node_to_label_map = create_map(graph.Graph, train_)

    ica_predict = ica.predict(graph.Graph, val_, test_,
                              conditional_node_to_label_map)

    print('Model prediction done...')

    ica_accuracy = accuracy_score(y_true, ica_predict)
    t_end = time.time()
    print(classification_report(y_true, ica_predict))

    print(ica_accuracy)
    elapsed_time = t_end - t_begin
    print('Start time: \t\t' + time.strftime("%H:%M:%S", time.gmtime(t_begin)))
    print('Elapsed time: \t\t' +
          time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
    print('End time: \t\t' + time.strftime("%H:%M:%S", time.gmtime(t_end)))
Esempio n. 2
0
    args = parser.parse_args()

    graph, domain_labels = load_linqs_data(args.content_file, args.cites_file)

    kf = KFold(n=len(graph.node_list),
               n_folds=args.num_folds,
               shuffle=True,
               random_state=42)

    accuracies = []

    cm = None

    for train, test in kf:
        clf = LocalClassifier(args.classifier)
        clf.fit(graph, train)
        y_pred = clf.predict(graph, test)
        y_true = [graph.node_list[t].label for t in test]
        accuracies.append(accuracy_score(y_true, y_pred))
        if cm is None:
            cm = confusion_matrix(y_true, y_pred, labels=domain_labels)
        else:
            cm += confusion_matrix(y_true, y_pred, labels=domain_labels)

    print accuracies
    print "Mean accuracy: %0.4f +- %0.4f" % (np.mean(accuracies),
                                             np.std(accuracies))
    print cm

conditional_map = {}
Esempio n. 3
0
    n = range(len(graph.node_list))
    if not args.dont_evaluate_local:
        local_accuracies = defaultdict(list)
    relational_accuracies = defaultdict(list)

    for t in range(args.num_trials):
        for b in budget:
            train, test = train_test_split(n, train_size=b, random_state=t)

            # True labels
            y_true = [graph.node_list[t].label for t in test]

            if not args.dont_evaluate_local:
                # local classifier fit and test
                local_clf = LocalClassifier(args.classifier)
                local_clf.fit(graph, train)
                local_y_pred = local_clf.predict(graph, test)
                local_accuracy = accuracy_score(y_true, local_y_pred)
                local_accuracies[b].append(local_accuracy)

            # relational classifier fit and test
            agg = pick_aggregator(args.aggregate, domain_labels, args.directed)
            relational_clf = RelationalClassifier(
                args.classifier, agg, not args.dont_use_node_attributes)
            relational_clf.fit(graph, train)
            conditional_node_to_label_map = create_map(graph, train)
            relational_y_pred = relational_clf.predict(
                graph, test, conditional_node_to_label_map)
            relational_accuracy = accuracy_score(y_true, relational_y_pred)
            relational_accuracies[b].append(relational_accuracy)
    parser.add_argument('-classifier', default='sklearn.linear_model.LogisticRegression', help='The underlying classifier.')
    parser.add_argument('-num_folds', type=int, default=10, help='The number of folds.')
    
    args = parser.parse_args()
    
    graph, domain_labels = load_linqs_data(args.content_file, args.cites_file)
    
    kf = KFold(n=len(graph.node_list), n_folds=args.num_folds, shuffle=True, random_state=42)    
    
    
    accuracies = []
    
    cm = None
    
    for train, test in kf:
        clf = LocalClassifier(args.classifier)
        clf.fit(graph, train)
        y_pred = clf.predict(graph, test)
        y_true = [graph.node_list[t].label for t in test]
        accuracies.append(accuracy_score(y_true, y_pred))
        if cm is None:
            cm = confusion_matrix(y_true, y_pred, labels = list(domain_labels))
        else:
            cm += confusion_matrix(y_true, y_pred, labels = list(domain_labels))

    
    print accuracies
    print "Mean accuracy: %0.4f +- %0.4f" % (np.mean(accuracies), np.std(accuracies))
    print cm
        
    
Esempio n. 5
0
    test = idx_val
else:
    test = idx_test
eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train)

# run training
ica_accuracies = list()
for run in range(args.num_trials):

    t_begin = time.time()

    # random ordering
    np.random.shuffle(eval_idx)

    y_true = [graph.node_list[t].label for t in test]
    local_clf = LocalClassifier(args.classifier)
    agg = pick_aggregator(args.aggregate, domain_labels)
    relational_clf = RelationalClassifier(args.classifier, agg)
    ica = ICA(local_clf,
              relational_clf,
              args.bootstrap,
              max_iteration=args.max_iteration)
    ica.fit(graph, train)
    conditional_node_to_label_map = create_map(graph, train)
    ica_predict = ica.predict(graph, eval_idx, test,
                              conditional_node_to_label_map)
    ica_accuracy = accuracy_score(y_true, ica_predict)
    ica_accuracies.append(ica_accuracy)
    print 'Run ' + str(run) + ': \t\t' + str(
        ica_accuracy) + ', Elapsed time: \t\t' + str(time.time() - t_begin)
    n=range(len(graph.node_list))
    if not args.dont_evaluate_local:
        local_accuracies = defaultdict(list)
    relational_accuracies = defaultdict(list)


    for t in range(args.num_trials):
        for b in budget:
            train, test = train_test_split(n, train_size=b, random_state=t)
            
            # True labels
            y_true=[graph.node_list[t].label for t in test]

            if not args.dont_evaluate_local:
                # local classifier fit and test
                local_clf=LocalClassifier(args.classifier)
                local_clf.fit(graph,train)
                local_y_pred=local_clf.predict(graph,test)
                local_accuracy=accuracy_score(y_true, local_y_pred)
                local_accuracies[b].append(local_accuracy)
            
            
            # relational classifier fit and test
            agg=pick_aggregator(args.aggregate,domain_labels,args.directed)
            relational_clf=RelationalClassifier(args.classifier, agg, not args.dont_use_node_attributes)
            relational_clf.fit(graph,train)
            conditional_node_to_label_map=create_map(graph,train)
            relational_y_pred=relational_clf.predict(graph,test,conditional_node_to_label_map)
            relational_accuracy=accuracy_score(y_true,relational_y_pred)
            relational_accuracies[b].append(relational_accuracy)