def run_ICA(classifier_name, classifier_args, num_iter): features, labels, train_, val_, test_, graph, domain_labels = load_data() # run training t_begin = time.time() # random ordering np.random.shuffle(val_) y_true = [graph.Graph.nodes[node]['label'] for node in test_] local_clf = LocalClassifier(classifier_name, classifier_args) agg = pick_aggregator('count', domain_labels) relational_clf = RelationalClassifier(classifier_name, agg, classifier_args) ica = ICA(local_clf, relational_clf, True, max_iteration=num_iter) ica.fit(graph.Graph, train_) print('Model fitting done...') conditional_node_to_label_map = create_map(graph.Graph, train_) ica_predict = ica.predict(graph.Graph, val_, test_, conditional_node_to_label_map) print('Model prediction done...') ica_accuracy = accuracy_score(y_true, ica_predict) t_end = time.time() print(classification_report(y_true, ica_predict)) print(ica_accuracy) elapsed_time = t_end - t_begin print('Start time: \t\t' + time.strftime("%H:%M:%S", time.gmtime(t_begin))) print('Elapsed time: \t\t' + time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) print('End time: \t\t' + time.strftime("%H:%M:%S", time.gmtime(t_end)))
args = parser.parse_args() graph, domain_labels = load_linqs_data(args.content_file, args.cites_file) kf = KFold(n=len(graph.node_list), n_folds=args.num_folds, shuffle=True, random_state=42) accuracies = [] cm = None for train, test in kf: clf = LocalClassifier(args.classifier) clf.fit(graph, train) y_pred = clf.predict(graph, test) y_true = [graph.node_list[t].label for t in test] accuracies.append(accuracy_score(y_true, y_pred)) if cm is None: cm = confusion_matrix(y_true, y_pred, labels=domain_labels) else: cm += confusion_matrix(y_true, y_pred, labels=domain_labels) print accuracies print "Mean accuracy: %0.4f +- %0.4f" % (np.mean(accuracies), np.std(accuracies)) print cm conditional_map = {}
n = range(len(graph.node_list)) if not args.dont_evaluate_local: local_accuracies = defaultdict(list) relational_accuracies = defaultdict(list) for t in range(args.num_trials): for b in budget: train, test = train_test_split(n, train_size=b, random_state=t) # True labels y_true = [graph.node_list[t].label for t in test] if not args.dont_evaluate_local: # local classifier fit and test local_clf = LocalClassifier(args.classifier) local_clf.fit(graph, train) local_y_pred = local_clf.predict(graph, test) local_accuracy = accuracy_score(y_true, local_y_pred) local_accuracies[b].append(local_accuracy) # relational classifier fit and test agg = pick_aggregator(args.aggregate, domain_labels, args.directed) relational_clf = RelationalClassifier( args.classifier, agg, not args.dont_use_node_attributes) relational_clf.fit(graph, train) conditional_node_to_label_map = create_map(graph, train) relational_y_pred = relational_clf.predict( graph, test, conditional_node_to_label_map) relational_accuracy = accuracy_score(y_true, relational_y_pred) relational_accuracies[b].append(relational_accuracy)
parser.add_argument('-classifier', default='sklearn.linear_model.LogisticRegression', help='The underlying classifier.') parser.add_argument('-num_folds', type=int, default=10, help='The number of folds.') args = parser.parse_args() graph, domain_labels = load_linqs_data(args.content_file, args.cites_file) kf = KFold(n=len(graph.node_list), n_folds=args.num_folds, shuffle=True, random_state=42) accuracies = [] cm = None for train, test in kf: clf = LocalClassifier(args.classifier) clf.fit(graph, train) y_pred = clf.predict(graph, test) y_true = [graph.node_list[t].label for t in test] accuracies.append(accuracy_score(y_true, y_pred)) if cm is None: cm = confusion_matrix(y_true, y_pred, labels = list(domain_labels)) else: cm += confusion_matrix(y_true, y_pred, labels = list(domain_labels)) print accuracies print "Mean accuracy: %0.4f +- %0.4f" % (np.mean(accuracies), np.std(accuracies)) print cm
test = idx_val else: test = idx_test eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train) # run training ica_accuracies = list() for run in range(args.num_trials): t_begin = time.time() # random ordering np.random.shuffle(eval_idx) y_true = [graph.node_list[t].label for t in test] local_clf = LocalClassifier(args.classifier) agg = pick_aggregator(args.aggregate, domain_labels) relational_clf = RelationalClassifier(args.classifier, agg) ica = ICA(local_clf, relational_clf, args.bootstrap, max_iteration=args.max_iteration) ica.fit(graph, train) conditional_node_to_label_map = create_map(graph, train) ica_predict = ica.predict(graph, eval_idx, test, conditional_node_to_label_map) ica_accuracy = accuracy_score(y_true, ica_predict) ica_accuracies.append(ica_accuracy) print 'Run ' + str(run) + ': \t\t' + str( ica_accuracy) + ', Elapsed time: \t\t' + str(time.time() - t_begin)
n=range(len(graph.node_list)) if not args.dont_evaluate_local: local_accuracies = defaultdict(list) relational_accuracies = defaultdict(list) for t in range(args.num_trials): for b in budget: train, test = train_test_split(n, train_size=b, random_state=t) # True labels y_true=[graph.node_list[t].label for t in test] if not args.dont_evaluate_local: # local classifier fit and test local_clf=LocalClassifier(args.classifier) local_clf.fit(graph,train) local_y_pred=local_clf.predict(graph,test) local_accuracy=accuracy_score(y_true, local_y_pred) local_accuracies[b].append(local_accuracy) # relational classifier fit and test agg=pick_aggregator(args.aggregate,domain_labels,args.directed) relational_clf=RelationalClassifier(args.classifier, agg, not args.dont_use_node_attributes) relational_clf.fit(graph,train) conditional_node_to_label_map=create_map(graph,train) relational_y_pred=relational_clf.predict(graph,test,conditional_node_to_label_map) relational_accuracy=accuracy_score(y_true,relational_y_pred) relational_accuracies[b].append(relational_accuracy)