parser = argparse.ArgumentParser() parser.add_argument('-content_file', help='The path to the content file.') parser.add_argument('-cites_file', help='The path to the cites file.') parser.add_argument('-classifier', default='sklearn.naive_bayes.MultinomialNB', help='The underlying classifier.') parser.add_argument('-num_folds', type=int, default=10, help='The number of folds.') args = parser.parse_args() graph, domain_labels = load_linqs_data(args.content_file, args.cites_file) kf = KFold(n=len(graph.node_list), n_folds=args.num_folds, shuffle=True, random_state=42) accuracies = [] cm = None for train, test in kf: clf = LocalClassifier(args.classifier) clf.fit(graph, train) y_pred = clf.predict(graph, test) y_true = [graph.node_list[t].label for t in test]
print 'Cites File: %s' % args.cites_file print 'Aggregator: %s' %args.aggregate if(args.directed): Dir='Directed' else: Dir='Undirected' print 'Directed : %s' %Dir if(args.dont_use_node_attributes): Att='Without Node attributes' else: Att='With Node Attributes' print 'Attributes: %s' %Att print 'classifier:%s' %args.classifier graph, domain_labels = load_linqs_data(args.content_file, args.cites_file) #budget=[0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9] budget=[0.8] n=range(len(graph.node_list)) ica_accuracies = defaultdict(list) for t in range(args.num_trials): for b in budget: train, test = train_test_split(n, train_size=b, random_state=t) # True labels y_true=[graph.node_list[t].label for t in test] local_clf=LocalClassifier(args.classifier)