def __init__(self, adj, features, labels, idx_train, idx_test, args): """ idx_train: labeled data idx_test: unlabeled data """ self.args = args self.adj = adj.tocsr() if args.dataset != 'reddit': self.adj_two_hop = adj.dot(adj) self.adj_two_hop.setdiag(0) self.adj_two_hop.eliminate_zeros() # self.graph = nx.from_scipy_sparse_matrix(adj) self.pseudo_labels = np.zeros((adj.shape[0], labels.shape[1])) # load_data = os.path.exists(f'preds/ICA_probs_{args.dataset}_{args.seed}.npy') load_data = os.path.exists( f'preds/ICA_probs_{args.train_size}_{args.dataset}_{args.seed}.npy' ) print('if loading: ', load_data) if not load_data: st = time.time() if args.dataset != 'cora': features[features != 0] = 1 classifier = 'sklearn.linear_model.LogisticRegression' aggregate = 'count' # choices=['count', 'prop'], help='Aggregation operator' graph, domain_labels = build_graph(adj, features, labels) y_true = [graph.node_list[t].label for t in idx_test] local_clf = LocalClassifier(classifier) agg = pick_aggregator(aggregate, domain_labels) relational_clf = RelationalClassifier(classifier, agg) ica = ICA(local_clf, relational_clf, bootstrap=True, max_iteration=10) ica.fit(graph, idx_train) conditional_node_to_label_map = create_map(graph, idx_train) eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train) ica_predict, probs = ica.predict(graph, eval_idx, idx_test, conditional_node_to_label_map) ica_accuracy = accuracy_score(y_true, ica_predict) print('Acc: ' + str(ica_accuracy)) print('optimization consumes %s s' % (time.time() - st)) # self.ica_predict = np.array([int(x[1:]) for x in ica_predict]) dict_pred = {x: int(y[1:]) for x, y in zip(idx_test, ica_predict)} dict_train = {x: labels.argmax(1)[x] for x in idx_train} dict_pred.update(dict_train) concated = sorted(dict_pred.items(), key=lambda x: x[0]) self.probs = np.vstack((labels[idx_train], probs)) self.concated = np.array([y for x, y in concated]) np.save( f'preds/ICA_probs_{args.train_size}_{args.dataset}_{args.seed}.npy', self.probs) np.save( f'preds/ICA_preds_{args.train_size}_{args.dataset}_{args.seed}.npy', self.concated) else: print('loading probs/preds...') # self.probs = np.load(f'ICA_probs_{args.dataset}_{args.seed}.npy') # self.concated = np.load(f'ICA_preds_{args.dataset}_{args.seed}.npy') # self.probs = np.load(f'ICA_probs_{args.dataset}_10.npy') # self.concated = np.load(f'ICA_preds_{args.dataset}_10.npy') self.probs = np.load( f'preds/ICA_probs_{args.train_size}_{args.dataset}_{args.seed}.npy' ) self.concated = np.load( f'preds/ICA_preds_{args.train_size}_{args.dataset}_{args.seed}.npy' ) # self.probs = np.load(f'preds/{args.dataset}_{args.seed}_pred.npy') # self.concated = self.probs.argmax(1) # self.concated[idx_train] = labels.argmax(1)[idx_train] print('Acc: ', (self.concated == labels.argmax(1))[idx_test].sum() / len(idx_test))
else: test = idx_test eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train) # run training ica_accuracies = list() for run in range(args.num_trials): t_begin = time.time() # random ordering np.random.shuffle(eval_idx) y_true = [graph.node_list[t].label for t in test] local_clf = LocalClassifier(args.classifier) agg = pick_aggregator(args.aggregate, domain_labels) relational_clf = RelationalClassifier(args.classifier, agg) ica = ICA(local_clf, relational_clf, args.bootstrap, max_iteration=args.max_iteration) ica.fit(graph, train) conditional_node_to_label_map = create_map(graph, train) ica_predict = ica.predict(graph, eval_idx, test, conditional_node_to_label_map) ica_accuracy = accuracy_score(y_true, ica_predict) ica_accuracies.append(ica_accuracy) print('Run ' + str(run) + ': \t\t' + str(ica_accuracy) + ', Elapsed time: \t\t' + str(time.time() - t_begin))