def test_feature_extraction(arguments): print 'Extracting training examples.' train_graph_obj = Train_Graph(graph_file_root='smallest_train') train_pgraph = train_graph_obj.pgraph train_examples, train_labels = extract_examples(train_pgraph, 10000, 10000) print 'Extracting testing examples' test_graph_obj = Train_Graph(graph_file_root='smallest_test') test_pgraph = test_graph_obj.pgraph test_examples, test_labels = extract_test_examples(train_pgraph, test_pgraph, \ train_examples, 1000, 1000) print 'Done!' print 'Graph Distance:', get_graph_distance(train_pgraph, train_examples[0][0], train_examples[0][1]) print 'Common Neighbors:', get_common_neighbors(train_pgraph, train_examples[0][0], train_examples[0][1]) print 'Jaccard Coefficient:', jaccard_coefficient(train_pgraph, train_examples[0][0], train_examples[0][1]) print 'Adamic Adar:', adamic_adar(train_pgraph, train_examples[0][0], train_examples[0][1]) print 'Preferential Attachment:', preferential_attachment( train_pgraph, train_examples[0][0], train_examples[0][1])
def test_graph_creation(arguments): # Arguments are data_path, output_root print arguments print 'Testing train_graph.py...' time_lbound = datetime.datetime(2013, 1, 1) time_ubound = datetime.datetime(2013, 4, 30) train_graph_obj = Train_Graph(time_lbound=time_lbound, \ time_ubound=time_ubound, \ src_path=arguments[0]) train_graph_obj.write_to_file(arguments[1]) print train_graph_obj.attributes.items()[:10] print train_graph_obj.pgraph.GetNodes() print train_graph_obj.pgraph.GetEdges() print 'Done!'
def test_graph_loading(arguments): print 'Testing graph loading...' train_graph_obj = Train_Graph(graph_file_root='mid_train') print 'Done!' print train_graph_obj.attributes.items()[:10] print train_graph_obj.pgraph.GetNodes() print train_graph_obj.pgraph.GetEdges()
def main(input_train, input_test, output_root): print 'Extracting training examples.' train_graph_obj = Train_Graph(graph_file_root=input_train) train_pgraph = train_graph_obj.pgraph max_scc = train_pgraph board_ids = train_graph_obj.board_node_ids train_examples, train_labels = get_pin_tr_ex(max_scc, 5000, 5000, board_ids) validate_train(train_examples, train_labels, max_scc, board_ids) ''' We need to make sure that every pair of nodes actually appears in the original training component, but every pair itself is not in training examples. ''' print 'Extracting testing examples' test_graph_obj = Test_Graph(graph_file_root=input_test) test_pgraph = test_graph_obj.pgraph test_examples, test_labels = get_pin_tst_ex(max_scc, test_pgraph, \ train_examples, 2500, 2500, test_graph_obj.board_node_ids) # Make sure test set satisfies criteria validate_test(test_examples, test_labels, train_examples, test_pgraph, max_scc, test_graph_obj.board_node_ids) # Define all feature functions we will be using feature_funcs = [get_graph_distance, get_ev_centr_sum, get_page_rank_sum, \ preferential_attachment, get_2_hops, get_degree_sum, \ std_nbr_degree_sum, mean_nbr_deg_sum, adamic_adar_2, \ common_neighbors_2] # feature_funcs = [jaccard_2, preferential_attachment, get_degree_sum] # # Test each feature function on its own # test_proximity(feature_funcs, test_examples, test_labels, max_scc, 5000) # Convert our training examples and testing examples to feature # vectors print 'Extracting features for classifier' all_train_features, all_test_features = get_all_features( feature_funcs, max_scc, train_examples, test_examples) print 'Saving features to file...' try: np.save('train_' + output_root + '_pin_features', all_train_features) np.save('test_' + output_root + '_pin_features', all_test_features) np.save('train_' + output_root + '_pin_examples', zip(train_examples, train_labels)) np.save('test_' + output_root + '_pin_examples', zip(test_examples, test_labels)) except Exception as e: print str(e) all_train_features = sklearn.preprocessing.scale(all_train_features) all_test_features = sklearn.preprocessing.scale(all_test_features) # # Test our classifiers over these features test_classifiers(all_train_features, train_labels, all_test_features, test_labels)
def main(input_train, input_test, num_intervals): # Read in the graph train_graph_obj = Train_Graph(graph_file_root=input_train) train_pgraph = train_graph_obj.pgraph # (Get max SCC?) # Get limits on the time range print 'Getting time limits' min_time, max_time = get_time_limits(train_graph_obj) # Divide into intervals based on time range print 'Dividing into intervals' interval_edges = get_intervals(min_time, max_time, train_pgraph, \ train_graph_obj.attributes, num_intervals, train_graph_obj.board_node_ids) assert sum([len(interval) for interval in interval_edges]) == train_pgraph.GetEdges() # Extract positive and negative training examples in the last frame print 'Getting training examples/labels' train_examples, train_labels = get_train_set(train_pgraph, interval_edges, \ train_graph_obj.board_node_ids, train_graph_obj.attributes, \ num_pos=5000, num_neg=5000) # Contruct our testing set test_graph_obj = Test_Graph(graph_file_root=input_test) test_pgraph = test_graph_obj.pgraph print 'Getting testing examples/labels' test_examples, test_labels = get_pin_tst_ex(train_pgraph, test_pgraph, \ train_examples, 2500, 2500, test_graph_obj.board_node_ids) feature_funcs = [get_graph_distance, get_ev_centr_sum, get_page_rank_sum, \ preferential_attachment, get_2_hops, get_degree_sum, \ std_nbr_degree_sum, mean_nbr_deg_sum, adamic_adar_2, \ common_neighbors_2] print 'Extracting Training features...' train_features = get_train_features(train_examples, train_pgraph, interval_edges, feature_funcs) try: np.save('train_temp_fol_features', train_features) np.save('train_temp_fol_examples', zip(train_examples, train_labels)) except Exception as e: print str(e) train_features = sklearn.preprocessing.scale(train_features) print 'Extracting Testing features...' test_features = get_test_features(test_examples, train_pgraph, interval_edges, feature_funcs) try: np.save('test_temp_fol_features', test_features) np.save('test_temp_fol_examples', zip(test_examples, test_labels)) except Exception as e: print str(e) test_features = sklearn.preprocessing.scale(test_features) test_classifiers(train_features, train_labels, test_features, test_labels)
def main(): print 'Extracting training examples.' train_graph_obj = Train_Graph(graph_file_root='smallest_train') train_pgraph = train_graph_obj.pgraph max_scc = train_pgraph train_examples, train_labels = extract_examples(max_scc, 10000, 10000) validate_train(train_examples, train_labels, max_scc) ''' We need to make sure that every pair of nodes actually appears in the original training component, but every pair itself is not in training examples. ''' print 'Extracting testing examples' test_graph_obj = Test_Graph(graph_file_root='smallest_test') test_pgraph = test_graph_obj.pgraph test_examples, test_labels = extract_test_examples(max_scc, test_pgraph, \ train_examples, 5000, 5000, test_graph_obj.board_node_ids) print 'Getting Edge types' test_edge_types = get_edge_types(test_examples, test_graph_obj.board_node_ids) # Make sure test set satisfies criteria validate_test(test_examples, test_labels, train_examples, test_pgraph, max_scc, test_graph_obj.board_node_ids) # Define all feature functions we will be using feature_funcs = [get_ev_centr_sum, get_page_rank_sum, preferential_attachment, \ get_2_hops, get_degree_sum, std_nbr_degree_sum, \ mean_nbr_deg_sum, adamic_adar_2, common_neighbors_2, \ jaccard_2] # feature_funcs = [preferential_attachment] # Test each feature function on its own test_proximity(feature_funcs, test_examples, test_labels, max_scc, 5000, test_edge_types) # Convert our training examples and testing examples to feature # vectors all_train_features, all_test_features = get_all_features(feature_funcs, max_scc, train_examples, test_examples) # Test our classifiers over these features test_classifiers(all_train_features, train_labels, all_test_features, test_labels, test_edge_types)
def main(temp_train_feats, temp_train_ex, temp_test_feats, temp_test_ex, graph_file): train_graph_obj = Train_Graph(graph_file_root=graph_file) graph = train_graph_obj.pgraph train_examples = temp_train_ex[:, 0].tolist() train_labels = temp_train_ex[:, 1].tolist() test_examples = temp_test_ex[:, 0].tolist() test_labels = temp_test_ex[:, 1].tolist() feature_funcs = [preferential_attachment] # feature_funcs = [get_graph_distance, get_ev_centr_sum, get_page_rank_sum, \ # preferential_attachment, get_2_hops, get_degree_sum, \ # std_nbr_degree_sum, mean_nbr_deg_sum, adamic_adar_2, \ # common_neighbors_2] print 'Extracting features' norm_train_features, norm_test_features = get_all_features( feature_funcs, graph, train_examples, test_examples) all_train_feats = np.hstack([norm_train_features, temp_train_feats]) all_test_feats = np.hstack([norm_test_features, temp_test_feats]) all_train_feats = sklearn.preprocessing.scale(all_train_feats) all_test_feats = sklearn.preprocessing.scale(all_test_feats) print 'Testing Classifiers' test_classifiers(all_train_features, train_labels, all_test_features, test_labels)