Example #1
0
def test_feature_extraction(arguments):
    print 'Extracting training examples.'
    train_graph_obj = Train_Graph(graph_file_root='smallest_train')
    train_pgraph = train_graph_obj.pgraph
    train_examples, train_labels = extract_examples(train_pgraph, 10000, 10000)

    print 'Extracting testing examples'
    test_graph_obj = Train_Graph(graph_file_root='smallest_test')
    test_pgraph = test_graph_obj.pgraph
    test_examples, test_labels = extract_test_examples(train_pgraph, test_pgraph, \
                 train_examples, 1000, 1000)
    print 'Done!'

    print 'Graph Distance:', get_graph_distance(train_pgraph,
                                                train_examples[0][0],
                                                train_examples[0][1])
    print 'Common Neighbors:', get_common_neighbors(train_pgraph,
                                                    train_examples[0][0],
                                                    train_examples[0][1])
    print 'Jaccard Coefficient:', jaccard_coefficient(train_pgraph,
                                                      train_examples[0][0],
                                                      train_examples[0][1])
    print 'Adamic Adar:', adamic_adar(train_pgraph, train_examples[0][0],
                                      train_examples[0][1])
    print 'Preferential Attachment:', preferential_attachment(
        train_pgraph, train_examples[0][0], train_examples[0][1])
Example #2
0
def test_graph_creation(arguments):
    # Arguments are data_path, output_root
    print arguments
    print 'Testing train_graph.py...'
    time_lbound = datetime.datetime(2013, 1, 1)
    time_ubound = datetime.datetime(2013, 4, 30)
    train_graph_obj = Train_Graph(time_lbound=time_lbound, \
             time_ubound=time_ubound, \
             src_path=arguments[0])

    train_graph_obj.write_to_file(arguments[1])
    print train_graph_obj.attributes.items()[:10]
    print train_graph_obj.pgraph.GetNodes()
    print train_graph_obj.pgraph.GetEdges()
    print 'Done!'
Example #3
0
def test_graph_loading(arguments):
    print 'Testing graph loading...'
    train_graph_obj = Train_Graph(graph_file_root='mid_train')
    print 'Done!'
    print train_graph_obj.attributes.items()[:10]
    print train_graph_obj.pgraph.GetNodes()
    print train_graph_obj.pgraph.GetEdges()
Example #4
0
def main(input_train, input_test, output_root):
    print 'Extracting training examples.'
    train_graph_obj = Train_Graph(graph_file_root=input_train)
    train_pgraph = train_graph_obj.pgraph
    max_scc = train_pgraph
    board_ids = train_graph_obj.board_node_ids
    train_examples, train_labels = get_pin_tr_ex(max_scc, 5000, 5000,
                                                 board_ids)
    validate_train(train_examples, train_labels, max_scc, board_ids)
    '''
	We need to make sure that every pair of nodes actually appears in the
	original training component, but every pair itself is not in training
	examples.
	'''
    print 'Extracting testing examples'
    test_graph_obj = Test_Graph(graph_file_root=input_test)
    test_pgraph = test_graph_obj.pgraph
    test_examples, test_labels = get_pin_tst_ex(max_scc, test_pgraph, \
           train_examples, 2500, 2500, test_graph_obj.board_node_ids)

    # Make sure test set satisfies criteria
    validate_test(test_examples, test_labels, train_examples, test_pgraph,
                  max_scc, test_graph_obj.board_node_ids)

    # Define all feature functions we will be using
    feature_funcs = [get_graph_distance, get_ev_centr_sum, get_page_rank_sum, \
        preferential_attachment, get_2_hops, get_degree_sum, \
        std_nbr_degree_sum, mean_nbr_deg_sum, adamic_adar_2, \
        common_neighbors_2]
    # feature_funcs = [jaccard_2, preferential_attachment, get_degree_sum]

    # # Test each feature function on its own
    # test_proximity(feature_funcs, test_examples, test_labels, max_scc, 5000)

    # Convert our training examples and testing examples to feature
    # vectors
    print 'Extracting features for classifier'
    all_train_features, all_test_features = get_all_features(
        feature_funcs, max_scc, train_examples, test_examples)
    print 'Saving features to file...'
    try:
        np.save('train_' + output_root + '_pin_features', all_train_features)
        np.save('test_' + output_root + '_pin_features', all_test_features)
        np.save('train_' + output_root + '_pin_examples',
                zip(train_examples, train_labels))
        np.save('test_' + output_root + '_pin_examples',
                zip(test_examples, test_labels))
    except Exception as e:
        print str(e)
    all_train_features = sklearn.preprocessing.scale(all_train_features)
    all_test_features = sklearn.preprocessing.scale(all_test_features)
    # # Test our classifiers over these features
    test_classifiers(all_train_features, train_labels, all_test_features,
                     test_labels)
Example #5
0
def main(input_train, input_test, num_intervals):
    # Read in the graph
    train_graph_obj = Train_Graph(graph_file_root=input_train)
    train_pgraph = train_graph_obj.pgraph
    # (Get max SCC?)

    # Get limits on the time range
    print 'Getting time limits'
    min_time, max_time = get_time_limits(train_graph_obj)

    # Divide into intervals based on time range
    print 'Dividing into intervals'
    interval_edges = get_intervals(min_time, max_time, train_pgraph, \
      train_graph_obj.attributes, num_intervals, train_graph_obj.board_node_ids)
    assert sum([len(interval)
                for interval in interval_edges]) == train_pgraph.GetEdges()

    # Extract positive and negative training examples in the last frame
    print 'Getting training examples/labels'
    train_examples, train_labels = get_train_set(train_pgraph, interval_edges, \
           train_graph_obj.board_node_ids, train_graph_obj.attributes, \
           num_pos=5000, num_neg=5000)

    # Contruct our testing set
    test_graph_obj = Test_Graph(graph_file_root=input_test)
    test_pgraph = test_graph_obj.pgraph
    print 'Getting testing examples/labels'
    test_examples, test_labels = get_pin_tst_ex(train_pgraph, test_pgraph, \
           train_examples, 2500, 2500, test_graph_obj.board_node_ids)

    feature_funcs = [get_graph_distance, get_ev_centr_sum, get_page_rank_sum, \
        preferential_attachment, get_2_hops, get_degree_sum, \
        std_nbr_degree_sum, mean_nbr_deg_sum, adamic_adar_2, \
        common_neighbors_2]
    print 'Extracting Training features...'
    train_features = get_train_features(train_examples, train_pgraph,
                                        interval_edges, feature_funcs)
    try:
        np.save('train_temp_fol_features', train_features)
        np.save('train_temp_fol_examples', zip(train_examples, train_labels))
    except Exception as e:
        print str(e)
    train_features = sklearn.preprocessing.scale(train_features)
    print 'Extracting Testing features...'
    test_features = get_test_features(test_examples, train_pgraph,
                                      interval_edges, feature_funcs)
    try:
        np.save('test_temp_fol_features', test_features)
        np.save('test_temp_fol_examples', zip(test_examples, test_labels))
    except Exception as e:
        print str(e)
    test_features = sklearn.preprocessing.scale(test_features)

    test_classifiers(train_features, train_labels, test_features, test_labels)
def main():
	print 'Extracting training examples.'
	train_graph_obj = Train_Graph(graph_file_root='smallest_train')
	train_pgraph = train_graph_obj.pgraph
	max_scc = train_pgraph
	train_examples, train_labels = extract_examples(max_scc, 10000, 10000)
	validate_train(train_examples, train_labels, max_scc)


	'''
	We need to make sure that every pair of nodes actually appears in the
	original training component, but every pair itself is not in training
	examples.
	'''
	print 'Extracting testing examples'
	test_graph_obj = Test_Graph(graph_file_root='smallest_test')
	test_pgraph = test_graph_obj.pgraph
	test_examples, test_labels = extract_test_examples(max_scc, test_pgraph, \
								train_examples, 5000, 5000, test_graph_obj.board_node_ids)
	print 'Getting Edge types'
	test_edge_types = get_edge_types(test_examples, test_graph_obj.board_node_ids)

	# Make sure test set satisfies criteria
	validate_test(test_examples, test_labels, train_examples, test_pgraph, max_scc, test_graph_obj.board_node_ids)

	# Define all feature functions we will be using
	feature_funcs = [get_ev_centr_sum, get_page_rank_sum, preferential_attachment, \
					get_2_hops, get_degree_sum, std_nbr_degree_sum, \
					mean_nbr_deg_sum, adamic_adar_2, common_neighbors_2, \
					jaccard_2]
	# feature_funcs = [preferential_attachment]

	# Test each feature function on its own
	test_proximity(feature_funcs, test_examples, test_labels, max_scc, 5000, test_edge_types)

	# Convert our training examples and testing examples to feature
	# vectors
	all_train_features, all_test_features = get_all_features(feature_funcs, max_scc, train_examples, test_examples)
	
	# Test our classifiers over these features
	test_classifiers(all_train_features, train_labels, all_test_features, test_labels, test_edge_types)
Example #7
0
def main(temp_train_feats, temp_train_ex, temp_test_feats, temp_test_ex,
         graph_file):
    train_graph_obj = Train_Graph(graph_file_root=graph_file)
    graph = train_graph_obj.pgraph
    train_examples = temp_train_ex[:, 0].tolist()
    train_labels = temp_train_ex[:, 1].tolist()
    test_examples = temp_test_ex[:, 0].tolist()
    test_labels = temp_test_ex[:, 1].tolist()
    feature_funcs = [preferential_attachment]
    # feature_funcs = [get_graph_distance, get_ev_centr_sum, get_page_rank_sum, \
    # 				preferential_attachment, get_2_hops, get_degree_sum, \
    # 				std_nbr_degree_sum, mean_nbr_deg_sum, adamic_adar_2, \
    # 				common_neighbors_2]
    print 'Extracting features'
    norm_train_features, norm_test_features = get_all_features(
        feature_funcs, graph, train_examples, test_examples)
    all_train_feats = np.hstack([norm_train_features, temp_train_feats])
    all_test_feats = np.hstack([norm_test_features, temp_test_feats])
    all_train_feats = sklearn.preprocessing.scale(all_train_feats)
    all_test_feats = sklearn.preprocessing.scale(all_test_feats)
    print 'Testing Classifiers'
    test_classifiers(all_train_features, train_labels, all_test_features,
                     test_labels)