コード例 #1
0
    train_file = sys.argv[1]
    test_file = sys.argv[2]
    test_ans_file = sys.argv[3]
    user_profile_file = sys.argv[4]
    config_file = sys.argv[5]
    out_train_file = sys.argv[6]
    out_test_file = sys.argv[7]

    # read in data
    train_graph = file_io.read_graph(train_file)
    config = file_io.read_config(config_file)
    user_feature = None

    (user_feature,
     feature_name) = file_io.read_feature_column_major(user_profile_file,
                                                       config)

    #normalize features
    for column in user_feature:
        if column.type == 'numerical':
            cf.normalize_column(column)
        elif column.type == 'categorical':
            cf.convert_to_dummy_variable(column)

    test_pair = file_io.read_data(test_file)
    train_graph = update_nodes_from_test_data(train_graph, test_pair)
    test_ans = gen_label_mapping(test_pair, file_io.read_ans(test_ans_file))
    gen_training_data(train_graph, user_feature, out_train_file)
    gen_testing_data(train_graph, test_pair, test_ans, user_feature,
                     out_test_file)
コード例 #2
0
    # arguments
    train_file = sys.argv[1]
    test_file = sys.argv[2]
    test_ans_file = sys.argv[3]
    user_profile_file = sys.argv[4]
    config_file = sys.argv[5]
    out_train_file = sys.argv[6]
    out_test_file = sys.argv[7]

    # read in data
    train_graph = file_io.read_graph(train_file)
    config = file_io.read_config(config_file)
    user_feature = None
    
    (user_feature, feature_name) = file_io.read_feature_column_major(user_profile_file, config)
    
    #normalize features
    for column in user_feature:
        if column.type == 'numerical':
            cf.normalize_column(column)
        elif column.type == 'categorical':
            cf.convert_to_dummy_variable(column)
    
    test_pair = file_io.read_data(test_file)
    train_graph = update_nodes_from_test_data(train_graph, test_pair)
    test_ans = gen_label_mapping(test_pair, file_io.read_ans(test_ans_file))
    gen_training_data(train_graph, user_feature, out_train_file)
    gen_testing_data(train_graph, test_pair, test_ans, 
            user_feature, out_test_file)