train_file = sys.argv[1] test_file = sys.argv[2] test_ans_file = sys.argv[3] user_profile_file = sys.argv[4] config_file = sys.argv[5] out_train_file = sys.argv[6] out_test_file = sys.argv[7] # read in data train_graph = file_io.read_graph(train_file) config = file_io.read_config(config_file) user_feature = None (user_feature, feature_name) = file_io.read_feature_column_major(user_profile_file, config) #normalize features for column in user_feature: if column.type == 'numerical': cf.normalize_column(column) elif column.type == 'categorical': cf.convert_to_dummy_variable(column) test_pair = file_io.read_data(test_file) train_graph = update_nodes_from_test_data(train_graph, test_pair) test_ans = gen_label_mapping(test_pair, file_io.read_ans(test_ans_file)) gen_training_data(train_graph, user_feature, out_train_file) gen_testing_data(train_graph, test_pair, test_ans, user_feature, out_test_file)
# arguments train_file = sys.argv[1] test_file = sys.argv[2] test_ans_file = sys.argv[3] user_profile_file = sys.argv[4] config_file = sys.argv[5] out_train_file = sys.argv[6] out_test_file = sys.argv[7] # read in data train_graph = file_io.read_graph(train_file) config = file_io.read_config(config_file) user_feature = None (user_feature, feature_name) = file_io.read_feature_column_major(user_profile_file, config) #normalize features for column in user_feature: if column.type == 'numerical': cf.normalize_column(column) elif column.type == 'categorical': cf.convert_to_dummy_variable(column) test_pair = file_io.read_data(test_file) train_graph = update_nodes_from_test_data(train_graph, test_pair) test_ans = gen_label_mapping(test_pair, file_io.read_ans(test_ans_file)) gen_training_data(train_graph, user_feature, out_train_file) gen_testing_data(train_graph, test_pair, test_ans, user_feature, out_test_file)