) print("\tsaving preprocessed aux path - %s" % aux_path) print("\tsaving preprocessed data path - %s" % data_path) print("\trating data path - %s" % path_rating) print("\tdocument data path - %s" % path_itemtext) print("\tprofile data path - %s" % path_usertext) print ("\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \ % (min_rating, max_length, max_df, vocab_size, split_ratio)) print( "===========================================================================================" ) R, D_all = data_factory.preprocess_ext(path_rating, path_itemtext, path_usertext, min_rating, max_length, max_df, vocab_size) data_factory.save(aux_path, R, D_all) data_factory.generate_train_valid_test_file_from_R(data_path, R, split_ratio) else: res_dir = args.res_dir emb_dim = args.emb_dim pretrain_w2v = args.pretrain_w2v dimension = args.dimension lambda_u = args.lambda_u lambda_v = args.lambda_v max_iter = args.max_iter num_kernel_per_ws = args.num_kernel_per_ws give_weight = args.give_weight threshold_doclen = args.threshold_length_document threshold_sentlen = args.threshold_length_sentence binary_rating = args.binary_rating
max_df = args.max_df vocab_size = args.vocab_size split_ratio = args.split_ratio print "=================================Preprocess Option Setting=================================" print "\tsaving preprocessed aux path - %s" % aux_path print "\tsaving preprocessed data path - %s" % data_path print "\trating data path - %s" % path_rating print "\tdocument data path - %s" % path_itemtext print "\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \ % (min_rating, max_length, max_df, vocab_size, split_ratio) print "===========================================================================================" R, D_all = data_factory.preprocess( path_rating, path_itemtext, min_rating, max_length, max_df, vocab_size) data_factory.save(aux_path, R, D_all) data_factory.generate_train_valid_test_file_from_R( data_path, R, split_ratio) else: res_dir = args.res_dir emb_dim = args.emb_dim pretrain_w2v = args.pretrain_w2v dimension = args.dimension lambda_u = args.lambda_u lambda_v = args.lambda_v max_iter = args.max_iter num_kernel_per_ws = args.num_kernel_per_ws give_item_weight = args.give_item_weight if res_dir is None: sys.exit("Argument missing - res_dir is required")
print "=================================Preprocess Option Setting=================================" print "\tsaving preprocessed aux path - %s" % aux_path print "\tsaving preprocessed data path - %s" % data_path print "\trating data path - %s" % path_rating print "\tdocument data path - %s" % path_itemtext print "\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \ % (min_rating, max_length, max_df, vocab_size, split_ratio) print "\tnumber of new itmes: %d" % new_item print "===========================================================================================" if new_item: print "Under construction" R_old, R_new, D_old, D_new = data_factory.preprocess_newitem( path_rating, path_itemtext, min_rating, max_length, max_df, vocab_size, new_item) data_factory.save(aux_path, R_old, D_old, True, R_new, D_new) data_factory.generate_train_valid_test_file_from_R( data_path, R_old, split_ratio, R_new) else: R, D_all = data_factory.preprocess(path_rating, path_itemtext, min_rating, max_length, max_df, vocab_size) data_factory.save(aux_path, R, D_all) data_factory.generate_train_valid_test_file_from_R( data_path, R, split_ratio) else: res_dir = args.res_dir emb_dim = args.emb_dim pretrain_w2v = args.pretrain_w2v dimension = args.dimension
data_factory = Data_Factory() if do_preprocess: path_rating = args.raw_rating_data_path min_rating = args.min_rating split_ratio = args.split_ratio print "=================================Preprocess Option Setting=================================" print "\tsaving preprocessed aux path - %s" % aux_path print "\tsaving preprocessed data path - %s" % data_path print "\trating data path - %s" % path_rating print "\tmin_rating: %d\n\t split_ratio: %.1f" % (min_rating, split_ratio) print "===========================================================================================" R = data_factory.preprocess(path_rating, min_rating) data_factory.save(aux_path, R) data_factory.generate_train_valid_test_file_from_R(data_path, R, split_ratio) else: methods = args.flag dimension = args.dimension lambda_u = args.lambda_u lambda_v = args.lambda_v lambda_p = args.lambda_p lambda_q = args.lambda_q max_iter = args.max_iter momentum_flag = args.momentum_flag if lambda_u is None: sys.exit("Argument missing - lambda_u is required")