# Prepare output directory for models and summaries # ======================================================= timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) # Data preprocess # ======================================================= # Load data print("Loading data...") x_text, y = data_helpers.load_positive_negative_data_files( FLAGS.positive_data_file, FLAGS.negative_data_file, cut=False, stop_words_list_file=None, ) #不进行切分词 #x_text, y = data_helpers.load_positive_negative_data_files(FLAGS.positive_data_file, FLAGS.negative_data_file, # cut=True, stop_words_list_file=FLAGS.stop_word_file) #切分词版本 #print(x_text) # Get embedding vector sentences = data_helpers.padding_sentences(x_text, '<PADDING>', padding_sentence_length=20) x = np.array( word2vec_helpers.embedding_sentences(sentences, embedding_size=FLAGS.embedding_dim, file_to_save=os.path.join( out_dir, 'trained_word2vec.model')))
# Prepare output directory for models and summaries # ======================================================= timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) # Data preprocess # ======================================================= # Load data print("Loading data...") x_text, y = data_helpers.load_positive_negative_data_files( FLAGS.positive_data_file, FLAGS.negative_data_file) # Get embedding vector sentences, max_document_length = data_helpers.padding_sentences( x_text, '<PADDING>') print('max_document_length:' + str(max_document_length)) x = np.array( word2vec_helpers.embedding_sentences(sentences, embedding_size=FLAGS.embedding_dim, file_to_save=os.path.join( out_dir, 'trained_word2vec.model'))) print("x.shape = {}".format(x.shape)) print("y.shape = {}".format(y.shape)) #原结果是 # Save params
# Prepare output directory for models and summaries # ======================================================= timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) # Data preprocess # ======================================================= # Load data print("Loading data...") x_text, y = data_helpers.load_positive_negative_data_files(FLAGS.positive_data_file, FLAGS.negative_data_file) # Get embedding vector sentences, max_document_length = data_helpers.padding_sentences(x_text, '<PADDING>') x = np.array(word2vec_helpers.embedding_sentences(sentences, embedding_size = FLAGS.embedding_dim, file_to_save = os.path.join(out_dir, 'trained_word2vec.model'))) print("x.shape = {}".format(x.shape)) print("y.shape = {}".format(y.shape)) # Save params training_params_file = os.path.join(out_dir, 'training_params.pickle') params = {'num_labels' : FLAGS.num_labels, 'max_document_length' : max_document_length} data_helpers.saveDict(params, training_params_file) # Shuffle data randomly np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y)))
print("Using word2vec model file : {}".format(trained_word2vec_model_file)) # validate training params file training_params_file = os.path.join(FLAGS.checkpoint_dir, "..", "training_params.pickle") if not os.path.exists(training_params_file): print("Training params file \'{}\' is missing!".format(training_params_file)) print("Using training params file : {}".format(training_params_file)) # Load params params = data_helpers.loadDict(training_params_file) num_labels = int(params['num_labels']) max_document_length = int(params['max_document_length']) # Load data if FLAGS.eval_train: x_raw, y_test = data_helpers.load_positive_negative_data_files(FLAGS) else: x_raw = ["a masterpiece four years in the making", "everything is off."] y_test = [1, 0] # Get Embedding vector x_test print max_document_length x_test, max_document_length = data_helpers.padding_sentences(x_raw, '<PADDING>', padding_sentence_length = max_document_length) _, w2vModel = word2vec_helpers.embedding_sentences(file_to_load = trained_word2vec_model_file) x_test = np.array(x_test) print("x_test.shape = {}".format(x_test.shape)) # Evaluation # ==================================================
# Prepare output directory for models and summaries # ======================================================= timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) # Data preprocess # ======================================================= # Load data print("Loading data...") x_text, y = data_helpers.load_positive_negative_data_files( FLAGS.cooking_data_file, FLAGS.music_data_file, FLAGS.video_data_file) #测试集 x_test, y_test = data_helpers.load_positive_negative_data_files( FLAGS.cooking_test, FLAGS.music_test, FLAGS.video_test) print('=============', len(x_test), len(x_test[0])) # Get embedding vector sentences, max_document_length = data_helpers.padding_sentences( x_text, '<PADDING>') x = np.array( word2vec_helpers.embedding_sentences(sentences, embedding_size=FLAGS.embedding_dim, file_to_save=os.path.join( out_dir, 'trained_word2vec.model')))
# Prepare output directory for models and summaries # ======================================================= timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) # Data preprocess # ======================================================= # Load data print("Loading data...") x_text, y = data_helpers.load_positive_negative_data_files( FLAGS.bingyin_data_file, FLAGS.zhenduan_data_file, FLAGS.zhiliao_data_file, FLAGS.zhengzhuang_data_file) # Get embedding vector sentences, max_document_length = data_helpers.padding_sentences( x_text, '<PADDING>') x = np.array( word2vec_helpers.embedding_sentences(sentences, embedding_size=FLAGS.embedding_dim, file_to_save=os.path.join( out_dir, 'trained_word2vec.model'))) print("x.shape = {}".format(x.shape)) print("y.shape = {}".format(y.shape)) # Save params