def __init__(self, config): vocab_path = os.path.join(config.OVERALL_PROCESSED_PATH, 'vocab.p') self.vocab_to_int, self.int_to_vocab = helper.load_vocab(vocab_path) self.graph, self.sess = self.load_graph() self.custom_dict = custom_dict (self.input_data, self.targets, self.inference_logits, self.training_logits, self.source_sequence_length, self.target_sequence_length, self.keep_prob) = self.get_tensors() print('Chatbot model created')
def __init__(self, args, need_shuffle=True): super(SD, self).__init__() self.data_path = args.data_path self.batch_size = args.batch_size self.epochs = args.epochs self.l1table = load_l1table() self.l2table = load_l2table() # self.global_tokens = load_global_tokens() self.class_map = list(set(self.l2table.values())) self.total_class = len(self.class_map) self.max_doc = args.max_doc self.vocab_path = args.vocab_path if args.vocab_path else os.path.join( args.model_dir, "vocab_{}th".format( datetime.today().timetuple().tm_yday)) # self.hv = get_hashing_vec(self.max_doc, "english") # self.find_bondary() if os.path.isfile(self.vocab_path): self.vocab_processor = load_vocab(self.vocab_path) else: self.vocab_processor = train_vocab(self.data_path, self.vocab_path, self.max_doc) self.x, self.y = self.load_data(need_shuffle) print("Max document length: {}".format(self.max_doc))
os.path.join(cf.model_path, 'model_fold_{}'.format(idx))) builder.add_meta_graph_and_variables(sess, ['training_model']) builder.save() if __name__ == '__main__': # train_data_file = sys.argv[1] # load data print("Loading data...") # TODO: 文件位置从外面读取 filename = os.path.join(os.path.join(cf.data_path, train_data_file)) X, Y, sentiments, patterns = data_helper.map_file_to_ids(filename=filename) vocab, ivocab, label_vocab, label_ivocab = data_helper.load_vocab(filename) embeddings = data_helper.load_embedding(cf.word2vec_path, vocab) num_classes = len(label_vocab) vocab_size = len(vocab) x_fold = cf.num_fold # 不进行交叉验证 if x_fold == 0: data_size = len(Y) size_per_fold = int(data_size / 10) dev_start = 0 * size_per_fold dev_end = (0 + 1) * size_per_fold x_train, y_train = X[dev_end:], Y[dev_end:] sents_train = sentiments[dev_end:] patt_train = patterns[dev_end:] # dev set
framealpha=0.8, fontsize=12) plt.title(u'sentiment classification', fontsize=17) plt.show() # 输出结果 print_result(predictions, probs, y_dev, vocab, label_vocab) if __name__ == '__main__': # test_data_file = sys.argv[1] export_model = sys.argv[2] # load data print("Loading data...") filename = os.path.join(cf.data_path, test_data_file) X, Y, sentiment_dict_features, patterns = data_helper.map_file_to_ids( filename=filename) # 输出使用 _, vocab, _, label_vocab = data_helper.load_vocab(filename) # random shuffle data np.random.seed(1) shuffle_indices = np.random.permutation(len(X)) X_shuffled = X Y_shuffled = Y test(os.path.join(cf.model_path, export_model), X, Y, sentiment_dict_features, patterns)
from random import shuffle #import numpy as np import config import tensorflow as tf #import numpy as np import seq2seq import pickle #from tensorflow.python.layers.core import Dense #%% ## first, load and pad data ## load all data and vocabulary vocab_path = os.path.join(config.PROCESSED_PATH, 'vocab.p') train_token_path = os.path.join(config.PROCESSED_PATH, 'processed_tokens.p') vocab_to_int, int_to_vocab = helper.load_vocab(vocab_path) config.source_vocab_size = len(vocab_to_int) config.target_vocab_size = len(vocab_to_int) train_enc_tokens, train_dec_tokens, test_enc_tokens, test_dec_tokens = helper.load_training_data( train_token_path) bucket_ids = helper.bucket_training_data(train_enc_tokens, config.max_conv_length) batches = helper.make_batches_of_bucket_ids(bucket_ids, config.batch_size) ## get a batch of data nd pad them #%% ## build the network # create inpute place holder input_data, targets, lr, keep_prob, target_sequence_length, max_target_sequence_length, source_sequence_length, hrnn_sequence_length = seq2seq.model_inputs(