Ejemplo n.º 1
0
    def __init__(self, config):
        vocab_path = os.path.join(config.OVERALL_PROCESSED_PATH, 'vocab.p')
        self.vocab_to_int, self.int_to_vocab = helper.load_vocab(vocab_path)
        self.graph, self.sess = self.load_graph()
        self.custom_dict = custom_dict

        (self.input_data, self.targets, self.inference_logits,
         self.training_logits, self.source_sequence_length,
         self.target_sequence_length, self.keep_prob) = self.get_tensors()

        print('Chatbot model created')
Ejemplo n.º 2
0
 def __init__(self, args, need_shuffle=True):
     super(SD, self).__init__()
     self.data_path = args.data_path
     self.batch_size = args.batch_size
     self.epochs = args.epochs
     self.l1table = load_l1table()
     self.l2table = load_l2table()
     #        self.global_tokens = load_global_tokens()
     self.class_map = list(set(self.l2table.values()))
     self.total_class = len(self.class_map)
     self.max_doc = args.max_doc
     self.vocab_path = args.vocab_path if args.vocab_path else os.path.join(
         args.model_dir, "vocab_{}th".format(
             datetime.today().timetuple().tm_yday))
     #       self.hv = get_hashing_vec(self.max_doc, "english")
     #       self.find_bondary()
     if os.path.isfile(self.vocab_path):
         self.vocab_processor = load_vocab(self.vocab_path)
     else:
         self.vocab_processor = train_vocab(self.data_path, self.vocab_path,
                                            self.max_doc)
     self.x, self.y = self.load_data(need_shuffle)
     print("Max document length: {}".format(self.max_doc))
Ejemplo n.º 3
0
            os.path.join(cf.model_path, 'model_fold_{}'.format(idx)))
        builder.add_meta_graph_and_variables(sess, ['training_model'])
        builder.save()


if __name__ == '__main__':
    #
    train_data_file = sys.argv[1]

    # load data
    print("Loading data...")
    # TODO: 文件位置从外面读取
    filename = os.path.join(os.path.join(cf.data_path, train_data_file))

    X, Y, sentiments, patterns = data_helper.map_file_to_ids(filename=filename)
    vocab, ivocab, label_vocab, label_ivocab = data_helper.load_vocab(filename)
    embeddings = data_helper.load_embedding(cf.word2vec_path, vocab)
    num_classes = len(label_vocab)
    vocab_size = len(vocab)
    x_fold = cf.num_fold

    # 不进行交叉验证
    if x_fold == 0:
        data_size = len(Y)
        size_per_fold = int(data_size / 10)
        dev_start = 0 * size_per_fold
        dev_end = (0 + 1) * size_per_fold
        x_train, y_train = X[dev_end:], Y[dev_end:]
        sents_train = sentiments[dev_end:]
        patt_train = patterns[dev_end:]
        # dev set
Ejemplo n.º 4
0
                       framealpha=0.8,
                       fontsize=12)
            plt.title(u'sentiment classification', fontsize=17)
            plt.show()

            # 输出结果
            print_result(predictions, probs, y_dev, vocab, label_vocab)


if __name__ == '__main__':
    #
    test_data_file = sys.argv[1]
    export_model = sys.argv[2]

    # load data
    print("Loading data...")
    filename = os.path.join(cf.data_path, test_data_file)
    X, Y, sentiment_dict_features, patterns = data_helper.map_file_to_ids(
        filename=filename)
    # 输出使用
    _, vocab, _, label_vocab = data_helper.load_vocab(filename)

    # random shuffle data
    np.random.seed(1)
    shuffle_indices = np.random.permutation(len(X))
    X_shuffled = X
    Y_shuffled = Y

    test(os.path.join(cf.model_path, export_model), X, Y,
         sentiment_dict_features, patterns)
Ejemplo n.º 5
0
from random import shuffle
#import numpy as np
import config
import tensorflow as tf
#import numpy as np
import seq2seq
import pickle
#from tensorflow.python.layers.core import Dense

#%%

## first, load and pad data
## load all data and vocabulary
vocab_path = os.path.join(config.PROCESSED_PATH, 'vocab.p')
train_token_path = os.path.join(config.PROCESSED_PATH, 'processed_tokens.p')
vocab_to_int, int_to_vocab = helper.load_vocab(vocab_path)
config.source_vocab_size = len(vocab_to_int)
config.target_vocab_size = len(vocab_to_int)
train_enc_tokens, train_dec_tokens, test_enc_tokens, test_dec_tokens = helper.load_training_data(
    train_token_path)
bucket_ids = helper.bucket_training_data(train_enc_tokens,
                                         config.max_conv_length)
batches = helper.make_batches_of_bucket_ids(bucket_ids, config.batch_size)

## get a batch of data nd pad them

#%%
## build the network

# create inpute place holder
input_data, targets, lr, keep_prob, target_sequence_length, max_target_sequence_length, source_sequence_length, hrnn_sequence_length = seq2seq.model_inputs(