예제 #1
0
def run():
    # imagine cnn, the third dim is like the 'chnl'
    g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g, optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='models/model_us_cities')

    for i in range(40):
        seed = random_sequence_from_textfile(path, maxlen)
        m.fit(X, Y, validation_set=0.1, batch_size=128,
              n_epoch=1, run_id='us_cities')
        print("-- TESTING...")
        print("-- Test with temperature of 1.2 --")
        print(m.generate(30, temperature=1.2, seq_seed=seed))
        print("-- Test with temperature of 1.0 --")
        print(m.generate(30, temperature=1.0, seq_seed=seed))
        print("-- Test with temperature of 0.5 --")
        print(m.generate(30, temperature=0.5, seq_seed=seed))
예제 #2
0
파일: 16-7.py 프로젝트: DemonZeros/1book
def do_rnn(x_train,x_test,y_train,y_test):
    global n_words
    # Data preprocessing
    # Sequence padding
    print "GET n_words embedding %d" % n_words


    #x_train = pad_sequences(x_train, maxlen=100, value=0.)
    #x_test = pad_sequences(x_test, maxlen=100, value=0.)
    # Converting labels to binary vectors
    y_train = to_categorical(y_train, nb_classes=2)
    y_test = to_categorical(y_test, nb_classes=2)

    # Network building
    net = tflearn.input_data(shape=[None, 100,n_words])
    net = tflearn.lstm(net, 10,  return_seq=True)
    net = tflearn.lstm(net, 10, )
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.1,name="output",
                             loss='categorical_crossentropy')

    # Training

    model = tflearn.DNN(net, tensorboard_verbose=3)
    model.fit(x_train, y_train, validation_set=(x_test, y_test), show_metric=True,
             batch_size=32,run_id="maidou")
예제 #3
0
def get_model_action():
    # Network building
    net = tflearn.input_data(shape=[None, 10, 128], name='net2_layer1')
    net = tflearn.lstm(net, n_units=256, return_seq=True, name='net2_layer2')
    net = tflearn.dropout(net, 0.6, name='net2_layer3')
    net = tflearn.lstm(net, n_units=256, return_seq=False, name='net2_layer4')
    net = tflearn.dropout(net, 0.6, name='net2_layer5')
    net = tflearn.fully_connected(net, 5, activation='softmax', name='net2_layer6')
    net = tflearn.regression(net, optimizer='sgd', loss='categorical_crossentropy', learning_rate=0.001,
                             name='net2_layer7')
    return tflearn.DNN(net, clip_gradients=5.0, tensorboard_verbose=0)
예제 #4
0
파일: models.py 프로젝트: gcm0621/pygta5
def sentnet_LSTM_gray(width, height, frame_count, lr, output=9):
    network = input_data(shape=[None, width, height], name='input')
    #network = tflearn.input_data(shape=[None, 28, 28], name='input')
    network = tflearn.lstm(network, 128, return_seq=True)
    network = tflearn.lstm(network, 128)
    network = tflearn.fully_connected(network, 9, activation='softmax')
    network = tflearn.regression(network, optimizer='adam',
    loss='categorical_crossentropy', name="output1")

    model = tflearn.DNN(network, checkpoint_path='model_lstm',
                        max_checkpoints=1, tensorboard_verbose=0, tensorboard_dir='log')

    return model
예제 #5
0
파일: 16-1.py 프로젝트: DemonZeros/1book
def do_rnn(X, Y, testX, testY):
    X = np.reshape(X, (-1, 28, 28))
    testX = np.reshape(testX, (-1, 28, 28))

    net = tflearn.input_data(shape=[None, 28, 28])
    net = tflearn.lstm(net, 128, return_seq=True)
    net = tflearn.lstm(net, 128)
    net = tflearn.fully_connected(net, 10, activation='softmax')
    net = tflearn.regression(net, optimizer='adam',
                         loss='categorical_crossentropy', name="output1")
    model = tflearn.DNN(net, tensorboard_verbose=2)
    model.fit(X, Y, n_epoch=1, validation_set=(testX,testY), show_metric=True,
          snapshot_step=100)
예제 #6
0
def shakespeare():


    path = "shakespeare_input.txt"
    #path = "shakespeare_input-100.txt"
    char_idx_file = 'char_idx.pickle'

    if not os.path.isfile(path):
        urllib.request.urlretrieve(
            "https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path)

    maxlen = 25

    char_idx = None
    if os.path.isfile(char_idx_file):
        print('Loading previous char_idx')
        char_idx = pickle.load(open(char_idx_file, 'rb'))

    X, Y, char_idx = \
        textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,
                                             pre_defined_char_idx=char_idx)

    pickle.dump(char_idx, open(char_idx_file, 'wb'))

    g = tflearn.input_data([None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='model_shakespeare')

    for i in range(50):
        seed = random_sequence_from_textfile(path, maxlen)
        m.fit(X, Y, validation_set=0.1, batch_size=128,
              n_epoch=1, run_id='shakespeare')
        print("-- TESTING...")
        print("-- Test with temperature of 1.0 --")
        print(m.generate(600, temperature=1.0, seq_seed=seed))
        #print(m.generate(10, temperature=1.0, seq_seed=seed))
        print("-- Test with temperature of 0.5 --")
        print(m.generate(600, temperature=0.5, seq_seed=seed))
예제 #7
0
파일: 16-3.py 프로젝트: DemonZeros/1book
def do_rnn(trainX, testX, trainY, testY):
    global n_words
    # Data preprocessing
    # Sequence padding
    print "GET n_words embedding %d" % n_words


    trainX = pad_sequences(trainX, maxlen=MAX_DOCUMENT_LENGTH, value=0.)
    testX = pad_sequences(testX, maxlen=MAX_DOCUMENT_LENGTH, value=0.)
    # Converting labels to binary vectors
    trainY = to_categorical(trainY, nb_classes=2)
    testY = to_categorical(testY, nb_classes=2)

    # Network building
    net = tflearn.input_data([None, MAX_DOCUMENT_LENGTH])
    net = tflearn.embedding(net, input_dim=n_words, output_dim=128)
    net = tflearn.lstm(net, 128, dropout=0.8)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy')

    # Training



    model = tflearn.DNN(net, tensorboard_verbose=3)
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
             batch_size=32,run_id="maidou")
예제 #8
0
def do_rnn(trainX, testX, trainY, testY):
    max_document_length=64
    y_test=testY
    trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
    testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
    # Converting labels to binary vectors
    trainY = to_categorical(trainY, nb_classes=2)
    testY = to_categorical(testY, nb_classes=2)

    # Network building
    net = tflearn.input_data([None, max_document_length])
    net = tflearn.embedding(net, input_dim=10240000, output_dim=64)
    net = tflearn.lstm(net, 64, dropout=0.1)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy')

    # Training
    model = tflearn.DNN(net, tensorboard_verbose=0,tensorboard_dir="dga_log")
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
              batch_size=10,run_id="dga",n_epoch=1)

    y_predict_list = model.predict(testX)
    #print y_predict_list

    y_predict = []
    for i in y_predict_list:
        print  i[0]
        if i[0] > 0.5:
            y_predict.append(0)
        else:
            y_predict.append(1)

    print(classification_report(y_test, y_predict))
    print metrics.confusion_matrix(y_test, y_predict)
예제 #9
0
def do_rnn(x,y):
    global max_document_length
    print "RNN"
    trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.4, random_state=0)
    y_test=testY

    trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
    testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
    # Converting labels to binary vectors
    trainY = to_categorical(trainY, nb_classes=2)
    testY = to_categorical(testY, nb_classes=2)

    # Network building
    net = tflearn.input_data([None, max_document_length])
    net = tflearn.embedding(net, input_dim=10240000, output_dim=128)
    net = tflearn.lstm(net, 128, dropout=0.8)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy')

    # Training
    model = tflearn.DNN(net, tensorboard_verbose=0)
    model.fit(trainX, trainY, validation_set=0.1, show_metric=True,
              batch_size=10,run_id="webshell",n_epoch=5)

    y_predict_list=model.predict(testX)
    y_predict=[]
    for i in y_predict_list:
        if i[0] > 0.5:
            y_predict.append(0)
        else:
            y_predict.append(1)

    do_metrics(y_test, y_predict)
def main():
    load_vectors("./vectors.bin")
    init_seq()
    xlist = []
    ylist = []
    test_X = None
    #for i in range(len(seq)-100):
    for i in range(1000):
        sequence = seq[i:i+20]
        xlist.append(sequence)
        ylist.append(seq[i+20])
        if test_X is None:
            test_X = np.array(sequence)
            (match_word, max_cos) = vector2word(seq[i+20])
            print "right answer=", match_word, max_cos

    X = np.array(xlist)
    Y = np.array(ylist)
    net = tflearn.input_data([None, 20, 200])
    net = tflearn.lstm(net, 200)
    net = tflearn.fully_connected(net, 200, activation='linear')
    net = tflearn.regression(net, optimizer='sgd', learning_rate=0.1,
                                     loss='mean_square')
    model = tflearn.DNN(net)
    model.fit(X, Y, n_epoch=1000, batch_size=1,snapshot_epoch=False,show_metric=True)
    model.save("model")
    predict = model.predict([test_X])
    #print predict
    #for v in test_X:
    #    print vector2word(v)
    (match_word, max_cos) = vector2word(predict[0])
    print "predict=", match_word, max_cos
예제 #11
0
    def test_sequencegenerator(self):

        with tf.Graph().as_default():
            text = "123456789101234567891012345678910123456789101234567891012345678910"
            maxlen = 5

            X, Y, char_idx = \
                tflearn.data_utils.string_to_semi_redundant_sequences(text, seq_maxlen=maxlen, redun_step=3)

            g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])
            g = tflearn.lstm(g, 32)
            g = tflearn.dropout(g, 0.5)
            g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
            g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                                   learning_rate=0.1)

            m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                          seq_maxlen=maxlen,
                                          clip_gradients=5.0)
            m.fit(X, Y, validation_set=0.1, n_epoch=100, snapshot_epoch=False)
            res = m.generate(10, temperature=1., seq_seed="12345")
            self.assertEqual(res, "123456789101234", "SequenceGenerator test failed! Generated sequence: " + res + " expected '123456789101234'")

            # Testing save method
            m.save("test_seqgen.tflearn")
            self.assertTrue(os.path.exists("test_seqgen.tflearn"))

            # Testing load method
            m.load("test_seqgen.tflearn")
            res = m.generate(10, temperature=1., seq_seed="12345")
            self.assertEqual(res, "123456789101234", "SequenceGenerator test failed after loading model! Generated sequence: " + res + " expected '123456789101234'")
예제 #12
0
def generator_xss():
    global char_idx
    global xss_data_file
    global maxlen


    if os.path.isfile(char_idx_file):
        print('Loading previous xxs_char_idx')
        char_idx = pickle.load(open(char_idx_file, 'rb'))


    X, Y, char_idx = \
        textfile_to_semi_redundant_sequences(xss_data_file, seq_maxlen=maxlen, redun_step=3,
                                             pre_defined_char_idx=char_idx)


    #pickle.dump(char_idx, open(char_idx_file, 'wb'))

    g = tflearn.input_data([None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 32, return_seq=True)
    g = tflearn.dropout(g, 0.1)
    g = tflearn.lstm(g, 32, return_seq=True)
    g = tflearn.dropout(g, 0.1)
    g = tflearn.lstm(g, 32)
    g = tflearn.dropout(g, 0.1)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='chkpoint/model_scanner_poc')

    print "random_sequence_from_textfile"
    #seed = random_sequence_from_textfile(xss_data_file, maxlen)
    seed='"/><script>'
    m.fit(X, Y, validation_set=0.1, batch_size=128,
              n_epoch=2, run_id='scanner-poc')
    print("-- TESTING...")

    print("-- Test with temperature of 0.1 --")
    print(m.generate(32, temperature=0.1, seq_seed=seed))
    print("-- Test with temperature of 0.5 --")
    print(m.generate(32, temperature=0.5, seq_seed=seed))
    print("-- Test with temperature of 1.0 --")
    print(m.generate(32, temperature=1.0, seq_seed=seed))
예제 #13
0
def generate_net(embedding):
    net = tflearn.input_data([None, 200])
    net = tflearn.embedding(net, input_dim=300000, output_dim=128)
    net = tflearn.lstm(net, 128)
    net = tflearn.dropout(net, 0.5)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam',
                             loss='categorical_crossentropy')
    return net
def build(embedding_size=(400000, 50), train_embedding=False, hidden_dims=128,
          learning_rate=0.001):
    net = tflearn.input_data([None, 200])
    net = tflearn.embedding(net, input_dim=embedding_size[0],
                            output_dim=embedding_size[1],
                            trainable=train_embedding, name='EmbeddingLayer')
    net = tflearn.lstm(net, hidden_dims, return_seq=True)
    net = tflearn.dropout(net, 0.5)
    net = tflearn.lstm(net, hidden_dims, return_seq=True)
    net = tflearn.dropout(net, 0.5)
    net = tflearn.lstm(net, hidden_dims, return_seq=True)
    net = tflearn.dropout(net, 0.5)
    net = tflearn.lstm(net, hidden_dims)
    net = tflearn.dropout(net, 0.5)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate,
                             loss='categorical_crossentropy')
    return net
def create_net(in_sx, in_sy, out_sx):
	"""
	Creates a tflearn neural network with the correct
	architecture for learning to hear the keyword
	"""
	net = tflearn.input_data([None, in_sx, in_sy])
	net = tflearn.lstm(net, lstm_size, dropout=lstm_dropout)
	net = tflearn.fully_connected(net, out_sx, activation='softmax')
	net = tflearn.regression(net, learning_rate=learning_rate, optimizer='adam', loss='categorical_crossentropy')
	return net
예제 #16
0
def train_model():
    X_train, X_test, y_train, y_test = np.load(PREPROCESSED_DATA)
    num_samples, num_timesteps, input_dim = X_train.shape
    net = tflearn.input_data(shape=[None, num_timesteps, input_dim])
    net = tflearn.lstm(net, 128)
    net = tflearn.fully_connected(net, 1, activation='relu')
    net = tflearn.regression(net, optimizer='sgd',
                             loss='mean_square', name="regression_output")
    model = tflearn.DNN(net, tensorboard_verbose=2, run_id=)
    model.fit(X_train, y_train, n_epoch=1, validation_set=0.1, show_metric=True,
              snapshot_step=100)
예제 #17
0
    def model(self, feed_previous=False):
        # 通过输入的XY生成encoder_inputs和带GO头的decoder_inputs
        input_data = tflearn.input_data(shape=[None, self.max_seq_len*2, self.word_vec_dim], dtype=tf.float32, name = "XY")
        encoder_inputs = tf.slice(input_data, [0, 0, 0], [-1, self.max_seq_len, self.word_vec_dim], name="enc_in")
        decoder_inputs_tmp = tf.slice(input_data, [0, self.max_seq_len, 0], [-1, self.max_seq_len-1, self.word_vec_dim], name="dec_in_tmp")
        go_inputs = tf.ones_like(decoder_inputs_tmp)
        go_inputs = tf.slice(go_inputs, [0, 0, 0], [-1, 1, self.word_vec_dim])
        decoder_inputs = tf.concat(1, [go_inputs, decoder_inputs_tmp], name="dec_in")

        # 编码器
        # 把encoder_inputs交给编码器,返回一个输出(预测序列的第一个值)和一个状态(传给解码器)
        (encoder_output_tensor, states) = tflearn.lstm(encoder_inputs, self.word_vec_dim, return_state=True, scope='encoder_lstm')
        encoder_output_sequence = tf.pack([encoder_output_tensor], axis=1)

        # 解码器
        # 预测过程用前一个时间序的输出作为下一个时间序的输入
        # 先用编码器的最后一个输出作为第一个输入
        if feed_previous:
            first_dec_input = go_inputs
        else:
            first_dec_input = tf.slice(decoder_inputs, [0, 0, 0], [-1, 1, self.word_vec_dim])
        decoder_output_tensor = tflearn.lstm(first_dec_input, self.word_vec_dim, initial_state=states, return_seq=False, reuse=False, scope='decoder_lstm')
        decoder_output_sequence_single = tf.pack([decoder_output_tensor], axis=1)
        decoder_output_sequence_list = [decoder_output_tensor]
        # 再用解码器的输出作为下一个时序的输入
        for i in range(self.max_seq_len-1):
            if feed_previous:
                next_dec_input = decoder_output_sequence_single
            else:
                next_dec_input = tf.slice(decoder_inputs, [0, i+1, 0], [-1, 1, self.word_vec_dim])
            decoder_output_tensor = tflearn.lstm(next_dec_input, self.word_vec_dim, return_seq=False, reuse=True, scope='decoder_lstm')
            decoder_output_sequence_single = tf.pack([decoder_output_tensor], axis=1)
            decoder_output_sequence_list.append(decoder_output_tensor)

        decoder_output_sequence = tf.pack(decoder_output_sequence_list, axis=1)
        real_output_sequence = tf.concat(1, [encoder_output_sequence, decoder_output_sequence])

        net = tflearn.regression(real_output_sequence, optimizer='sgd', learning_rate=0.1, loss='mean_square')
        model = tflearn.DNN(net)
        return model
예제 #18
0
파일: NN.py 프로젝트: szsam/DSLearn
def build():
    network = input_data([None, Meta.max_string_len])
    network = embedding(network, input_dim=Meta.max_one_hot, output_dim=128)
    branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
    branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
    branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
    network = merge([branch1, branch2, branch3], mode='concat', axis=1)
    network = dropout(network, 0.5)
    network = lstm(network, 128)
    # network = fully_connected(network, 20)
    network = fully_connected(network, 2, activation='softmax')
    network = tflearn.regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy')
    model = tflearn.DNN(network, tensorboard_verbose=0)
    return model
예제 #19
0
	def train(self):

		char_idx = None

		if(os.path.isfile(self.charIDXFile)):
			# load previous character file
			char_idx = pickle.load(open(self.charIDXFile, 'rb'))


		X, Y, char_idx = textfile_to_semi_redundant_sequences(self.path,seq_maxlen=self.maxLength,redun_step=3)

		pickle.dump(char_idx, open(self.charIDXFile, 'wb'))

		self.g = tflearn.input_data([None,self.maxLength,len(char_idx)]);
		self.g = tflearn.lstm(self.g,512,return_seq=True)
		self.g = tflearn.dropout(self.g,0.5)
		self.g = tflearn.lstm(self.g,512,return_seq=True)
		self.g = tflearn.dropout(self.g,0.5)
		self.g = tflearn.lstm(self.g,512)
		self.g = tflearn.dropout(self.g,0.5)
		self.g = tflearn.fully_connected(self.g,len(char_idx),activation='softmax')
		self.g = tflearn.regression(self.g, optimizer='adam', loss='categorical_crossentropy',
								 learning_rate=0.001)
		self.model = tflearn.SequenceGenerator(self.g, dictionary=char_idx, seq_maxlen=self.maxLength, max_checkpoints=0,checkpoint_path='model_trump')
예제 #20
0
    def test_recurrent_layers(self):

        X = [[1, 3, 5, 7], [2, 4, 8, 10], [1, 5, 9, 11], [2, 6, 8, 0]]
        Y = [[0., 1.], [1., 0.], [0., 1.], [1., 0.]]

        with tf.Graph().as_default():
            g = tflearn.input_data(shape=[None, 4])
            g = tflearn.embedding(g, input_dim=12, output_dim=4)
            g = tflearn.lstm(g, 6)
            g = tflearn.fully_connected(g, 2, activation='softmax')
            g = tflearn.regression(g, optimizer='sgd', learning_rate=1.)

            m = tflearn.DNN(g)
            m.fit(X, Y, n_epoch=300, snapshot_epoch=False)
            self.assertGreater(m.predict([[5, 9, 11, 1]])[0][1], 0.9)
예제 #21
0
파일: 16-5.py 프로젝트: DemonZeros/1book
def do_rnn(trainX, testX, trainY, testY):
    global max_sequences_len
    global max_sys_call
    # Data preprocessing
    # Sequence padding

    trainX = pad_sequences(trainX, maxlen=max_sequences_len, value=0.)
    testX = pad_sequences(testX, maxlen=max_sequences_len, value=0.)
    # Converting labels to binary vectors
    trainY = to_categorical(trainY, nb_classes=2)
    testY_old=testY
    testY = to_categorical(testY, nb_classes=2)

    # Network building
    print "GET max_sequences_len embedding %d" % max_sequences_len
    print "GET max_sys_call embedding %d" % max_sys_call

    net = tflearn.input_data([None, max_sequences_len])
    net = tflearn.embedding(net, input_dim=max_sys_call+1, output_dim=128)
    net = tflearn.lstm(net, 128, dropout=0.3)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.1,
                             loss='categorical_crossentropy')

    # Training



    model = tflearn.DNN(net, tensorboard_verbose=3)
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
             batch_size=32,run_id="maidou")

    y_predict_list = model.predict(testX)
    #print y_predict_list

    y_predict = []
    for i in y_predict_list:
        #print  i[0]
        if i[0] > 0.5:
            y_predict.append(0)
        else:
            y_predict.append(1)

    #y_predict=to_categorical(y_predict, nb_classes=2)

    print(classification_report(testY_old, y_predict))
    print metrics.confusion_matrix(testY_old, y_predict)
예제 #22
0
    def test_sequencegenerator_words(self):

        with tf.Graph().as_default():
            text = ["hello","world"]*100
            word_idx = {"hello": 0, "world": 1}
            maxlen = 2

            vec = [x for x in map(word_idx.get, text) if x is not None]

            sequences = []
            next_words = []
            for i in range(0, len(vec) - maxlen, 3):
                sequences.append(vec[i: i + maxlen])
                next_words.append(vec[i + maxlen])

            X = np.zeros((len(sequences), maxlen, len(word_idx)), dtype=np.bool)
            Y = np.zeros((len(sequences), len(word_idx)), dtype=np.bool)
            for i, seq in enumerate(sequences):
                for t, idx in enumerate(seq):
                    X[i, t, idx] = True
                    Y[i, next_words[i]] = True

            g = tflearn.input_data(shape=[None, maxlen, len(word_idx)])
            g = tflearn.lstm(g, 32)
            g = tflearn.dropout(g, 0.5)
            g = tflearn.fully_connected(g, len(word_idx), activation='softmax')
            g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                                   learning_rate=0.1)

            m = tflearn.SequenceGenerator(g, dictionary=word_idx,
                                          seq_maxlen=maxlen,
                                          clip_gradients=5.0)
            m.fit(X, Y, validation_set=0.1, n_epoch=100, snapshot_epoch=False)
            res = m.generate(4, temperature=.5, seq_seed=["hello","world"])
            res_str = " ".join(res[-2:])
            self.assertEqual(res_str, "hello world", "SequenceGenerator (word level) test failed! Generated sequence: " + res_str + " expected 'hello world'")

            # Testing save method
            m.save("test_seqgen_word.tflearn")
            self.assertTrue(os.path.exists("test_seqgen_word.tflearn"))

            # Testing load method
            m.load("test_seqgen_word.tflearn")
            res = m.generate(4, temperature=.5, seq_seed=["hello","world"])
            res_str = " ".join(res[-2:])
            self.assertEqual(res_str, "hello world", "Reloaded SequenceGenerator (word level) test failed! Generated sequence: " + res_str + " expected 'hello world'")
예제 #23
0
파일: lstm.py 프로젝트: kengz/ai-notebook
def run():
    net = tflearn.input_data([None, 100])
    # embed int vector to compact real vector
    net = tflearn.embedding(net, input_dim=10000, output_dim=128)
    # f*****g magic of rnn
    # if dynamic lstm, backprop thru time till the seq ends,
    # but padding is needed to feed input dim; tail not used
    net = tflearn.lstm(net, 128, dropout=0.8, dynamic=True)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam',
                             learning_rate=0.001,
                             loss='categorical_crossentropy')

    m = tflearn.DNN(net)
    m.fit(trainX, trainY, validation_set=(testX, testY),
          show_metric=True, batch_size=32)
    m.save('models/lstm.tfl')

    run()
예제 #24
0
    def __init__(self, s_date):
        prev_bd = int(s_date[:6])-1
        prev_ed = int(s_date[9:15])-1
        if prev_bd%100 == 0: prev_bd -= 98
        if prev_ed%100 == 0: prev_ed -= 98
        pred_s_date = "%d01_%d01" % (prev_bd, prev_ed)
        prev_model = '../model/tflearn/lstm/%s' % pred_s_date
        self.model_dir = '../model/tflearn/lstm/%s' % s_date

        tf.reset_default_graph()
        tflearn.init_graph(gpu_memory_fraction=0.1)
        input_layer = tflearn.input_data(shape=[None, 30, 23], name='input')
        lstm1 = tflearn.lstm(input_layer, 23, dynamic=True, name='lstm1')
        dense1 = tflearn.fully_connected(lstm1, 1, name='dense1')
        output = tflearn.single_unit(dense1)
        regression = tflearn.regression(output, optimizer='adam', loss='mean_square',
                                metric='R2', learning_rate=0.001)
        self.estimators = tflearn.DNN(regression)
        if os.path.exists('%s/model.tfl' % prev_model):
            self.estimators.load('%s/model.tfl' % prev_model)
예제 #25
0
def lstm(trainX, trainY,testX, testY):
    # Data preprocessing
    # Sequence padding
    trainX = pad_sequences(trainX, maxlen=100, value=0.)
    testX = pad_sequences(testX, maxlen=100, value=0.)
    # Converting labels to binary vectors
    trainY = to_categorical(trainY, nb_classes=2)
    testY = to_categorical(testY, nb_classes=2)

    # Network building
    net = tflearn.input_data([None, 100])
    net = tflearn.embedding(net, input_dim=10000, output_dim=128)
    net = tflearn.lstm(net, 128, dropout=0.8)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy')

    # Training
    model = tflearn.DNN(net, tensorboard_verbose=0)
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
              batch_size=32,run_id="rnn-lstm")
예제 #26
0
def do_rnn_wordbag(trainX, testX, trainY, testY):
    global max_document_length
    print "RNN and wordbag"

    trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
    testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
    # Converting labels to binary vectors
    trainY = to_categorical(trainY, nb_classes=2)
    testY = to_categorical(testY, nb_classes=2)

    # Network building
    net = tflearn.input_data([None, max_document_length])
    net = tflearn.embedding(net, input_dim=10240000, output_dim=128)
    net = tflearn.lstm(net, 128, dropout=0.8)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy')

    # Training
    model = tflearn.DNN(net, tensorboard_verbose=0)
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
              batch_size=10,run_id="review",n_epoch=5)
예제 #27
0
# print ('Y example: ', Y[123])
# print ('nDim: ', type(X) not in [list, np.array])
Y = to_categorical(Y, nb_classes=2)

# Data preprocessing
# Sequence padding
# trainX = pad_sequences(trainX, maxlen=100, value=0.)
# testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
# trainY = to_categorical(trainY, nb_classes=2)
# testY = to_categorical(testY, nb_classes=2)

# Network building
net = tflearn.input_data([None, 1, lookback])
#net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 4096, dropout=0.9, return_seq=True)#dropout=(0.9, 0.9), forget_bias=0.9, return_seq=True)
net = tflearn.lstm(net, 4096, dropout=0.9, return_seq=True)
net = tflearn.lstm(net, 4096)#, dropout=(0.9, 0.9), forget_bias=0.9)
net = tflearn.fully_connected(net, 512, activation='relu')
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

# Training
# Wrap the network in a model object
model = tflearn.DNN(net, tensorboard_verbose=0, max_checkpoints=15,
                    checkpoint_path='/home/dev/data-science/next-interval-rnn.checkpoints/next-interval-rnn-50k.tfl.ckpt')

model.fit(X, Y, n_epoch=15, shuffle=True, validation_set=validationPC,
          show_metric=True, batch_size=500,
          snapshot_epoch=True,
예제 #28
0
def comment_predict():
	"""
	根据已有模型对评论倾向进行预测
	:return:
	"""
	# 建立模型时用到的评论数据
	predict_data = pd.read_csv("courses.csv")

	def chinese_word_cut(text):
		"""
		使用结巴分词对中文进行切分转化为独立的词语
		:param text: 完整的评论
		:return: 切分后的评论
		"""
		return " ".join(jieba.cut(text))

	# 进行分词并新建一列保存结果
	predict_data["cut_name"] = predict_data.name.apply(chinese_word_cut)

	# 确定评论部分(X)和标签部分(y)
	X = predict_data["cut_name"]
	y = predict_data["type"]

	# 对数据集进行切分,分为训练集(train)和测试集(test)
	# 这里随机数种子要和建立模型时的随机数种子一样
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

	def get_custom_stopwords(stop_words_file):
		"""
		得到停用词表
		:param stop_words_file:
		:return: 停用词表list
		"""
		with open(stop_words_file, encoding="utf-8") as f:
			stopwords = f.read()

		stopwords_list = stopwords.split("\n")
		custom_stopwords_list = [i for i in stopwords_list]
		return custom_stopwords_list

	# 得到停用词表
	stop_words_file = "./stop_words/哈工大停用词表.txt"
	stopwords = get_custom_stopwords(stop_words_file)

	# 计算特征数值
	vect = CountVectorizer(token_pattern=u'(?u)\\b\\w+\\b', stop_words=frozenset(stopwords))
	vect.fit(X_train)
	vocab = vect.vocabulary_

	def convert_X_to_X_word_ids(X):
		"""
		将评论(文字部分)转化为id集(数值序列)
		:param X:评论集合
		:return:数值序列
		"""
		return X.apply(lambda x: [vocab[w] for w in [w.lower().strip() for w in x.split()] if w in vocab])

	# 序列扩充,统一延长到长度为20的序列,使得评论序列格式相同,不足的用0代替
	X_train_word_ids = convert_X_to_X_word_ids(X_train)
	X_train_padded_seqs = pad_sequences(X_train_word_ids, maxlen=20, value=0)

	# 标签集处理
	unique_y_labels = list(y_train.value_counts().index)
	le = preprocessing.LabelEncoder()
	le.fit(unique_y_labels)

	# 构造网络
	size_of_each_vector = X_train_padded_seqs.shape[1]
	vocab_size = len(vocab)
	no_of_unique_y_labels = len(unique_y_labels)

	net = tflearn.input_data([None, size_of_each_vector])
	net = tflearn.embedding(net, input_dim=vocab_size, output_dim=128)
	net = tflearn.lstm(net, 128, dropout=0.6)
	net = tflearn.fully_connected(net, no_of_unique_y_labels, activation='softmax')
	net = tflearn.regression(net, optimizer='adam', learning_rate=1e-4, loss='categorical_crossentropy')

	# 初始化
	model = tflearn.DNN(net, tensorboard_verbose=0, tensorboard_dir="./tflearn_data/tflearn_logs/")

	# 加载模型
	model.load("./tflearn_data/tflearn_models/2019-07-08 11.51.40.170202(200, 42)/model")

	# ———————————————————————————————————————预测部分———————————————————————————————————————
	# 待预测的评论数据
	predict_data = pd.read_csv("predict.csv")

	# 对评论数据进行分词
	predict_data["cut_name"] = predict_data.name.apply(chinese_word_cut)

	# 设置预测集
	predict_X = predict_data["cut_name"]
	vect.fit(predict_X)

	# 转化为数值序列
	predict_X_word_ids = convert_X_to_X_word_ids(predict_X)
	predict_X_padded_seqs = pad_sequences(predict_X_word_ids, maxlen=20, value=0)

	# 进行预测并得到结果
	predict_Y = model.predict(predict_X_padded_seqs)

	# 输出结果
	print(predict_Y)
	get_type(predict_Y)
예제 #29
0
    def model(self, feed_previous=False):
        input_data = tflearn.input_data(
            shape=[None, self.max_seq_len * 2, self.word_vec_dim],
            dtype=tf.float32,
            name="XY")

        # 然后将我们输入的所有样本数据的词序列切出前max_seq_len个,也就是question句子部分,作为编码器的输入
        encoder_inputs = tf.slice(input_data, [0, 0, 0],
                                  [-1, self.max_seq_len, self.word_vec_dim],
                                  name="enc_in")

        decoder_inputs_tmp = tf.slice(
            input_data, [0, self.max_seq_len, 0],
            [-1, self.max_seq_len - 1, self.word_vec_dim],
            name="dec_in_tmp")
        go_inputs = tf.ones_like(decoder_inputs_tmp)
        go_inputs = tf.slice(go_inputs, [0, 0, 0], [-1, 1, self.word_vec_dim])
        decoder_inputs = tf.concat(1, [go_inputs, decoder_inputs_tmp],
                                   name="dec_in")
        # 之后开始编码过程,返回的encoder_output_tensor展开成tflearn.regression回归可以识别的形如(?, 1, 200)向量;返回的states后面传入给解码器
        (encoder_output_tensor, states) = tflearn.lstm(encoder_inputs,
                                                       self.word_vec_dim,
                                                       return_state=True,
                                                       scope='encoder_lstm')
        encoder_output_sequence = tf.pack([encoder_output_tensor], axis=1)

        # 取出decoder_inputs的第一个词,也就是G0
        first_dec_input = tf.slice(decoder_inputs, [0, 0, 0],
                                   [-1, 1, self.word_vec_dim])

        # 将其输入到解码器中,如下,解码器的初始化状态为编码器生成的states,注意:这里的scope='decoder_lstm'是为了下面重用同一个解码器
        decoder_output_tensor = tflearn.lstm(first_dec_input,
                                             self.word_vec_dim,
                                             initial_state=states,
                                             return_seq=False,
                                             reuse=False,
                                             scope='decoder_lstm')

        # 暂时先将解码器的第一个输出存到decoder_output_sequence_list中供最后一起输出
        decoder_output_sequence_single = tf.pack([decoder_output_tensor],
                                                 axis=1)
        decoder_output_sequence_list = [decoder_output_tensor]

        # 接下来我们循环max_seq_len-1次,不断取decoder_inputs的一个个词向量作为下一轮解码器输入,并将结果添加到decoder_output_sequence_list中,
        # 这里的result=True, scope='decoder_lstm'说明和上面第一次解码用的是同一个lstm层
        for i in range(self.max_seq_len - 1):
            next_dec_input = tf.slice(decoder_inputs, [0, i + 1, 0],
                                      [-1, 1, self.word_vec_dim])
            decoder_output_tensor = tflearn.lstm(next_dec_input,
                                                 self.word_vec_dim,
                                                 return_seq=False,
                                                 reuse=True,
                                                 scope='decoder_lstm')
            decoder_output_sequence_single = tf.pack([decoder_output_tensor],
                                                     axis=1)
            decoder_output_sequence_list.append(decoder_output_tensor)

        # 下面我们把编码器第一个输出和解码器所有输出拼接起来,作为tflearn.regression回归的输入
        decoder_output_sequence = tf.pack(decoder_output_sequence_list, axis=1)
        real_output_sequence = tf.concat(
            1, [encoder_output_sequence, decoder_output_sequence])
        net = tflearn.regression(real_output_sequence,
                                 optimizer='sgd',
                                 learning_rate=0.1,
                                 loss='mean_square')
        model = tflearn.DNN(net)
        return model
예제 #30
0
learning_rate = 0.001
batch_size = 1

width = 20  # mfcc features
height = 29  # (max) length of utterance
classes = 10  # digits
batch = word_batch = speech_data.mfcc_batch_generator(
    batch_size, height, data_path)  #傳入64,返回一個生產器batch,每次只會執行一次,每次使用就算出一次的值。
#print('batch =',batch)
#X, Y = next(batch)

# Network building
net = tflearn.input_data([None, width, height])

#net = tflearn.lstm(net,256, dropout=0.8)
net = tflearn.lstm(net, 2048)
#jie bi jie bi
net = tflearn.fully_connected(net, classes, activation='softmax')

net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=learning_rate,
                         loss='categorical_crossentropy')
# Training

model = tflearn.DNN(net, tensorboard_verbose=0)
model.load('tflearn.lstm.model')

test_times = 476
acc_times = 0
acc_rate = 0
예제 #31
0
n_classes = 10
n_inputs = 10  # MNIST data input (img shape: 28*28)
n_steps = 20  # time steps
n_hidden_units = 6  # neurons in hidden layer

# Barcelona Dataset loading
reader = dt.ReaderTS(n_inputs)
XdataSet, YdataSet = reader.load_csvdata(n_steps)

x_train, x_test, y_train, y_test = XdataSet['train'], XdataSet[
    'test'], YdataSet['train'], YdataSet['test']
x_val, y_val = XdataSet['val'], YdataSet['val']

# Network building
net = tflearn.input_data([None, n_inputs, n_steps])
net = tflearn.lstm(net, n_hidden_units)  #, dropout=0.8
net = tflearn.fully_connected(net, n_classes, activation='linear')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=0.001,
                         loss='mean_square')

# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(x_train,
          y_train,
          n_epoch=100,
          validation_set=(x_val, y_val),
          show_metric=True,
          batch_size=32)
predictions = model.predict(x_test)
예제 #32
0
        frames = frames[..., None]

        # Explore alterative cropping/resizing methods
        frames = tf.image.resize_image_with_crop_or_pad(
            frames, 434, 774)  # Determine max heaight/with and change
        frames = frames[:, :, :, 0]

        cap.release()

        frames = frames.eval()

        vid_frames.append(frames)
        annotations.append(anno)

    net = tflearn.input_data(shape=[None, 434, 774])
    net = tflearn.lstm(net, 128, return_seq=True)
    net = tflearn.lstm(net, 128)
    net = tflearn.fully_connected(net, 7, activation='sigmoid')
    net = tflearn.regression(net,
                             optimizer='adam',
                             loss='binary_crossentropy',
                             name="output1")

    model = tflearn.DNN(net, tensorboard_verbose=2)

    model.fit(vid_frames,
              annotations,
              n_epoch=1,
              validation_set=0.2,
              show_metric=True,
              snapshot_step=100)
import tflearn
from tflearn.data_utils import *

data_path = "dataset.txt"

# Maximum length of generated names
maxlen = 20

# Translate text file to vectors
X, Y, char_idx = textfile_to_semi_redundant_sequences(data_path, seq_maxlen=maxlen, redun_step=3)

# Create LSTM model
model = tflearn.input_data(shape=[None, maxlen, len(char_idx)])

model = tflearn.lstm(model, 512, return_seq=True)

model = tflearn.dropout(model, 0.5)

model = tflearn.lstm(model, 512)

model = tflearn.dropout(model, 0.5)

model = tflearn.fully_connected(model, len(char_idx), activation="softmax")

model = tflearn.regression(model, optimizer='adam', loss='categorical_crossentropy',
                       learning_rate=0.001)

# Generate city names
model = tflearn.SequenceGenerator(model,
                                  dictionary=char_idx,
# NOTE: Padding is required for dimension consistency. This will pad sequences
# with 0 at the end, until it reaches the max sequence length. 0 is used as a
# masking value by dynamic RNNs in TFLearn; a sequence length will be
# retrieved by counting non zero elements in a sequence. Then dynamic RNN step
# computation is performed according to that length.
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
net = tflearn.input_data([None, 100])
# Masking is not required for embedding, sequence length is computed prior to
# the embedding op and assigned as 'seq_length' attribute to the returned Tensor.
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8, dynamic=True)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=0.001,
                         loss='categorical_crossentropy')

# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX,
          trainY,
          validation_set=(testX, testY),
          show_metric=True,
          batch_size=32)
예제 #35
0
# IMDB Dataset loading
train, test, _ = imdb.load_data(path="imdb.pkl", n_words=10000, valid_portion=0.1)
trainX, trainY = train
testX, testY = test

# Data preprocessing
# NOTE: Padding is required for dimension consistency. This will pad sequences
# with 0 at the end, until it reaches the max sequence length. 0 is used as a
# masking value by dynamic RNNs in TFLearn; a sequence length will be
# retrieved by counting non zero elements in a sequence. Then dynamic RNN step
# computation is performed according to that length.
trainX = pad_sequences(trainX, maxlen=100, value=0.0)
testX = pad_sequences(testX, maxlen=100, value=0.0)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
net = tflearn.input_data([None, 100])
# Masking is not required for embedding, sequence length is computed prior to
# the embedding op and assigned as 'seq_length' attribute to the returned Tensor.
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8, dynamic=True)
net = tflearn.fully_connected(net, 2, activation="softmax")
net = tflearn.regression(net, optimizer="adam", learning_rate=0.001, loss="categorical_crossentropy")

# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=32)
from __future__ import division, print_function, absolute_import

import tflearn
from dataset import *

dataset = DataSet(32, 0.8, 0.1, 0.1, False, 8)

hidden_parameters = 256

trainX, trainY, _ = dataset.get_train_set()
validX, validY, _ = dataset.get_valid_set()

net = tflearn.input_data([None, dataset.stats['max_sequence_length']])
net = tflearn.embedding(net, input_dim=(dataset.stats['vocabulary_size'] + 1),
                        output_dim=hidden_parameters)
net = tflearn.lstm(net, hidden_parameters, dropout=0.8, dynamic=True)
net = tflearn.fully_connected(net, 5, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

# Training
model = tflearn.DNN(net, tensorboard_verbose=0, checkpoint_path="checkpoints/tflearn_0")
model.fit(trainX, trainY, validation_set=(validX, validY), show_metric=True,
          batch_size=32)
예제 #37
0
    print(xTr.shape, xTe.shape, yTr.shape, yTe.shape)

    x = tf.placeholder(shape=(None, 9), dtype=tf.float32)
    y_ = tf.placeholder(shape=(None, 1), dtype=tf.float32)
    keep_prob = tf.placeholder(tf.float32)

    batch_size = 75
    epochs = 600
    lr = 0.001

    net = tflearn.input_data(placeholder=x)
    net = tflearn.embedding(net,
                            input_dim=21,
                            output_dim=32,
                            weights_init='xavier')
    net = tflearn.lstm(net, 40, activation='tanh', dropout=0.3)
    net = tflearn.dropout(net, 0.2)
    net = tflearn.layers.normalization.batch_normalization(net)
    net = tflearn.fully_connected(net, 1, activation='sigmoid')

    loss = tf.reduce_mean(tf.square(net - y_))
    train_op = tf.train.RMSPropOptimizer(lr).minimize(loss)
    accuracy = tf.contrib.metrics.streaming_root_mean_squared_error(net, y_)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        tflearn.is_training(True, session=sess)

        for step in range(epochs):
            total_batch = int(xTr.shape[0] / batch_size)
예제 #38
0
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb

train, test, _ = imdb.load_data(path='imdb.pkl',
                                n_words=10000,
                                valid_portion=0.1)
X_train, Y_train = train
X_test, Y_test = test

X_train = pad_sequences(X_train, maxlen=100, value=0.)
X_test = pad_sequences(X_test, maxlen=100, value=0.)
Y_train = to_categorical(Y_train, nb_classes=2)
Y_test = to_categorical(Y_test, nb_classes=2)

RNN = tflearn.input_data([None, 100])
RNN = tflearn.embedding(RNN, input_dim=10000, output_dim=128)
RNN = tflearn.lstm(RNN, 128, dropout=0.0)
RNN = tflearn.fully_connected(RNN, 2, activation='softmax')
RNN = tflearn.regression(RNN,
                         optimizer='adam',
                         learning_rate=0.001,
                         loss='categorical_crossentropy')

model = tflearn.DNN(RNN, tensorboard_verbose=0)
model.fit(X_train,
          Y_train,
          validation_set=(X_test, Y_test),
          show_metric=True,
          batch_size=32)
예제 #39
0
)

args = vars(parser.parse_args())

size = 256
drop = 0.5
temp = args['temp']
maxlen = args['length']
fname = args['filename']
model_name = os.path.split(fname)[1].split('.')[0]  # create model name from textfile input

X, Y, char_idx = \
    textfile_to_semi_redundant_sequences(fname, seq_maxlen=maxlen, redun_step=3)

g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, size, return_seq=True)
g = tflearn.dropout(g, drop)
g = tflearn.lstm(g, size, return_seq=True)
g = tflearn.dropout(g, drop)
g = tflearn.lstm(g, size)
g = tflearn.dropout(g, drop)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                       learning_rate=0.001)

m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                              seq_maxlen=maxlen,
                              clip_gradients=5.0,
                              checkpoint_path='model_'+ model_name)

for i in range(50):
예제 #40
0
batch = word_batch = speech_data.mfcc_batch_generator(
    batch_size
)  #this function will download a set of wav files, each file is a recording of different spoken digit. Returns the labeled speach files as batch
X, Y = next(
    batch
)  # we are spliting the batch in training and testing data with next() function
trainX, trainY = X, Y  # we are using the same data for testing, so it would be able to recognize the speaker i've trained on, but not other speakers
testX, testY = X, Y  #overfit for now

# Network building
net = tflearn.input_data(
    [None, width, height]
)  #gateway for the date to be put in the network, the parametar will help define the shape of the input  data
net = tflearn.lstm(
    net, 128, dropout=0.8
)  #building the next layer (number of neurons) too few - bad prediction, too many - overtraining
#dropout helps overfitting, by randomly turning off some of the neorons during training, so data is forced to find new paths in the network, alowing  more generalized model
net = tflearn.fully_connected(
    net, classes, activation='softmax'
)  #conecting the neurons, number of classes is 10 because we are recognizing only 10 digits, activation will convert the data to probabilities
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=learning_rate,
                         loss='categorical_crossentropy'
                         )  #output layer that predicts the single number
# Training

###  "fix" for tensorflow version errors
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
예제 #41
0
    print("Time Cost: {}".format(time.time() - t))
    rnn.draw_err_logs()

    print("=" * 60, "\n" + "Sparse LSTM" + "\n" + "-" * 60)
    generator = MnistGenerator(one_hot=False)
    t = time.time()
    tf.reset_default_graph()
    rnn = SparseRNN()
    rnn.fit(28, 10, generator, n_history=n_history, epoch=10)
    print("Time Cost: {}".format(time.time() - t))
    rnn.draw_err_logs()

    print("=" * 60, "\n" + "Tflearn", "\n" + "-" * 60)
    generator = MnistGenerator()
    t = time.time()
    tf.reset_default_graph()
    net = tflearn.input_data(shape=[None, 28, 28])
    net = tf.concat(tflearn.lstm(net, 128, return_seq=True)[-n_history:],
                    axis=1)
    net = tflearn.fully_connected(net, 10, activation='softmax')
    net = tflearn.regression(net,
                             optimizer='adam',
                             batch_size=64,
                             loss='categorical_crossentropy')
    model = tflearn.DNN(net, tensorboard_verbose=0)
    model.fit(*generator.gen(0),
              n_epoch=10,
              validation_set=generator.gen(0, True),
              show_metric=True)
    print("Time Cost: {}".format(time.time() - t))
import os
import pickle
from six.moves import urllib

import tflearn
from tflearn.data_utils import *

path = "zu05056.txt"

maxlen = 25

X, Y, char_idx = \
    textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3)

g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                       learning_rate=0.001)

m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                              seq_maxlen=maxlen,
                              clip_gradients=5.0,
                              checkpoint_path='model_didactic')

for i in range(10):
예제 #43
0
        bag.append(1) if w in pattern_words else bag.append(0)
    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

random.shuffle(training)
training = np.array(training)
train_x = list(training[:, 0])
train_y = list(training[:, 1])

ops.reset_default_graph()
# Build neural network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 512, dropout=0.2)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=0.001,
                         loss='categorical_crossentropy')

# Define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
# Start training (apply gradient descent algorithm)
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
model.save('model.tflearn')

# save all of our data structures
import pickle
pickle.dump(
예제 #44
0
def main():

    path = FLAGS.dataset

    # We avoid using fixed padding and simply calculate the max lenght of our input set.
    if FLAGS.max_sequence_lenght < 1:
        maxlen = find_maxlenght(path)
    else:
        maxlen = FLAGS.max_sequence_lenght

    print("MaxLen = ", maxlen)
    X, Y, char_idx = textfile_to_semi_redundant_sequences(path,
                                                          seq_maxlen=maxlen,
                                                          redun_step=3)

    # Here we define our network structure, using common used values for node dimensions and dropout

    # Input Layer
    g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])

    # Create our hidden LSTM Layers from parameters
    for i in range(FLAGS.hidden_layer_size):
        g = tflearn.lstm(g, FLAGS.lstm_node_size, return_seq=True)
        g = tflearn.dropout(g, 0.5)

    # Finally our last lstm layer and a fully_connected with softmax activation for the output
    g = tflearn.lstm(g, FLAGS.lstm_node_size)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')

    # Let's not forget our regression!
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    # wrap it up in a sequence generator
    m = tflearn.SequenceGenerator(g,
                                  dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='model_' +
                                  os.path.basename(path))
    train = True
    if os.path.exists(FLAGS.model_file):
        # Load our pre-train model from file
        print("Loading model from file ", FLAGS.model_file)
        load_model(m)
        train = False

    # Let's train it
    if train:
        print("Training model...")
        m.fit(X,
              Y,
              validation_set=0.1,
              batch_size=FLAGS.batch_size,
              n_epoch=FLAGS.epochs,
              run_id=os.path.basename(path))

        # save our results
        print("Saving trained model to file ", FLAGS.model_file)
        save_model(m)

    # Generate a test result
    generate(m, maxlen)

    # Interactive Session:
    try:
        import readline
        temp = 1.0
        while temp > 0.0:
            temp = float(raw_input('Insert temperature for generation: '))
            FLAGS.temperature = temp
            generate(m, maxlen)
    except EOFError:
        print("Bye!")
        return
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb

# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
                                valid_portion=0.1)
trainX, trainY = train
testX, testY = test

# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
net = tflearn.input_data([None, 100])
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
          batch_size=32)
예제 #46
0
import tensorflow as tf

from data_analysis16 import data_pre
import sys
sys.path.insert(0, '/home/lpy/tflearn/')
import tflearn

train_data, train_label = data_pre('train')
test_data, test_label = data_pre('test')
print train_data.shape
print test_data.shape
days = 35

net = tflearn.input_data(shape=[None, days, 16])
net = tflearn.lstm(net, 40, return_seq=False)  #,dropout=0.8)
net = tflearn.dropout(net, 0.8)

#net = tflearn.lstm(net, 512)#,dropout=0.8)
#net = tflearn.dropout(net, 0.8)

net = tflearn.fully_connected(net, 1, activation='linear')
net = tflearn.regression(net,
                         optimizer='rmsprop',
                         loss='mean_square',
                         metric='R2',
                         name="target")

model = tflearn.DNN(net)
#model.fit(train_data, train_label, n_epoch=10000,run_id='lstm',snapshot_epoch=True,validation_set=(test_data,test_label), show_metric=True, batch_size=20)
#model.save("model/bitcoin_lstm.tfl")
예제 #47
0
import tflearn
from tflearn.data_utils import *
import time

path = "lusiadas.txt"
maxlen = 120
EPOCH = 0

print('Start..')

X, Y, char_idx = \
    textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3)

print('Creating Model...')
g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g,
                       optimizer='adam',
                       loss='categorical_crossentropy',
                       learning_rate=0.01)

m = tflearn.SequenceGenerator(g,
                              dictionary=char_idx,
                              seq_maxlen=maxlen,
                              clip_gradients=5.0,
예제 #48
0
    def model(self, feed_previous=False):
        # 通过输入的XY生成encoder_inputs和带GO头的decoder_inputs
        input_data = tflearn.input_data(
            shape=[None, self.max_seq_len * 2, self.word_vec_dim],
            dtype=tf.float32,
            name="XY")
        encoder_inputs = tf.slice(input_data, [0, 0, 0],
                                  [-1, self.max_seq_len, self.word_vec_dim],
                                  name="enc_in")
        decoder_inputs_tmp = tf.slice(
            input_data, [0, self.max_seq_len, 0],
            [-1, self.max_seq_len - 1, self.word_vec_dim],
            name="dec_in_tmp")
        go_inputs = tf.ones_like(decoder_inputs_tmp)
        go_inputs = tf.slice(go_inputs, [0, 0, 0], [-1, 1, self.word_vec_dim])
        decoder_inputs = tf.concat([go_inputs, decoder_inputs_tmp],
                                   1,
                                   name="dec_in")

        # 编码器
        # 把encoder_inputs交给编码器,返回一个输出(预测序列的第一个值)和一个状态(传给解码器)
        (encoder_output_tensor, states) = tflearn.lstm(encoder_inputs,
                                                       self.word_vec_dim,
                                                       return_state=True,
                                                       scope='encoder_lstm')
        encoder_output_sequence = tf.stack([encoder_output_tensor], axis=1)

        # 解码器
        # 预测过程用前一个时间序的输出作为下一个时间序的输入
        # 先用编码器的最后一个输出作为第一个输入
        if feed_previous:
            first_dec_input = go_inputs
        else:
            first_dec_input = tf.slice(decoder_inputs, [0, 0, 0],
                                       [-1, 1, self.word_vec_dim])
        decoder_output_tensor = tflearn.lstm(first_dec_input,
                                             self.word_vec_dim,
                                             initial_state=states,
                                             return_seq=False,
                                             reuse=False,
                                             scope='decoder_lstm')
        decoder_output_sequence_single = tf.stack([decoder_output_tensor],
                                                  axis=1)
        decoder_output_sequence_list = [decoder_output_tensor]
        # 再用解码器的输出作为下一个时序的输入
        for i in range(self.max_seq_len - 1):
            if feed_previous:
                next_dec_input = decoder_output_sequence_single
            else:
                next_dec_input = tf.slice(decoder_inputs, [0, i + 1, 0],
                                          [-1, 1, self.word_vec_dim])
            decoder_output_tensor = tflearn.lstm(next_dec_input,
                                                 self.word_vec_dim,
                                                 return_seq=False,
                                                 reuse=True,
                                                 scope='decoder_lstm')
            decoder_output_sequence_single = tf.stack([decoder_output_tensor],
                                                      axis=1)
            decoder_output_sequence_list.append(decoder_output_tensor)

        decoder_output_sequence = tf.stack(decoder_output_sequence_list,
                                           axis=1)
        real_output_sequence = tf.concat(
            [encoder_output_sequence, decoder_output_sequence], 1)

        net = tflearn.regression(real_output_sequence,
                                 optimizer='sgd',
                                 learning_rate=0.1,
                                 loss='mean_square')
        model = tflearn.DNN(net)
        return model
예제 #49
0
batch_size = 50

width = 20  # mfcc features
height = 80  # (max) length of utterance
# classes = 10  # digits
speakers = ['adrian', 'zhanet']
number_classes = len(speakers)

batch = utils.mfcc_batch_generator(speakers,
                                   batch_size=batch_size,
                                   utterance_len=height)

# Network building
# net = tflearn.input_data([None, width, height])
net = tflearn.input_data([None, height, width])
net = tflearn.lstm(net, 800, dropout=0.5)
net = tflearn.fully_connected(net, number_classes, activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=learning_rate,
                         loss='categorical_crossentropy')
model = tflearn.DNN(net, tensorboard_verbose=3)

# model.load('models/lstm.model_80_800.0.1')

## add this "fix" for tensorflow version errors
for x in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
    tf.add_to_collection(tf.GraphKeys.VARIABLES, x)

# Training
예제 #50
0
def main():
    if len(sys.argv) < 2:
        print('Usage: simple_lstm.py <TRAIN_FILE>')
        sys.exit(-1)
    else:
        train_file = sys.argv[1]

    if not os.path.exists(train_file):
        print('Not found:', train_file)
        sys.exit(-1)

    # read train data from file
    df = pd.read_csv(
        train_file,
        delimiter=',',
        skiprows=7,
        skipfooter=1,
        header=None,
        engine='python',
        usecols=[1])
    raw_data = df.values
    raw_data = raw_data.astype('float32')
    print('n_raw_data :', df.size)

    # Normalize a train_data
    min_data, max_data, raw_data = min_max_normalize(raw_data)

    # Create dataset from raw_data
    X, y = create_dataset(raw_data)

    # Split dataset into a train and test
    X_train, X_test, y_train, y_test = train_test_split_data(
        X, y, train_size=0.8)
    print('Train size:', len(X_train))
    print('Test size:', len(X_test))

    # Define a LSTM model
    net = tflearn.input_data(shape=[None, 1, 1])
    net = tflearn.lstm(net, n_units=6)
    net = tflearn.fully_connected(net, 1, activation='linear')
    net = tflearn.regression(
        net, optimizer='adam', learning_rate=0.001, loss='mean_square')

    model = tflearn.DNN(net, tensorboard_verbose=0)
    model.fit(X_train, y_train, validation_set=0.1, batch_size=1, n_epoch=150)

    # Evaluate a trained model
    train_predict = model.predict(X_train)
    test_predict = model.predict(X_test)

    train_predict_plot = np.empty_like(raw_data)
    train_predict_plot[:, :] = np.nan
    train_predict_plot[1:len(train_predict) + 1, :] = train_predict

    test_predict_plot = np.empty_like(raw_data)
    test_predict_plot[:, :] = np.nan
    test_predict_plot[len(train_predict) + 1:, :] = test_predict

    plt.figure(figsize=(8, 8))
    plt.plot(raw_data)
    plt.plot(train_predict_plot)
    plt.plot(test_predict_plot)
    plt.show()
예제 #51
0
        sc.fit(X_train)

        # save the Standardizer
        joblib.dump(
            sc, 'Saved_Models/LSTM_n_epochs_{}/standard.pkl'.format(n_epoch))

        X_train_sd = sc.transform(X_train)
        X_test_sd = sc.transform(X_test)

        # Reshape data to a 10, 10 matrix to feed it into lstm
        X_test_sd = np.reshape(X_test_sd, (X_test_sd.shape[0], 13, 107))
        X_train_sd = np.reshape(X_train_sd, (X_train_sd.shape[0], 13, 107))

        # Model
        net = tflearn.input_data([None, 13, 107], name='input')
        net = tflearn.lstm(net, 100, dropout=0.8)
        net = tflearn.fully_connected(net, 128, activation='linear')
        net = tflearn.fully_connected(net, 2, activation='softmax')
        net = tflearn.regression(net,
                                 optimizer='adam',
                                 learning_rate=0.001,
                                 loss='categorical_crossentropy')

        # Define model with checkpoint (autosave)
        model = tflearn.DNN(net, tensorboard_verbose=3)

        # Train model with checkpoint every epoch and every 500 steps
        model.fit(X_train_sd,
                  Y_train,
                  n_epoch=n_epoch,
                  show_metric=True,
예제 #52
0
파일: lstm.py 프로젝트: ClarenceKe/tflearn
# IMDB Dataset loading
train, val, test = imdb.load_data(path='imdb.pkl', maxlen=200,
                                  n_words=20000)
trainX, trainY = train
valX, valY = val
testX, testY = test

# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=200, value=0.)
valX = pad_sequences(valX, maxlen=200, value=0.)
testX = pad_sequences(testX, maxlen=200, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
valY = to_categorical(valY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
net = tflearn.input_data([None, 200])
net = tflearn.embedding(net, input_dim=20000, output_dim=128)
net = tflearn.lstm(net, 128)
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam',
                         loss='categorical_crossentropy')

# Training
model = tflearn.DNN(net, clip_gradients=0., tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
          batch_size=128)
예제 #53
0
from tflearn.models.dnn import DNN
from tflearn.models.generator import SequenceGenerator
from tflearn.data_utils import VocabularyProcessor
from tflearn.data_preprocessing import DataPreprocessing
from tflearn.helpers.trainer import Trainer
from tflearn.helpers.evaluator import Evaluator
from tflearn.helpers.summarizer import summarize
from tflearn.helpers.regularizer import add_weights_regularizer
from tensorflow.contrib.slim import dataset
from tensorflow.contrib.slim import dataset

tflearn.input_data()
tflearn.variable()
tflearn.conv_2d()
tflearn.single_unit()
tflearn.lstm()
tflearn.embedding()
tflearn.batch_normalization()
tflearn.merge()
tflearn.regression()
tflearn.tanh()
tflearn.softmax_categorical_crossentropy()
tflearn.SGD()
tflearn.initializations.uniform()
tflearn.losses.L1()
tflearn.add_weights_regularizer()
tflearn.metrics.Accuracy()
tflearn.summaries()
tflearn.ImagePreprocessing()
tflearn.ImageAugmentation()
tflearn.init_graph()
예제 #54
0
import tflearn as tfl
import tensorflow as tf
import encoding
import os

flags = tf.flags
logging = tf.logging
FILE_PATH= os.getcwd()+'/input/'
SAVE_PATH= os.getcwd()
INPUT_SIZE = 13
flags.DEFINE_string("model", "small", "A type of model. Possible options are: small, medium, large.")
flags.DEFINE_string("data_path", FILE_PATH, "Where the training/test data is stored.")
flags.DEFINE_string("save_path", SAVE_PATH, "Model output directory.")
flags.DEFINE_bool("use_fp16", False, "Train using 16-bit floats instead of 32bit floats")
FLAGS = flags.FLAGS

raw_data = encoding.input_data(FLAGS.data_path)
train_data, valid_data, test_data, vocabulary = raw_data

input_data, targets = encoding.input_producer(train_data, None, None, name=None)


g = tfl.input_data(shape=[None, 10, INPUT_SIZE])
g = tfl.lstm(g, 512)
g = tfl.dropout(g, 0.5)
g = tfl.fully_connected(g, INPUT_SIZE, activation='softmax')
g = tfl.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)

m = tfl.SequenceGenerator(g)
m.fit(input_data, targets)
예제 #55
0
learning_rate = 0.0001
training_iters = 300000  # steps
batch_size = 64

width = 20  # mfcc features
height = 80  # (max) length of utterance
classes = 10  # digits

batch = word_batch = speech_data.mfcc_batch_generator(batch_size)
X, Y = next(batch)
trainX, trainY = X, Y
testX, testY = X, Y  #overfit for now

# Network building
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=learning_rate,
                         loss='categorical_crossentropy')
# Training

### add this "fix" for tensorflow version errors
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
    tf.add_to_collection(tf.GraphKeys.VARIABLES, x)

model = tflearn.DNN(net, tensorboard_verbose=0)
while 1:  #training_iters
    model.fit(trainX,
trainX, trainY = train 				# saving the training data into input(X) and target(Y) vectors 											
testX, testY = test 				# saving the test data into input(X) and target(Y) vectors	



# Data preprocessing


# Sequence padding- We need to pad sequences to perform seq-to-seq prediction.
# Using the pad_sequences function of tflearn
trainX = pad_sequences(trainX, maxlen=100, value=0.) #one seq should be of max length 100
testX = pad_sequences(testX, maxlen=100, value=0.)	 #and padded with values '0'


# Converting labels to binary vectors				 #converting to binary vectors as there are
trainY = to_categorical(trainY, nb_classes=2)		 #two classes- 0(-ve review),1(+ve review)
testY = to_categorical(testY, nb_classes=2)			 #and the computer can't understand labels

# Network building													# Building a five layer deep
net = tflearn.input_data([None, 100])								# neural net
net = tflearn.embedding(net, input_dim=10000, output_dim=128)		
net = tflearn.lstm(net, 128, dropout=0.8)							#LSTM for long-term dependencies
net = tflearn.fully_connected(net, 2, activation='softmax')			
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

# Training
model = tflearn.DNN(net, tensorboard_verbose=0)			#training using the tflearn Deep Neural Net module
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
          batch_size=32)
예제 #57
0
import numpy as np
import tflearn as tfl
from tflearn.data_utils import pad_sequences
import data_preprocessing as dp

_, testX = dp.convert_reviews()
_, testY = dp.get_sentiment_arrays()

testX = pad_sequences(testX, maxlen=200, value=0.)

# rebuild network structure.
net = tfl.input_data([None, 200])
net = tfl.embedding(net, input_dim=10000, output_dim=128)
net = tfl.lstm(net, 128, dropout=0.8)
net = tfl.fully_connected(net, 2, activation='softmax')
net = tfl.regression(net,
                     optimizer='adam',
                     learning_rate=0.001,
                     loss='categorical_crossentropy')

model = tfl.DNN(net, tensorboard_verbose=0)
model.load('./saved_models/model1.tfl')

# On my machine, running this prediction shows memory warnings
# These warnings shouldn't actually cause any issues
predictions = model.predict(testX)

results = []
for i in predictions:
    if i[0] > i[1]:
        results.append(0)
                          img_c])  # [batch_size, height, width, channels]
y_ = tf.placeholder(tf.int64, shape=[
    None,
])

# # Building convolutional network
# network = input_data(shape=[None, img_h, img_w, img_c], name='input')
network = tflearn.conv_2d(x, 32, 3, activation='relu',
                          regularizer="L2")  # [-1,19,19,32]
network = tflearn.max_pool_2d(network, 2)  # [-1,10,10,32]

# network=tf.reshape(network,[-1,10,10*32]) #[-1,10,10*32]
network = tflearn.layers.reshape(network, [-1, 10, 10 * 32])

# network=tflearn.embedding(network, input_dim=img_h*img_w*img_c, output_dim=hiddle_layes)  # [-1,256,128]
network = tflearn.lstm(network, hiddle_layes_2,
                       dropout=dropout_)  # 10个[-1,256]序列

network = tflearn.fully_connected(network, 512,
                                  activation='tanh')  # [-1,512] 默认flatten()
network = tflearn.dropout(network, dropout_)

network = tflearn.fully_connected(network, n_classes,
                                  activation='softmax')  # [-1,2]

y = network
# --------------------------------------------------------------------#
cost = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_,
                                                   logits=y,
                                                   name='cost'))
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
예제 #59
0
    train_split, test_split = shuffle[:int(records * test_fraction
                                           )], shuffle[int(records *
                                                           test_fraction):]

    trainX, trainY = word_vectors[train_split, :], to_categorical(
        Y.values[train_split], 2)
    testX, testY = word_vectors[test_split, :], to_categorical(
        Y.values[test_split], 2)
    # # Network construction

    # The vector input is of represented using a vector of size 5000
    net = tflearn.input_data([None, 5000])
    # The Embedding layer represents the words in VSM on semantic similarity contributing to sentiment
    net = tflearn.embedding(net, input_dim=5000, output_dim=128)
    # The lstm learn the relationship between the vectors
    net = tflearn.lstm(net, 128)
    # The Pooling layer
    net = tflearn.fully_connected(net, 2, activation='softmax')
    # performs gradient descent using adam optimizer
    net = tflearn.regression(net,
                             optimizer='adam',
                             learning_rate=0.01,
                             loss='categorical_crossentropy')

    # # Train the network
    model = tflearn.DNN(net, tensorboard_verbose=0)
    if (test == 'Y'):
        model.fit(trainX,
                  trainY,
                  validation_set=0.1,
                  show_metric=True,
learning_rate = 0.0001
training_iters = 5000  #3000000  # steps
batch_size = 64
width = 20  # MFCC features :
height = 80  # (max) length of utterance
classes = 10  # digits

#Split Data for train and test
batch = word_batch = speech_data_mod.mfcc_batch_generator(batch_size)
X, Y = next(batch)
trainX, trainY = X, Y
testX, testY = X, Y

# Network building
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128,
                   dropout=0.5)  #net = tflearn.lstm(net, 128*4, dropout=0.5)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=learning_rate,
                         loss='categorical_crossentropy')

## add this "fix" for tensorflow version errors
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
    tf.add_to_collection(tf.GraphKeys.VARIABLES, x)

# Training
model = tflearn.DNN(net, tensorboard_verbose=0)

for step in range(0, training_iters):