コード例 #1
0
def get_trainable_model():
    return model.GRU(NUM_EMB,
                     EMB_DIM,
                     HIDDEN_DIM,
                     SEQ_LENGTH,
                     START_TOKEN,
                     learning_rate=LEARNING_RATE)
コード例 #2
0
def get_trainable_model():
    """ Creates GRU object, which extends Recurrent Neural Network class in model.py"""
    return model.GRU(NUM_EMB,
                     EMB_DIM,
                     HIDDEN_DIM,
                     SEQ_LENGTH,
                     START_TOKEN,
                     learning_rate=LEARNING_RATE)
コード例 #3
0
def main():
    batch_size = 16
    num_epochs = 2  #epoch size must be <= 2
    save_path = './model/'
    word_emb, train, dev = init_data()

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    gpu_config = "/gpu:0"

    max_acc = 0.0
    with tf.Session(config=config) as sess:
        with tf.device(gpu_config):
            initializer = tf.contrib.layers.xavier_initializer()
            with tf.variable_scope("model",
                                   reuse=None,
                                   initializer=initializer):
                re_model = model.GRU(True, word_emb)

            global_step = tf.Variable(0, name="global_step", trainable=False)

            train_op = tf.train.AdamOptimizer(0.001).minimize(
                re_model.final_loss, global_step=global_step)
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()

            max_acc = 0.0
            print("Training model...")
            num_iterations = int(math.ceil(1.0 * len(train[0]) / batch_size))
            for epoch in range(num_epochs):
                print("Epoch: ", epoch)
                #shuffle the examples
                sh_index = np.arange(len(train[0]))
                np.random.shuffle(sh_index)
                for i in range(len(train)):
                    train[i] = train[i][sh_index]

                for iteration in range(num_iterations):
                    #get a batch
                    word_batch, pos1_batch, pos2_batch, ent_batch, y_batch = get_next_batch(
                        train, iteration * batch_size, batch_size)

                    train_shape = []
                    train_word = []
                    train_pos1 = []
                    train_pos2 = []
                    train_ent_type = []
                    #train_partofspeech = []
                    train_word_num = 0

                    #process the batches
                    for i in range(len(word_batch)):
                        train_shape.append(train_word_num)
                        train_word_num += len(word_batch[i])
                        train_word.extend([word for word in word_batch[i]])
                        train_pos1.extend([pos1 for pos1 in pos1_batch[i]])
                        train_pos2.extend([pos2 for pos2 in pos2_batch[i]])
                        train_ent_type.extend([ent for ent in ent_batch[i]])
                        #train_partofspeech.extend([pos for pos in partspeech_batch[i]])

                    train_shape.append(train_word_num)
                    train_shape = np.array(train_shape)
                    train_word = np.array(train_word)
                    train_pos1 = np.array(train_pos1)
                    train_pos2 = np.array(train_pos2)
                    #train_partofspeech = np.array(train_partofspeech)
                    train_ent_type = np.array(train_ent_type)

                    _, step, train_loss, train_acc, _, _ = sess.run(
                        [
                            train_op, global_step, re_model.total_loss,
                            re_model.accuracy, re_model.l2_loss,
                            re_model.final_loss
                        ],
                        feed_dict={
                            re_model.input_shape:
                            train_shape,
                            re_model.input_word:
                            train_word,
                            re_model.input_pos1:
                            train_pos1,
                            re_model.input_pos2:
                            train_pos2,
                            re_model.input_ent_type:
                            train_ent_type,
                            #re_model.input_speech:train_partofspeech,
                            re_model.input_y:
                            y_batch
                        })

                    if step % 50 == 0:
                        train_acc = np.reshape(np.array(train_acc),
                                               (batch_size))
                        train_acc = np.mean(train_acc)
                        print("step {}, loss {:g}, train accuracy {:g}".format(
                            step, train_loss, train_acc))

                    if step % 100 == 0:
                        # perform validation
                        dev_order = list(range(len(dev[0])))
                        random_idx = random.randint(
                            0, int(len(dev_order) / float(batch_size)))

                        word_batch, pos1_batch, pos2_batch, ent_batch, dev_y = get_next_batch(
                            dev, random_idx * batch_size, batch_size)
                        dev_shape = []
                        dev_word = []
                        dev_pos1 = []
                        dev_pos2 = []
                        dev_ent_type = []
                        #dev_partofspeech = []
                        dev_word_num = 0

                        for i in range(len(word_batch)):
                            dev_shape.append(dev_word_num)
                            dev_word_num += len(word_batch[i])
                            dev_word.extend([word for word in word_batch[i]])
                            dev_pos1.extend([pos1 for pos1 in pos1_batch[i]])
                            dev_pos2.extend([pos2 for pos2 in pos2_batch[i]])
                            dev_ent_type.extend([ent for ent in ent_batch[i]])
                            #dev_partofspeech.extend([pos for pos in speech_batch[i]])

                        dev_shape.append(dev_word_num)
                        dev_shape = np.array(dev_shape)
                        dev_word = np.array(dev_word)
                        dev_pos1 = np.array(dev_pos1)
                        dev_pos2 = np.array(dev_pos2)
                        #dev_partofspeech = np.array(dev_partofspeech)
                        dev_ent_type = np.array(dev_ent_type)

                        dev_loss, dev_acc = sess.run(
                            [re_model.total_loss, re_model.accuracy],
                            feed_dict={
                                re_model.input_shape:
                                dev_shape,
                                re_model.input_word:
                                dev_word,
                                re_model.input_pos1:
                                dev_pos1,
                                re_model.input_pos2:
                                dev_pos2,
                                re_model.input_ent_type:
                                dev_ent_type,
                                #re_model.input_speech:dev_partofspeech,
                                re_model.input_y:
                                dev_y
                            })

                        dev_acc = np.reshape(np.array(dev_acc), (batch_size))
                        dev_acc = np.mean(dev_acc)
                        print("dev performance: accuracy {:g}".format(dev_acc))

                        if max_acc < dev_acc:
                            max_acc = dev_acc
                            saver.save(
                                sess,
                                save_path + str(epoch) + '_RE_model.ckpt')
コード例 #4
0
args.cuda = (not args.no_cuda) and torch.cuda.is_available()
del args.no_cuda
args.save_dir = os.path.join(
    args.save_dir,
    datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
print("\nParameters:")
for attr, value in sorted(args.__dict__.items()):
    print("\t{}={}".format(attr.upper(), value))

# model
m_model = None
if args.snapshot is None:
    if args.which_model == 'lstm':
        m_model = model.LSTM(args, m_embedding)
    elif args.which_model == 'gru':
        m_model = model.GRU(args, m_embedding)
    elif args.which_model == 'rnn':
        m_model = model.RNN(args, m_embedding)
else:
    print('\nLoading model from [%s]...' % args.snapshot)
    try:
        m_model = torch.load(args.snapshot)
    except:
        print("Sorry, This snapshot doesn't exist.")
        exit()
if args.cuda:
    m_model = m_model.cuda()

# train or predict
assert m_model is not None
コード例 #5
0
def main():
    with tf.Session() as sess:
        initializer = tf.contrib.layers.xavier_initializer()
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            re_model = model.GRU(False, word_emb)
            
            print("loading model parameter...")
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
    
            print("Testing...")
            #get file name/path lists from a txt file
            with open("./vectorized_data/vectorized_test/file_list.txt", "r") as f:
                files = f.readlines()
                for name in files:
                    name = name.strip()
                    print("Predicting file: " + name)
                    if not name:
                        continue
                    test, test_ann, test_pairs = load_file(name)
                    test_order = list(range(len(test[0])))
                    num_iterations = int(math.ceil(1.0 * len(test_order) / batch_size))
                    y_hat = [] #predication
                    
                    for i in range(num_iterations): 
                        word_batch, pos1_batch, pos2_batch, ent_type_batch, y_batch = get_next_batch(test, i * batch_size, batch_size)
                        test_shape = []
                        test_word = []
                        test_pos1 = []
                        test_pos2 = []
                        #test_speech = []
                        test_ent_type = []
                        test_word_num = 0
                        
                        for i in range(len(word_batch)):
                            test_shape.append(test_word_num)
                            test_word_num += len(word_batch[i])
                            test_word.extend([word for word in word_batch[i]])
                            test_pos1.extend([pos1 for pos1 in pos1_batch[i]])
                            test_pos2.extend([pos2 for pos2 in pos2_batch[i]])
                            #test_speech.extend([pos for pos in speech_batch[i]])
                            test_ent_type.extend([ent for ent in ent_type_batch[i]])
                         
                        test_shape.append(test_word_num)
                        test_shape = np.array(test_shape)
                        test_word = np.array(test_word)
                        test_pos1 = np.array(test_pos1)
                        test_pos2 = np.array(test_pos2)
                        #test_speech = np.array(test_speech)
                        test_ent_type = np.array(test_ent_type)
                        
                        pred = sess.run([
                                        re_model.predictions],
                                        feed_dict={
                                            re_model.input_shape:test_shape,
                                            re_model.input_word:test_word,
                                            re_model.input_pos1:test_pos1,
                                            re_model.input_pos2:test_pos2,
                                            re_model.input_ent_type:test_ent_type,
                                            #re_model.input_speech:test_speech,
                                            re_model.input_y:y_batch
                                        })
    
                        y_hat += list(pred[0])
                    
                    #output results to files
                    build_output_file(len(test[0]), y_hat, test_pairs, test_ann, name)
コード例 #6
0
def run(batch_size,
        permuted,
        modeltype='surprise_gru',
        n_hidden=64,
        zoneout=0.25,
        layer_norm=True,
        optimizer='adam',
        learnrate=1e-3,
        aux_weight=0.1,
        cuda=True,
        resume=False):
    assert isinstance(batch_size, int)
    assert isinstance(permuted, bool)
    assert modeltype in MODELS_IMPLEMENTED
    assert isinstance(n_hidden, int)
    assert isinstance(zoneout, (int, float))
    assert isinstance(layer_norm, bool)
    assert isinstance(optimizer, str)
    assert isinstance(learnrate, (int, float))
    assert isinstance(cuda, bool)
    assert isinstance(resume, bool)

    # Name the experiment s.t. parameters are easily readable
    exp_name = (
        '%s_perm%r_h%i_z%2f_norm%r_%s' %
        (modeltype, permuted, n_hidden, zoneout, layer_norm, optimizer))
    exp_path = os.path.join('/home/jason/experiments/recurrent_pytorch/',
                            exp_name)
    if not os.path.isdir(exp_path):
        os.makedirs(exp_path)

    if not resume:
        # Store experiment params in params.json
        params = {
            'batch_size': batch_size,
            'permuted': permuted,
            'modeltype': modeltype,
            'n_hidden': n_hidden,
            'zoneout': zoneout,
            'layer_norm': layer_norm,
            'optimizer': optimizer,
            'learnrate': learnrate,
            'aux_weight': aux_weight,
            'cuda': cuda
        }
        with open(os.path.join(exp_path, 'params.json'), 'w') as f:
            json.dump(params, f)

        # Model
        if modeltype.lower() == 'rnn':
            net = model.RNN(1, n_hidden, 10, layer_norm)
        elif modeltype.lower() == 'gru':
            net = model.GRU(1, n_hidden, 10, layer_norm)
        elif modeltype.lower() == 'surprise_gru':
            net = model.SurpriseGRU(1, n_hidden, 10, layer_norm)
        else:
            raise ValueError
    else:
        # if resuming, need to have params, stats and checkpoint files
        if not (os.path.isfile(os.path.join(exp_path, 'params.json'))
                and os.path.isfile(os.path.join(exp_path, 'stats.json'))
                and os.path.isfile(os.path.join(exp_path, 'checkpoint'))):
            raise Exception(
                'Missing params, stats or checkpoint file (resume)')
        net = torch.load(os.path.join(exp_path, 'checkpoint'))

    # Data loaders
    train_loader, val_loader = data.mnist(batch_size,
                                          sequential=True,
                                          permuted=permuted)

    # Train
    train.fit_recurrent(train_loader,
                        val_loader,
                        net,
                        exp_path,
                        zoneout,
                        optimizer,
                        aux_weight=aux_weight,
                        cuda=cuda,
                        resume=resume)

    # Post-trainign visualization
    post_training(exp_path, val_loader)
コード例 #7
0
			#num = re.split('_|\.', filename)[-2]
			#start_epoch = int(num)+1

			print "Modello recuperato dal file "+filename
		else:
			print "Nessun file trovato per il modello "+args.model+". Ne verrà creato uno nuovo."
			args.restart = False

	# instanzia nuova rete neurale
	if not args.restart:
		if args.model == 'RNN':
			rnn = model.RNN(data.n_letters, args.n_hidden, data.n_categories, cuda=args.cuda)
		elif args.model == 'LSTM':
			rnn = model.LSTM(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda)
		elif args.model == 'GRU':
			rnn = model.GRU(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda)

	assert rnn

	#optimizer = torch.optim.SGD(rnn.parameters(), lr=args.lr)
	optimizer = torch.optim.Adam(rnn.parameters(), lr=args.lr)
	criterion = nn.NLLLoss()

	if args.cuda:
		rnn.cuda()
		criterion.cuda()

	start = time.time()
	num_batches = data.n_instances / args.batch_size
	print "num_batches: "+str(num_batches)