Ejemplo n.º 1
0
 def _init_model(self):
     # if not self.model:
     try:
         with tf.variable_scope('classifier'):
             self.model = RNNModel(self.args)
             # self.model = BIDIRNNModel(self.args)
     except ValueError as ve:
         with tf.variable_scope('classifier', reuse=True):
             self.model = RNNModel(self.args)
Ejemplo n.º 2
0
    def fit(self, X, y):
        X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X)
        y_tensor = self.convert_to_tensor(y, target=True)
        n_neurons_fc = self.hyper_parameters['n_neurons_fc']
        hidden_dim = self.hyper_parameters['hidden_dim']
        self.model = RNNModel(num_features=len(X.columns),
                              num_class=2,
                              hidden_dim=hidden_dim,
                              n_neurons_fc=n_neurons_fc,
                              sequence_length=self.sequence_length)
        self.init_loss_and_optimizer()
        epochs = self.hyper_parameters['epochs']
        n_batches = 20
        for i in range(epochs):
            for i in range(n_batches):
                # Local batches and labels
                local_X1, local_X2, local_y = self.get_batch(
                    X_tweet_text_tensor, X_other_features_tensor, y_tensor,
                    n_batches, i)
                self.optimizer.zero_grad()

                y_pred = self.model(local_X1, local_X2)
                loss = self.criterion(y_pred, local_y)
                loss.backward()
                self.optimizer.step()
Ejemplo n.º 3
0
def swipe_convert(src):
    src = [i - 64 for i in src]
    length = len(src)
    if len(src) > 10:
        src = src[:10]
        length = 10
    else:
        src = src + [0] * (10 - len(src))

    src = torch.LongTensor([src])
    length = torch.LongTensor([length])
    model = RNNModel('LSTM', 27, 1, 10, 1, 0.2)
    # Load the best saved model.
    with open('model.pt', 'rb') as f:
        model = torch.load(f)
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        model.rnn.flatten_parameters()
    model.eval()
    hidden = model.init_hidden(1)

    with torch.no_grad():
        output, hidden = model(src, length, hidden)
        pred = output.max(1, keepdim=True)[1]
        return pred.item()
Ejemplo n.º 4
0
def main():
    """Sup Main!"""
    models = [CNNModel(), RNNModel()]
    for model in models:
        model.build_model()
        train = TrainModel(model, n_epochs=200, batch_size=128)
        train.train_model()
        train.reset_model()
Ejemplo n.º 5
0
def sample_text(sess, data_provider, iteration):
    model = RNNModel(data_provider.vocabulary_size, batch_size=1, sequence_length=1, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE, training=False)
    text = model.sample(sess, data_provider.chars, data_provider.vocabulary, TEXT_SAMPLE_LENGTH)#.encode("utf-8")
    output = open(output_file, "a")
    output.write("Iteration: " + str(iteration) + "\n")
    output.write(str(text) + "\n")
    output.write("\n")
    output.close()
Ejemplo n.º 6
0
 def __init__(self, train_config, model_config, is_training=True):
     self.train_config, self.model_config = train_config, model_config
     self.is_training = is_training
     self.cnn_model = CNNModel(self.train_config.cnn_keep_prob, is_training=is_training)
     self.rnn_model = RNNModel(train_config.learning_rate, model_config.n_fcs, model_config.n_views,
                               model_config.n_hidden, model_config.n_classes, train_config.rnn_keep_prob if is_training else 1.0, is_training=self.is_training)
     self.gpu_config = tf.ConfigProto()
     self.gpu_config.gpu_options.allow_growth = True
     self.data = modelnet.read_data(FLAGS.modelnet_path)
Ejemplo n.º 7
0
def rnn():
    data_provider = DataProvider(data_dir, BATCH_SIZE, SEQUENCE_LENGTH)
    model = RNNModel(data_provider.vocabulary_size,
                     batch_size=BATCH_SIZE,
                     sequence_length=SEQUENCE_LENGTH,
                     hidden_layer_size=HIDDEN_LAYER_SIZE,
                     cells_size=CELLS_SIZE)

    with tf.Session() as sess:

        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(tensorboard_dir)
        writer.add_graph(sess.graph)
        sess.run(tf.global_variables_initializer())

        # Forward pass and one backward pass of all the training examples
        epoch = 0
        temp_losses = []
        smooth_losses = []

        while True:
            sess.run(
                tf.assign(model.learning_rate,
                          LEARNING_RATE * (DECAY_RATE**epoch)))
            data_provider.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for batch in range(data_provider.batches_size):
                inputs, targets = data_provider.next_batch()
                feed = {model.input_data: inputs, model.targets: targets}
                for index, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[index].c
                    feed[h] = state[index].h

                # Iteration is the number of times batch data has passed
                # through the neural network - both forward and backwards
                # propagation
                iteration = epoch * data_provider.batches_size + batch
                summary, loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summary, iteration)
                temp_losses.append(loss)

                if iteration % SAMPLING_FREQUENCY == 0:
                    sample_model(sess, data_provider, iteration)

                if iteration % LOGGING_FREQUENCY == 0:
                    smooth_loss = np.mean(temp_losses)
                    smooth_losses.append(smooth_loss)
                    temp_losses = []
                    plot(smooth_losses, "iterations (thousands)", "loss")
                    print('{{"metric": "iteration", "value": {}}}'.format(
                        iteration))
                    print('{{"metric": "epoch", "value": {}}}'.format(epoch))
                    print('{{"metric": "loss", "value": {}}}'.format(
                        smooth_loss))
            epoch += 1
Ejemplo n.º 8
0
def main():
    config = deepcopy(QMixConfig)
    env = StarCraft2Env(map_name=config['scenario'],
                        difficulty=config['difficulty'])
    env = SC2EnvWrapper(env)
    config['episode_limit'] = env.episode_limit
    config['obs_shape'] = env.obs_shape
    config['state_shape'] = env.state_shape
    config['n_agents'] = env.n_agents
    config['n_actions'] = env.n_actions

    rpm = EpisodeReplayBuffer(config['replay_buffer_size'])
    agent_model = RNNModel(config)
    qmixer_model = QMixerModel(config)
    algorithm = QMIX(agent_model, qmixer_model, config)
    qmix_agent = QMixAgent(algorithm, config)

    while rpm.count < config['memory_warmup_size']:
        train_reward, train_step, train_is_win, train_loss, train_td_error\
                = run_train_episode(env, qmix_agent, rpm, config)

    total_steps = 0
    last_test_step = -1e10
    while total_steps < config['training_steps']:
        train_reward, train_step, train_is_win, train_loss, train_td_error\
                = run_train_episode(env, qmix_agent, rpm, config)
        total_steps += train_step

        if total_steps - last_test_step >= config['test_steps']:
            last_test_step = total_steps
            eval_is_win_buffer = []
            eval_reward_buffer = []
            eval_steps_buffer = []
            for _ in range(3):
                eval_reward, eval_step, eval_is_win = run_evaluate_episode(
                    env, qmix_agent)
                eval_reward_buffer.append(eval_reward)
                eval_steps_buffer.append(eval_step)
                eval_is_win_buffer.append(eval_is_win)

            summary.add_scalar('train_loss', train_loss, total_steps)
            summary.add_scalar('eval_reward', np.mean(eval_reward_buffer),
                               total_steps)
            summary.add_scalar('eval_steps', np.mean(eval_steps_buffer),
                               total_steps)
            summary.add_scalar('eval_win_rate', np.mean(eval_is_win_buffer),
                               total_steps)
            summary.add_scalar('exploration', qmix_agent.exploration,
                               total_steps)
            summary.add_scalar('replay_buffer_size', rpm.count, total_steps)
            summary.add_scalar('target_update_count',
                               qmix_agent.target_update_count, total_steps)
            summary.add_scalar('train_td_error:', train_td_error, total_steps)
Ejemplo n.º 9
0
def main(args):
    if args.model == 'lstm':
        config = RNNConfig(max_sequence_length, n_classes)
    else:
        config = CNNConfig(max_sequence_length, n_classes)

    # Training
    # ==================================================
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=config.allow_soft_placement,
            log_device_placement=config.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            init_embedding = tf.random_uniform([vocab_size, config.embed_size],
                                               -1.0, 1.0)
            if args.model == 'lstm':
                model = RNNModel(config, init_embedding)
            else:
                model = CNNModel(config, init_embedding)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", args.model, timestamp))
            print("Writing to {}\n".format(out_dir))
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            dataset.vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)

            saver = tf.train.Saver()

            sess.run(tf.global_variables_initializer())

            model.fit(sess, saver, dataset, checkpoint_dir)
Ejemplo n.º 10
0
def main():
    args = parse_args()
    logging.basicConfig(stream=sys.stdout,
                        format='%(asctime)s %(levelname)s:%(message)s',
                        level=logging.INFO,
                        datefmt='%I:%M:%S')
    with codecs.open(data_file, 'r') as f:
        text = f.read()
    train_size = len(text)
    train_text = text
    if args.test == 'false':
        vocab_index_dict, index_vocab_dict, vocab_size = create_vocab(text)
        save_vocab(vocab_index_dict, 'vocab.json')
    else:
        vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(
            'vocab.json')

    train_batches = BatchGenerator(train_text, batch_size, seq_length,
                                   vocab_size, vocab_index_dict)
    graph = tf.Graph()
    with graph.as_default():
        model = RNNModel(args.test, hidden_size, rnn_layers, batch_size,
                         seq_length, vocab_size, embedding_size, learning_rate,
                         max_grad_norm)

    with tf.Session(graph=graph) as session:
        model_saver = tf.train.Saver()
        if args.test == 'false':
            tf.global_variables_initializer().run()
            for i in range(num_epochs):
                model.train(session, train_size, train_batches)
                if i % 100 == 0:
                    logging.info("saving model")
                    model_saver.save(session,
                                     rnn_model,
                                     global_step=model.global_step)
        else:
            module_file = tf.train.latest_checkpoint(restore_path)
            model_saver.restore(session, module_file)
            start_text = 'your'
            length = 20
            print(
                model.predict(session, start_text, length, vocab_index_dict,
                              index_vocab_dict))
Ejemplo n.º 11
0
def rnn():
    data_provider = DataProvider(data_dir, BATCH_SIZE, SEQUENCE_LENGTH)
    model = RNNModel(data_provider.vocabulary_size, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE)

    with tf.Session() as sess:

        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(tensorboard_dir)
        writer.add_graph(sess.graph)
        sess.run(tf.global_variables_initializer())

        epoch = 0
        temp_losses = []
        smooth_losses = []

        while True:
            sess.run(tf.assign(model.learning_rate, LEARNING_RATE * (DECAY_RATE ** epoch)))
            data_provider.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for batch in range(data_provider.batches_size):
                inputs, targets = data_provider.next_batch()
                feed = {model.input_data: inputs, model.targets: targets}
                for index, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[index].c
                    feed[h] = state[index].h
                iteration = epoch * data_provider.batches_size + batch
                summary, loss, state, _ = sess.run([summaries, model.cost, model.final_state, model.train_op], feed)
                writer.add_summary(summary, iteration)
                temp_losses.append(loss)

                if iteration % SAMPLING_FREQUENCY == 0:
                    sample_text(sess, data_provider, iteration)

                if iteration % LOGGING_FREQUENCY == 0:
                    smooth_loss = np.mean(temp_losses)
                    smooth_losses.append(smooth_loss)
                    temp_losses = []
                    plot(smooth_losses, 'iterations (thousands)', 'loss')
                    print(f'metric: iteration, value: {iteration}')
                    print(f'metric: epoch, value: {epoch}')
                    print(f'metric: loss, value: {smooth_loss}')
            epoch += 1
Ejemplo n.º 12
0
    def build_model(self):
        """
		Builds graph of FCNN, called only during __init__.
		"""
        self.cnn_in = tf.placeholder(tf.float64, [None, self.cnn_input_size],
                                     name="cnn_input")
        self.q_batch = tf.placeholder(tf.float64,
                                      [None, None, self.embed_size])
        self.labels = tf.placeholder(tf.int32, [None], name="labels")

        self.attention_vec = AttentionModel(self.cnn_input_size,
                                            self.cnn_in,
                                            self.q_batch,
                                            embed_size=self.embed_size).output

        self.rnn_out = RNNModel(self.attention_vec, dense=False).output

        # self.rnn_out = tf.layers.dropout(self.rnn_out, self.dropout)
        self.prev_layer = self.rnn_out
        for layer_name, layer_nodes in self.net_struct.items():
            self.prev_layer = tf.layers.dense(self.prev_layer,
                                              layer_nodes,
                                              activation=self.activation_fn,
                                              name=layer_name)
            # self.prev_layer = tf.layers.dropout(self.prev_layer, self.dropout)

        self.output = tf.layers.dense(self.prev_layer,
                                      self.output_size,
                                      activation=self.activation_fn,
                                      name="output")

        self.labels = tf.stop_gradient(self.labels)
        self.loss = tf.reduce_mean(
            self.loss_fn(labels=self.labels, logits=self.output))
        # self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)
        self.global_step = tf.Variable(0, trainable=False)
        self.lr = tf.train.piecewise_constant(self.global_step,
                                              self.boundaries, self.values)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.train_op = self.optimizer.apply_gradients(
            self.grads_and_vars, global_step=self.global_step)
Ejemplo n.º 13
0
def sample_text(sess, data_provider, iteration):
    model = RNNModel(data_provider.vocabulary_size,
                     batch_size=1,
                     sequence_length=1,
                     hidden_layer_size=HIDDEN_LAYER_SIZE,
                     cells_size=CELLS_SIZE,
                     training=False)
    text = model.sample(sess, data_provider.chars, data_provider.vocabulary,
                        TEXT_SAMPLE_LENGTH).encode("utf-8")
    with open(output_file, "a") as output:
        output.write("Iteration: " + str(iteration) + "\n")
        output.write(text + "\n")
        output.write("\n")

    analysis = get_linguistic_analysis(text)
    print(analysis)
    with open(data_dir + "analysis.txt", mode="a",
              encoding='utf-8') as analysis_file:
        analysis_file.write("Iteration: " + str(iteration) + "\n")
        analysis_file.write(analysis)
        analysis_file.write("\n")
Ejemplo n.º 14
0
    def __init__(self, model_dir, model_type, sess, batch_size=64, seq_len=32):
        with open(os.path.join(model_dir, 'config.pkl'), 'rb') as f:
            saved_args = cPickle.load(f)
        with open(os.path.join(model_dir, 'chars_vocab.pkl'), 'rb') as f:
            _, vocab = cPickle.load(f)

        saved_args.batch_size = batch_size
        saved_args.seq_length = seq_len

        infer = False  # for seqlen != 1

        if model_type == 'biLSTM':
            model = BiLSTM(saved_args, infer)
        elif model_type == 'biSRU':
            model = BiLSTM(saved_args, infer)
        elif model_type == 'stackBiLstm':
            model = StackedBiLstm(saved_args, infer)
        elif model_type == 'cnn3layers':
            model = Conv3LayerModel(saved_args, infer)
        elif model_type == 'conv1d':
            model = Conv1d3Layer(saved_args, infer)
        elif model_type == 'cnn6layers':
            model = Conv6LayerModel(saved_args, infer)
        elif model_type == 'cnn_lstm':
            model = ConvLSTMModel(saved_args, infer)
        else:
            model = RNNModel(saved_args, infer)

        self.seq_len = seq_len
        self.model = model
        self.vocab = vocab
        self.sess = sess
        self.saver = tf.train.Saver()
        self.ckpt = tf.train.get_checkpoint_state(model_dir)
        if sess is not None:
            self.saver.restore(sess, self.ckpt.model_checkpoint_path)

        assert self.ckpt and self.ckpt.model_checkpoint_path
Ejemplo n.º 15
0
def sample_multi(save_dir, data, model_type):
    with open(os.path.join(save_dir, 'config.pkl'), 'rb') as f:
        saved_args = cPickle.load(f)
    with open(os.path.join(save_dir, 'chars_vocab.pkl'), 'rb') as f:
        _, vocab = cPickle.load(f)

    if model_type == 'biLSTM':
        model = BiLSTM(saved_args, True)
    elif model_type == 'biSRU':
        model = BiLSTM(saved_args, True)
    elif model_type == 'stackBiLstm':
        model = StackedBiLstm(saved_args, True)
    elif model_type == 'cnn3layers':
        model = Conv3LayerModel(saved_args, True)
    elif model_type == 'conv1d':
        model = Conv1d3Layer(saved_args, True)
    elif model_type == 'cnn6layers':
        model = Conv6LayerModel(saved_args, True)
    elif model_type == 'cnn_lstm':
        model = ConvLSTMModel(saved_args, True)
    else:
        model = RNNModel(saved_args, True)

    config = tf.ConfigProto(gpu_options=tf.GPUOptions(
        per_process_gpu_memory_fraction=0.25))
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            vector = np.mean(model.valid_run(sess, vocab, data[0]), axis=0)
            vectors = np.zeros((len(data), vector.shape[0]))
            vectors[0, :] = vector
            for i in tqdm(range(1, len(data))):
                vectors[i, :] = np.mean(model.valid_run(sess, vocab, data[i]),
                                        axis=0)
    return vectors
Ejemplo n.º 16
0
    # We can't fit all of the notes into memory. Split the patients into chunks.
    # Ensure 1 < (number of patients / total_chunks).
    total_chunks = 20  # TODO move to program args.

    # Load the first chunk to get number of input features.
    X_train, Y_train = tensor_loader.load_X_Y_rnn(logger, args.train_table_name, chunk=0, total_chunks=total_chunks, no_gpu=args.no_gpu)
    X_val, Y_val = tensor_loader.load_X_Y_rnn(logger, args.val_table_name, chunk=0, total_chunks=total_chunks, no_gpu=args.no_gpu, validation_set=True)

    N, seq_length, D_in = X_train.shape  # Number of samples, sequence length, number of features.
    if args.top100_labels:  # Dimension of the hidden units, and dimension of the output vector.
        H, D_out = 1000, 100
    else:
        H, D_out = 100, 10

    model = RNNModel(D_in, H, D_out)

    if not args.no_gpu:
        model.cuda()

    loss_fn = torch.nn.BCEWithLogitsLoss(size_average=True)
    learning_rate, decay, momentum = 0.01, 1e-6, 0.9
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=decay, momentum=momentum, nesterov=True)

    tb_logger_train = tensorboardX.SummaryWriter(log_dir='../tensorboard_logs/rnn_train_' + str(experiment_id))
    tb_logger_val = tensorboardX.SummaryWriter(log_dir='../tensorboard_logs/rnn_val_' + str(experiment_id))
    metrics_train = defaultdict(list)
    metrics_val = defaultdict(list)
    metrics_test = defaultdict(list)

    epochs = 3  # TODO move to program args
#############

#import pdb ; pdb.set_trace()

#Configuration
config = Config()

# BEFORE init, set batch size to 1 if restoring..
if args.restore:
    print '==> Setting Batch Size to 1...'
    config.batch_size = 1

#Create Model
with tf.variable_scope("RNN") as scope:
    model = RNNModel(config)

print '==> initializing variables'
init = tf.global_variables_initializer()
saver = tf.train.Saver()

#import pdb ; pdb.set_trace()

#############

with tf.Session() as session:

    sum_dir = 'summaries/train/' + time.strftime("%Y-%m-%d %H %M")
    if not os.path.exists(sum_dir):
        os.makedirs(sum_dir)
    train_writer = tf.summary.FileWriter(sum_dir, session.graph)
def train_with_validation(train_set, valid_set, corpus, 
                          n_hidden=128, n_emb=128, batch_size=32, conv_size=5,             
                          pooling_type='mean', model_type='lstm', w2v_fn=None, 
                          model_save_fn=None, disp_proc=True):
    '''pooling_type: mean or max
    model_type: lstm, rnn or cnn
    use_w2v: whether to use pre-trained embeddings from word2vec
    '''
    # Only train_set is converted by theano.shared
    train_x, train_mask, train_y = [theano.shared(_) for _ in train_set]
    valid_x, valid_mask, valid_y = valid_set
    n_train, n_valid = len(train_x.get_value()), len(valid_x)

    print("%d training examples" % n_train)
    print("%d validation examples" % n_valid)
    
    rng = np.random.RandomState(1224)
    th_rng = RandomStreams(1224)
    
    if model_save_fn is None:
        model_save_fn = os.path.join('model-res', '%s-%s' % (model_type, pooling_type))
    
    # Load Word2Vec 
    if w2v_fn is None:
        gensim_w2v = None
    else:
        print('Loading word2vec model...')
        if not os.path.exists(w2v_fn):
            raise Exception("Word2Vec model doesn't exist!", model_type)
        gensim_w2v = Word2Vec.load(w2v_fn)
    
    # Define Model
    if model_type == 'lstm':
        model = LSTMModel(corpus, n_emb, n_hidden, pooling_type, 
                          rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v)
    elif model_type == 'rnn':
        model = RNNModel(corpus, n_emb, n_hidden, pooling_type, 
                         rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v)
    elif model_type == 'cnn':
        model = CNNModel(corpus, n_emb, n_hidden, batch_size, conv_size, pooling_type, 
                         rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v)
    else:
        raise Exception("Invalid model type!", model_type)
    
    x, mask, y = model.x, model.mask, model.y
    batch_idx_seq, use_noise = model.batch_idx_seq, model.use_noise
    
    f_update_1_gr     = theano.function(inputs =[batch_idx_seq], 
                                        outputs=model.cost, 
                                        updates=model.gr_updates,
                                        givens ={x:    train_x[batch_idx_seq],
                                                 mask: train_mask[batch_idx_seq],
                                                 y:    train_y[batch_idx_seq]},
                                        on_unused_input='ignore')
    f_update_2_gr_sqr = theano.function(inputs=[], updates=model.gr_sqr_updates)
    f_update_3_dp_sqr = theano.function(inputs=[], updates=model.dp_sqr_updates)
    f_update_4_params = theano.function(inputs=[], updates=model.param_updates)
    
    # keep validation set consistent
    keep_tail = False if model_type == 'cnn' else True
    valid_idx_batches = get_minibatches_idx(n_valid, batch_size, keep_tail=keep_tail)
    valid_y = np.concatenate([valid_y[idx_batch] for idx_batch in valid_idx_batches])
    
    # train the model
    patience = 5000
    patience_increase = 2
    improvement_threshold = 0.995
    disp_freq = 20
    validation_freq = 100
    
    max_epoch = 500
    best_iter = 0
    best_validation_err = np.inf
    
    epoch = 0
    uidx = 0
    done_looping = False
    start_time = time.time()
    
    while (epoch < max_epoch) and (not done_looping):
        epoch += 1        
        # Get new shuffled index for the training set. use rng to make result keep same with specific random-seed
        for idx_batch in get_minibatches_idx(n_train, batch_size, shuffle=True, rng=rng, keep_tail=keep_tail):
            uidx += 1
            use_noise.set_value(1.)
            
            cost = f_update_1_gr(idx_batch)
            f_update_2_gr_sqr()
            f_update_3_dp_sqr()
            f_update_4_params()
            
            if uidx % disp_freq == 0 and disp_proc:
                print('epoch %i, minibatch %i, train cost %f' % (epoch, uidx, cost))
    
            if uidx % validation_freq == 0:
                use_noise.set_value(0.)
                valid_y_pred = [model.predict_func(valid_x[idx_batch], valid_mask[idx_batch]) for idx_batch in valid_idx_batches]
                valid_y_pred = np.concatenate(valid_y_pred)
                this_validation_err = (valid_y_pred != valid_y).mean()
                print('epoch %i, minibatch %i, validation error %f %%' % (epoch, uidx, this_validation_err*100))
                
                if this_validation_err < best_validation_err:
                    if this_validation_err < best_validation_err*improvement_threshold:
                        patience = max(patience, uidx*patience_increase)                        
                    best_validation_err = this_validation_err
                    best_iter = uidx
                    model.save(model_save_fn)
                    
            if patience < uidx:
                done_looping = True
                break
        
    end_time = time.time()
    print('Optimization complete with best validation score of %f %%, at iter %d' % (best_validation_err * 100, best_iter))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch / (end_time - start_time)))
    
    
    
Ejemplo n.º 19
0
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

train_data = dataloader('train_shuf.txt', args.batch_size, args.bptt)
val_data = dataloader('val.txt', args.batch_size, args.bptt)

eval_batch_size = args.batch_size

###############################################################################
# Build the model
###############################################################################

ntokens = 27
model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)

optimizer = optim.SGD(model.parameters(), lr=args.lr)

criterion = nn.CrossEntropyLoss()

###############################################################################
# Training code
###############################################################################

def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
Ejemplo n.º 20
0
def train(args):
    # check compatibility if training is continued from previously saved model

    if args.init_from is None:
        print(args.init_from)
        data_loader = TextLoader(args)
        ckpt = ''
    else:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with codecs.open(os.path.join(args.init_from, 'config.pkl'),
                         'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with codecs.open(os.path.join(args.init_from, 'chars_vocab.pkl'),
                         'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)

        data_loader = TextLoader(args, chars=saved_chars, vocab=saved_vocab)

        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    args.vocab_size = data_loader.vocab_size
    args.letter_size = data_loader.letter_size
    args.word_vocab_size = data_loader.word_vocab_size

    os.makedirs(args.save_dir, exist_ok=True)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    if args.model == 'biLSTM':
        model = BiLSTM(args)
        train_bidirectional_model(model, data_loader, args, ckpt)
    elif args.model == 'biSRU':
        model = BiLSTM(args)
        train_bidirectional_model(model, data_loader, args, ckpt)
    elif args.model == 'stackBiLstm':
        model = StackedBiLstm(args)
        train_bidirectional_model(model, data_loader, args, ckpt)
    elif args.model == 'cnn3layers':
        model = Conv3LayerModel(args)
        train_cnn_model(model, data_loader, args, ckpt)
    elif args.model == 'conv1d':
        model = Conv1d3Layer(args)
        train_cnn_model(model, data_loader, args, ckpt)
    elif args.model == 'cnn6layers':
        model = Conv6LayerModel(args)
        train_cnn_model(model, data_loader, args, ckpt)
    elif args.model == 'cnn_lstm':
        model = ConvLSTMModel(args)
        train_one_forward_model(model, data_loader, args, ckpt)
    else:
        model = RNNModel(args)
        train_one_forward_model(model, data_loader, args, ckpt)
Ejemplo n.º 21
0
def train_rnn(training_articles, testing_articles, n_epochs, batch_size,
              seq_length, char_skip, dropout_pkeep, force_retrain):
    print "[ INFO] Parsing training articles..."
    training_batch_generator = BatchGenerator(training_articles, batch_size,
                                              seq_length, char_skip)

    print "[ INFO] Parsing validation articles..."
    validation_batch_generator = BatchGenerator(testing_articles, batch_size,
                                                seq_length, char_skip)

    model_file = get_model_file()
    if model_file and not force_retrain:
        rnn_model = RNNModel.load_from_model_file(model_file)
        state_file = os.path.join(MODEL_SAVE_DIR, 'saved-vars.npz')
        if not os.path.exists(state_file):
            raise IOError("Numpy state file does not exist")
        saved_vars = np.load(state_file)
        istate = saved_vars['cell-state']
        training_batch_generator.restore_state_dict(**saved_vars)
        print "[ INFO] Resuming training from epoch %d, global step %d" % (
            training_batch_generator.n_epochs, rnn_model.training_step_num)
    else:
        print "[ INFO] Initializing RNN"
        rnn_model = RNNModel(max_seq_length=seq_length)
        rnn_model.init_network()
        istate = np.zeros(shape=(rnn_model.n_layers, 2, batch_size,
                                 rnn_model.cell_size))

    log_dir = os.path.join(
        LOG_DIR,
        'training_%s' % datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"))
    os.makedirs(log_dir)
    log_file = open(os.path.join(log_dir, 'log.txt'), 'w')

    validation_accuracies = list()
    validation_losses = list()
    validation_steps = list()

    while training_batch_generator.n_epochs < n_epochs:
        batch, labels, seq_length_arr, istate = training_batch_generator.get_batch(
            istate)
        pred, ostate, acc = rnn_model.process_training_batch(
            batch, labels, seq_length_arr, istate, dropout_pkeep)

        if rnn_model.training_step_num % DISPLAY_INTERVAL == 0:
            print "[ INFO] Accuracy at step %d (epoch %d): %.3f" % (
                rnn_model.training_step_num,
                training_batch_generator.n_epochs + 1, acc)
            print "[ INFO] Prediction of first sample in minibatch: %s" % idx_arr_to_str(
                pred[0])

        if rnn_model.training_step_num % TEXT_PREDICTION_LOG_INTERVAL == 0:
            log_file.write("Text prediction at step %d:\n" %
                           rnn_model.training_step_num)
            for i in range(batch_size):
                log_file.write(idx_arr_to_str(pred[i]) + '\n')
            log_file.write(
                "-----------------------------------------------------\n")

        if rnn_model.training_step_num % MODEL_SAVE_INTERVAL == 0:
            print "[ INFO] Saving model..."
            rnn_model.tf_saver.save(rnn_model.session,
                                    os.path.join(MODEL_SAVE_DIR, MODEL_PREFIX),
                                    global_step=rnn_model.training_step_num)

            # also save the cell state and counters of the BatchGenerator
            vars_to_store = training_batch_generator.get_state_dict()
            vars_to_store.update({'cell-state': ostate})
            np.savez(os.path.join(MODEL_SAVE_DIR, 'saved-vars.npz'),
                     **vars_to_store)

        if rnn_model.training_step_num % VALIDATION_INTERVAL == 0:
            print "[ INFO] Starting validation run"
            avg_loss, avg_accuracy = perform_validation_run(
                rnn_model, validation_batch_generator)
            validation_steps.append(rnn_model.training_step_num)
            validation_accuracies.append(avg_accuracy)
            validation_losses.append(avg_loss)

            plt.plot(validation_steps, validation_accuracies, label='accuracy')
            plt.plot(validation_steps, validation_losses, label='loss')

            plt.xlabel('Training Step')
            plt.yticks(np.arange(0., 1.05, 0.05))
            plt.legend(loc='upper left')
            plt.grid(True)
            plt.savefig(
                os.path.join(log_dir, 'validation_loss-accuracy-plot.png'))
            plt.close()

        istate = ostate

    log_file.close()
Ejemplo n.º 22
0
import sys
import torch
import torch.nn as nn
sys.path.append("../d2l_func/")
from data_prepare import load_data_jay_song, data_iter_random, data_iter_consecutive, to_onehot
from model_train import train_rnn_pytorch
from predict import predict_rnn_pytorch
from rnn_model import RNNModel

if __name__ == "__main__":
    # load data
    corpus_index, char_to_idx, vocab_set, vocab_size = load_data_jay_song()
    # model
    hidden_num = 256
    rnn_layer = nn.LSTM(vocab_size, hidden_num)
    model = RNNModel(rnn_layer, vocab_size)
    model = model.cuda()
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    params = {
        "epoch_num": 10,
        "model": model,
        "loss": loss,
        "optimizer": optimizer,
        "batch_size": 64,
        "num_step": 32,
        "corpus_index": corpus_index,
        "data_iter": data_iter_consecutive,
        "char_to_idx": char_to_idx,
        "vocab_set": vocab_set,
Ejemplo n.º 23
0
    def build_model(self):
        """
        Builds graph of FCNN, called only during __init__. 
        """
        self.cnn_in = tf.placeholder(tf.float64, [None, self.cnn_input_size],
                                     name="cnn_input")

        if self.embed_type == "RNN":
            self.q_batch = tf.placeholder(tf.int32, [None, None],
                                          name="q_batch")
        elif self.embed_type == "GloVe":
            self.q_batch = tf.placeholder(tf.float64,
                                          [None, self.glove_embed_size])
        elif self.embed_type == "Word2Vec":
            self.q_batch = tf.placeholder(tf.float64,
                                          [None, self.word2vec_embed_size])

        self.labels = tf.placeholder(tf.int32, [None], name="labels")

        if self.embed_type == "RNN":
            self.q_batch = tf.stop_gradient(self.q_batch)
            self.one_hot = tf.one_hot(self.q_batch,
                                      self.vocab_size,
                                      dtype=tf.float64)
            rnn = RNNModel(self.one_hot)
            self.embed_output = rnn.output
            self.embed_output = tf.nn.l2_normalize(self.embed_output)
        elif self.embed_type == "GloVe":
            self.embed_output = tf.stop_gradient(self.q_batch)
        elif self.embed_type == "Word2Vec":
            self.embed_output = tf.stop_gradient(self.q_batch)

        self.cnn_l2_reg = tf.nn.l2_normalize(tf.stop_gradient(self.cnn_in))
        self.cnn_dense = tf.layers.dense(self.cnn_l2_reg,
                                         self.pointwise_layer_size,
                                         activation=self.activation_fn,
                                         name='cnn_in_layer')
        self.q_dense = tf.layers.dense(self.embed_output,
                                       self.pointwise_layer_size,
                                       activation=self.activation_fn,
                                       name='rnn_in_layer')
        self.pointwise_layer = tf.multiply(self.cnn_dense,
                                           self.q_dense,
                                           name="pointwise_layer")

        self.pointwise_layer = tf.layers.dropout(self.pointwise_layer,
                                                 self.dropout)
        self.prev_layer = self.pointwise_layer
        for layer_name, layer_nodes in self.net_struct.items():
            self.prev_layer = tf.layers.dense(self.prev_layer,
                                              layer_nodes,
                                              activation=self.activation_fn,
                                              name=layer_name)
            self.prev_layer = tf.layers.dropout(self.prev_layer, self.dropout)

        self.output = tf.layers.dense(self.prev_layer,
                                      self.output_size,
                                      activation=self.activation_fn,
                                      name="output")

        self.labels = tf.stop_gradient(self.labels)
        self.loss = tf.reduce_mean(
            self.loss_fn(labels=self.labels, logits=self.output))
        # self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)
        self.global_step = tf.Variable(0, trainable=False)
        self.lr = tf.train.piecewise_constant(self.global_step,
                                              self.boundaries, self.values)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.train_op = self.optimizer.apply_gradients(
            self.grads_and_vars, global_step=self.global_step)
Ejemplo n.º 24
0
def sample_text(sess, data_provider, iteration):
    model = RNNModel(data_provider.vocabulary_size, batch_size=1, sequence_length=1, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE, training=False)
    text = model.sample(sess, data_provider.chars, data_provider.vocabulary, TEXT_SAMPLE_LENGTH).encode('utf-8')
    with open(output_file, 'a') as output:
        output.write(f'Iteration: {iteration}\n{text}\n\n')