def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
Esempio n. 2
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=True)
                    ) as sess:  # fareed gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())

        #fareed
        dot_rep = graph_to_dot(sess.graph)
        #s = Source(dot_rep, filename="test.gv", format="PNG")
        with open('./profs/rnn.dot', 'w') as fwr:
            fwr.write(str(dot_rep))

        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

        operations_tensors = {}
        operations_names = tf.get_default_graph().get_operations()
        count1 = 0
        count2 = 0

        for operation in operations_names:
            operation_name = operation.name
            operations_info = tf.get_default_graph().get_operation_by_name(
                operation_name).values()
            if len(operations_info) > 0:
                if not (operations_info[0].shape.ndims is None):
                    operation_shape = operations_info[0].shape.as_list()
                    operation_dtype_size = operations_info[0].dtype.size
                    if not (operation_dtype_size is None):
                        operation_no_of_elements = 1
                        for dim in operation_shape:
                            if not (dim is None):
                                operation_no_of_elements = operation_no_of_elements * dim
                        total_size = operation_no_of_elements * operation_dtype_size
                        operations_tensors[operation_name] = total_size
                    else:
                        count1 = count1 + 1
                else:
                    count1 = count1 + 1
                    operations_tensors[operation_name] = -1

                #   print('no shape_1: ' + operation_name)
                #  print('no shape_2: ' + str(operations_info))
                #  operation_namee = operation_name + ':0'
                # tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
                # print('no shape_3:' + str(tf.shape(tensor)))
                # print('no shape:' + str(tensor.get_shape()))

            else:
                # print('no info :' + operation_name)
                # operation_namee = operation.name + ':0'
                count2 = count2 + 1
                operations_tensors[operation_name] = -1

                # try:
                #   tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
                # print(tensor)
                # print(tf.shape(tensor))
                # except:
                # print('no tensor: ' + operation_namee)
        print(count1)
        print(count2)

        with open('./profs/tensors_sz_32.txt', 'w') as f:
            for tensor, size in operations_tensors.items():
                f.write('"' + tensor + '"::' + str(size) + '\n')
        #end fareed

        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state
                }
                start = time.time()

                if b % 10 == 7:
                    summary, train_loss, state, _, _ = sess.run(
                        [
                            merged, model.cost, model.final_state,
                            model.train_op, model.inc_batch_pointer_op
                        ],
                        feed,
                        run_metadata=run_metadata,
                        options=options)
                    profile(run_metadata, b)
                    if b == 7:
                        options_mem = tf.profiler.ProfileOptionBuilder.time_and_memory(
                        )
                        options_mem["min_bytes"] = 0
                        options_mem["min_micros"] = 0
                        options_mem["output"] = 'file:outfile=./profs/mem.txt'
                        options_mem["select"] = ("bytes", "peak_bytes",
                                                 "output_bytes",
                                                 "residual_bytes")
                        mem = tf.profiler.profile(tf.get_default_graph(),
                                                  run_meta=run_metadata,
                                                  cmd="scope",
                                                  options=options_mem)

                else:
                    summary, train_loss, state, _, _ = sess.run([
                        merged, model.cost, model.final_state, model.train_op,
                        model.inc_batch_pointer_op
                    ], feed)
                    speed = time.time() - start
                    train_writer.add_summary(summary,
                                             e * data_loader.num_batches + b)

                if (e * data_loader.num_batches + b) % int(
                        args.batch_size / 10) == 0 and b % 10 != 7:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                """ if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path)) """
        train_writer.close()
Esempio n. 3
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words==data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state,
                        model.batch_time: speed}
                summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state,
                                                             model.train_op, model.inc_batch_pointer_op], feed)
                train_writer.add_summary(summary, e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
Esempio n. 4
0
def train(args):
    tf.reset_default_graph()
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        try:
            # check if all necessary files exist
            assert os.path.isdir(
                args.init_from), " %s must be a path" % args.init_from
            assert os.path.isfile(
                os.path.join(args.init_from, "config.pkl")
            ), "config.pkl file does not exist in path %s" % args.init_from
            assert os.path.isfile(
                os.path.join(args.init_from, "words_vocab.pkl")
            ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
            ckpt = tf.train.get_checkpoint_state(args.init_from)
            assert ckpt, "No checkpoint found"
            assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

            # open old config and check if models are compatible
            with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
                saved_model_args = cPickle.load(f)
            need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
            for checkme in need_be_same:
                assert vars(saved_model_args)[checkme] == vars(
                    args
                )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

            # open saved vocab/dict and check if vocabs/dicts are compatible
            with open(os.path.join(args.init_from, 'words_vocab.pkl'),
                      'rb') as f:
                saved_words, saved_vocab = cPickle.load(f)
            assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
            assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
        except:
            print("Could not init from old file")

    ## Dump new stuff
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        model_dict = {
            "model": model,
            "words": data_loader.words,
            "vocab": data_loader.vocab,
            "sess": sess
        }
        train_writer.add_graph(sess.graph)

        # Write graph quick
        writer = tf.summary.FileWriter(os.path.join(args.save_dir, "graph"),
                                       sess.graph)
        writer.close()

        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())

        # restore model
        if args.init_from is not None:
            try:
                saver.restore(sess, ckpt.model_checkpoint_path)
            except:
                print("Could not restore")

        # Epoch loop
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                try:
                    data_loader.pointer = model.batch_pointer.eval()
                    args.init_from = None
                except:
                    pass

            # Batch step loop
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y, last_words, syllables, topic_words = data_loader.next_batch(
                )

                # Concatenate Inputs
                #x = tf.concat([x[:,:,None],last_words[:,:,None]],2)
                if args.end_word_training:
                    feed = {
                        model.input_data: x,
                        model.targets: last_words,
                        model.bonus_features: last_words,
                        model.initial_state: state,
                        model.syllables: syllables,
                        model.topic_words: topic_words,
                        model.batch_time: speed
                    }
                elif args.syllable_training:
                    feed = {
                        model.input_data: x,
                        model.targets: last_words,
                        model.bonus_features: last_words,
                        model.initial_state: state,
                        model.syllables: syllables,
                        model.topic_words: topic_words,
                        model.batch_time: speed
                    }
                else:
                    feed = {
                        model.input_data: x,
                        model.targets: y,
                        model.bonus_features: last_words,
                        model.initial_state: state,
                        model.syllables: syllables,
                        model.topic_words: topic_words,
                        model.batch_time: speed
                    }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                #if (e * data_loader.num_batches + b) % args.save_every == 0 \
                #if b % 1000 in [1, 100] \
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

                    #sample.main(save_dir = args.save_dir, output_path = "sample.txt", internal_call = True, model = model_dict)
                    python_path = "python"
                    #python_path = r"/usr/bin/python2.6/python"
                    if args.sample:
                        subprocess.call(
                            "python sample.py -e turtle -o sample.txt -s {}".
                            format(args.save_dir).split(),
                            shell=False)

        train_writer.close()
Esempio n. 5
0
    def run(self):
        data_loader = TextLoader(
            self.config.data_dir,
            self.config.batch_size,
            self.config.seq_length,
            self.config.input_encoding,
        )
        self.config.vocab_size = data_loader.vocab_size

        # check compatibility if training is continued from previously
        # saved model
        if self.config.init_from is not None:
            # check if all necessary files exist
            assert os.path.isdir(
                self.config.init_from), ('{} must be a path'.format(
                    self.config.init_from))
            assert os.path.isfile(
                os.path.join(self.config.init_from, 'config.pkl')), (
                    'config.pkl file does not exist in path {}'.format(
                        self.config.init_from))
            assert os.path.isfile(
                os.path.join(self.config.init_from, 'words_vocab.pkl')
            ), 'words_vocab.pkl.pkl file does not exist in path {}'.format(
                self.config.init_from)
            ckpt = tf.train.get_checkpoint_state(self.config.init_from)
            assert ckpt, 'No checkpoint found'
            assert ckpt.model_checkpoint_path, (
                'No model path found in checkpoint')

            # open old config and check if models are compatible
            with open(os.path.join(self.config.init_from, 'config.pkl'),
                      'rb') as f:
                saved_model_args = cPickle.load(f)
            need_be_same = ['model', 'rnn_size', 'num_layers', 'seq_length']
            for checkme in need_be_same:
                assert vars(saved_model_args)[checkme] == vars(
                    self)[checkme], (
                        'Command line argument and saved model disagree '
                        'on "{}".'.format(checkme))

            # open saved vocab/dict and check if vocabs/dicts are compatible
            with open(os.path.join(self.config.init_from, 'words_vocab.pkl'),
                      'rb') as f:
                saved_words, saved_vocab = cPickle.load(f)
            assert saved_words == data_loader.words, (
                'Data and loaded model disagree on word set!')
            assert saved_vocab == data_loader.vocab, (
                'Data and loaded model disagree on dictionary mappings!')

        with open(os.path.join(self.config.save_dir, 'config.pkl'), 'wb') as f:
            cPickle.dump(self.config, f)
        with open(os.path.join(self.config.save_dir, 'words_vocab.pkl'),
                  'wb') as f:
            cPickle.dump((data_loader.words, data_loader.vocab), f)

        model = Model(self.config)

        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(self.config.log_dir)
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=self.config.gpu_mem)

        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options)) as sess:
            train_writer.add_graph(sess.graph)
            tf.global_variables_initializer().run()
            saver = tf.train.Saver(tf.global_variables())
            # restore model
            if self.config.init_from is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)
            for e in range(model.epoch_pointer.eval(), self.config.num_epochs):
                sess.run(
                    tf.assign(
                        model.lr,
                        self.config.learning_rate *
                        (self.config.decay_rate**e),
                    ))
                data_loader.reset_batch_pointer()
                state = sess.run(model.initial_state)
                speed = 0
                if self.config.init_from is None:
                    assign_op = model.epoch_pointer.assign(e)
                    sess.run(assign_op)
                if self.config.init_from is not None:
                    data_loader.pointer = model.batch_pointer.eval()
                    self.config.init_from = None
                for b in range(data_loader.pointer, data_loader.num_batches):
                    start = time.time()
                    x, y = data_loader.next_batch()
                    feed = {
                        model.input_data: x,
                        model.targets: y,
                        model.initial_state: state,
                        model.batch_time: speed,
                    }
                    summary, train_loss, state, _, _ = sess.run([
                        merged,
                        model.cost,
                        model.final_state,
                        model.train_op,
                        model.inc_batch_pointer_op,
                    ], feed)
                    train_writer.add_summary(summary,
                                             e * data_loader.num_batches + b)
                    speed = time.time() - start
                    if ((e * data_loader.num_batches + b) %
                            self.config.batch_size == 0):
                        print(
                            '{}/{} (epoch {}), train_loss = {:.3f}, '
                            'time/batch = {:.3f}'.format(
                                e * data_loader.num_batches + b,
                                self.config.num_epochs *
                                data_loader.num_batches,
                                e,
                                train_loss,
                                speed,
                            ), )
                    # save for the last result
                    if ((e * data_loader.num_batches + b) %
                            self.config.save_every == 0
                            or (e == self.config.num_epochs - 1
                                and b == data_loader.num_batches - 1)):
                        checkpoint_path = os.path.join(
                            self.config.save_dir,
                            'model-{:.3f}.ckpt'.format(train_loss),
                        )
                        saver.save(
                            sess,
                            checkpoint_path,
                            global_step=e * data_loader.num_batches + b,
                        )
                        print('model saved to {}'.format(checkpoint_path))
            train_writer.close()
Esempio n. 6
0
def train(args):
    '''start by getting the data_loader object'''
    data_loader = TextLoader(args.reverse, args.data_dir, args.test_split,
                             args.batch_size, args.seq_length,
                             args.input_encoding)
    '''some informative prints'''
    args.vocab_size = data_loader.vocab_size
    print("Train size: ", data_loader.num_batches * args.batch_size)
    if args.test_split > 0:
        print("Test size: ", data_loader.test_num_batches * args.batch_size)
    print("Vocab size: ", args.vocab_size)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
    '''idk what the pickle.dump does'''
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)
    '''start up the model'''
    model = Model(args)
    '''if a test split is requested, get it'''
    if args.test_split > 0:
        test_x = data_loader.test_x
        test_y = data_loader.test_y
    '''not sure about this stuff'''
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)
    '''begin the session for training'''
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        # take a look at the learning rate schedule, if so desired (note parameters here should match the ones used)
        plot = False
        if plot:
            n = args.num_epochs * data_loader.num_batches
            n = 150000
            x = np.arange(n)
            y = np.zeros((n, 1))
            y = cosine_decay_restarts(
                args.learning_rate,
                x,  # shift down every epoch
                50000,  # check out this sweet graph https://github.com/tensorflow/tensorflow/pull/11749
                .9,  # doesn't hurt to look at the tf docs too
                .1,
                1e-12).eval()
            plt.figure()
            plt.plot(x, y)
            plt.title("Learning rate schedule")
            plt.show()
        '''not sure what this does'''
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        '''fun fact: you cant put comments inside if-else clauses'''
        '''initialize from a previous model OR start from scratch, which means grabbing GloVe embeddings '''
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

        else:
            print("Loading my knowledge of the English language...")
            embeddings = data_loader.get_embeddings()
            sess.run([model.embedding_init],
                     {model.embedding_placeholder: embeddings})
        '''iterate over the range of epochs specified'''
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            #sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) #this is the vanilla exponential deca
            '''learning rate decay is cosine annealing'''
            sess.run(
                tf.assign(
                    model.lr,
                    cosine_decay_restarts(
                        args.learning_rate,
                        e * data_loader.num_batches,  # shift down every epoch
                        20000,  # check out this sweet graph https://github.com/tensorflow/tensorflow/pull/11749
                        1,  # doesn't hurt to look at the tf docs too
                        .1,
                        1e-12)))
            '''reset the pointer to start from the beginning'''
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            '''iterative over the batches in the dataset'''
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                '''the feed dictionary gets passed to the model when tensorflow variables are computed'''
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed,
                    model.dropout: args.dropout
                }
                '''variables to be trained, either with or without word embeddings'''
                run_list_full = [
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ]
                run_list_no_W = [
                    merged, model.cost, model.final_state, model.train_op_no_W,
                    model.inc_batch_pointer_op
                ]
                # YES, TRAIN THE EMBEDDINGS
                if args.trainable_embeddings == 1:
                    summary, train_loss, state, _, _ = sess.run(
                        run_list_full, feed)
                # NO, DO NOT TRAIN THE EMBEDDINGS (train_op_no_W)
                elif args.trainable_embeddings == 0:
                    summary, train_loss, state, _, _ = sess.run(
                        run_list_no_W, feed)
                # it's been e epochs, so start training the embeddings
                elif e > args.trainable_embeddings:
                    summary, train_loss, state, _, _ = sess.run(
                        run_list_full, feed)
                # it hasn't been e epochs, don't train the embeddings
                else:
                    summary, train_loss, state, _, _ = sess.run(
                        run_list_no_W, feed)
                '''some diagnostics to be printed, and the model gets saved here too'''
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
                    print("learning rate: ", model.lr.eval())

                    #TEST LOSS EVAL - evaluates batch by batch with same batch size as for training
                    if (args.test_split > 0):
                        test_loss = 0
                        batches_in_test = len(test_x)
                        save_state = state
                        state = sess.run(model.initial_state)
                        for i in range(batches_in_test):
                            feed = {
                                model.test_x: test_x[i],
                                model.test_y: test_y[i],
                                model.initial_state: state
                            }
                            loss, state, _ = sess.run([
                                model.test_cost, model.test_final_state,
                                model.inc_batch_pointer_op
                            ], feed)
                            test_loss += loss
                        test_loss = test_loss / batches_in_test
                        state = save_state
                        print("test_loss = {:.3f}".format(test_loss))
        '''one final evaluation of the entire dataset to check the loss'''

        data_loader.reset_batch_pointer()
        state = sess.run(model.initial_state)
        ovr_loss = 0
        start = time.time()
        for b in range(data_loader.pointer, data_loader.num_batches):
            x, y = data_loader.next_batch()
            feed = {
                model.input_data: x,
                model.targets: y,
                model.initial_state: state
            }
            train_loss, state, _ = sess.run(
                [model.cost, model.final_state, model.inc_batch_pointer_op],
                feed)
            ovr_loss += train_loss

        speed = time.time() - start
        print("ovr_train_loss = {:.3f}, time_to_eval = {:.3f}".format(
            ovr_loss / data_loader.num_batches, speed))
        '''lets you initialize a model without training it'''
        if args.num_epochs == 0:
            saver.save(sess,
                       checkpoint_path,
                       global_step=e * data_loader.num_batches + b)
            print("model saved to {}".format(checkpoint_path))

        train_writer.close()
Esempio n. 7
0
def run_model(args, test=True):
    data_loader = TextLoader(args.data_train_dir, args.batch_size,
                             args.seq_length, args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()

        if test:
            # todo: this is very hacky... change it later
            test_data_loader = TextLoader(args.data_test_dir, args.batch_size,
                                          args.seq_length, args.input_encoding)

            # loop over the entire data set and generate the probabilities of the next word
            first_batch = True
            for b in range(test_data_loader.pointer,
                           test_data_loader.num_batches):
                x, y = test_data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, probs, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.probs,
                    model.train_op, model.inc_batch_pointer_op
                ], feed)

                # save probability vectors along with the text
                # print(np.shape(probs))
                # get probability and indices for the top k predictions
                k = 100
                prob_table_top_k, sorting_idx_table_top_k = get_top_k_probs_and_indices(
                    probs, k)

                # collect info
                if first_batch:
                    PROBS = prob_table_top_k
                    IDX = sorting_idx_table_top_k
                    first_batch = False
                else:
                    PROBS = np.vstack([PROBS, prob_table_top_k])
                    IDX = np.vstack([IDX, sorting_idx_table_top_k])

            # save the probability table and indices
            print(np.shape(PROBS))
            print('whole seq length = %d' % test_data_loader.full_text_len)
            np.savez(os.path.join(args.data_test_dir, 'probs'),
                     prob_table=PROBS,
                     idx_table=IDX)
Esempio n. 8
0
def train(args):
    # parse text data and record statistics
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        # load checkpoint
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    # save arguments to config.pkl
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    # save words parsed by data_loader to words_vocab.pkl
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    # input output placeholders, loss_op, optimizer, train_op etc. are defined in model.py
    model = Model(args)
    """
    tf.summary.merge_all(key=tf.GraphKeys.SUMMARIES)
    Merges all summaries collected in the default graph.
    key: GraphKey used to collect the summaries. Defaults to GraphKeys.SUMMARIES.
    Returns:
    If no summaries were collected, returns None. Otherwise returns a scalar Tensor
    of type string containing the serialized Summary protocol buffer resulting from
    the merging.
    """
    merged = tf.summary.merge_all()
    # the FileWriter class provides a mechanism to create an event file in a given
    # directory and add summaries and events to it.
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    # Launch the graph in a session.
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        # Adds a Graph to the event file.
        train_writer.add_graph(sess.graph)
        # Run the Op that initializes global variables.
        tf.global_variables_initializer().run()
        # tf.global_variables() returns global variables.(A list of Variable objects)
        # The Saver class adds ops to save and restore variables to and from checkpoints.
        saver = tf.train.Saver(tf.global_variables())
        # Restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        # e: epoch number
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            # model.lr <- args.learning_rate * (args.decay_rate ** e)
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            # pointer <- 0
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                # Assign 0 to batch_pointer
                assign_op = model.batch_pointer.assign(0)
                sess.run(assign_op)
                # Assign e to the epoch_pointer
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                # This method wraps the provided summary in an Event protocol buffer and adds it to the event file.
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()