Exemple #1
0
def train(fm: FileManager):
    cfg = ConfigProject(fm, args['embed_model'])
    train_data, cfg['count_class'], cfg['classes'] = LoadDatasetManager(
        fm['train'], True).get_dataset()
    bm_train = BatchManager(train_data, cfg, StatusDatasets.Train)
    test_data = LoadDatasetManager(fm['dev'],
                                   dict_classes=cfg['classes']).get_dataset()
    bm_dev = BatchManager(test_data, cfg, StatusDatasets.Dev)
    with open(fm['config'], 'wb') as f:
        pickle.dump(cfg, f)
    Trainer(cfg, bm_train, bm_dev, path=fm['model']).run()
Exemple #2
0
def test(fm: FileManager):
    with open(fm['config'], 'rb') as f:
        cfg = pickle.load(f)
    test_data = LoadDatasetManager(fm['test'],
                                   dict_classes=cfg['classes']).get_dataset()
    bm_test = BatchManager(test_data, cfg, StatusDatasets.Test)
    res = Tester(cfg, bm_test, fm['model']).run()
    LogManager.write_log(cfg, res, './logs/log.txt')
Exemple #3
0
def get_train_bm(message_length,
                 minimal_actual_text_length,
                 batch_size,
                 end_padding='~',
                 shuffle_examples=True,
                 num_workers=1,
                 word_code=False):
    maybe_download_and_extract()

    gen_adapt, spam_adapt = load_messages('adapt')
    gen_train, spam_train = load_messages('train')
    random.shuffle(spam_train)
    gen, spam = gen_adapt + gen_train, spam_adapt + spam_train

    data_len = min(len(gen), len(spam))
    gen, spam = gen[:data_len], spam[:data_len]
    messages = tuple(gen) + tuple(spam)
    indices = tuple(range(len(messages)))

    if not word_code:  # char code
        # pipeline is key -> message -> remove useless tags -> randomly cropped message of fixed length -> char_codes
        MESSAGE_TRAIN_PIPELINE = compose_ops([
            key_to_element(messages),
            remove_confusing_tags(),
            random_crop(message_length,
                        minimal_actual_text_length,
                        front_padding=None,
                        end_padding=end_padding),
            for_each(ord),
        ])
    else:
        maybe_build_vocab()
        # pipeline is key -> message -> word_code -> randomly cropped message of fixed length
        MESSAGE_TRAIN_PIPELINE = compose_ops([
            key_to_element(messages),
            to_word_code,
            random_crop(message_length,
                        minimal_actual_text_length,
                        front_padding=None,
                        end_padding=[VOCAB[PADDING_CHAR]]),
        ])
    LABEL_TRAIN_PIPELINE = compose_ops(
        [greater_than(len(gen) - 1),
         key_to_element({
             False: 0,
             True: 1
         })])
    SPAM_GEN_EXAMPLE_GETTER = for_each(
        parallelise_ops([MESSAGE_TRAIN_PIPELINE, LABEL_TRAIN_PIPELINE]))

    return BatchManager(SPAM_GEN_EXAMPLE_GETTER,
                        indices,
                        generic_batch_composer(np.int32, np.int32),
                        batch_size,
                        shuffle_examples=shuffle_examples,
                        num_workers=num_workers)
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from batch_manager import BatchManager

if __name__ == "__main__":
    batch_id = 'batch_power_cage_etl'
    project_id = sys.argv[1]
    batch_state = sys.argv[2]
    batch_mgr = BatchManager(project_id, batch_id)
    if batch_state in ["BEGIN", "END"]:
        batch_mgr.execute_batch(batch_state)
Exemple #5
0
 def make_batches(self):
     batch_manager = BatchManager(self.n_tracks_per_batch,
                                  self.batches_path, self.first_execution)
     batch_manager.make_batches()
#            Training Script            #
#                                       #
# ===================================== #

if __name__ == "__main__":

    n_epochs = 100
    batch_size = 64

    with tf.Session() as sess:
        vae = VariationalAutoencoder(sess,
                                     batch_size,
                                     image_size=64,
                                     channels=3,
                                     latent_size=256)
        batch_manager = BatchManager("data/processed", resize=(64, 64))
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()

        print("# ========================= #")
        print("#                           #")
        print("#      Training Session     #")
        print("#                           #")
        print("# ========================= #")
        print("n_epochs          =", n_epochs)
        print("batch_size        =", batch_size)
        print("training_examples =", batch_manager.num_examples())
        print("batch_shape       =",
              np.shape(batch_manager.next_batch(batch_size)))
        print(
            "params            =",