def train(fm: FileManager): cfg = ConfigProject(fm, args['embed_model']) train_data, cfg['count_class'], cfg['classes'] = LoadDatasetManager( fm['train'], True).get_dataset() bm_train = BatchManager(train_data, cfg, StatusDatasets.Train) test_data = LoadDatasetManager(fm['dev'], dict_classes=cfg['classes']).get_dataset() bm_dev = BatchManager(test_data, cfg, StatusDatasets.Dev) with open(fm['config'], 'wb') as f: pickle.dump(cfg, f) Trainer(cfg, bm_train, bm_dev, path=fm['model']).run()
def test(fm: FileManager): with open(fm['config'], 'rb') as f: cfg = pickle.load(f) test_data = LoadDatasetManager(fm['test'], dict_classes=cfg['classes']).get_dataset() bm_test = BatchManager(test_data, cfg, StatusDatasets.Test) res = Tester(cfg, bm_test, fm['model']).run() LogManager.write_log(cfg, res, './logs/log.txt')
def get_train_bm(message_length, minimal_actual_text_length, batch_size, end_padding='~', shuffle_examples=True, num_workers=1, word_code=False): maybe_download_and_extract() gen_adapt, spam_adapt = load_messages('adapt') gen_train, spam_train = load_messages('train') random.shuffle(spam_train) gen, spam = gen_adapt + gen_train, spam_adapt + spam_train data_len = min(len(gen), len(spam)) gen, spam = gen[:data_len], spam[:data_len] messages = tuple(gen) + tuple(spam) indices = tuple(range(len(messages))) if not word_code: # char code # pipeline is key -> message -> remove useless tags -> randomly cropped message of fixed length -> char_codes MESSAGE_TRAIN_PIPELINE = compose_ops([ key_to_element(messages), remove_confusing_tags(), random_crop(message_length, minimal_actual_text_length, front_padding=None, end_padding=end_padding), for_each(ord), ]) else: maybe_build_vocab() # pipeline is key -> message -> word_code -> randomly cropped message of fixed length MESSAGE_TRAIN_PIPELINE = compose_ops([ key_to_element(messages), to_word_code, random_crop(message_length, minimal_actual_text_length, front_padding=None, end_padding=[VOCAB[PADDING_CHAR]]), ]) LABEL_TRAIN_PIPELINE = compose_ops( [greater_than(len(gen) - 1), key_to_element({ False: 0, True: 1 })]) SPAM_GEN_EXAMPLE_GETTER = for_each( parallelise_ops([MESSAGE_TRAIN_PIPELINE, LABEL_TRAIN_PIPELINE])) return BatchManager(SPAM_GEN_EXAMPLE_GETTER, indices, generic_batch_composer(np.int32, np.int32), batch_size, shuffle_examples=shuffle_examples, num_workers=num_workers)
#! /usr/bin/env python # -*- coding: utf-8 -*- import sys from batch_manager import BatchManager if __name__ == "__main__": batch_id = 'batch_power_cage_etl' project_id = sys.argv[1] batch_state = sys.argv[2] batch_mgr = BatchManager(project_id, batch_id) if batch_state in ["BEGIN", "END"]: batch_mgr.execute_batch(batch_state)
def make_batches(self): batch_manager = BatchManager(self.n_tracks_per_batch, self.batches_path, self.first_execution) batch_manager.make_batches()
# Training Script # # # # ===================================== # if __name__ == "__main__": n_epochs = 100 batch_size = 64 with tf.Session() as sess: vae = VariationalAutoencoder(sess, batch_size, image_size=64, channels=3, latent_size=256) batch_manager = BatchManager("data/processed", resize=(64, 64)) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() print("# ========================= #") print("# #") print("# Training Session #") print("# #") print("# ========================= #") print("n_epochs =", n_epochs) print("batch_size =", batch_size) print("training_examples =", batch_manager.num_examples()) print("batch_shape =", np.shape(batch_manager.next_batch(batch_size))) print( "params =",