def main(args): datamodule = LanguageDataModule(root=args.dataset_path, languages=args.languages, batch_size=args.batch_size, num_workers=args.num_workers) model = LanguageModel( # layers=14,#10 # blocks=1,#4 skip_channels=32, #256 end_channels=32, #256 # uncomment for fast debug network ) ckpt = torch.load(args.ckpt_path) model.load_state_dict(ckpt['state_dict']) trainer = pl.Trainer( # comment to run on cpu for local testing gpus=args.gpus, auto_select_gpus=True, # distributed_backend='ddp', benchmark=True, ## ------- terminate_on_nan=True, ) datamodule.setup() # trainer.fit(model, datamodule) results = trainer.test(model, datamodule.test_dataloader())
def rnn_main(dataset): model = LanguageModel(dataset.vocab).to(_flags.device()) def sample(): return dataset.sample_train(aug_ratio=FLAGS.aug_ratio) def score_utts(utts): fake = [((), utt) for utt in utts] batch = make_batch(fake, model.vocab, staged=False) mean = model(None, batch.out_data, None, None).item() tot = mean * sum(len(utt) - 1 for utt in utts) return tot def callback(i_epoch): model.eval() final = i_epoch == FLAGS.n_epochs - 1 with hlog.task("eval_val", timer=False): val_acc = evaluate(score_utts, dataset.get_val(), dataset) if FLAGS.TEST and (final or FLAGS.test_curve): with hlog.task("eval_test", timer=False): evaluate(score_utts, dataset.get_test(), dataset) if (i_epoch + 1) % FLAGS.n_checkpoint == 0: torch.save( model.state_dict(), os.path.join(FLAGS.model_dir, "model.%05d.chk" % i_epoch)) return val_acc train(dataset, model, sample, callback, staged=False)
def run(arguments) -> None: hyperparameters = LanguageModel.get_default_hyperparameters() hyperparameters["run_id"] = make_run_id(arguments) max_epochs = int(arguments.get("--max-num-epochs")) patience = int(arguments.get("--patience")) max_num_files = arguments.get("--max-num-files") # override hyperparams if flag is passed hypers_override = arguments.get("--hypers-override") if hypers_override is not None: hyperparameters.update(json.loads(hypers_override)) save_model_dir = args["SAVE_DIR"] os.makedirs(save_model_dir, exist_ok=True) save_file = os.path.join(save_model_dir, f"{hyperparameters['run_id']}_best_model.bin") print("Loading data ...") vocab = build_vocab_from_data_dir( data_dir=args["TRAIN_DATA_DIR"], vocab_size=hyperparameters["max_vocab_size"], max_num_files=max_num_files, ) print(f" Built vocabulary of {len(vocab)} entries.") train_data = load_data_from_dir( vocab, length=hyperparameters["max_seq_length"], data_dir=args["TRAIN_DATA_DIR"], max_num_files=max_num_files, ) print( f" Loaded {train_data.shape[0]} training samples from {args['TRAIN_DATA_DIR']}." ) valid_data = load_data_from_dir( vocab, length=hyperparameters["max_seq_length"], data_dir=args["VALID_DATA_DIR"], max_num_files=max_num_files, ) print( f" Loaded {valid_data.shape[0]} validation samples from {args['VALID_DATA_DIR']}." ) model = LanguageModel(hyperparameters, vocab) model.build(([None, hyperparameters["max_seq_length"]])) print( f"Constructed model, using the following hyperparameters: {json.dumps(hyperparameters)}" ) train( model, train_data, valid_data, batch_size=hyperparameters["batch_size"], max_epochs=max_epochs, patience=patience, save_file=save_file, )
def generate_program_desc(self, do_test=False): """ generate the paddle program desc """ with fluid.program_guard(self.main_program_, self.startup_program_): self.input_model_ = LanguageModel() model_configs = self.trainer_config self.input_model_.build_model(model_configs) optimizer = fluid.optimizer.SGD( learning_rate=self.trainer_config["lr"], grad_clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=self.trainer_config["max_grad_norm"])) optimizer.minimize(self.input_model_.get_model_loss()) self.main_program_desc_ = self.main_program_.desc.serialize_to_string() self.startup_program_desc_ = self.startup_program_.desc.serialize_to_string( ) self.update_trainer_configs("loss_name", self.input_model_.get_model_loss_name()) self.update_trainer_configs( "input_names", self.input_model_.get_model_input_names(), ) self.update_trainer_configs( "target_names", self.input_model_.get_target_names(), ) self.update_trainer_configs( "metrics", self.input_model_.get_model_metrics(), ) self.update_trainer_configs("show_metric", True) self.update_trainer_configs("max_training_steps", "inf") self.update_trainer_configs("shuffle", False) self.update_trainer_configs("main_program_desc", self.main_program_desc_) self.update_trainer_configs("startup_program_desc", self.startup_program_desc_) if do_test: input_names = self.input_model_.get_model_input_names() target_var_names = self.input_model_.get_target_names() self.infer_program_ = self.main_program_._prune_with_input( feeded_var_names=input_names, targets=target_var_names) self.infer_program_ = self.infer_program_._inference_optimize( prune_read_op=True) fluid.io.prepend_feed_ops(self.infer_program_, input_names) fluid.io.append_fetch_ops(self.infer_program_, target_var_names) self.infer_program_.desc._set_version() fluid.core.save_op_compatible_info(self.infer_program_.desc) self.infer_program_desc_ = self.infer_program_.desc.serialize_to_string( ) self.update_trainer_configs("infer_program_desc", self.infer_program_desc_)
def main(_): # Load configuration. with open(FLAGS.config, 'r') as f: config = yaml.load(f) # Initialize CoNLL dataset. dataset = CoNLLDataset(fname=config['data']['train'], target='lm') # Initialize model. language_model = LanguageModel( vocab_size=len(dataset.token_vocab), embedding_dim=config['model']['embedding_dim'], hidden_size=config['model']['hidden_size'], num_layers=config['model']['num_layers']) if torch.cuda.is_available(): language_model = language_model.cuda() # Initialize loss function. NOTE: Manually setting weight of padding to 0. weight = torch.ones(len(dataset.token_vocab)) weight[0] = 0 if torch.cuda.is_available(): weight = weight.cuda() loss_function = torch.nn.NLLLoss(weight) optimizer = torch.optim.Adam(language_model.parameters()) # Main training loop. data_loader = DataLoader(dataset, batch_size=config['training']['batch_size'], shuffle=True, collate_fn=collate_annotations) losses = [] i = 0 for epoch in range(config['training']['num_epochs']): for batch in data_loader: inputs, targets, lengths = batch optimizer.zero_grad() outputs, _ = language_model(inputs, lengths=lengths) outputs = outputs.view(-1, len(dataset.token_vocab)) targets = targets.view(-1) loss = loss_function(outputs, targets) loss.backward() optimizer.step() losses.append(loss.data[0]) if (i % 100) == 0: average_loss = np.mean(losses) losses = [] print('Iteration %i - Loss: %0.6f' % (i, average_loss), end='\r') if (i % 1000) == 0: torch.save(language_model, config['data']['checkpoint']) i += 1 torch.save(language_model, config['data']['checkpoint'])
def graph(params): model = LanguageModel( params.vocab.size(), params.embed_size, params.hidden_size, params.nlayers, dropout=params.dropout, cell=params.cell, ) loss = torch.nn.CrossEntropyLoss() return model, loss
def model_load(path, model=None, optimizer=None): config = LMConfig(os.path.join(path, 'config.json')) if model is None: model_to_load = LanguageModel(config) else: model_to_load = get_model(model) model_to_load.__init__(config) model_state_dict = torch.load(open(os.path.join(path, 'model.pt'), 'rb'), map_location=lambda s, l: s) model_to_load.load_state_dict(model_state_dict) if optimizer: optimizer_state_dict = torch.load(open( os.path.join(path, 'optimizer.pt'), 'rb'), map_location=lambda s, l: s) optimizer.load_state_dict(optimizer_state_dict) return model_to_load
def freestyle(loc): # TODO # load data model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) print(settings) # settings cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] how_many = 100 # load the models vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size fnames = os.listdir(model_dir / 'checkpoints') fname = fnames[-1] # load the model model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() # monitor sents = [ 'The Standard ', 'non-abelian', 'silicon pixel detector', 'estimate the', '[23] ATLAS' ] temperatures = [0.01 + 0.1 * i for i in range(11)] eval_stream = model_dir / 'evaluate_stream.txt' for temperature in temperatures: txt = '\nTemperature = {}'.format(temperature) utils.report(txt, eval_stream) for sent in sents: txt = generate.compose(model, vocab, emb, sent, temperature, how_many) utils.report(txt, eval_stream)
def build_LM(in_file: str) -> LanguageModel: """ build language models for each label each line in in_file contains a label and a string separated by a space """ print('building language models...') # This is an empty method # Pls implement your code in below lm = LanguageModel() with open(in_file, encoding="utf8") as in_file_lines: for line in in_file_lines: (language, l) = line.split(" ", 1) for gram in create_grams(tokenize(l, NGRAM_SIZE - 1), NGRAM_SIZE): lm.add_gram(gram, language) return lm
def main(args): logger.info(f"Args: {json.dumps(args.__dict__, indent=2, sort_keys=True)}") spm_path = os.path.join('spm', args.spm, "spm.model") args.sample = parse_sample_options(args.sample) logger.info(f"Loading tokenizer from {spm_path}") tokenizer = Tokenizer(spm_path) args.ntoken = ntoken = len(tokenizer) logger.info(f" Vocabulary size: {ntoken}") logger.info("Reading dataset") data = {} for x in ['train', 'valid', 'test']: data[x] = read_data(os.path.join(args.data_dir, f"{x}.query.txt"), min_len=args.min_len) logger.info(f" Number of {x:>5s} data: {len(data[x]):8d}") logger.info("Preparing model and optimizer") config = LMConfig(ntoken, args.ninp, args.nhid, args.nlayers, args.dropouti, args.dropoutr, args.dropouth, args.dropouto) model = LanguageModel(config).to(device) params = get_params(model) logger.info( f" Number of model parameters: {sum(p.numel() for p in params)}") optimizer = torch.optim.Adam(params) if args.resume: logger.info(f"Loading model from {args.resume}") model_load(args.resume, model, optimizer) model = model.to(device) if n_gpu > 1: logger.info(f"Making model as data parallel") model = torch.nn.DataParallel(model, dim=1) train(model, optimizer, tokenizer, data['train'], data['valid'], args) test(model, tokenizer, data['test'], args)
def plot_losses(loc): # load data model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) # settings cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] max_len = settings['max_len'] n_epochs = settings['n_epochs'] n_saves = settings['n_saves'] criterion = nn.CrossEntropyLoss() # load the models models = [] vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size for fname in os.listdir(model_dir / 'checkpoints'): model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() models.append(model) # prepare training and validation sets N = 10000 splits = ['train', 'valid'] gens = { split: generate.generate(split, token=token, max_len=max_len, small=small, batch_size=N) for split in splits } batch, labels = {}, {} for split in splits: for b, l in gens[split]: # one hot encode if token == 'character': b = generate.one_hot_encode(b, vocab) # or embed elif token == 'word': b = generate.w2v_encode(b, emb, vocab) batch[split], labels[split] = torch.Tensor(b), torch.Tensor( l).long() break # evaluate the models loss = {split: [] for split in splits} acc = {split: [] for split in splits} for i, model in enumerate(models): t0 = time.time() print(i) for split in splits: # loss outputs = model(batch[split]) l = criterion(outputs, labels[split]) loss[split].append(float(l)) # accuracy _, preds = torch.max(outputs, 1) a = sum(preds == labels[split]) / float(N) acc[split].append(float(a)) print('{:2.2f}s'.format(time.time() - t0)) for split in splits: with open(model_dir / 'best_{}_acc.txt'.format(split), 'w') as handle: best = max(acc[split]) handle.write('{}\n'.format(best)) # plot both quantities for quantity, description in zip([loss, acc], ['Loss', 'Accuracy']): fig, ax = plt.subplots() for split in splits: xs = (1 + np.arange(len(quantity[split]))) / n_saves ax.plot(xs, quantity[split], label=split) ax.set_xlabel('Training epoch') if n_epochs > 1: ax.set_xlabel('Epoch') ax.set_ylabel(description) upper = ax.get_ylim()[1] if description == 'Loss' else 1 ax.set_ylim(0, upper) ax.set_xlim(0, ax.get_xlim()[1]) ax.set_title(model_dir.name, fontsize=7) ax.legend() ax.grid(alpha=0.5, which='both') plt.savefig(model_dir / '{}.pdf'.format(description))
test_input, test_label = get_batch(test_file_path, word_dict, batch_size=args.batch_size, bptt=args.bptt) with open(test_pkl_path, 'wb') as f: pickle.dump({'data': test_input, 'label': test_label}, f) with open(train_pkl_path, 'rb') as f: train_data = pickle.load(f) with open(test_pkl_path, 'rb') as f: test_data = pickle.load(f) model = LanguageModel(dict_size, args.hidden_size, args.hidden_size, n_layer=1, drop_rate=args.drop_rate, adaptive_softmax=with_adaptive, cutoff=cutoff_list) model #.cuda() optimizer = optim.Adagrad(model.parameters(), lr=args.learning_rate, lr_decay=args.learning_rate_decay, weight_decay=args.weight_decay) if with_adaptive: print('Use adaptive softmax.') criterion = AdaptiveLoss(cutoff_list) else: print('Use common softmax.') criterion = nn.CrossEntropyLoss()
input = data['input'] label = data['label'] vocab = len(data['worddic']) if args.model == 'adasoft': adasoft = True elif args.model == 'linear': adasoft = False model = LanguageModel(vocab, 512, 512, 1, adaptive_softmax=adasoft, cutoff=[2000, 10000]) model.cuda() optimizer = optim.Adagrad(model.parameters(), lr=0.1, lr_decay=1e-5, weight_decay=1e-5) if adasoft: criterion = AdaptiveLoss([2000, 10000, vocab + 1]) else: criterion = nn.CrossEntropyLoss()
def train(opt): # Read preprocessed data print_line() print('Loading training data ...') check_name = re.compile('.*\.prep\.train\.pt') assert os.path.exists( opt.train_data) or check_name.match(opt.train_data) is None train_dataset = torch.load(opt.train_data) train_dataset.set_batch_size(opt.batch_size) print('Done.') print_line() print('Loading validation data ...') check_name = re.compile('.*\.prep\.val\.pt') assert os.path.exists( opt.val_data) or check_name.match(opt.val_data) is None val_dataset = torch.load(opt.val_data) val_dataset.set_batch_size(opt.batch_size) print('Done.') # Build / load Model if opt.model_reload is None: print_line() print('Build new model...') model = LanguageModel(train_dataset.num_vocb, dim_word=opt.dim_word, dim_rnn=opt.dim_rnn, num_layers=opt.num_layers, dropout_rate=opt.dropout_rate) model.dictionary = train_dataset.dictionary print('Done') train_dataset.describe_dataset() val_dataset.describe_dataset() else: print_line() print('Loading existing model...') model = torch.load(opt.model_reload) print('done') train_dataset.change_dict(model.dictionary) val_dataset.change_dict(model.dictionary) model_start_epoch = model.train_info['epoch idx'] - 1 model_start_batch = model.train_info['batch idx'] - 1 # Use GPU / CPU print_line() if opt.cuda: model.cuda() print('Using GPU %d' % torch.cuda.current_device()) else: print('Using CPU') # Crterion, mask padding criterion_weight = torch.ones(train_dataset.num_vocb + 1) criterion_weight[const.PAD] = 0 criterion = nn.CrossEntropyLoss(weight=criterion_weight, size_average=False) if opt.cuda: criterion = criterion.cuda() # Optimizer lr = opt.lr optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr) if (model_start_epoch > opt.epoch): print( 'This model has already trained more than %d epoch, add epoch parameter is you want to continue' % (opt.epoch + 1)) return print_line() print('') if opt.model_reload is None: print('Start training new model, will go through %d epoch' % opt.epoch) else: print('Continue existing model, from epoch %d, batch %d to epoch %d' % (model_start_epoch, model_start_batch, opt.epoch)) print('') best_model = model.train_info if opt.save_freq == 0: opt.save_freq = train_dataset.num_batch - 1 # Train model.train() for epoch_idx in range(model_start_epoch, opt.epoch): # New epoch acc_loss = 0 acc_count = 0 start_time = time.time() train_dataset.shuffle() print_line() print('Start epoch %d, learning rate %f ' % (epoch_idx + 1, lr)) print_line('-') epoch_start_time = start_time # If load model and continue training if epoch_idx == model_start_epoch and model_start_batch > 0: start_batch = model_start_batch else: start_batch = 0 for batch_idx in range(start_batch, train_dataset.num_batch): # Generate batch data batch_data, batch_lengths, target_words = train_dataset[batch_idx] if opt.cuda: batch_data = batch_data.cuda() batch_lengths = batch_lengths.cuda() target_words = target_words.cuda() batch_data = Variable(batch_data, requires_grad=False) batch_lengths = Variable(batch_lengths, requires_grad=False) target_words = Variable(target_words, requires_grad=False) optimizer.zero_grad() # Forward output_flat = model.forward(batch_data, batch_lengths) # Caculate loss loss = criterion(output_flat, target_words.view(-1)) # Backward loss.backward() # Prevent gradient explode torch.nn.utils.clip_grad_norm(model.parameters(), opt.clip) # Update parameters optimizer.step() # Accumulate loss acc_loss += loss.data acc_count += batch_lengths.data.sum() # Display progress if batch_idx % opt.display_freq == 0: average_loss = acc_loss[0] / acc_count.item() print( 'Epoch : %d, Batch : %d / %d, Loss : %f, Perplexity : %f, Time : %f' % (epoch_idx + 1, batch_idx, train_dataset.num_batch, average_loss, math.exp(average_loss), time.time() - start_time)) acc_loss = 0 acc_count = 0 start_time = time.time() #Save and validate if it is neccesary if (1 + batch_idx) % opt.save_freq == 0: print_line('-') print('Pause training for save and validate.') model.eval() val_loss = evaluate(model=model, eval_dataset=val_dataset, cuda=opt.cuda, criterion=criterion) model.train() print('Validation Loss : %f' % val_loss) print('Validation Perplexity : %f' % math.exp(val_loss)) model_savename = opt.model_name + '-e_' + str( epoch_idx + 1) + '-b_' + str(batch_idx + 1) + '-ppl_' + str( int(math.exp(val_loss))) + '.pt' model.val_loss = val_loss model.val_ppl = math.exp(val_loss) model.epoch_idx = epoch_idx + 1 model.batch_idx = batch_idx + 1 model.train_info['val loss'] = val_loss model.train_info['train loss'] = math.exp(val_loss) model.train_info['epoch idx'] = epoch_idx + 1 model.train_info['batch idx'] = batch_idx + 1 model.train_info['val ppl'] = math.exp(model.val_loss) model.train_info['save name'] = model_savename try: torch.save(model, model_savename) except: print('Failed to save model!') if model.val_loss < best_model['val loss']: print_line('-') print('New best model on validation set') best_model = model.train_info shutil.copy2(best_model['name'], opt.model_name + '.best.pt') print_line('-') print('Save model at %s' % (model_savename)) print_line('-') print('Continue Training...') print_line('-') print('Epoch %d finished, spend %d s' % (epoch_idx + 1, time.time() - epoch_start_time)) # Update lr if needed lr *= opt.lr_decay optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr) # Finish training print_line() print(' ') print('Finish training %d epochs!' % opt.epoch) print(' ') print_line() print('Best model:') print('Epoch : %d, Batch : %d ,Loss : %f, Perplexity : %f' % (best_model['epoch idx'], best_model['batch idx'], best_model['val loss'], best_model['val ppl'])) print_line('-') print('Save best model at %s' % (opt.model_name + '.best.pt')) shutil.copy2(best_model['name'], opt.model_name + '.best.pt') print_line()
def detach_hidden(h): """Detach hidden states from their history.""" if isinstance(h, torch.Tensor): return h.detach() return tuple(detach_hidden(v) for v in h) torch.backends.cudnn.benchmark = True torch.manual_seed(0) np.random.seed(0) labels = Labels() model = LanguageModel(128, 512, 256, len(labels), n_layers=3, dropout=0.3) model.cuda() bptt = 8 batch_size = 64 root = '/open-stt-e2e/data/' train = [ root + 'asr_public_phone_calls_1.csv', root + 'asr_public_phone_calls_2_aa.csv', root + 'asr_public_phone_calls_2_ab.csv', root + 'public_youtube1120_aa.csv', root + 'public_youtube1120_ab.csv', root + 'public_youtube1120_ac.csv', root + 'public_youtube1120_hq.csv', root + 'public_youtube700_aa.csv', root + 'public_youtube700_ab.csv' ]
trainSet, vocab = creatDataSet('./data', 'ptb.train.txt') testSet, _ = creatDataSet('./data', 'ptb.test.txt') validSet, _ = creatDataSet('./data', 'ptb.valid.txt') word2idx, idx2word = word2index(vocab) ### Parameters Set ########## VOCAB_SIZE = len(word2idx) EMBEDDING_SIZE = 128 HIDDEN_SIZE = 1024 N_LAYERS = 1 DOPROUT_P = 0.5 BATCH_SIZE = 20 SEQ_LENGTH = 30 EPOCH = 40 LEARNING_RATE = 0.01 ############################# train_data = batchify(prepare_sequence(trainSet, word2idx), BATCH_SIZE) test_data = batchify(prepare_sequence(testSet, word2idx), BATCH_SIZE) valid_data = batchify(prepare_sequence(validSet, word2idx), BATCH_SIZE) model = LanguageModel(VOCAB_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE, N_LAYERS, DOPROUT_P).to(device) model.weight_init() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) trainModel(model, train_data, valid_data, BATCH_SIZE, SEQ_LENGTH, EPOCH) testModel(model, test_data, BATCH_SIZE, SEQ_LENGTH)
lr=0.0001, teacher_forcing_ratio=1.0, seed=1, max_len=428, worker_num=1) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) cuda = config.use_cuda and torch.cuda.is_available() device = torch.device('cuda' if cuda else 'cpu') model = LanguageModel(n_class=len(char2id), n_layers=config.n_layers, rnn_cell='lstm', hidden_size=config.hidden_size, dropout_p=config.dropout_p, max_length=config.max_len, sos_id=SOS_token, eos_id=EOS_token, device=device) model.flatten_parameters() model = nn.DataParallel(model).to(device) for param in model.parameters(): param.data.uniform_(-0.08, 0.08) # Prepare loss weight = torch.ones(len(char2id)).to(device) perplexity = Perplexity(weight, PAD_token, device) optimizer = optim.Adam(model.module.parameters(), lr=config.lr)
from model import LanguageModel import argparse new = LanguageModel() parser = argparse.ArgumentParser() parser.add_argument("--length", default=100, type=int, help="text length") parser.add_argument("--file", default="bred.txt", type=str, help="file in which the text will be written") parser.add_argument("--print", default=False, type=bool, help="display text on screen or not") args = parser.parse_args() result = new.generate(args.length, args.file) if args.print and result == 'Successfully': with open(args.file, 'r') as file: print(file.read()) else: print(result)
def plot_switch_prob(loc): # load settings model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] max_len = settings['max_len'] # load the final model vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size fnames = os.listdir(model_dir / 'checkpoints') fname = fnames[-1] # load the model model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() # prepare the base and replacement batch N = 100 gen = generate.generate('valid', token=token, max_len=max_len, small=small, batch_size=N) base_batch, _ = next(gen) repl_batch, _ = next(gen) # compute the average KL divs over the batch depths = [i for i in range(max_len)] switch_probs = [ compute_switch_prob(model, base_batch, repl_batch, keep_depth, vocab, emb) for keep_depth in depths ] # make the plot fig, ax = plt.subplots() ax.plot(depths, switch_probs, 'tomato') ax.plot(depths, [0.01] * len(depths), 'k') ax.set_yscale('log') ax.set_ylim(0.001, 1) ax.set_xlim(0, max_len) ax.set_title('Probability of switching predicted character\n{}'.format( model_dir.name), fontsize=7) ax.set_xlabel('sequence keep-depth') ax.set_ylabel('Probabillity') ax.grid() plt.savefig(model_dir / 'SwitchProbability.pdf')
dataset.save(dataset_specific_info) params = {} #take account of the 0 token for padding params['vocab_size'] = dataset.vocab_size + 1 params['num_classes'] = dataset.vocab_size params['batch_size'] = batch_size params['valid_batch_size'] = valid_batch_size params['seq_len'] = seq_len params['hidden_dim'] = hidden_size params['num_layers'] = num_layers params['embed_size'] = embed_size params['directoryOutLogs'] = directoryOutLogs model = LanguageModel(params) model.compile() eval_softmax = 5 total_time_training = 0 total_time_valid = 0 loss_list = '' perp_list = '' wps_list = '' time_per_batch = '' time_per_epoch = '' for epoch in range(num_epochs): dataset.set_data_dir(data_dir) dataset.set_batch_size(batch_size)
def detach_hidden(h): """Detach hidden states from their history.""" if isinstance(h, torch.Tensor): return h.detach() return tuple(detach_hidden(v) for v in h) torch.backends.cudnn.benchmark = True torch.manual_seed(0) np.random.seed(0) labels = Labels() num_labels = len(labels) model = LanguageModel(128, 512, 256, num_labels, n_layers=3, dropout=0.3) model.cuda() bptt = 8 batch_size = 32 train = [ '/media/lytic/STORE/ru_open_stt_wav/text/public_youtube1120_hq.txt', '/media/lytic/STORE/ru_open_stt_wav/text/public_youtube1120.txt', '/media/lytic/STORE/ru_open_stt_wav/text/public_youtube700.txt' ] test = [ '/media/lytic/STORE/ru_open_stt_wav/text/asr_calls_2_val.txt', '/media/lytic/STORE/ru_open_stt_wav/text/buriy_audiobooks_2_val.txt', '/media/lytic/STORE/ru_open_stt_wav/text/public_youtube700_val.txt'
EPOCH = 40‘ RESCHEDULED = False train_data, vocab_train = creatDataSet('./data', 'ptb.train.txt') valid_data, _ = creatDataSet('./data', 'ptb.valid.txt') test_data, _ = creatDataSet('./data', 'ptb.test.txt') vocab = list(set(vocab_train)) word2idx, idx2word = word2index(vocab) trainSet = batchify(prepare_seq(train_data, word2idx), BATCH_SIZE) testSet = batchify(prepare_seq(test_data, word2idx), BATCH_SIZE//2) validSet = batchify(prepare_seq(valid_data, word2idx), BATCH_SIZE//2) model = LanguageModel(len(word2idx), EMBED_SIZE, HIDDEN_SIZE, NUM_LAYER, 0.5).to(device) model.init_weight() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr = LR) def trainModel(trainSet, validSet): for epoch in range(EPOCH): total_loss = 0 losses = [] hidden = model.init_hidden(BATCH_SIZE) total = ceil((trainSet.size(1) - SEQ_LENGTH) / SEQ_LENGTH) model.train() for i, batch in enumerate(getBatch(trainSet, SEQ_LENGTH)): view_bar(i, total, epoch + 1, EPOCH) inputs, targets = batch hidden = model.detach_hidden(hidden)
def train(settings, model_dir): # training and sampling temperature = 0.5 how_many = 70 vocab = generate.get_vocab(args.token, small=args.small) # create the vocab, model, (and embedding) if args.token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif args.token == 'character': emb = None input_size = vocab.size output_size = vocab.size model = LanguageModel(args.cell, input_size, args.hidden_size, output_size) # create criterion and optimiser criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # create the validation set n_valid = 10000 valid_gen = generate.generate('valid', token=args.token, max_len=args.max_len, small=args.small, batch_size=n_valid) for valid_batch, valid_labels in valid_gen: # one hot encode if args.token == 'character': valid_batch = generate.one_hot_encode(valid_batch, vocab) # or embed elif args.token == 'word': valid_batch = generate.w2v_encode(valid_batch, emb, vocab) valid_batch, valid_labels = torch.Tensor(valid_batch), torch.Tensor(valid_labels).long() break # how many epochs do we need? batches_per_epoch = generate.get_n_batches_in_epoch('train', args.token, args.batch_size, args.max_len, args.small) # training settings every_n = int(batches_per_epoch/args.n_saves) if not args.debug else 50 running_loss = 0 training_losses = [] valid_losses = [] t0 = time.time() # dump the settings pickle.dump(settings, open(model_dir/ 'settings.pkl', 'wb')) out_stream = model_dir / 'out_stream.txt' # run the training loop for epoch in range(1, args.n_epochs+1): opening = ['', '#'*20, '# Epoch {} (t={:2.2f}h)'.format(epoch, (time.time() - t0)/3600.), '#'*20, ''] for txt in opening: utils.report(txt, out_stream) # create the generator for each epoch train_gen = generate.generate('train', token=args.token, max_len=args.max_len, small=args.small, batch_size=args.batch_size) for i, (batch, labels) in enumerate(train_gen): # one hot encode if args.token == 'character': batch = generate.one_hot_encode(batch, vocab) # or embed elif args.token == 'word': batch = generate.w2v_encode(batch, emb, vocab) # turn into torch tensors batch = torch.Tensor(batch) labels = torch.Tensor(labels).long() # zero the gradients optimizer.zero_grad() # forward and backward pass and optimisation step outputs = model(batch) loss = criterion(outputs, labels) loss.backward() optimizer.step() # monitor the losses running_loss += loss if i % every_n == (every_n-1): # append the training losses training_losses.append(float(running_loss/every_n)) running_loss = 0 # compute the valid loss valid_outputs = model(valid_batch) valid_losses.append(float(criterion(valid_outputs, valid_labels))) # monitor progress monitor = ['\n{}/{} done'.format(i+1, batches_per_epoch)] monitor.append(generate.compose(model, vocab, emb, 'The Standard Model of', temperature, how_many)) for m in monitor: utils.report(m, out_stream) # save the model torch.save(model.state_dict(), model_dir/'checkpoints'/'epoch{}_step_{}.pt'.format(epoch, round(i/every_n))) if i >= 1000 and args.debug: break # save information dt = (time.time() - t0) time_txt = '\ntime taken: {:2.2f}h\n'.format(dt/3600.) utils.report(time_txt, out_stream) utils.report(str(dt/3600.), model_dir/'time.txt') loss_dict = {'train':training_losses, 'valid':valid_losses, 'time_taken':dt} pickle.dump(loss_dict, open(model_dir/ 'losses.pkl', 'wb')) # evaluate evaluate.plot_losses(model_dir)
def main(): ''' Main function that coordinates the entire process. Parses arguments that specify the exercise and the experiment that should be run. Initializes the model and the checkpoint managers. ''' parser = argparse.ArgumentParser( description='Define configuration of experiments') parser.add_argument('--mode', type=str, nargs='+', choices=['train', 'evaluate', 'generate'], required=True) parser.add_argument('--experiment', type=str, choices=['a', 'b', 'c'], required=True) parser.add_argument('--id', type=str, required=False) parser.add_argument('--epochs', type=int, default=EPOCHS, required=False) args = parser.parse_args() # Setting Experiment Id if args.id is None: exp_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") print(f"No Experiment Id Set, Creating New: {exp_id}") else: exp_id = args.id print(f"Using Experiment Id: {exp_id}") # Setting Directories base_dir = f"{OUTPUT_DIR}/exp_{args.experiment}/{exp_id}" log_dir = f"{base_dir}/logs" submission_dir = f"{base_dir}/submissions" if not os.path.exists(submission_dir): os.makedirs(submission_dir) ckpt_dir = f"{base_dir}/ckpts" print(f"Experiment Directory: {base_dir}") print(f"Using Tensorflow Version: {tf.__version__}") print("Building Vocabulary...") build_vocab(input_file=PATH_TRAIN, output_file=PATH_VOCAB, top_k=VOCAB_SIZE, special=SPECIAL) word2id, id2word = build_vocab_lookup(PATH_VOCAB, "<unk>") # Setting Experiment Specific Configurations if args.experiment == 'a': lstm_hidden_state_size = 512 word_embeddings = None elif args.experiment == 'b': lstm_hidden_state_size = 512 word_embeddings = load_embedding(dim_embedding=EMBEDDING_SIZE, vocab_size=VOCAB_SIZE) elif args.experiment == 'c': lstm_hidden_state_size = 1024 word_embeddings = load_embedding(dim_embedding=EMBEDDING_SIZE, vocab_size=VOCAB_SIZE) else: raise ValueError(f"Unknown Experiment {args.experiment}") print(f'Initializing Model...') model = LanguageModel(vocab_size=VOCAB_SIZE, sentence_length=SENTENCE_LENGTH, embedding_size=EMBEDDING_SIZE, hidden_state_size=lstm_hidden_state_size, output_size=LSTM_OUTPUT_SIZE, batch_size=BATCH_SIZE, word_embeddings=word_embeddings, index_to_word_table=id2word) print(f'Initializing Optimizer...') optimizer = tf.keras.optimizers.Adam() ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model) manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=5) if manager.latest_checkpoint: print(f"Restoring Model from {manager.latest_checkpoint}...") ckpt.restore(manager.latest_checkpoint) model_loaded = True else: print("Initializing Model from Scratch") model_loaded = False if "train" in args.mode: print(f"Starting Training...") train_summary_writer = tf.summary.create_file_writer( f"{log_dir}/train") with train_summary_writer.as_default(): train(ckpt=ckpt, manager=manager, model=model, optimizer=optimizer, word2id=word2id, id2word=id2word, epochs=args.epochs) model_loaded = True if "evaluate" in args.mode: print(f"Starting Evaluation...") assert model_loaded, 'model must be loaded from checkpoint in order to be evaluated' test_summary_writer = tf.summary.create_file_writer( f"{log_dir}/evaluate") with test_summary_writer.as_default(): evaluate( model=model, word2id=word2id, id2word=id2word, step=optimizer.iterations, path_submission= f"{submission_dir}/group35.perplexity{args.experiment.upper()}" ) if "generate" in args.mode: print(f"Starting Generation...") assert model_loaded, 'model must be loaded from checkpoint in order to start generation' generate_summary_writer = tf.summary.create_file_writer( f"{log_dir}/generate") with generate_summary_writer.as_default(): generate(word2id, id2word, model=model, path_submission=f"{submission_dir}/group35.continuation")
from dataset import TextDataLoaderIterator from model import LanguageModel device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_dir = 'data/Gutenberg/split/' txt_files = [data_dir + file_name for file_name in os.listdir(data_dir)][:5] if __name__ == '__main__': # checkpoint = torch.load('models/lm/latest.pth') model = LanguageModel(n_vocab=10000).to(device) # model.load_state_dict(checkpoint['model_state_dict']) optimizer = optim.Adam(model.parameters(), lr=1e-4) # optimizer.load_state_dict(checkpoint['optimizer_state_dict']) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.95, patience=100, min_lr=1e-6) # lr_scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict']) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(f'runs/{time.strftime('%Y%m%d-%I:%M%p', time.localtime())}') dummy_input = torch.LongTensor([[1]]).to(device) writer.add_graph(model, dummy_input) # global_step = checkpoint['global_step'] global_step = 0 for epoch in range(10):
if __name__ == "__main__": uniqueService = UniqueService(APP_DBUS_NAME, APP_OBJECT_NAME) app = QApplication(sys.argv) tray_icon = SystemTrayIcon( QIcon(os.path.join(get_parent_dir(__file__), "image", "trayicon.png")), app) tray_icon.show() (constant.TRAYAREA_TOP, constant.TRAYAREA_BOTTOM) = tray_icon.get_trayarea() plugin = Plugin() source_lang_model = LanguageModel() dest_lang_model = LanguageModel() word_engine_name = setting_config.get_translate_config("word_engine") words_engine_name = setting_config.get_translate_config("words_engine") translate_simple = imp.load_source( "translate_simple", plugin.get_plugin_file(word_engine_name)).Translate() translate_long = imp.load_source( "translate_long", plugin.get_plugin_file(words_engine_name)).Translate() word_translate_model = plugin.get_word_model( setting_config.get_translate_config("src_lang"), setting_config.get_translate_config("dst_lang")) words_translate_model = plugin.get_words_model( setting_config.get_translate_config("src_lang"),