def compute_loss_one_batch(model): if len(model.megabatch) == 0: if model.megabatch_anneal == 0: for i in range(model.max_megabatch_size): if model.curr_idx < len(model.mb): model.megabatch.append(model.mb[model.curr_idx][1]) model.curr_idx += 1 else: if model.increment and model.curr_megabatch_size < model.max_megabatch_size: model.curr_megabatch_size += 1 model.increment = False print("Increasing megabatch size to {0}".format( model.curr_megabatch_size)) for i in range(model.curr_megabatch_size): if model.curr_idx < len(model.mb): model.megabatch.append(model.mb[model.curr_idx][1]) model.curr_idx += 1 if model.curr_idx % model.megabatch_anneal == 0: model.increment = True megabatch = [] for n, i in enumerate(model.megabatch): arr = [model.data[t] for t in i] example_arr = [] for j in arr: example = (j[0], j[1]) if len(example[0].embeddings) > 0 and len( example[1].embeddings) > 0: example_arr.append(example) continue example[0].populate_embeddings(model.vocab, model.zero_unk, model.ngrams, model.scramble_rate) if not model.share_vocab: example[1].populate_embeddings(model.vocab_fr, model.zero_unk, model.ngrams, model.scramble_rate) else: example[1].populate_embeddings(model.vocab, model.zero_unk, model.ngrams, model.scramble_rate) example_arr.append(example) megabatch.append(example_arr) model.megabatch = megabatch if len(model.megabatch) == 0: return None sents1_list = [] sents2_list = [] sents1_lengths_list = [] sents2_lengths_list = [] for j in model.megabatch: sents1 = [i[0] for i in j] sents2 = [i[1] for i in j] sents_1_torch, lengths_1_torch = model.torchify_batch(sents1) if model.gpu: sents_1_torch = sents_1_torch.cuda() lengths_1_torch = lengths_1_torch.cuda() sents_2_torch, lengths_2_torch = model.torchify_batch(sents2) if model.gpu: sents_2_torch = sents_2_torch.cuda() lengths_2_torch = lengths_2_torch.cuda() sents1_list.append(sents_1_torch) sents2_list.append(sents_2_torch) sents1_lengths_list.append(lengths_1_torch) sents2_lengths_list.append(lengths_2_torch) p1_sents_list, p1_lengths_list, p2_sents_list, p2_lengths_list, = get_pairs_batch( model, sents1_list, sents1_lengths_list, sents2_list, sents2_lengths_list) model.megabatch = [] for i in range(len(p1_sents_list)): new_batch = Batch() new_batch.g1 = sents1_list[i] new_batch.g1_l = sents1_lengths_list[i] new_batch.g2 = sents2_list[i] new_batch.g2_l = sents2_lengths_list[i] new_batch.p1 = p1_sents_list[i] new_batch.p1_l = p1_lengths_list[i] new_batch.p2 = p2_sents_list[i] new_batch.p2_l = p2_lengths_list[i] model.megabatch.append(new_batch) curr_batch = model.megabatch.pop(0) g1, g2, p1, p2 = model.forward(curr_batch) return model.loss_function(g1, g2, p1, p2)
def compute_loss_one_batch(model): if len(model.megabatch) == 0: if model.megabatch_anneal == 0: for i in range(model.max_megabatch_size): if model.curr_idx < len(model.mb): model.megabatch.append(model.mb[model.curr_idx][1]) model.curr_idx += 1 else: if model.increment and model.curr_megabatch_size < model.max_megabatch_size: model.curr_megabatch_size += 1 model.increment = False print("Increasing megabatch size to {0}".format(model.curr_megabatch_size)) for i in range(model.curr_megabatch_size): if model.curr_idx < len(model.mb): model.megabatch.append(model.mb[model.curr_idx][1]) model.curr_idx += 1 if model.curr_idx % model.megabatch_anneal == 0: model.increment = True megabatch = [] for n, i in enumerate(model.megabatch): arr = [model.data[t] for t in i] example_arr = [] for j in arr: example = (BigExample(j[0], model.vocab, model.rev_vocab, model.scramble_rate), BigExample(j[1], model.vocab, model.rev_vocab, model.scramble_rate)) if model.args.debug: print("Logging Pairing: {0} {1}".format(j[0].sentence, j[1].sentence)) example_arr.append(example) megabatch.append(example_arr) model.megabatch = megabatch if len(model.megabatch) == 0: return None sents1_list = [] sents2_list = [] sents1_lengths_list = [] sents2_lengths_list = [] for j in model.megabatch: sents1 = [i[0] for i in j] sents2 = [i[1] for i in j] sents_1_torch, lengths_1_torch = model.torchify_batch(sents1) if model.gpu: sents_1_torch = sents_1_torch.cuda() lengths_1_torch = lengths_1_torch.cuda() sents_2_torch, lengths_2_torch = model.torchify_batch(sents2) if model.gpu: sents_2_torch = sents_2_torch.cuda() lengths_2_torch = lengths_2_torch.cuda() sents1_list.append(sents_1_torch) sents2_list.append(sents_2_torch) sents1_lengths_list.append(lengths_1_torch) sents2_lengths_list.append(lengths_2_torch) p1_sents_list, p1_lengths_list, p2_sents_list, p2_lengths_list, = get_pairs_batch(model, sents1_list, sents1_lengths_list, sents2_list, sents2_lengths_list) model.megabatch = [] for i in range(len(p1_sents_list)): new_batch = Batch() new_batch.g1 = sents1_list[i] new_batch.g1_l = sents1_lengths_list[i] new_batch.g2 = sents2_list[i] new_batch.g2_l = sents2_lengths_list[i] new_batch.p1 = p1_sents_list[i] new_batch.p1_l = p1_lengths_list[i] new_batch.p2 = p2_sents_list[i] new_batch.p2_l = p2_lengths_list[i] model.megabatch.append(new_batch) curr_batch = model.megabatch.pop(0) g1, g2, p1, p2 = model.forward(curr_batch) return model.loss_function(g1, g2, p1, p2)