def batchop(datapoints, VOCAB, GENDER, config, for_prediction=False, *args, **kwargs): indices = [d.id for d in datapoints] in_sequence = [] out_sequence = [] gender = [] for d in datapoints: gender.append(GENDER[d.gender]) in_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.in_sequence] + [VOCAB['EOS']]) out_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.out_sequence] + [VOCAB['EOS']]) gender = LongVar(config, gender) in_sequence = LongVar(config, pad_seq(in_sequence)).transpose(0, 1) out_sequence = LongVar(config, pad_seq(out_sequence)).transpose(0, 1) #print(list(i.size() for i in [gender, in_sequence, out_sequence])) batch = indices, (gender, in_sequence), (out_sequence) return batch
def batchop2(datapoints, VOCAB, GENDER, config, for_prediction=False, *args, **kwargs): indices = [d.id for d in datapoints] in_sequence = [] if for_prediction: out_sequence = [] gender = [] for d in datapoints: gender.append(GENDER[d.gender]) in_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.in_sequence] + [VOCAB['EOS']]) if for_prediction: out_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.out_sequence] + [VOCAB['EOS']]) gender = LongVar(config, gender) in_sequence = LongVar(config, pad_seq(in_sequence)).transpose(0, 1) if for_prediction: out_sequence = LongVar(config, pad_seq(out_sequence)).transpose(0, 1) if for_prediction: batch = indices, (gender, in_sequence), (out_sequence) else: batch = indices, (gender, in_sequence), () return batch
def predict_batchop(datapoints, VOCAB, LABELS, config, *args, **kwargs): indices = [d.id for d in datapoints] story = [] question = [] for d in datapoints: story.append([VOCAB[w] for w in d.story]) question.append([VOCAB[w] for w in d.q]) story = LongVar(config, pad_seq(story)) question = LongVar(config, pad_seq(question)) batch = indices, (story, question), () return batch
def batchop(datapoints, VOCAB, LABELS, *args, **kwargs): indices = [d.id for d in datapoints] story = [] question = [] answer = [] for d in datapoints: story.append([VOCAB[w] for w in d.story]) question.append([VOCAB[w] for w in d.q]) answer.append(LABELS[d.a]) story = LongVar(pad_seq(story)) question = LongVar(pad_seq(question)) answer = LongVar(answer) batch = indices, (story, question), (answer) return batch
def plot_accuracies( epoch_limit, min_epoch_count, max_epoch_count, accuracies, plot_title='Combined Accuracy', plot_filepath='combined_accuracy_heatmap.png', labels={}, y_offsets={}, ylabel='Accuracy', xlabel='Epoch', ylim=(0, 1), moving_avg=0, ): fig, ax = plt.subplots(1, 1, figsize=(6, 6)) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() #fig.subplots_adjust(left=.06, right=.75, bottom=.02, top=.94) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) for i, (task_name, acc) in enumerate(accuracies): p = np.asarray(pad_seq(acc)).mean(axis=0) log.debug('p.shape: {}'.format(p.shape)) if moving_avg: p = calc_moving_avg(p, moving_avg) line = plt.plot(p, lw=2.5, color=colors[i + 1], label=task_name) plt.legend(loc='lower right') # Add a text label to the right end of every line. Most of the code below # is adding specific offsets y position because some labels overlapped. y_pos = acc[-1] #- 0.5 # Again, make sure that all labels are large enough to be easily read # by the viewer. task_name = os.path.basename(task_name) if task_name in y_offsets: y_pos += y_offsets[task_name] if task_name in labels: task_name = labels[task_name] fig.suptitle(plot_title, fontsize=18, ha='center') if ylim: plt.ylim(*ylim) plt.ylabel(ylabel) plt.xlabel(xlabel) plt.savefig(plot_filepath, bbox_inches='tight') plt.show()
def batchop(datapoints, WORD2INDEX, *args, **kwargs): indices = [d.id for d in datapoints] story = [] question = [] answer = [] extvocab_story = [] extvocab_answer = [] def build_oov(d, WORD2INDEX): oov = [w for w in d.story + d.q + d.a if WORD2INDEX[w] == UNK] oov = list(set(oov)) return oov UNK = WORD2INDEX['UNK'] extvocab_size = 0 for d in datapoints: story.append([WORD2INDEX[w] for w in d.story] + [WORD2INDEX['EOS']]) question.append([WORD2INDEX[w] for w in d.q] + [WORD2INDEX['EOS']]) answer.append([WORD2INDEX[w] for w in d.a] + [WORD2INDEX['EOS']]) oov = build_oov(d, WORD2INDEX) extvocab_story.append( [ oov.index(w) + len(WORD2INDEX) if WORD2INDEX[w] == UNK else WORD2INDEX[w] for w in d.story] + [WORD2INDEX['EOS']] ) extvocab_answer.append( [ oov.index(w) + len(WORD2INDEX) if WORD2INDEX[w] == UNK else WORD2INDEX[w] for w in d.a] + [WORD2INDEX['EOS']] ) extvocab_size = max(extvocab_size, len(oov)) story = LongVar(pad_seq(story)) question = LongVar(pad_seq(question)) answer = LongVar(pad_seq(answer)) extvocab_answer = LongVar(pad_seq(extvocab_answer)) extvocab_story = LongVar(pad_seq(extvocab_story)) batch = indices, (story, question), (answer, extvocab_story, extvocab_answer, extvocab_size) return batch
def batchop(datapoints, VOCAB, config, *args, **kwargs): indices = [d.id for d in datapoints] sequence = [] for d in datapoints: s = [] sequence.append([VOCAB[w] for w in d.sequence]) sequence = LongVar(config, pad_seq(sequence)) batch = indices, (sequence, ), () return batch
def batchop(datapoints, VOCAB, GENDER, config, *args, **kwargs): indices = [d.id for d in datapoints] in_sequence = [] out_sequence = [] gender = [] for d in datapoints: in_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.in_sequence] + [VOCAB['EOS']]) out_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.out_sequence] + [VOCAB['EOS']]) gender.append(GENDER[d.gender]) in_sequence = LongVar(config, pad_seq(in_sequence)).transpose(0, 1) out_sequence = LongVar(config, pad_seq(out_sequence)).transpose(0, 1) gender = LongVar(config, gender) batch = indices, (gender, in_sequence), (out_sequence) return batch
def batchop(datapoints, VOCAB, config, *args, **kwargs): indices = [d.id for d in datapoints] max_len = max([d.max_token_len for d in datapoints]) word1 = [] word2 = [] existence = [] for d in datapoints: w1, w2 = d.pair word1.append([VOCAB[i] for i in w1]) word2.append([VOCAB[i] for i in w2]) existence.append(d.existence) word1 = LongVar(config, pad_seq(word1)) word2 = LongVar(config, pad_seq(word2)) existence = LongVar(config, existence) batch = indices, (word1, word2), existence return batch
def batchop(datapoints, VOCAB, config, *args, **kwargs): indices = [d.id for d in datapoints] sequence = [] for d in datapoints: sequence.append([VOCAB[w] for w in d.sequence]) sequence = LongVar(config, pad_seq(sequence)) sequence = sequence.transpose(1, 0) batch = indices, (sequence[:-1]), (sequence[1:]) return batch
def batchop(datapoints, WORD2INDEX, *args, **kwargs): indices = [d.id for d in datapoints] context = [] question = [] answer_positions = [] answer_lengths = [] for d in datapoints: context.append([WORD2INDEX[w] for w in d.context] + [WORD2INDEX['EOS']]) question.append([WORD2INDEX[w] for w in d.q]) answer_length = len(d.a_positions) + 1 answer_positions.append([i for i in d.a_positions] + [len(d.context)]) answer_lengths.append(answer_length) context = pad_seq(context) question = pad_seq(question) answer_positions = pad_seq(answer_positions) batch = indices, (np.array(context), np.array(question), np.array(answer_lengths)), (np.array(answer_positions), ) return batch
def batchop(datapoints, VOCAB, GENDER, config, *args, **kwargs): indices = [d.id for d in datapoints] sequence = [] gender = [] for d in datapoints: sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.sequence] + [VOCAB['EOS']]) gender.append(GENDER[d.gender]) sequence = LongVar(config, pad_seq(sequence)) gender = LongVar(config, gender) batch = indices, (gender, sequence), () return batch
def batchop(datapoints, VOCAB, LABELS, *args, **kwargs): indices = [d.id for d in datapoints] sentence = [] label = [] for d in datapoints: sentence.append([VOCAB[w] for w in d.sentence] + [VOCAB['EOS']]) #sentence.append([VOCAB[w] for w in d.sentence]) label.append(LABELS[d.label]) sentence = LongVar(pad_seq(sentence)) label = LongVar(label) batch = indices, (sentence, ), (label, ) return batch
def batchop(datapoints, VOCAB, LABELS, config, for_prediction=False, *args, **kwargs): indices = [d.id for d in datapoints] sequence = [] label = [] for d in datapoints: sequence.append([VOCAB[w] for w in d.sequence]) if not for_prediction: label.append(LABELS[d.label]) sequence = LongVar(config, pad_seq(sequence)) if not for_prediction: label = LongVar(config, label) batch = indices, (sequence, ), (label) return batch