input_length=MAX_LEN, trainable=False) ant_prem = antonym_embed(premise) ant_hypo = antonym_embed(hypothesis) if args.timedist: antonym_translate = TimeDistributed( Dense(ANT_SENT_HIDDEN_SIZE, activation=ACTIVATION)) ant_prem = antonym_translate(ant_prem) ant_hypo = antonym_translate(ant_hypo) if args.align_op_we is not None: alignment = _align(prem, hypo, normalize=True) alignment_aggr_1 = _aggregate(alignment, args.align_op_we, axis=1) alignment_aggr_2 = _aggregate(alignment, args.align_op_we, axis=2) prem_reps.append(alignment_aggr_1) #prem_reps.append(add(prem,-alignment_aggr_1)) prem_reps.append(multiply(prem, alignment_aggr_1)) hypo_reps.append(alignment_aggr_2) #hypo_reps.append(add(hypo, -alignment_aggr_2)) hypo_reps.append(multiply(hypo, alignment_aggr_2)) if args.align_op_ae is not None: alignment = _align(ant_prem, ant_hypo, normalize=True) alignment_aggr_1 = _aggregate(alignment, args.align_op_ae, axis=1) alignment_aggr_2 = _aggregate(alignment, args.align_op_ae, axis=2) prem_reps.append(multiply(prem, alignment_aggr_1)) hypo_reps.append(multiply(hypo, alignment_aggr_2))
# read in embedding and translate print("> fetching word embedding") embedding_matrix = get_embedding_matrix(args.embedding, VOCAB, EMBED_HIDDEN_SIZE, tokenizer) embed = Embedding(VOCAB, EMBED_HIDDEN_SIZE, weights=[embedding_matrix], input_length=MAX_LEN, trainable=False) prem = embed(premise) hypo = embed(hypothesis) translate_1 = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION)) translate_2 = TimeDistributed(Dense(200, activation=ACTIVATION)) t_prem = translate_1(prem) t_hypo = translate_1(hypo) t_prem = _aggregate(t_prem, "SUM", axis=1) t_hypo = _aggregate(t_hypo, "SUM", axis=1) a_prem = translate_2(prem) a_hypo = translate_2(hypo) align = Align(normalize=True)([a_prem, a_hypo]) print(align.get_shape()) conv1 = Conv1D(32, kernel_size=3, activation='relu')(align) conv2 = Conv1D(64, 3, activation='relu')(conv1) pool = MaxPooling1D(pool_size=2)(conv2) flattened = Flatten()(pool) # assert len(reps) > 0, "no sentence representations, hence no output of the translation layer"
prem = translate(prem) hypo = translate(hypo) alignment = _align(prem, hypo, normalize=True) prem_c = _softalign(prem, alignment, transpose=True) hypo_c = _softalign(hypo, alignment) prem = concatenate([prem, hypo_c], axis=-1) hypo = concatenate([hypo, prem_c], axis=-1) translate2 = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION)) prem = translate2(prem) hypo = translate2(hypo) prem = _aggregate(prem, "SUM", axis=1) hypo = _aggregate(hypo, "SUM", axis=1) joint = concatenate([prem, hypo]) joint = Dropout(DP)(joint) for i in range(3): joint = Dense(2 * SENT_HIDDEN_SIZE, activation=ACTIVATION, kernel_regularizer=l2(L2))(joint) joint = Dropout(DP)(joint) joint = BatchNormalization()(joint) pred = Dense(3, activation='softmax')(joint) model = Model(inputs=[premise, hypothesis], outputs=pred) model.compile(optimizer=OPTIMIZER,
input_length=MAX_LEN, trainable=False) ant_prem = antonym_embed(premise) ant_hypo = antonym_embed(hypothesis) if args.timedist: antonym_translate = TimeDistributed( Dense(ANT_SENT_HIDDEN_SIZE, activation=ACTIVATION)) ant_prem = antonym_translate(ant_prem) ant_hypo = antonym_translate(ant_hypo) reps = [] # sentence representations if args.agg_we is not None: prem_aggr = _aggregate(prem, args.agg_we, axis=1) hypo_aggr = _aggregate(hypo, args.agg_we, axis=1) reps.append(prem_aggr) reps.append(hypo_aggr) if args.align_op_we is not None: alignment = _align(prem, hypo, normalize=True) alignment_aggr_1 = _aggregate(alignment, args.align_op_we, axis=1) alignment_aggr_2 = _aggregate(alignment, args.align_op_we, axis=2) reps.append(alignment_aggr_1) reps.append(alignment_aggr_2) if args.agg_ae is not None: ant_prem_aggr = _aggregate(ant_prem, args.agg_ae, axis=1) ant_hypo_aggr = _aggregate(ant_hypo, args.agg_ae, axis=1) reps.append(ant_prem_aggr)