input_length=MAX_LEN,
                              trainable=False)

    ant_prem = antonym_embed(premise)
    ant_hypo = antonym_embed(hypothesis)

    if args.timedist:
        antonym_translate = TimeDistributed(
            Dense(ANT_SENT_HIDDEN_SIZE, activation=ACTIVATION))

        ant_prem = antonym_translate(ant_prem)
        ant_hypo = antonym_translate(ant_hypo)

if args.align_op_we is not None:
    alignment = _align(prem, hypo, normalize=True)
    alignment_aggr_1 = _aggregate(alignment, args.align_op_we, axis=1)
    alignment_aggr_2 = _aggregate(alignment, args.align_op_we, axis=2)
    prem_reps.append(alignment_aggr_1)
    #prem_reps.append(add(prem,-alignment_aggr_1))
    prem_reps.append(multiply(prem, alignment_aggr_1))
    hypo_reps.append(alignment_aggr_2)
    #hypo_reps.append(add(hypo, -alignment_aggr_2))
    hypo_reps.append(multiply(hypo, alignment_aggr_2))

if args.align_op_ae is not None:
    alignment = _align(ant_prem, ant_hypo, normalize=True)
    alignment_aggr_1 = _aggregate(alignment, args.align_op_ae, axis=1)
    alignment_aggr_2 = _aggregate(alignment, args.align_op_ae, axis=2)
    prem_reps.append(multiply(prem, alignment_aggr_1))
    hypo_reps.append(multiply(hypo, alignment_aggr_2))
# read in embedding and translate
print("> fetching word embedding")
embedding_matrix = get_embedding_matrix(args.embedding, VOCAB, EMBED_HIDDEN_SIZE, tokenizer)
embed = Embedding(VOCAB, EMBED_HIDDEN_SIZE, weights=[embedding_matrix], input_length=MAX_LEN, trainable=False)

prem = embed(premise)
hypo = embed(hypothesis)

translate_1 = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))
translate_2 = TimeDistributed(Dense(200, activation=ACTIVATION))

t_prem = translate_1(prem)
t_hypo = translate_1(hypo)

t_prem = _aggregate(t_prem, "SUM", axis=1)
t_hypo = _aggregate(t_hypo, "SUM", axis=1)

a_prem = translate_2(prem)
a_hypo = translate_2(hypo)

align = Align(normalize=True)([a_prem, a_hypo])
print(align.get_shape())

conv1 = Conv1D(32, kernel_size=3, activation='relu')(align)
conv2 = Conv1D(64, 3, activation='relu')(conv1)
pool = MaxPooling1D(pool_size=2)(conv2)
flattened = Flatten()(pool)

# assert len(reps) > 0, "no sentence representations, hence no output of the translation layer"
prem = translate(prem)
hypo = translate(hypo)

alignment = _align(prem, hypo, normalize=True)
prem_c = _softalign(prem, alignment, transpose=True)
hypo_c = _softalign(hypo, alignment)

prem = concatenate([prem, hypo_c], axis=-1)
hypo = concatenate([hypo, prem_c], axis=-1)

translate2 = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))

prem = translate2(prem)
hypo = translate2(hypo)

prem = _aggregate(prem, "SUM", axis=1)
hypo = _aggregate(hypo, "SUM", axis=1)

joint = concatenate([prem, hypo])
joint = Dropout(DP)(joint)
for i in range(3):
    joint = Dense(2 * SENT_HIDDEN_SIZE,
                  activation=ACTIVATION,
                  kernel_regularizer=l2(L2))(joint)
    joint = Dropout(DP)(joint)
    joint = BatchNormalization()(joint)

pred = Dense(3, activation='softmax')(joint)

model = Model(inputs=[premise, hypothesis], outputs=pred)
model.compile(optimizer=OPTIMIZER,
예제 #4
0
                              input_length=MAX_LEN,
                              trainable=False)

    ant_prem = antonym_embed(premise)
    ant_hypo = antonym_embed(hypothesis)

    if args.timedist:
        antonym_translate = TimeDistributed(
            Dense(ANT_SENT_HIDDEN_SIZE, activation=ACTIVATION))

        ant_prem = antonym_translate(ant_prem)
        ant_hypo = antonym_translate(ant_hypo)

reps = []  # sentence representations
if args.agg_we is not None:
    prem_aggr = _aggregate(prem, args.agg_we, axis=1)
    hypo_aggr = _aggregate(hypo, args.agg_we, axis=1)
    reps.append(prem_aggr)
    reps.append(hypo_aggr)

if args.align_op_we is not None:
    alignment = _align(prem, hypo, normalize=True)
    alignment_aggr_1 = _aggregate(alignment, args.align_op_we, axis=1)
    alignment_aggr_2 = _aggregate(alignment, args.align_op_we, axis=2)
    reps.append(alignment_aggr_1)
    reps.append(alignment_aggr_2)

if args.agg_ae is not None:
    ant_prem_aggr = _aggregate(ant_prem, args.agg_ae, axis=1)
    ant_hypo_aggr = _aggregate(ant_hypo, args.agg_ae, axis=1)
    reps.append(ant_prem_aggr)