예제 #1
0
    def test_evaluation_with_scoring(self):
        """
        Evaluate the grammar on all examples, collecting metrics:
        
        semantics oracle accuracy: # of examples where one parse or the other was
        correct.

        semantics accuracy: # of examples where parse at position 0 was correct.
        """
        arithmetic_grammar = Grammar(self.arithmetic_rules)

        from executor import Executor

        arithmetic_model = Model(grammar=arithmetic_grammar,
                                 feature_fn=Parse.operator_precedence_features,
                                 weights=self.weights,
                                 executor=Executor.execute)

        from experiment import evaluate_model

        metrics = evaluate_model(model=arithmetic_model,
                                 examples=self.one_parse_examples +
                                 self.two_parse_examples)
        self.assertEqual(metrics['semantics oracle accuracy'], 17)
        self.assertEqual(metrics['semantics accuracy'], 16)  # Improvement
예제 #2
0
    def test_feature_function(self):
        from experiment import evaluate_model
        from metrics import denotation_match_metrics
        from scoring import Model
        from geo880 import geo880_train_examples

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)

        def empty_denotation_feature(parse):
            features = defaultdict(float)
            if parse.denotation == ():
                features['empty_denotation'] += 1.0
            return features

        weights = {'empty_denotation': -1.0}

        model = Model(grammar=grammar,
                      feature_fn=empty_denotation_feature,
                      weights=weights,
                      executor=self.geobase.executor().execute)
        metric_values = evaluate_model(model=model,
                                       examples=geo880_train_examples,
                                       metrics=denotation_match_metrics(),
                                       print_examples=False)
        self.assertEqual(235, metric_values['denotation accuracy'])
예제 #3
0
    def test_evaluate_model(self):
        from experiment import evaluate_model
        from metrics import denotation_match_metrics
        from scoring import Model
        from geo880 import geo880_train_examples

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        # Set print_examples=True and look for 'what state has the shortest
        # river?' and
        evaluate_model(model=model,
                       examples=geo880_train_examples[:10],
                       metrics=denotation_match_metrics(),
                       print_examples=False)
예제 #4
0
            feed_dict = {stories[0]: trainX[0],
                         queries[0]: trainX[1],
                         stories[1]: trainX[2],
                         queries[1]: trainX[3],
                         answers[0]: trainY[0],
                         answers[1]: trainY[1]}
            return sess.run(loss_op, feed_dict=feed_dict)

        def predictf(trainX):
            feed_dict = {stories[0]: trainX[0],
                         queries[0]: trainX[1],
                         stories[1]: trainX[2],
                         queries[1]: trainX[3]}
            return sess.run(logits, feed_dict=feed_dict)
        
        def savef(filepath):
            return modelsaver.save(sess, filepath)
        
        def restoref(filepath):
            modelsaver.restore(sess, filepath)

    return model(fitf=fitf, testf=testf, predictf=predictf, savef=savef, restoref=restoref)

ver, qas, repeat = handlesysargs('en/hn', None, 10)
#embedding_matrix, word_idx = pre_train_embedding(EMBEDDING_SIZE, ver)

def wmethod():
    global word_idx
    return word_idx
evaluate_model(compile_model, ver, qas, pad=1, wmethod='concat', flatten=0, repeat=repeat, E2E=1);
예제 #5
0
    
    # Using last C as W per adjacent weight tying
    # func = lambda x:tf.matmul(x, tf.transpose(embedlayer.get_weights()[0], [1,0]))
    # dl = Lambda(func)(newu)
    
    pred = Activation('softmax')(dl)
    
    model = Model(input=[story_input, query_input], output=[pred])
    
    # opt = Adam(lr=0.001,
               # beta_1=0.9,
               # beta_2=0.999,
               # epsilon=1e-08,
               # decay=0.0)
    
    opt = SGD(lr=0.0,
              momentum=0.0,
              decay=0.0,
              nesterov=False)
    
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model, [LearningRateScheduler(step_decay)]
    # return model, None

ver = 'en'
qas = None
ver, qas, repeat = handlesysargs(ver, qas)
evaluate_model(compile_model, ver, qas, pad=1, wmethod=None, flatten=0, word=0, repeat=repeat);
예제 #6
0
    hnques = RNN(EMBED_HIDDEN_SIZE, return_sequences=False)(hnques)
    hnques = RepeatVector(story_maxlen2)(hnques)

    enres = merge([ensent, enques], mode='sum')

    hnres = merge([hnsent, hnques], mode='sum')

    srnn = RNN(EMBED_HIDDEN_SIZE, return_sequences=False)
    enrnnout = srnn(enres)
    hnrnnout = srnn(hnres)

    do = Dropout(0.3)
    endoout = do(enrnnout)
    hndoout = do(hnrnnout)

    enout = Dense(vocab_size1, activation='softmax')(endoout)
    hnout = Dense(vocab_size2, activation='softmax')(hndoout)

    model = Model([ensinput, enqinput, hnsinput, hnqinput], [enout, hnout])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model, None


ver = 'en/hn'
qas = None
ver, qas, repeat = handlesysargs(ver, qas)
evaluate_model(compile_model, ver, qas, wmethod='concat', repeat=repeat)
        def restoref(filepath):
            modelsaver.restore(sess, filepath)

    return model(fitf=fitf,
                 testf=testf,
                 predictf=predictf,
                 savef=savef,
                 restoref=restoref)


ver, qas, repeat = handlesysargs('en', None, 10)
embedding_matrix, word_idx = pre_train_embedding(EMBEDDING_SIZE,
                                                 ver,
                                                 method='external')


def wmethod():
    global word_idx
    return word_idx


evaluate_model(compile_model,
               ver,
               qas,
               pad=1,
               wmethod=wmethod,
               flatten=0,
               repeat=repeat,
               E2E=1)
예제 #8
0
    do = Dropout(0.3, seed=_seed)
    endoout = do(enrnnout)
    hndoout = do(hnrnnout)

    dense = Dense(vocab_size, activation=None, kernel_initializer=INITIALIZER)
    enout = dense(endoout)
    hnout = dense(hndoout)

    enout = Activation('softmax')(enout)
    hnout = Activation('softmax')(hnout)

    model = Model([ensinput, enqinput, hnsinput, hnqinput], [enout, hnout])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return modelwrapper(model)


ver = 'en/hn'
qas = None
ver, qas, repeat = handlesysargs(ver, qas)
evaluate_model(compile_model,
               ver,
               qas,
               pad=1,
               wmethod='concat',
               repeat=repeat,
               stop_early=0)
예제 #9
0
    model = Model(
        input=[story_input1, query_input1, story_input2, query_input2],
        output=preds)

    opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

    # opt = SGD(lr=0.0,
    # momentum=0.0,
    # decay=0.0,
    # nesterov=False)

    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # return model, [LearningRateScheduler(step_decay)]
    return model, None


ver = 'en/hn'
qas = None
ver, qas, repeat = handlesysargs(ver, qas)
evaluate_model(compile_model,
               ver,
               qas,
               pad=1,
               wmethod='concat',
               flatten=0,
               repeat=repeat,
               limit=500)
예제 #10
0
best_mpr = 1
for epoch in range(args.epochs):
    model.fit(item_user, show_progress=False)
    if args.m is not 'bpr':
        loss = implicit._als.calculate_loss(item_user.T.tocsr(),
                                            model.user_factors,
                                            model.item_factors,
                                            model.regularization)
        print('Epoch {}, Loss {}'.format(epoch, loss))

    if not test_mode:
        R_hat = model.user_factors @ (model.item_factors.T)
        top_N_recs = np.array(np.argsort(-R_hat,
                                         axis=1)[:, :args.top_n]).tolist()
        MAP, rec_at_k, mpr_all, mpr_mask = evaluate_model(
            R_hat, top_N_recs, data['val'], data['val_masked'])
        del R_hat
        wandb.log({
            'MAP@N': MAP,
            'Recall@N': rec_at_k,
            'MPR (all)': mpr_all,
            'MPR (new)': mpr_mask
        })
        if mpr_mask < best_mpr:
            wandb.run.summary["best_mpr"] = mpr_mask
            best_mpr = mpr_mask

if test_mode:
    MAP, rec_at_k, mpr_all, mpr_mask = evaluate_model(R_hat, top_N_recs,
                                                      data['test'],
                                                      data['test_masked'])
예제 #11
0
    #qrnn = Dropout(0.3, seed=_seed)(qrnn)
    #qrnn = RNN(query_maxlen, return_sequences=False,
    #           kernel_initializer=INITIALIZER,
    #           bias_initializer=INITIALIZER,
    #           recurrent_initializer=RINITIALIZER)(qinput)
    qrnn = RepeatVector(story_maxlen)(qinput)
    qrnn = Flatten()(qrnn)

    out = merge([sentrnn, qrnn], mode='sum')
    #out = RNN(EMBED_HIDDEN_SIZE, return_sequences=False,
    #          kernel_initializer=INITIALIZER,
    #          bias_initializer=INITIALIZER,
    #          recurrent_initializer=RINITIALIZER)(out)
    #out = Dropout(0.3, seed=_seed)(out)
    out = Dense(vocab_size,
                kernel_initializer=INITIALIZER,
                activation='softmax')(out)

    model = Model([sentinp, qinput], [out])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return modelwrapper(model)


ver = 'en'
qas = None
ver, qas, repeat = handlesysargs(ver, qas)
evaluate_model(compile_model, ver, qas, repeat=repeat)
예제 #12
0
                         queries: trainX[1],
                         answers: trainY[0]}
            return sess.run(loss_op, feed_dict=feed_dict)

        def predictf(trainX):
            feed_dict = {stories: trainX[0],
                        queries: trainX[1]}
            return sess.run(logits, feed_dict=feed_dict)
        
        def savef(filepath):
            return modelsaver.save(sess, filepath)
        
        def restoref(filepath):
            modelsaver.restore(sess, filepath)

    return model(fitf=fitf,
                 testf=testf,
                 predictf=predictf,
                 savef=savef,
                 restoref=restoref)


ver, qas, repeat = handlesysargs('en', None, 10)
#embedding_matrix, word_idx = pre_train_embedding(EMBEDDING_SIZE, ver)

def wmethod():
    global word_idx
    return word_idx

evaluate_model(compile_model, ver, qas, pad=1, wmethod=None, flatten=0, repeat=repeat, E2E=1, stop_early=1);
예제 #13
0
    do = Dropout(0.3)
    endoout = do(enrnnout)
    hndoout = do(hnrnnout)
    shuffleddoout = do(shuffledrnnout)

    enout = Dense(vocab_size1, activation='softmax')(endoout)
    hnout = Dense(vocab_size2, activation='softmax')(hndoout)
    shuffledout = Dense(vocab_size3, activation='softmax')(shuffleddoout)

    model = Model([
        ensinput, enqinput, hnsinput, hnqinput, shuffledsinput, shuffledqinput
    ], [enout, hnout, shuffledout])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model, None


ver = 'en/hn/shuffled'
qas = None
ver, qas, repeat = handlesysargs(ver, qas)
evaluate_model(compile_model,
               ver,
               qas,
               pad=1,
               wmethod='concat',
               flatten=1,
               repeat=repeat)