Exemple #1
0
def graph_input_sts(si0, si1, y, f0=None, f1=None):
    """ Produce Keras task specification from vocab-vectorized sentences. """
    import pysts.loader as loader
    gr = {'si0': si0, 'si1': si1, 'classes': loader.sts_labels2categorical(y)}
    if f0 is not None:
        gr['f0'] = f0
        gr['f1'] = f1
    return gr
Exemple #2
0
def graph_input_sts(si0, si1, y, f0=None, f1=None, s0=None, s1=None):
    """ Produce Keras task specification from vocab-vectorized sentences. """
    import pysts.loader as loader
    gr = {'si0': si0, 'si1': si1, 'classes': loader.sts_labels2categorical(y)}
    if f0 is not None:
        gr['f0'] = f0
        gr['f1'] = f1
    if s0 is not None:
        # This is useful for non-neural baselines
        gr['s0'] = s0
        gr['s1'] = s1
    return gr
Exemple #3
0
def graph_input_sts(si0, si1, y, f0=None, f1=None, s0=None, s1=None):
    """ Produce Keras task specification from vocab-vectorized sentences. """
    import pysts.loader as loader
    gr = {'si0': si0, 'si1': si1, 'classes': loader.sts_labels2categorical(y)}
    if f0 is not None:
        gr['f0'] = f0
        gr['f1'] = f1
    if s0 is not None:
        # This is useful for non-neural baselines
        gr['s0'] = s0
        gr['s1'] = s1
    return gr
Exemple #4
0
 def predict(self, gr):
     scores = []
     for i in range(len(gr['s0'])):
         s0 = [self._norm(w) for w in gr['s0'][i]]
         s1 = [self._norm(w) for w in gr['s1'][i]]
         scores.append([self._score(s0, s1)])
     scores = np.array(scores)
     if self.output == 'score':
         return {'score': scores}
     elif self.output == 'binary':
         # XXX: we should tune the threshold to maximize accuracy
         scores0 = scores - np.min(scores)
         return {'score': scores0 * 5. / np.max(scores0)}
     elif self.output == 'classes':
         scores0 = scores - np.min(scores)
         return {'classes': loader.sts_labels2categorical(scores0 * 5. / np.max(scores0))}
 def predict(self, gr):
     scores = []
     for i in range(len(gr['s0'])):
         s0 = [self._norm(w) for w in gr['s0'][i]]
         s1 = [self._norm(w) for w in gr['s1'][i]]
         scores.append([self._score(s0, s1)])
     scores = np.array(scores)
     if self.output == 'score':
         return {'score': scores}
     elif self.output == 'binary':
         # XXX: we should tune the threshold to maximize accuracy
         scores0 = scores - np.min(scores)
         return {'score': scores0 * 5. / np.max(scores0)}
     elif self.output == 'classes':
         scores0 = scores - np.min(scores)
         return {
             'classes':
             loader.sts_labels2categorical(scores0 * 5. / np.max(scores0))
         }
    parser = argparse.ArgumentParser(description="Benchmark yu1412 on semantic relatedness regression-classification (sts)")
    parser.add_argument("-N", help="GloVe dim", type=int, default=300)
    parser.add_argument("--sick", help="whether to run on SICK2014 inst. of sts2012-14/15 dataset", type=int, default=0)
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.sick == 1:
        Xtrain, ytrain = load_set(glove, 'data/sts/sick2014/SICK_train.txt', loader.load_sick2014)
        Xtest, ytest = load_set(glove, 'data/sts/sick2014/SICK_test_annotated.txt', loader.load_sick2014)
    else:
        Xtrain, ytrain = load_set(glove, 'data/sts/semeval-sts/all/201[0-4]*')
        Xtest, ytest = load_set(glove, 'data/sts/semeval-sts/all/2015*')

    model = prep_model(glove)
    model.compile(loss={'classes': 'categorical_crossentropy'}, optimizer='adam')
    model.fit({'e0': Xtrain[0], 'e1': Xtrain[1], 'classes': loader.sts_labels2categorical(ytrain)},
              batch_size=20, nb_epoch=100,
              validation_data={'e0': Xtest[0], 'e1': Xtest[1], 'classes': loader.sts_labels2categorical(ytest)})
    ev.eval_sts(model.predict({'e0': Xtrain[0], 'e1': Xtrain[1]})['classes'], ytrain, 'Train')
    ev.eval_sts(model.predict({'e0': Xtest[0], 'e1': Xtest[1]})['classes'], ytest, 'Test')


"""
Performance tuning (100 iters) on sick2014:

  * Just elementwise-mul:

    4500/4500 [==============================] - 0s - loss: 1.2332 - acc: 0.5016 - val_loss: 1.1926 - val_acc: 0.4770
    Train Pearson: 0.721662
    Train Spearman: 0.593711
    Train MSE: 0.529364
                                  loader.load_sick2014)
        Xtest, ytest = load_set(glove,
                                'data/sts/sick2014/SICK_test_annotated.txt',
                                loader.load_sick2014)
    else:
        Xtrain, ytrain = load_set(glove, 'data/sts/semeval-sts/all/201[0-4]*')
        Xtest, ytest = load_set(glove, 'data/sts/semeval-sts/all/2015*')

    model = prep_model(glove)
    model.compile(loss={'classes': 'categorical_crossentropy'},
                  optimizer='adam')
    model.fit(
        {
            'e0': Xtrain[0],
            'e1': Xtrain[1],
            'classes': loader.sts_labels2categorical(ytrain)
        },
        batch_size=20,
        nb_epoch=100,
        validation_data={
            'e0': Xtest[0],
            'e1': Xtest[1],
            'classes': loader.sts_labels2categorical(ytest)
        })
    ev.eval_sts(
        model.predict({
            'e0': Xtrain[0],
            'e1': Xtrain[1]
        })['classes'], ytrain, 'Train')
    ev.eval_sts(
        model.predict({
Exemple #8
0
    model1.add(RepeatVector(6))  # [nclass]

    model = Sequential()
    model.add(Merge([model0, model1], mode='dot', dot_axes=([2], [2])))
    model.add(Flatten())  # 6x6 matrix with cross-activations -> 36 vector
    model.add(Dense(6, W_regularizer=l2(l2reg)))  # 36 vector -> 6 vector, ugh
    model.add(Activation('softmax'))
    return model


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Benchmark yu1412 on semantic relatedness regression-classification (sts)")
    parser.add_argument("-N", help="GloVe dim", type=int, default=300)
    parser.add_argument("--sick", help="whether to run on SICK2014 inst. of sts2012-14/15 dataset", type=int, default=0)
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.sick == 1:
        Xtrain, ytrain = load_set(glove, 'data/sts/sick2014/SICK_train.txt', loader.load_sick2014)
        Xtest, ytest = load_set(glove, 'data/sts/sick2014/SICK_test_annotated.txt', loader.load_sick2014)
    else:
        Xtrain, ytrain = load_set(glove, 'data/sts/semeval-sts/all/201[0-4]*')
        Xtest, ytest = load_set(glove, 'data/sts/semeval-sts/all/2015*')

    model = prep_model(glove)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.fit(Xtrain, loader.sts_labels2categorical(ytrain), batch_size=80, nb_epoch=200, show_accuracy=True,
              validation_data=(Xtest, loader.sts_labels2categorical(ytest)))
    ev.eval_sts(model.predict_proba(Xtrain), ytrain, 'Train')
    ev.eval_sts(model.predict_proba(Xtest), ytest, 'Test')