コード例 #1
0
    def test_model(self):
        model = load_model()

        arr = numpy.array([[3, 92.6, 109.3, 2, 12, 26],
                           [2, 10.4, 43.5, 3, 26, 5]])

        score = model.predict(arr)

        self.assertAlmostEqual(score[0], 244.9)
        self.assertAlmostEqual(score[1], 89.9)
コード例 #2
0
def evalutate_model():
    input = request.get_json(force=True)
    modelName = str(input.get('model'))
    data = input.get('data')

    nlp = load_model(modelName)
    testData = load_train_data(data)

    scores = evaluate(nlp, testData)

    return json.dumps(scores)
コード例 #3
0
def predict():
    input = request.get_json(force=True)
    modelName = str(input.get('model'))
    data = input.get('data')

    nlp = load_model(modelName)

    for item in data:
        doc = predict_model(nlp, str(item['text']))
        conf = confidence(nlp, doc)
        item['confidence'] = conf
        ents = []
        for ent in doc.ents:
            ents.append({
                "start": ent.start_char,
                "end": ent.end_char,
                "label": ent.label_
            })
        item['ents'] = ents

    return json.dumps(data)
コード例 #4
0
ファイル: evaluate.py プロジェクト: aojiu/COMS-4705-NLP
def evaluate_models(files, verbose=True):
    '''
        Evaluates multiple models at a time. Returns results in a list where
        each item is a dict containing
        { "wordsim" : WordSim353 correlation,
          "bats" : a dictionary of BATS scores (see eval_bats() for details),
          "msr" : MSR paraphrase performance }.
    '''
    results = []

    for f in files:
        if verbose: print('[evaluate_models] Reading ' + f)
        model = load_model(f)
        matrix, vocab, indices = collect(model)
        r = {}
        if verbose: print('[evaluate_models] Evaluating on WordSim...')
        r['wordsim'] = eval_wordsim(model)
        if verbose: print('[evaluate_models] Evaluating on BATS...')
        r['bats'] = eval_bats(model, matrix, vocab, indices)
        if verbose: print('[evaluate_models] Evaluating on MSRPC...')
        r['msr'] = eval_msr(model)
        # if f.endswith('.bin'):
        #     r['msr'] = eval_msr(model)
        # elif f.endswith('.txt'):
        #     print(11234325)
        #     dct = {}
        #     lines = f.split('\n')[:-1]
        #     for i in range(len(lines)):
        #         splits = lines[i].split(" ", 1)
        #         word = splits[0].strip()
        #         vector = splits[1].strip().split(' ')
        #         vector = list(map(float, vector))
        #         # vector = str(' [') + splits[1].strip() + str('] \n')
        #         dct[word] = vector
        #     r['msr'] = eval_msr(dct)
        results.append(r)

    return results
コード例 #5
0
def evaluate_models(files, verbose=True):
    '''
        Evaluates multiple models at a time. Returns results in a list where
        each item is a dict containing
        { "wordsim" : WordSim353 correlation,
          "bats" : a dictionary of BATS scores (see eval_bats() for details),
          "msr" : MSR paraphrase performance }.
    '''
    results = []

    for f in files:
        if verbose: print('[evaluate_models] Reading ' + f)
        model = load_model(f)
        matrix, vocab, indices = collect(model)
        r = {}
        if verbose: print('[evaluate_models] Evaluating on WordSim...')
        r['wordsim'] = eval_wordsim(model)
        if verbose: print('[evaluate_models] Evaluating on BATS...')
        r['bats'] = eval_bats(model, matrix, vocab, indices)
        if verbose: print('[evaluate_models] Evaluating on MSRPC...')
        r['msr'] = eval_msr(model)
        results.append(r)

    return results
コード例 #6
0
ファイル: evaluate.py プロジェクト: aojiu/COMS-4705-NLP
    return results


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Evaluate a single trained model.')
    parser.add_argument(
        'path',
        metavar='filename',
        type=str,
        help='the path to the file containing your trained model')
    args = parser.parse_args()

    print('[evaluate] Loading model...')
    model = load_model(args.path)

    print('[evaluate] Collecting matrix...')
    matrix, vocab, indices = collect(model)

    print('[evaluate] WordSim353 correlation:')
    ws = eval_wordsim(model)
    print(ws)

    print('[evaluate] BATS accuracies:')
    bats = eval_bats(model, matrix, vocab, indices)
    print(bats)

    print('[evaluate] MSR accuracy:')
    msr = eval_msr(model)
    print(msr)
コード例 #7
0
def evaluate_models(files, bert_embedding=None, verbose=True):
    '''
        Evaluates multiple models at a time. Returns results in a list where
        each item is a dict containing
        { "wordsim" : WordSim353 correlation,
          "bats" : a dictionary of BATS scores (see eval_bats() for details),
          "msr" : MSR paraphrase performance }.
    '''
    print("evaluating bert")

    results_bert = []
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert = BertModel.from_pretrained('bert-base-uncased')
    # get the vocab list
    model = load_model(files)
    matrix, vocab, indices = collect(model)
    pt_batch = tokenizer(vocab,
                         padding=True,
                         truncation=True,
                         max_length=3,
                         return_tensors="pt")
    # get embedding for each word using bert for bats
    try:
        wv = np.loadtxt(bert_embedding)
        print("BERT embedding loaded")
    except:
        print("BERT embedding not given. Running BERT inference.")
        wv = np.array([])
        for elem in pt_batch["input_ids"]:
            if len(wv) == 0:
                output = bert(input_ids=elem.view(1, 3))
                wv = output.last_hidden_state[:, 1, :].detach().numpy()
            else:
                output_new = bert(input_ids=elem.view(1, 3))
                wv_new = output_new.last_hidden_state[:, 1, :].detach().numpy()
                wv = np.vstack((wv, wv_new))
    # indices = {}
    # for i in range(len(dict_keys)): indices[dict_keys[i][0]] = i

    r_bert = {}
    if verbose: print('[evaluate_bert] Evaluating on WordSim...')
    r_bert['wordsim'] = eval_wordsim_bert(bert, tokenizer)
    if verbose: print('[evaluate_bert] Evaluating on BATS...')
    r_bert['bats'] = eval_bats_bert(wv, vocab, indices, bert, tokenizer)
    if verbose: print('[evaluate_bert] Evaluating on MSRPC...')
    r_bert['msr'] = eval_msr_bert(model, bert, tokenizer)
    results_bert.append(r_bert)

    # for f in files:
    #     if verbose: print('[evaluate_models] Reading ' + f)

    #     r = {}
    #     if verbose: print('[evaluate_models] Evaluating on WordSim...')
    #     r['wordsim'] = eval_wordsim(bert, tokenizer)
    #     if verbose: print('[evaluate_models] Evaluating on BATS...')
    #     r['bats'] = eval_bats(wv, dict_keys.reshape(len(dict_keys)), indices,bert, tokenizer)
    #     if verbose: print('[evaluate_models] Evaluating on MSRPC...')
    #     r['msr'] = eval_msr(model,bert, tokenizer)
    #     results.append(r)

    return results_bert
コード例 #8
0
ファイル: eval_svd.py プロジェクト: Axl824/NLP-coursework
from process import load_model
from evaluate import collect, eval_wordsim, eval_bats, eval_msr

windows = [2, 5, 10]
dims = [100, 300, 1000]

for window in windows:
    for dim in dims:
        print("SVD model\tdimension: %d\twindow size: %d" % (dim, window))
        path = 'svd/svd-' + str(window) + '-' + str(dim) + '.txt'

        model = load_model(path)
        print('[evaluate] Collecting matrix...')
        matrix, vocab, indices = collect(model)

        print('[evaluate] WordSim353 correlation:')
        ws = eval_wordsim(model)
        print(ws)

        print('[evaluate] BATS accuracies:')
        bats = eval_bats(model, matrix, vocab, indices)
        print(bats)

        print('[evaluate] MSR accuracy:')
        msr = eval_msr(model)
        print(msr)
        print()