Ejemplos de score en Python, ejemplos de scorer.score en Python

Ejemplo n.º 1

0

Mostrar archivo

def predict(model, settings, to_predict, elmo, vocabs):
    pred_path = settings.dir + to_predict.split("/")[-1] + ".pred"
    entries, predicted, other_predicted = model.predict(to_predict, elmo)
    f1, _ = sc.score(*zip(*((entry[1][settings.pt].numpy(), predicted[entry[0]].numpy()) for entry in entries)))
    print("F1 is {:.2%}".format(f1))

    if len(other_predicted) > 0:
        other_f1, _ = sc.score(*zip(*((entry[1][settings.ot].numpy(), other_predicted[entry[0]].numpy()) for entry in entries)))
        print("Other F1 is {:.2%}".format(other_f1))
    with open(pred_path, "w") as fh:
        for sentence in cd.read_col_data(to_predict):
            pred = predicted[sentence.id].numpy()
            if settings.target_style == "scope-":
                cue_matrix = sentence.make_matrix("cues", True, vocabs[settings.td["cue"]].w2i)
                pred = np.maximum(pred, cue_matrix)
            #pred = other_predicted[sentence.id].numpy()
            sentence.update_parse(pred, settings.target_style, vocabs[settings.pt].i2w)
            if len(other_predicted) > 0:
                pred = other_predicted[sentence.id].numpy()
                # NOTE sem == sem hopefully
                if settings.target_style == settings.other_target_style:
                    sentence.update_parse(pred, "syn", vocabs[settings.pt].i2w)
                else:
                    sentence.update_parse(pred, settings.other_target_style, vocabs[settings.pt].i2w)
            print(sentence, file=fh)
    return True

Ejemplo n.º 2

0

Mostrar archivo

def main():
    parser = argparse.ArgumentParser(
        description=
        'Take the probability files from several models and do model ensembling.'
    )
    parser.add_argument('--prob_dir',
                        type=str,
                        default='tmp/ensemble/',
                        dest='prob_dir',
                        action='store',
                        help='The dir where the prob files locate.')
    parser.add_argument('--files',
                        type=str,
                        dest='files',
                        action='store',
                        required=True,
                        help='The list of filenames, separated by comma.')
    parser.add_argument('--key_file',
                        type=str,
                        dest='key_file',
                        action='store',
                        required=True,
                        help='Where to find the key file.')

    args = parser.parse_args()

    if not os.path.exists(args.prob_dir):
        raise Exception('Probability file dir does not exist at ' +
                        args.prob_dir)
    prob_files = args.files.split(',')
    if len(prob_files) <= 1:
        raise Exception(
            'Need to provide more than one model prediction files.')
    prob_files = [os.path.join(args.prob_dir, x) for x in prob_files]

    prob_matrices = []
    for fname in prob_files:
        print "Reading prediction prob file at: " + fname
        pm = read_prob_file(fname)
        prob_matrices.append(pm)

    print "Doing majority vote to generate final predictions..."
    preds = majority_vote(prob_matrices)

    # convert preds from index to labels
    label2id = data_utils.LABEL_TO_ID
    id2label = dict([(v, k) for k, v in label2id.items()])
    preds = [id2label[x] for x in preds]

    # write pred file
    pred_file = args.prob_dir + '/ensemble.prediction.tmp'
    with open(pred_file, 'w') as outfile:
        for p in preds:
            print >> outfile, p + '\t1.0'

    # score
    scorer.score(args.key_file, [pred_file], 1, True)

Ejemplo n.º 3

0

Mostrar archivo

def solve(problems):
    files = {'a': 'a_example.txt',
             'b': 'b_read_on.txt',
             'c': 'c_incunabula.txt',
             'd': 'd_tough_choices.txt',
             'e': 'e_so_many_books.txt',
             'f': 'f_libraries_of_the_world.txt'}

    pool = Pool()
    for f in problems:
        run_file = files[f]
        days_left, remaining_libs = reader.read('./inputs/' + run_file)
        outputs = []
        while days_left > 0 and len(remaining_libs) > 0:
            # Tuning:
            # For b, c, f: 50 is better than 0
            # For e: 0 is better than 50
            scores = pool.map(lambda x: x.get_score(days_left),
                              remaining_libs)
            next_lib = remaining_libs[np.argmax(scores)]
            _ = pool.map(lambda x: x.scan_copy(), next_lib.books.values())
            remaining_libs.remove(next_lib)
            next_lib.books = next_lib.avail_books(days_left)
            if not next_lib.books:
                continue
            _ = pool.map(lambda x: x.remove_dupes(next_lib.books.keys()),
                         remaining_libs)

            days_left = days_left - next_lib.signup
            outputs.append(next_lib)

        writer.write('./outputs/' + run_file, outputs)
        return scorer.score(run_file)

Ejemplo n.º 4

0

Mostrar archivo

def simulate_game(players, deck, hand, table):
    hand += [deck.pop() for i in range(CARDS_IN_HAND - len(hand))]
    table += [deck.pop() for i in range(CARDS_IN_RIVER - len(table))]
    player_hands = [[deck.pop() for i in range(CARDS_IN_HAND)]
                    for i in range(players - 1)]
    your_score = scorer.score(hand + table)
    player_scores = [scorer.score(h + table) for h in player_hands]

    if not player_scores:
        return GameOutcome.WIN

    max_player = max(player_scores)
    if your_score < max_player:
        return GameOutcome.LOSS
    elif your_score > max_player:
        return GameOutcome.WIN
    return GameOutcome.TIE

Ejemplo n.º 5

0

Mostrar archivo

def cl_corner_bite_render(compressed, size):
    num, board, revealed, constraints, _ = cl_corner_bite(size)

    tile_size = 10
    points = '-{s},-{s},{s},-{s},{s},{s},-{s},{s}'.format(s=0.96 * tile_size /
                                                          2)

    nodes = []
    columns = []

    for i in range(num):
        board[i][1] = compressed[i]
        nodes.append(
            dict(
                id=i,
                neighbors=board[i][2],
                position=((i % size) * tile_size, (i // size) * tile_size),
                has_mine=compressed[i] == '*',
                secret=compressed[i] == '?',
                revealed=i in revealed,
                points=points,
            ))

    for j in range(size):
        # horizontal column hints
        constraints[2 * j][0] = sum(
            [compressed[i] == '*' for i in constraints[2 * j][1]])
        columns.append(
            dict(
                ids=constraints[2 * j][1],
                text_location=(-tile_size, j * tile_size),
            ))

        # vertical column hints
        constraints[2 * j + 1][0] = sum(
            [compressed[i] == '*' for i in constraints[2 * j + 1][1]])
        columns.append(
            dict(
                ids=constraints[2 * j + 1][1],
                text_location=(j * tile_size, -tile_size),
            ))

    constraints[-1][0] = compressed.count('*')

    result = Puzzle(board, revealed, constraints).solve()
    scored = score(result, 'seqnum')
    title = f'CL Corner Bite {size}x{size} with score {scored}'
    tile_text = 'CoB'

    return dict(
        title=title,
        tile_text=tile_text,
        nodes=nodes,
        columns=columns,
        scored=scored,
    )

Ejemplo n.º 6

0

Mostrar archivo

def clone(compressed, filename):
    contents = None
    with open(filename) as f:
        contents = f.read()

    puzzle, name, reverse_id_map = load(contents)
    board = puzzle.board
    revealed = puzzle.revealed
    constraints = puzzle.og_constraints

    num = len(board)
    board.sort(
        key=lambda c: c[0] in revealed)  # praise be to Python's stable sort

    if not compressed:
        return dict(
            num=num,
            board=board,
            revealed=revealed,
            constraints=constraints,
        )
    else:
        replace_cells(board, revealed, constraints, compressed)
        puzzle = Puzzle(board, revealed, constraints)
        result = puzzle.solve()
        scored = score(result, 'seqnum')
        title = f'Cloned "{name}" with score {scored}'
        tile_text = 'CLO'

        data = extract(contents)
        num_revealed = 0
        for index, node in enumerate(data['nodes']):
            if index in revealed:
                node['revealed'] = True
                num_revealed += 1
            else:
                node['has_mine'] = compressed[index - num_revealed] == '*'
                node['secret'] = compressed[index - num_revealed] == '?'

        return dict(
            title=title,
            tile_text=tile_text,
            scored=scored,
            nodes=data['nodes'],
            columns=data['columns'],
            colors=data['colors'],
        )

Ejemplo n.º 7

0

Mostrar archivo

def main():
    books, libraries, num_days = readFile(sys.argv[1])
    for l in libraries:
        l.tot_score = sum(books[b] for b in l.books)

    file_id = sys.argv[1][5]
    if file_id == 'a'or file_id == 'b':
        libraries.sort(key=lambda x : x.su_time)
    elif file_id == 'c':
        libraries.sort(key=lambda x : reward_func(x, 1, 0, 34))
    elif file_id == 'd':
        libraries = sort_by_diff(libraries)
    elif file_id == 'e':
        libraries.sort(key=lambda x : reward_func(x, 1, 45900, 45900))
    elif file_id == 'f':
        libraries.sort(key=lambda x : reward_func(x, 1, 7000, 7000))

    libraries = scan_books(books, libraries, num_days)
    outputFile(sys.argv[1], libraries)
    print(score(books, libraries))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: tester.py Proyecto: elendiastarman/Tametsi-level-generator

def run(level_id=None, verbose=False):
  # a shorthand to make it easy to test the latest puzzle you generated
  if level_id == '-1':
    filenames = ['../latest.puz']

  else:
    filenames = []
    # if you've just cloned the repo, this test/index file does not exist!
    # this file solely consists of lines in the form "[id] [filename]"
    # where filenames are in that same folder
    with open('test/index') as index:
      for line in index.read().split('\n'):
        id, name = line.strip().split(' ')
        filenames.append(name)

        if level_id and id == level_id:
          filenames = [name]
          break

  print('filenames:', filenames)
  for index, filename in enumerate(filenames):
    with open(f'test/{filename}') as level:
      puzzle, name, reverse_id_map = load(level.read(), verbose=verbose)

    st = time.time()
    result = puzzle.solve()
    et = time.time()

    if verbose:
      print('result:', result)
      print('')
      for step in result['summary']:
        print(' ', step)

    if verbose and not result['solved']:
      flagged = ','.join([reverse_id_map[cell_id] for cell_id in result['flagged']])
      revealed = ','.join([reverse_id_map[cell_id] for cell_id in result['revealed']])
      print(f'P:_:{flagged}:{revealed}:n')

    scored = score(result, 'seqnum')
    print(f'{index + 1:3} {filename:20}: {et - st:.3f} seconds, solved {result["solved"]}, score {scored:.3f} - {name}')

Ejemplo n.º 9

0

Mostrar archivo

Archivo: repeatfinder.py Proyecto: abner24/plantismash

def check_cutoff(nrl, cutoff=0.4):
    #old code
    #max_res = most_common(nrl)
    #n_mcr = nrl.count(max_res)
    #return n_mcr/len(nrl) > cutoff

    max_res = most_common(nrl)
    order, matrix = sc.blosum62()
    score_list = []
    for res in nrl:
        current_score = sc.score(max_res, res, order, matrix)
        score_list.append(current_score)
    if len(score_list) > 0:
        avg_score = sum(score_list) / len(score_list)
    else:
        return False
    #print avg_score
    if avg_score > cutoff:

        return True
    else:

        return False

Ejemplo n.º 10

0

Mostrar archivo

Archivo: find_best_scorer_params.py Proyecto: AutoDAC/AutoDAC

 best_noise_threshs = []
 for i in range(5, 60, 5):
     # Print to help track progress
     print(str(i))
     for j in range(0, 100, 4):
         score = 0
         scorer.set_params(i, j)
         out_of_images = False
         cnt = 0
         while not out_of_images:
             img = cv2.imread(dirPath + 'cells/' + str(cnt) + '.png')
             if img is not None:
                 expected = int(expecteds[cnt])
                 # Ignore rejected cells
                 if 9 > expected > 0:
                     actual = scorer.score(img)
                     difference = abs(expected - actual)
                     if difference == 0:
                         score += EXACT_WEIGHTING
                     elif difference == 1:
                         score += ONE_OFF_WEIGHTING
                 cnt += 1
             else:
                 out_of_images = True
         if score > max_score:
             max_score = score
             best_sector_threshs = [i]
             best_noise_threshs = [j]
         elif score >= max_score:
             best_sector_threshs.append(i)
             best_noise_threshs.append(j)

Ejemplo n.º 11

0

Mostrar archivo

def holey_render(compressed, size):
    num, board, revealed, constraints, _ = holey(size)
    size = 2 * size + 1

    tile_size = 10
    points = '-{s},-{s},{s},-{s},{s},{s},-{s},{s}'.format(s=0.96 * tile_size /
                                                          2)

    nodes = []
    columns = []
    mapped = dict()

    for i in range(size**2):
        c = compressed[i] if i < len(compressed) else '.'
        board[i][1] = c
        mapped[board[i][0]] = c

        nodes.append(
            dict(
                id=board[i][0],
                neighbors=board[i][2],
                position=((board[i][0] % size) * tile_size,
                          (board[i][0] // size) * tile_size),
                has_mine=c == '*',
                secret=c == '?',
                revealed=board[i][0] in revealed,
                points=points,
            ))

    for j in range(size // 2 + 1):
        # horizontal column hints
        constraints[2 * j][0] = sum(
            [mapped[i] == '*' for i in constraints[2 * j][1]])
        columns.append(
            dict(
                ids=constraints[2 * j][1],
                text_location=(-tile_size, 2 * j * tile_size),
            ))

        # vertical column hints
        constraints[2 * j + 1][0] = sum(
            [mapped[i] == '*' for i in constraints[2 * j + 1][1]])
        columns.append(
            dict(
                ids=constraints[2 * j + 1][1],
                text_location=(2 * j * tile_size, -tile_size),
            ))

    constraints[-1][0] = compressed.count('*')

    result = Puzzle(board, revealed, constraints).solve()
    scored = score(result, 'seqnum')
    title = f'Holey {size}x{size} with score {scored}'
    tile_text = 'HOL'

    return dict(
        title=title,
        tile_text=tile_text,
        nodes=nodes,
        columns=columns,
        scored=scored,
    )

Ejemplo n.º 12

0

Mostrar archivo

def L_shape_grid(compressed, size, depth):
    """
    33
    13
  2114
  2244
  """
    size, depth = int(size), int(depth)
    side_length = 2 * size * 2**depth
    points = '-{s},-{s},{s},-{s},{s},{s},-{s},{s}'.format(s=0.96 / 2)
    cindex = 0

    board = []
    revealed = []
    constraints = []
    nodes = []
    columns = []
    colors = [
        dict(ids=[], color='RED', is_dark=False),
        dict(ids=[], color='ORANGE', is_dark=False),
        dict(ids=[], color='GREEN', is_dark=False),
        dict(ids=[], color='BLUE', is_dark=False),
    ]

    id_map = dict()
    for y in range(side_length):
        for x in range(side_length):
            if x < side_length // 2 - 1 and y < side_length // 2 - 1:
                continue
            else:
                id_map[(x, y)] = len(id_map)

    pos_to_id = lambda x, y: id_map.get((x, y))

    for y in range(side_length):
        for x in range(side_length):
            if x < side_length // 2 - 1 and y < side_length // 2 - 1:
                continue

            cell_id = pos_to_id(x, y)
            neighbors = []

            for dy in range(-1, 2):
                for dx in range(-1, 2):
                    if dx == dy == 0:
                        continue

                    neighbor_id = pos_to_id(x + dx, y + dy)
                    if neighbor_id:
                        neighbors.append(neighbor_id)

            if x < side_length // 2 and y < side_length // 2:
                what = '.'
                revealed.append(cell_id)
            else:
                what = compressed[cindex] if compressed else ''
                cindex += 1

                temp_x, temp_y, threshold = x, y, side_length // 4

                for i in range(depth):
                    if threshold <= temp_x < 3 * threshold and threshold <= temp_y < 3 * threshold:
                        if i == depth - 1:
                            colors[0]['ids'].append(cell_id)
                        else:
                            temp_x -= threshold
                            temp_y -= threshold

                    elif temp_x < threshold * 2:
                        if i == depth - 1:
                            colors[1]['ids'].append(cell_id)
                        else:
                            temp_x, temp_y = temp_y - threshold * 2, threshold * 2 - temp_x - 1

                    elif temp_y < threshold * 2:
                        if i == depth - 1:
                            colors[2]['ids'].append(cell_id)
                        else:
                            temp_x, temp_y = threshold * 2 - temp_y - 1, temp_x - threshold * 2

                    else:
                        if i == depth - 1:
                            colors[3]['ids'].append(cell_id)
                        else:
                            temp_x -= threshold * 2
                            temp_y -= threshold * 2

                    threshold //= 2

            board.append([cell_id, what, neighbors])

            if compressed:
                nodes.append(
                    dict(
                        id=cell_id,
                        neighbors=neighbors,
                        position=(x, y),
                        has_mine=what == '*',
                        secret=what == '?',
                        revealed=cell_id in revealed,
                        points=points,
                    ))

    constraints.append([
        sum([c[1] == '*' for c in board]),
        [c[0] for c in board if c[0] not in revealed]
    ])

    for color in colors:
        constraints.append(
            [sum(board[n][1] == '*' for n in color['ids']), color['ids']])

    num = 3 * side_length**2 // 4
    board.sort(
        key=lambda c: c[0] in revealed)  # praise be to Python's stable sort

    if not compressed:
        return dict(
            num=num,
            board=board,
            revealed=revealed,
            constraints=constraints,
        )
    else:
        result = Puzzle(board, revealed, constraints).solve()
        scored = score(result, 'seqnum')
        title = f'L-shape {size}-{depth} with score {scored}'
        tile_text = 'L'

        return dict(
            title=title,
            tile_text=tile_text,
            nodes=nodes,
            columns=columns,
            colors=colors,
            scored=scored,
        )

Ejemplo n.º 13

0

Mostrar archivo

Archivo: learn.py Proyecto: ayunah/opencorpora

    return clf


def load_files():
    train = np.loadtxt("learning.tab")
    test = np.loadtxt("test.tab")

    X_train = train[:, 1:-1]
    y_train = train[:, -1]
    X_test = test[:, 1:-1]
    y_test = test[:, -1]

    # pairs
    names_train = train[:, 0]
    names_test = test[:, 0]

    return X_train, y_train, X_test, y_test, names_train, names_test


if __name__ == "__main__":
    X_train, y_train, X_test, y_test, train, test = load_files()
    estimator = svc(X_train, y_train)

    y_train_predict = estimator.predict(X_train)
    y_test_predict = estimator.predict(X_test)

    print score(y_test_predict)

    np.savetxt("learning.pred.tab", y_train_predict)
    np.savetxt("test.pred.tab", y_test_predict)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: optimizer.py Proyecto: Birdlet/ChemGE

    def optimize(self, smiles, vina_conf, vina_log_path, \
        log = None, mu=32, lam=64, generation=1000, seed=0, verbose=True):

        np.random.seed(seed)
        gene_length = 300

        # Initialize population
        print("Initializing Population....")

        # Generation 0, start from input smiles
        initial_smiles = np.random.choice(smiles, mu+lam) 
        initial_smiles = [util.canonicalize(s) for s in initial_smiles]
        initial_genes = [self.encode(s, max_len=gene_length)
                        for s in initial_smiles]
        initial_scores = []
        print(r"|0%--------------------50%-------------------100%|")
        for i, s in enumerate(initial_smiles):
            scorer.score_vina(s, conf_path=vina_conf, log_path=vina_log_path)
            print("*"*int(50*i/(mu+lam)), end='\r')
        print()

        population = []
        for score, gene, smiles in zip(initial_scores, initial_genes,
                                    initial_smiles):
            population.append((score, smiles, gene))

        # Select top $mu$ smiles as generation 0
        population = sorted(population, key=lambda x: x[0], reverse=True)[:mu]
    

        # Start!
        print("Generation Start!")
        all_smiles = [p[1] for p in population]
        all_result = []

        for epoch in range(generation):
            
            new_population = []
            # For each mutation in each generation in range $lamda$
            for _ in range(lam):
                # random select one smi/gene in top $mu$ smiles
                p = population[np.random.randint(mu)] 
                p_gene = p[2]
                c_gene = util.mutation(p_gene)

                c_smiles = util.canonicalize(self.decode(c_gene))
                if c_smiles not in all_smiles:
                    c_score = scorer.score(c_smiles)
                    c = (c_score, c_smiles, c_gene)
                    new_population.append(c)
                    all_smiles.append(c_smiles)

            population.extend(new_population)
            all_result.extend(new_population)
            population = sorted(population,
                                key=lambda x: x[0], reverse=True)[:mu]

            if epoch%15 == 0 and verbose:
                # Log on screen
                self._log(epoch, population, population_size=len(all_smiles))

        print("\nFinished!")

        if log:
            try:
                self._log_file(log, all_result)
                print("Log file write into %s" % log)
            except:
                print("Failed writing log to %s" % log)
        
        return all_result

Ejemplo n.º 15

0

Mostrar archivo

Archivo: run_tacred.py Proyecto: gstoica27/SpanBERT

def evaluate(model,
             device,
             eval_dataloader,
             eval_label_ids,
             num_labels,
             id2label,
             verbose=True,
             raw_data=None):
    model.eval()
    eval_loss = 0
    nb_eval_steps = 0
    preds = []
    for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        label_ids = label_ids.to(device)
        with torch.no_grad():
            logits, _ = model(input_ids, segment_ids, input_mask, labels=None)
        loss_fct = CrossEntropyLoss()
        tmp_eval_loss = loss_fct(logits.view(-1, num_labels),
                                 label_ids.view(-1))
        eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        if len(preds) == 0:
            preds.append(logits.detach().cpu().numpy())
        else:
            preds[0] = np.append(preds[0],
                                 logits.detach().cpu().numpy(),
                                 axis=0)

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds[0], axis=1).reshape(-1)
    pred_labels = [id2label[pred_id] for pred_id in preds]
    eval_labels = [
        id2label[label_id] for label_id in eval_label_ids.numpy().reshape(-1)
    ]
    _, indices = score(eval_labels, pred_labels, verbose=verbose)

    structure_parts = compute_structure_parts(raw_data)
    compute_structure_errors(structure_parts,
                             preds=pred_labels,
                             gold_labels=eval_labels)

    wrong_indices = indices['wrong_indices']
    correct_indices = indices['correct_indices']
    wrong_relations = indices['wrong_predictions']
    correct_predictions = indices['correct_predictions']
    all_predictions = indices['all_predictions']
    wrong_ids = [d['id'] for d in raw_data[wrong_indices]]
    correct_ids = [d['id'] for d in raw_data[correct_indices]]
    all_ids = [d['id'] for d in raw_data]
    print('Num Correct: {} | Num Wrong: {}'.format(len(correct_indices),
                                                   len(wrong_indices)))
    print('Wrong Predictions: {}')
    print(Counter(wrong_relations))
    # save_dir = os.path.join(cfg_dict['test_save_dir'], cfg_dict['id'])
    save_dir = '/home/ec2-user/apex/SpanBERT/indices_dir/tacred/'
    os.makedirs(save_dir, exist_ok=True)
    print('saving to: {}'.format(save_dir))
    np.savetxt(os.path.join(save_dir, 'correct_ids.txt'),
               correct_ids,
               fmt='%s')
    np.savetxt(os.path.join(save_dir, 'wrong_ids.txt'), wrong_ids, fmt='%s')
    np.savetxt(os.path.join(save_dir, 'wrong_predictions.txt'),
               wrong_relations,
               fmt='%s')
    np.savetxt(os.path.join(save_dir, 'correct_predictions.txt'),
               correct_predictions,
               fmt='%s')
    np.savetxt(os.path.join(save_dir, 'all_predictions.txt'),
               all_predictions,
               fmt='%s')
    np.savetxt(os.path.join(save_dir, 'all_ids.txt'), all_ids, fmt='%s')

    ids = [instance['id'] for instance in raw_data]
    formatted_data = []
    for instance_id, pred, gold in zip(ids, pred_labels, eval_labels):
        formatted_data.append({
            "id": instance_id.replace("'", '"'),
            "label_true": gold.replace("'", '"'),
            "label_pred": pred.replace("'", '"')
        })

    id2preds = {d['id']: pred for d, pred in zip(raw_data, pred_labels)}
    json.dump(id2preds, open(os.path.join(save_dir, 'id2preds.json'), 'w'))

    with open(os.path.join(save_dir, 'spanbert_tacred.jsonl'), 'w') as handle:
        print('Saving to: {}'.format(
            os.path.join(save_dir, 'spanbert_tacred.jsonl')))
        for instance in formatted_data:
            line = "{}\n".format(instance)
            handle.write(line)

    result = compute_f1(preds, eval_label_ids.numpy())
    result['accuracy'] = simple_accuracy(preds, eval_label_ids.numpy())
    result['eval_loss'] = eval_loss
    if verbose:
        logger.info("***** Eval results *****")
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
    return preds, result

Ejemplo n.º 16

0

Mostrar archivo

 def compute_score(self):
     """Scores the job entry as defined in scorefile.csv"""
     self.preprocess_bodystring()
     self.score, self.score_hits = scorer.score(self.processed_tokens)

Ejemplo n.º 17

0

Mostrar archivo

    if v != '': continue
    if k in semantic_preditions:
        binary_predictions[k] = semantic_preditions[k]
    else:
        binary_predictions[k] = 'no_relation'

predictions = []
for i in range(0, len(binary_predictions)):
    assert binary_predictions[i] != ''
    predictions.append(binary_predictions[i])

gold = []
data = open(y['gold_file'], 'r')
for d in data:
    d = d.strip()
    gold.append(d)

if not os.path.exists('saved_models/depot-all-recent/'):
    os.mkdir('saved_models/depot-all-recent/')

out_file = 'saved_models/depot-all-recent/predictions.txt'
out_f = open(out_file, 'w')
for i, p in enumerate(predictions):
    out_f.write('%d %s\n' % (i, p))

p, r, f1 = scorer.score(gold, predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    'test', p, r, f1))

print("Evaluation ended.")

Ejemplo n.º 18

0

Mostrar archivo

Archivo: get_scores.py Proyecto: AutoDAC/AutoDAC

"""
Score a directory of cells and write the results to a file
"""

import cv2

import scorer

SECTOR_THRESHOLD = 25
NOISE_THRESHOLD = 32
CELL_DIR_PATH = 'img/all_cells/cells/'
RESULTS_FILE = 'img/actual_results.txt'

if __name__ == '__main__':
    out_of_images = False
    file = open(RESULTS_FILE, 'w')
    cnt = 0
    while not out_of_images:
        img = cv2.imread(CELL_DIR_PATH + str(cnt) + '.png')
        if img is not None:
            scorer.set_params(SECTOR_THRESHOLD, NOISE_THRESHOLD)
            score = scorer.score(img)
            file.write('%s ' % score)
            file.write('\n')
            cnt += 1
        else:
            out_of_images = True
    file.close()

Ejemplo n.º 19

0

Mostrar archivo

Archivo: eval.py Proyecto: po-sheng/NLP_relation_extraction

    newline='')
writer = csv.writer(csvfile)
writer.writerow(["sentence", "idx", "predict", "gold"])
for i in tqdm(range(len(error))):
    for j in range(len(error[i])):
        inside = 0
        for k in range(len(error[i][j])):
            inside = 1
            sentence = " ".join(
                [vocab.id2word[g] for g in error[i][j][k]["token"]])
            sub = []
            obj = []
            for it, g in enumerate(error[i][j][k]["sub_pos"]):
                if g == 0 and it < len(error[i][j][k]["token"]):
                    sub.append(vocab.id2word[error[i][j][k]["token"][it]])
            for it, g in enumerate(error[i][j][k]["obj_pos"]):
                if g == 0 and it < len(error[i][j][k]["token"]):
                    obj.append(vocab.id2word[error[i][j][k]["token"][it]])
            predict = id2label[error[i][j][k]["preds"]]
            gold = id2label[error[i][j][k]["label"]]
            writer.writerow([sentence, idx, predict, gold])
        if inside == 1:
            writer.writerow("")

predictions = [id2label[p] for p in predictions]
p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    args.dataset, p, r, f1))

print("Evaluation ended.")

Ejemplo n.º 20

0

Mostrar archivo

Archivo: main.py Proyecto: rshekhovtsov/sdsj-2018

            6: 'classification',
            7: 'classification',
            8: 'classification',
        }
        for i in tests.keys():
            start_total_time = time.time()
            folder = r'..\..\check_' + str(i) + '_' + tests[i][0] + '\\'
            argv = [
                '--train-csv', folder + 'train.csv',
                '--test-csv', folder + 'test.csv',
                '--prediction-csv', folder + 'prediction.csv',
                '--test-target-csv', folder + 'test-target.csv',
                '--model-dir', '.',
                # '--nrows', '5000' if i in [3, 4, 5, 6, 7] else '500' if i in [8] else '-1',
                '--mode', tests[i]]
            args = parser.parse_args(argv)

            log('processing', folder)
            model_config = train(args)
            X, _, _ = preprocess_test_data(args, model_config)
            prediction = predict(X, model_config['model'])
            score(args, prediction=prediction)

            log('all datasets time: {}'.format(time.time() - start_total_time))
            log_trail('=', '\n\n')

    except BaseException as e:
        log('EXCEPTION:', e)
        log(traceback.format_exc())
        exit(1)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: learn.py Proyecto: yana-pavlova/opencorpora

    return clf


def load_files():
    train = np.loadtxt('learning.tab')
    test = np.loadtxt('test.tab')

    X_train = train[:, 1:-1]
    y_train = train[:, -1]
    X_test = test[:, 1:-1]
    y_test = test[:, -1]

    # pairs
    names_train = train[:, 0]
    names_test = test[:, 0]

    return X_train, y_train, X_test, y_test, names_train, names_test


if __name__ == '__main__':
    X_train, y_train, X_test, y_test, train, test = load_files()
    estimator = svc(X_train, y_train)

    y_train_predict = estimator.predict(X_train)
    y_test_predict = estimator.predict(X_test)

    print score(y_test_predict)

    np.savetxt('learning.pred.tab', y_train_predict)
    np.savetxt('test.pred.tab', y_test_predict)

Ejemplo n.º 22

0

Mostrar archivo

    def train(self):
        settings = self.settings

        print("Training is starting for {} epochs using ".format(
            settings.epochs) +
              "{} with the following settings:".format(self.device))
        print()
        for key, val in settings.__dict__.items():
            print("{}: {}".format(key, val))
        print(flush=True)

        train_dataloader = self._init_training_data(settings.train)
        best_f1 = 0
        best_f1_epoch = 1 + self.epoch_offset

        for epoch in range(1 + self.epoch_offset,
                           settings.epochs + 1 + self.epoch_offset):
            start_time = time.time()
            total_loss, sequences_trained = self._run_train_epoch(
                train_dataloader, epoch, not settings.quiet,
                not settings.disable_gradient_clip)
            total_time = round(time.time() - start_time, 2)
            print("#" * 50)
            print("Epoch {}".format(epoch))
            print("loss {}".format(total_loss))
            print("execution time {}s".format(total_time) \
            + " ({} trained sequences/s)".format(round(sequences_trained/(total_time))))
            print("#" * 50, flush=True)
            if not settings.disable_val_eval:
                entries, predicted, other_predicted = self.predict(
                    settings.val, settings.elmo_dev)
                #a,d,b,c = zip(*((entry[0], len(entry[4]), entry[1].numpy().shape, predicted[entry[0]].numpy().shape) for entry in entries))
                #print([(x,w,y,z) for x,w,y,z in zip(a,d,b,c) if y!=z])
                f1, _ = sc.score(*zip(*((entry[1][self.pt].numpy(),
                                         predicted[entry[0]].numpy())
                                        for entry in entries)))
                print("Primary Dev F1 on epoch {} is {:.2%}".format(epoch, f1))

                if len(other_predicted) > 0:
                    other_f1, _ = sc.score(*zip(
                        *((entry[1][self.ot].numpy(),
                           other_predicted[entry[0]].numpy())
                          for entry in entries)))
                    print("Secondary Dev F1 on epoch {} is {:.2%}".format(
                        epoch, other_f1))
                #f1 = sc.score()
                improvement = f1 > best_f1
                elapsed = epoch - best_f1_epoch
                es_active = settings.early_stopping > 0

                if (es_active and not improvement
                        and elapsed == settings.early_stopping):
                    print("Have not seen any improvement for {} epochs".format(
                        elapsed))
                    print("Best F1 was {} seen at epoch #{}".format(
                        best_f1, best_f1_epoch))
                    break
                else:
                    if improvement:
                        best_f1 = f1
                        best_f1_epoch = epoch
                        print("Saving {} model".format(best_f1_epoch))
                        self.save("best_model.save", epoch)
                    else:
                        print("Have not seen any improvement for {} epochs".
                              format(elapsed))
                    print("Best F1 was {:.2%} seen at epoch #{}".format(
                        best_f1, best_f1_epoch))

            if settings.enable_train_eval:
                entries, predicted, other_predicted = self.predict(
                    settings.train, settings.elmo_train)
                train_f1, _ = sc.score(*zip(*((entry[1][self.pt].numpy(),
                                               predicted[entry[0]].numpy())
                                              for entry in entries)))
                print("Sem Train F1 on epoch {} is {:.2%}".format(
                    epoch, train_f1))

                if len(other_predicted) > 0:
                    other_train_f1, _ = sc.score(*zip(
                        *((entry[1][self.ot].numpy(),
                           other_predicted[entry[0]].numpy())
                          for entry in entries)))
                    print("Syn Train F1 on epoch {} is {:.2%}".format(
                        epoch, other_train_f1))

            if settings.save_every:
                self.save("{}_epoch{}.save".format(int(time.time()), epoch),
                          epoch)
            else:
                self.save("last_epoch.save", epoch)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: main.py Proyecto: thomplth/hash-code-2021

import solver
import parser
import time
import glob
#import optimize

#solution = solver.solve(parser.parse('datasets/a_example.txt'))
#print(solution)

for idx, filename in enumerate(sorted(glob.glob('datasets/*'))):
    dataset = parser.parse(filename)
    start_time = time.time()
    solution = solver.solve(dataset)
    # print(solution)
    print("--- %.10f seconds ---" % (time.time() - start_time))
    score = scorer.score(solution, dataset)
    #print('Score for %s: %s (%s pizzas for %s person)' % (
    #    filename[9:], score, dataset['nOfPizzas'], 2*dataset['nOfTwo']+3*dataset['nOfThree']+4*dataset['nOfFour']))
    writer.writing(solution, filename[9] + '.txt')

#Optimazation Part here
#opt_solution = optimize.solve(dataset)
#print(opt_solution)
#score = opt_scorer.score(opt_solution, dataset)

# dataset = parser.parse('datasets/a_example')
# solution = solver.solve(dataset)
# print(solution)
# score = scorer.score(solution, dataset)
# print("Score =",score)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: train.py Proyecto: frankxu2004/feedforward-RE

def train():
    # print training info
    print _get_training_info()

    # dealing with files
    print "Loading data from files..."
    train_loader = data_utils.DataLoader(
        os.path.join(FLAGS.data_dir, 'train.vocab%d.id' % FLAGS.vocab_size),
        FLAGS.batch_size,
        FLAGS.sent_len,
        subsample=FLAGS.subsample,
        unk_prob=FLAGS.corrupt_rate
    )  # use a subsample of the data if specified
    # load cv dataset
    dev_loaders = []
    test_loaders = []
    for i in range(100):
        dev_loader = data_utils.DataLoader(
            os.path.join(FLAGS.data_dir, 'cv',
                         'dev.vocab%d.id.%d' % (FLAGS.vocab_size, i)),
            FLAGS.batch_size, FLAGS.sent_len)
        test_loader = data_utils.DataLoader(
            os.path.join(FLAGS.data_dir, 'cv',
                         'test.vocab%d.id.%d' % (FLAGS.vocab_size, i)),
            FLAGS.batch_size, FLAGS.sent_len)
        dev_loaders.append(dev_loader)
        test_loaders.append(test_loader)

    max_steps = train_loader.num_batches * FLAGS.num_epoch

    print "# Examples in training data:"
    print train_loader.num_examples

    # load label2id mapping and create inverse mapping
    label2id = data_utils.LABEL_TO_ID
    id2label = dict([(v, k) for k, v in label2id.iteritems()])

    key = random.randint(1e5, 1e6 - 1)  # get a random 6-digit int
    test_key_file_list = []
    test_prediction_file_list = []
    dev_key_file_list = []
    dev_prediction_file_list = []
    for i in range(100):
        test_key_file = os.path.join(
            FLAGS.train_dir,
            str(key) + '.shuffled.test.key.tmp.%d' % i)
        test_prediction_file = os.path.join(
            FLAGS.train_dir,
            str(key) + '.shuffled.test.prediction.tmp.%d' % i)
        dev_key_file = os.path.join(FLAGS.train_dir,
                                    str(key) + '.shuffled.dev.key.tmp.%d' % i)
        dev_prediction_file = os.path.join(
            FLAGS.train_dir,
            str(key) + '.shuffled.dev.prediction.tmp.%d' % i)
        test_key_file_list.append(test_key_file)
        test_prediction_file_list.append(test_prediction_file)
        dev_key_file_list.append(dev_key_file)
        dev_prediction_file_list.append(dev_prediction_file)
        test_loaders[i].write_keys(test_key_file, id2label=id2label)
        dev_loaders[i].write_keys(dev_key_file, id2label=id2label)

    with tf.Graph().as_default():
        print "Constructing model %s..." % (FLAGS.model)
        with tf.variable_scope('model', reuse=None):
            m = _get_model(is_train=True)
        with tf.variable_scope('model', reuse=True):
            mdev = _get_model(is_train=False)

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=2)
        save_path = os.path.join(FLAGS.train_dir, 'model.ckpt')

        config = tf.ConfigProto()
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_mem, allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(device_count={"GPU": 1},
                                                gpu_options=gpu_options))
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                graph=sess.graph)
        sess.run(tf.initialize_all_variables())

        if FLAGS.use_pretrain:
            print "Use pretrained embeddings to initialize model ..."
            emb_file = os.path.join(
                FLAGS.data_dir,
                "emb-v%d-d%d.npy" % (FLAGS.vocab_size, FLAGS.hidden_size))
            if not os.path.exists(emb_file):
                raise Exception("Pretrained vector file does not exist at: " +
                                emb_file)
            pretrained_embedding = np.load(emb_file)
            m.assign_embedding(sess, pretrained_embedding)

        current_lr = FLAGS.init_lr
        global_step = 0
        training_history = []
        dev_f_history = []
        test_f_history = []
        best_dev_scores = []
        best_test_scores = []

        def eval_once(mdev, sess, data_loader):
            data_loader.reset_pointer()
            predictions = []
            confidences = []
            dev_loss = 0.0
            for _ in xrange(data_loader.num_batches):
                x_batch, y_batch, x_lens = data_loader.next_batch()
                feed = _get_feed_dict(mdev,
                                      x_batch,
                                      y_batch,
                                      x_lens,
                                      use_pos=(FLAGS.pos_size > 0),
                                      use_ner=(FLAGS.ner_size > 0),
                                      use_deprel=(FLAGS.deprel_size > 0))
                loss_value, pred, conf = sess.run(
                    [mdev.loss, mdev.prediction, mdev.confidence],
                    feed_dict=feed)
                predictions += list(pred)
                confidences += list(conf)
                dev_loss += loss_value
            dev_loss /= data_loader.num_batches
            return dev_loss, predictions, confidences

        print "Start training with %d epochs, and %d steps per epoch..." % (
            FLAGS.num_epoch, train_loader.num_batches)
        for epoch in xrange(FLAGS.num_epoch):
            train_loss = 0.0
            train_loader.reset_pointer()
            m.assign_lr(sess, current_lr)
            for _ in xrange(train_loader.num_batches):
                global_step += 1
                start_time = time.time()
                x_batch, y_batch, x_lens = train_loader.next_batch()
                feed = _get_feed_dict(m,
                                      x_batch,
                                      y_batch,
                                      x_lens,
                                      use_pos=(FLAGS.pos_size > 0),
                                      use_ner=(FLAGS.ner_size > 0),
                                      use_deprel=(FLAGS.deprel_size > 0))
                _, loss_value = sess.run([m.train_op, m.loss], feed_dict=feed)
                duration = time.time() - start_time
                train_loss += loss_value
                assert not np.isnan(loss_value), "Model loss is NaN."

                if global_step % FLAGS.log_step == 0:
                    format_str = (
                        '%s: step %d/%d (epoch %d/%d), loss = %.6f (%.3f sec/batch), lr: %.6f'
                    )
                    print format_str % (datetime.now(), global_step, max_steps,
                                        epoch + 1, FLAGS.num_epoch, loss_value,
                                        duration, current_lr)

            # summary loss after each epoch
            train_loss /= train_loader.num_batches
            summary_writer.add_summary(_summary_for_scalar(
                'eval/training_loss', train_loss),
                                       global_step=epoch)
            # do CV on test set and use average score
            avg_dev_loss = 0.0
            avg_test_loss = 0.0
            avg_dev_f = 0.0
            avg_dev_p = 0.0
            avg_dev_r = 0.0
            avg_test_f = 0.0
            avg_test_p = 0.0
            avg_test_r = 0.0
            for i in range(100):
                dev_loss, dev_preds, dev_confs = eval_once(
                    mdev, sess, dev_loaders[i])
                avg_dev_loss += dev_loss
                summary_writer.add_summary(_summary_for_scalar(
                    'eval/dev_loss%d' % i, dev_loss),
                                           global_step=epoch)
                _write_prediction_file(dev_preds, dev_confs, id2label,
                                       dev_prediction_file_list[i])
                # print "Evaluating on dev set..."
                dev_prec, dev_recall, dev_f = scorer.score(
                    dev_key_file_list[i], [dev_prediction_file_list[i]],
                    FLAGS.f_measure)
                avg_dev_f += dev_f
                avg_dev_p += dev_prec
                avg_dev_r += dev_recall

                test_loss, test_preds, test_confs = eval_once(
                    mdev, sess, test_loaders[i])
                avg_test_loss += test_loss
                summary_writer.add_summary(_summary_for_scalar(
                    'eval/test_loss%d' % i, test_loss),
                                           global_step=epoch)
                _write_prediction_file(test_preds, test_confs, id2label,
                                       test_prediction_file_list[i])
                # print "Evaluating on test set..."
                test_prec, test_recall, test_f = scorer.score(
                    test_key_file_list[i], [test_prediction_file_list[i]],
                    FLAGS.f_measure)
                avg_test_f += test_f
                avg_test_p += test_prec
                avg_test_r += test_recall
            avg_dev_loss /= 100
            avg_test_loss /= 100
            avg_dev_f /= 100
            avg_dev_p /= 100
            avg_dev_r /= 100
            avg_test_f /= 100
            avg_test_p /= 100
            avg_test_r /= 100
            print "Epoch %d: training_loss = %.6f" % (epoch + 1, train_loss)
            print "Epoch %d: avg_dev_loss = %.6f, avg_dev_f-%g = %.6f" % (
                epoch + 1, avg_dev_loss, FLAGS.f_measure, avg_dev_f)
            print "Epoch %d: avg_test_loss = %.6f, avg_test_f-%g = %.6f" % (
                epoch + 1, avg_test_loss, FLAGS.f_measure, avg_test_f)

            # decrease learning rate if dev_f does not increase after an epoch
            if len(dev_f_history) > 10 and avg_dev_f <= dev_f_history[-1]:
                current_lr *= FLAGS.lr_decay
            training_history.append(train_loss)

            # save the model when best f score is achieved on dev set
            if len(dev_f_history) == 0 or (len(dev_f_history) > 0
                                           and avg_dev_f > max(dev_f_history)):
                saver.save(sess, save_path, global_step=epoch)
                print "\tmodel saved at epoch %d, with best dev dataset f-%g score %.6f" % (
                    epoch + 1, FLAGS.f_measure, avg_dev_f)
                best_dev_scores = [avg_dev_p, avg_dev_r, avg_dev_f]
                best_test_scores = [avg_test_p, avg_test_r, avg_test_f]
            dev_f_history.append(avg_dev_f)
            test_f_history.append(avg_test_f)

            # stop learning if lr is too low
            if current_lr < 1e-6:
                break
        # saver.save(sess, save_path, global_step=epoch)
        print "Training ended with %d epochs." % epoch
        print "\tBest dev scores achieved (P, R, F-%g):\t%.3f\t%.3f\t%.3f" % tuple(
            [FLAGS.f_measure] + [x * 100 for x in best_dev_scores])
        print "\tBest test scores achieved on best dev scores (P, R, F-%g):\t%.3f\t%.3f\t%.3f" % tuple(
            [FLAGS.f_measure] + [x * 100 for x in best_test_scores])

    # clean up
    for dev_key_file, dev_prediction_file, test_key_file, test_prediction_file in zip(
            dev_key_file_list, dev_prediction_file_list, test_key_file_list,
            test_prediction_file_list):
        if os.path.exists(dev_key_file):
            os.remove(dev_key_file)
        if os.path.exists(dev_prediction_file):
            os.remove(dev_prediction_file)
        if os.path.exists(test_key_file):
            os.remove(test_key_file)
        if os.path.exists(test_prediction_file):
            os.remove(test_prediction_file)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: eval.py Proyecto: xnz535264581/DS-RelationExtraction

def evaluate():
    print "Building graph and loading model..."
    with tf.Graph().as_default():
        ### the first model will be doing the full batches (a residual of examples will be left)
        with tf.variable_scope('model'):
            m = _get_model(is_train=False)
        saver = tf.train.Saver(tf.all_variables())

        config = tf.ConfigProto()
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(device_count={"GPU": 1},
                                                gpu_options=gpu_options))
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            raise IOError("Loading checkpoint file failed!")

        print "====> Evaluating on %s data" % FLAGS.eval_set
        print "Loading %s data..." % FLAGS.eval_set
        loader = data_utils.DataLoader(
            os.path.join(FLAGS.data_dir,
                         '%s.vocab%d.id' % (FLAGS.eval_set, FLAGS.vocab_size)),
            FLAGS.batch_size, FLAGS.sent_len
        )  # load test data with batch_size 1; this is too slow

        # load label2id mapping and create inverse mapping
        label2id = data_utils.LABEL_TO_ID
        id2label = dict([(v, k) for k, v in label2id.iteritems()])

        # key = random.randint(1e5, 1e6-1) # get a random 6-digit int
        test_key_file = os.path.join(FLAGS.train_dir,
                                     'shuffled.%s.key.tmp' % FLAGS.eval_set)
        test_prediction_file = os.path.join(
            FLAGS.train_dir, 'shuffled.%s.prediction.tmp' % FLAGS.eval_set)
        test_prob_file = os.path.join(FLAGS.train_dir,
                                      'shuffled.%s.probs.tmp' % FLAGS.eval_set)
        loader.write_keys(test_key_file,
                          id2label=id2label,
                          include_residual=True
                          )  # write shuffled key to file, used by scorer

        test_loss = .0
        print "Evaluating on %d test examples with full batch..." % (
            loader.num_batches * loader.batch_size)
        preds, confs = [], []
        all_probs = np.zeros([loader.num_examples, FLAGS.num_class])
        for i in range(loader.num_batches):
            x, y, x_lens = loader.next_batch()
            feed = _get_feed_dict(m,
                                  x,
                                  y,
                                  x_lens,
                                  use_pos=(FLAGS.pos_size > 0),
                                  use_ner=(FLAGS.ner_size > 0),
                                  use_deprel=(FLAGS.deprel_size > 0))
            loss_value, predictions, confidences, probs = sess.run(
                [m.loss, m.prediction, m.confidence, m.probs], feed_dict=feed)
            test_loss += loss_value
            preds += list(predictions)
            confs += list(confidences)
            all_probs[i * loader.batch_size:(i + 1) *
                      loader.batch_size, :] = probs

        ### second model will do the residuals with one batch
        if loader.num_residual > 0:
            print "Evaluating on an residual of %d examples..." % loader.num_residual
            x, y, x_lens = loader.get_residual()
            feed = _get_feed_dict(m,
                                  x,
                                  y,
                                  x_lens,
                                  use_pos=(FLAGS.pos_size > 0),
                                  use_ner=(FLAGS.ner_size > 0),
                                  use_deprel=(FLAGS.deprel_size > 0))
            loss_value, predictions, confidences, probs = sess.run(
                [m.loss, m.prediction, m.confidence, m.probs], feed_dict=feed)
            test_loss += loss_value
            preds += list(predictions)
            confs += list(confidences)
            all_probs[loader.num_batches * loader.batch_size:, :] = probs

        if not FLAGS.use_confidence:
            confs = [1.0] * len(confs)

        _write_prediction_file(preds, confs, all_probs, id2label,
                               test_prediction_file, test_prob_file)
        test_loss /= loader.num_examples
        print "%s: test_loss = %.6f" % (datetime.now(), test_loss)

        prec, recall, f1 = scorer.score(test_key_file, [test_prediction_file],
                                        verbose=True)

    # clean up
    if FLAGS.cleanup and os.path.exists(test_key_file):
        os.remove(test_key_file)
    if FLAGS.cleanup and os.path.exists(test_prediction_file):
        os.remove(test_prediction_file)