예제 #1
0
def test_get_batches():
    data = {
        'data0': [[i] * 2 for i in range(10)],
        'data1': [[i] * 3 for i in range(10)]
    }

    batch_generator = batch.get_batches(data, batch_size=3, exact_epoch=True)
    batches = list(batch_generator)

    assert (batches[0]['data0'] == np.array([[6, 6], [3, 3], [0, 0]])).all()
    assert (batches[0]['data1'] == np.array([[6, 6, 6], [3, 3, 3],
                                             [0, 0, 0]])).all()

    assert (batches[1]['data0'] == np.array([[4, 4], [5, 5], [2, 2]])).all()
    assert (batches[1]['data1'] == np.array([[4, 4, 4], [5, 5, 5],
                                             [2, 2, 2]])).all()

    assert (batches[2]['data0'] == np.array([[1, 1], [9, 9], [8, 8]])).all()
    assert (batches[2]['data1'] == np.array([[1, 1, 1], [9, 9, 9],
                                             [8, 8, 8]])).all()

    assert (batches[3]['data0'] == np.array([[7, 7]])).all()
    assert (batches[3]['data1'] == np.array([[7, 7, 7]])).all()

    assert len(batches) == 4

    batch_generator = batch.get_batches(data, batch_size=3, exact_epoch=False)
    batches = list(batch_generator)

    assert len(batches) == 3
 def dataset_generator(self, dataset: List[Tuple[QASetting, List[Answer]]],
                       is_eval: bool) -> Iterable[Mapping[TensorPort, np.ndarray]]:
     corpus = self.preprocess(dataset)
     xy_dict = {
         Ports.Input.multiple_support: corpus["support"],
         Ports.Input.question: corpus["question"],
         Ports.Input.atomic_candidates: corpus["candidates"],
         Ports.Targets.candidate_labels: corpus["targets"]
     }
     return get_batches(xy_dict)
예제 #3
0
 def dataset_generator(
         self,
         dataset: List[Tuple[QASetting, List[Answer]]],
         is_eval: bool,
         test_time=False) -> Iterable[Mapping[TensorPort, np.ndarray]]:
     corpus = self.preprocess(dataset, test_time=test_time)
     xy_dict = {
         Ports.Input.question: corpus["question"],
         Ports.Input.atomic_candidates: corpus["candidates"],
         Ports.Targets.target_index: corpus["answers"]
     }
     return get_batches(xy_dict, batch_size=self.config['batch_size'])
    def dataset_generator(self, dataset: List[Tuple[QASetting, List[Answer]]],
                          is_eval: bool) -> Iterable[Mapping[TensorPort, np.ndarray]]:
        corpus, _, _, _ = \
                preprocess_with_pipeline(dataset,
                        self.shared_vocab_config.vocab, self.answer_vocab, use_single_support=True, sepvocab=True)

        xy_dict = {
            Ports.Input.single_support: corpus["support"],
            Ports.Input.question: corpus["question"],
            Ports.Targets.candidate_idx:  corpus["answers"],
            Ports.Input.question_length : corpus['question_lengths'],
            Ports.Input.support_length : corpus['support_lengths'],
            Ports.Input.sample_id : corpus['ids']
        }

        return get_batches(xy_dict)
예제 #5
0
    if c == True:
        for k, batch in enumerate(batches):
            for kk, seq2 in enumerate(batch["relation_matrices"]):
                for kkk, seq in enumerate(seq2):
                    for kkkk, tok in enumerate(seq):
                        batches[k]["relation_matrices"][kk][kkk][kkkk] = randint(0, len(rel_type_vocab) - 1)
    return batches


if __name__ == "__main__":

    reset_output_dir()

    vocab = Vocab()
    instances = read_ann(test_dir)
    fill_vocab(instances, vocab)
    batchable = convert_to_batchable_format(instances, vocab)  #[:2]

    #print(batchable)
    batches = list(get_batches(batchable))#[:2]
    # random baseline
    batches = randomBaseline(batches, a=False, b=False, c=True)

    for batch in batches:
        convert_batch_to_ann(batch, instances, "/tmp")

    remove_anno = "keys"  # "", "types", "rel" or "keys"
    calculateMeasures(test_dir, "/tmp/", remove_anno = remove_anno)

# print(instances[0].labels)