def test_get_batches(): data = { 'data0': [[i] * 2 for i in range(10)], 'data1': [[i] * 3 for i in range(10)] } batch_generator = batch.get_batches(data, batch_size=3, exact_epoch=True) batches = list(batch_generator) assert (batches[0]['data0'] == np.array([[6, 6], [3, 3], [0, 0]])).all() assert (batches[0]['data1'] == np.array([[6, 6, 6], [3, 3, 3], [0, 0, 0]])).all() assert (batches[1]['data0'] == np.array([[4, 4], [5, 5], [2, 2]])).all() assert (batches[1]['data1'] == np.array([[4, 4, 4], [5, 5, 5], [2, 2, 2]])).all() assert (batches[2]['data0'] == np.array([[1, 1], [9, 9], [8, 8]])).all() assert (batches[2]['data1'] == np.array([[1, 1, 1], [9, 9, 9], [8, 8, 8]])).all() assert (batches[3]['data0'] == np.array([[7, 7]])).all() assert (batches[3]['data1'] == np.array([[7, 7, 7]])).all() assert len(batches) == 4 batch_generator = batch.get_batches(data, batch_size=3, exact_epoch=False) batches = list(batch_generator) assert len(batches) == 3
def dataset_generator(self, dataset: List[Tuple[QASetting, List[Answer]]], is_eval: bool) -> Iterable[Mapping[TensorPort, np.ndarray]]: corpus = self.preprocess(dataset) xy_dict = { Ports.Input.multiple_support: corpus["support"], Ports.Input.question: corpus["question"], Ports.Input.atomic_candidates: corpus["candidates"], Ports.Targets.candidate_labels: corpus["targets"] } return get_batches(xy_dict)
def dataset_generator( self, dataset: List[Tuple[QASetting, List[Answer]]], is_eval: bool, test_time=False) -> Iterable[Mapping[TensorPort, np.ndarray]]: corpus = self.preprocess(dataset, test_time=test_time) xy_dict = { Ports.Input.question: corpus["question"], Ports.Input.atomic_candidates: corpus["candidates"], Ports.Targets.target_index: corpus["answers"] } return get_batches(xy_dict, batch_size=self.config['batch_size'])
def dataset_generator(self, dataset: List[Tuple[QASetting, List[Answer]]], is_eval: bool) -> Iterable[Mapping[TensorPort, np.ndarray]]: corpus, _, _, _ = \ preprocess_with_pipeline(dataset, self.shared_vocab_config.vocab, self.answer_vocab, use_single_support=True, sepvocab=True) xy_dict = { Ports.Input.single_support: corpus["support"], Ports.Input.question: corpus["question"], Ports.Targets.candidate_idx: corpus["answers"], Ports.Input.question_length : corpus['question_lengths'], Ports.Input.support_length : corpus['support_lengths'], Ports.Input.sample_id : corpus['ids'] } return get_batches(xy_dict)
if c == True: for k, batch in enumerate(batches): for kk, seq2 in enumerate(batch["relation_matrices"]): for kkk, seq in enumerate(seq2): for kkkk, tok in enumerate(seq): batches[k]["relation_matrices"][kk][kkk][kkkk] = randint(0, len(rel_type_vocab) - 1) return batches if __name__ == "__main__": reset_output_dir() vocab = Vocab() instances = read_ann(test_dir) fill_vocab(instances, vocab) batchable = convert_to_batchable_format(instances, vocab) #[:2] #print(batchable) batches = list(get_batches(batchable))#[:2] # random baseline batches = randomBaseline(batches, a=False, b=False, c=True) for batch in batches: convert_batch_to_ann(batch, instances, "/tmp") remove_anno = "keys" # "", "types", "rel" or "keys" calculateMeasures(test_dir, "/tmp/", remove_anno = remove_anno) # print(instances[0].labels)