for fn in os.listdir(out_dir): chunk_in_fn = os.path.join(out_dir, fn) with open(chunk_in_fn, 'rb') as fd: o = pickle.load(fd) output.update(o) if __name__ == '__main__': parser = argparse.ArgumentParser('Generate knowledge graphs from OIE6 output.') parser.add_argument('--dataset', default='hotpot_qa', help='trivia_qa or hotpot_qa') parser.add_argument( '-debug', default=False, action='store_true', help='If true, run on tiny portion of train dataset') parser.add_argument('--dtypes', default=None) args = parser.parse_args() dataset = dataset_factory(args.dataset) print('Loading Spacy...') spacy_tokenizer = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger', 'textcat']) chunker = spacy.load('en_core_web_lg') data_dir = os.path.join('..', 'data', dataset.name) print('Loading ALBERT...') # getting the list of GPUs available if torch.cuda.is_available(): DEVICE = torch.device('cuda') device_ids = list(range(torch.cuda.device_count())) gpus = len(device_ids) print('GPU detected') else: DEVICE = torch.device("cpu")
def __init__(self, dataset_str, dtype): self.dataset = dataset_factory(dataset_str)[dtype] kg_fn = os.path.join('..', 'data', dataset_str, 'kg_{}.pk'.format(dtype)) with open(kg_fn, 'rb') as fd: self.kgs = pickle.load(fd)