sample_inputs = [] val = {"e1": args.input, "r": "Causes"} sample_inputs.append(val) if "bs" in opt.eval: opt.eval.pop("bs") if "k" in opt.eval: opt.eval.pop("k") if "beam" in args.sampling_algorithm: opt.eval.sample = "beam" opt.eval.bs = int(args.sampling_algorithm.split("-")[1]) sampler = BeamSampler(opt, data_loader) else: opt.eval.sample = "greedy" opt.eval.bs = 1 sampler = GreedySampler(opt, data_loader) outputs = [] for input_case in tqdm(sample_inputs): e1 = input_case["e1"] r = input_case["r"] output = interactive.get_conceptnet_sequence(e1, model, sampler, data_loader, text_encoder, r) outputs.append(output) json.dump(outputs, open(args.output_file, "w"))
relation = ['AtLocation', 'HasPrerequisite', 'Desires', 'UsedFor'] sampling_algorithm = args.sampling_algorithm # %% sampler = interactive.set_sampler(opt, sampling_algorithm, data_loader) for rel in relation: if rel not in data.conceptnet_data.conceptnet_relations: relation = "all" break start = time.time() logging.info('Getting Outputs from ConceptNet...') outputs = [ interactive.get_conceptnet_sequence(input_event, comet_model, sampler, data_loader, text_encoder, relation) for input_event in input_events ] logging.info('Total Time Used: ' + str(time.time() - start) + "s") # %% [markdown] # use bert-as-service to embed the output phrases from conceptnet # %% start = time.time() logging.info('Getting Happy Elements Encodings...') num_beam = int(sampling_algorithm.split('-')[1]) embeddings = [ beams for x in outputs for beams in [
def _get_result(self, event, category): raw_result = interactive.get_conceptnet_sequence( event, self._model, self._sampler, self._data_loader, self._text_encoder, category) return self.all_beams_cleanup(raw_result)
] results_cache = {} for sample in tqdm.tqdm(raw_data): for turn in sample[0]: content = turn.split(':')[1].strip() sents = nltk.sent_tokenize(content) for sent in sents: sent = sent.strip()[:-1] subsents = sent.split(',') for subsent in subsents: tokens = tokenizer.tokenize(subsent) candidates = get_ngrams(tokens) for cand in candidates: if cand not in results_cache: outputs = interactive.get_conceptnet_sequence( cand, model, sampler, data_loader, text_encoder, relation) results_cache[cand] = outputs for qa in sample[1]: ques = qa['question'][:-1] subqs = qa['question'].split(',') for subq in subqs: subq = subq.strip() tokens = tokenizer.tokenize(subq) candidates = get_ngrams(tokens) for cand in candidates: if cand not in results_cache: outputs = interactive.get_conceptnet_sequence( cand, model, sampler, data_loader, text_encoder, relation) results_cache[cand] = outputs
def _get_sequence(self, event, relations, sampler): return interactive.get_conceptnet_sequence(event, self._model, sampler, self._data_loader, self._text_encoder, relations)