def dump_line(self, outputs: JsonDict) -> str: # pylint: disable=no-self-use """ If you don't want your outputs in JSON-lines format you can override this function to output them differently. """ keys = ['citedPaperId', 'citingPaperId', 'excerptCitationIntents'] for k in outputs.copy(): if k not in keys: outputs.pop(k) return json.dumps(outputs, cls=JsonFloatEncoder) + "\n"
def dump_line(self, outputs: JsonDict) -> str: # pylint: disable=no-self-use """ If you don't want your outputs in JSON-lines format you can override this function to output them differently. """ keys = ['citation_id', 'prediction', 'probabilities', 'citation_text'] for k in outputs.copy(): if k not in keys: outputs.pop(k) return json.dumps(outputs, cls=JsonFloatEncoder) + "\n"
def dump_line(self, output: JsonDict) -> str: # pylint: disable=no-self-use output.pop('class_probabilities', None) output['hierplane_tree'].pop('linkNameToLabel', None) output['hierplane_tree'].pop('nodeTypeToStyle', None) tree = output['hierplane_tree'] # Spans are 4-tuple with (start, end (exclusive), span_text, span_label) spans = self.get_parse_spans(tree, []) sentence_id = None if "sentence_id" in output['metadata']: sentence_id = output['metadata']['sentence_id'] tokens = output['metadata']['tokens'] output_jsonl_dict = { 'sentence_id': sentence_id, 'tokens': tokens, 'spans': spans } return json.dumps(output_jsonl_dict) + "\n"
def predict_json(self, inputs: JsonDict) -> JsonDict: n = inputs.pop('n', 10) if 'track_id' in inputs: if self.index is None: raise AttributeError("Please build an index before searching by track.") idx = self.vocab.get_token_to_index_vocabulary("labels")[inputs['track_id']] nns = self.index.get_nns_by_item(idx, n+1)[1:] #scores = self.index.get_item_vector(idx) tracks = self.neighbors_to_tracks(nns) return tracks #return {'tracks': tracks, 'scores': scores} instance = self._json_to_instance(inputs) output_dict = self.predict_instance(instance) output_dict['inputs'] = inputs if self.index: logits = output_dict.get('logits') nns = self.index.get_nns_by_vector(logits, n) return self.neighbors_to_tracks(nns) #output_dict['tracks'] = self.neighbors_to_tracks(nns) return output_dict
def get_most_important_part(self, instance: Instance, output: JsonDict): if "alphas" not in output: return output alphas = output.pop("alphas") best_span = argmax(alphas) tokens = instance["tokens"].tokens nom = 0 span = (0, 0) length = len(tokens) flag = False for i in range(1, length - 1): for j in range(i, length - 1): span = (i, j) if nom == best_span: flag = True break nom += 1 i, j = span best_tokens = tokens[i:j + 1] output["best_span"] = " ".join([token.text for token in best_tokens]) output["nom"] = nom output["ij"] = [i, j] output["break"] = flag output["val"] = alphas[best_span]
def predict(self, inputs: JsonDict): # For compatability with other RC models. if "passage" in inputs: inputs["context"] = inputs.pop("passage") return super().predict(inputs)