def setUp(self): if self.universal: self.trees = trees_ud else: self.trees = trees_sd kwargs = dict(version=self.version, download_if_missing=True) if self.backend is not None: kwargs['backend'] = self.backend self.sd = StanfordDependencies.get_instance(**kwargs)
def test_convert_doc_no_jpype(self): c = NegBioPtb2DepConverter() c._backend = 'subprocess' c._sd = StanfordDependencies.get_instance(backend=c._backend) text = 'No pneumothorax.' tree = '(S1 (S (S (NP (DT No) (NN pneumothorax))) (. .)))' d = text_to_bioc([text], type='d/p/s') s = d.passages[0].sentences[0] s.infons['parse tree'] = tree d = c.__call__(d) s = d.passages[0].sentences[0] assert 'lemma' not in s.annotations[1].infons
def __init__(self, document_as_string): """ Construct a document from a string representation. The Format must follow the CoNLL format, see http://conll.cemantix.org/2012/data.html. Args: document_as_string (str): A representation of a document in the CoNLL format. """ logging.info("Create CONLL_doc") identifier = " ".join(document_as_string.split("\n")[0].split(" ")[2:]) self.document_table = CoNLLDocument.__string_to_table( document_as_string) in_sentence_ids = [int(i) for i in self.__extract_from_column(2)] indexing_start = in_sentence_ids[0] if indexing_start != 0: logger.warning("Detected " + str(indexing_start) + "-based indexing for tokens in sentences in input," "transformed to 0-based indexing.") in_sentence_ids = [i - indexing_start for i in in_sentence_ids] sentence_spans = CoNLLDocument.__extract_sentence_spans( in_sentence_ids) temp_tokens = self.__extract_from_column(3) temp_pos = self.__extract_from_column(4) temp_ner = self.__extract_ner() temp_speakers = self.__extract_from_column(9) coref = CoNLLDocument.__get_span_to_id(self.__extract_from_column(-1)) parses = [ CoNLLDocument.get_parse(span, self.__extract_from_column(5), temp_pos, temp_tokens) for span in sentence_spans ] sd = StanfordDependencies.get_instance() # print(identifier) dep_trees = sd.convert_trees( [parse.replace("NOPARSE", "S") for parse in parses]) sentences = [] for i, span in enumerate(sentence_spans): sentences.append((temp_tokens[span.begin:span.end + 1], temp_pos[span.begin:span.end + 1], temp_ner[span.begin:span.end + 1], temp_speakers[span.begin:span.end + 1], parses[i], dep_trees[i])) print("WEDFWEFW") super(CoNLLDocument, self).__init__(identifier, sentences, coref)
def test_jpype_backend_creation(self): sd = StanfordDependencies.get_instance(backend='jpype', version='3.5.2', download_if_missing=True) assert isinstance(sd, JPypeBackend), \ "Fell back to another backend due to a JPype error"
def test_subprocess_backend_creation(self): sd = StanfordDependencies.get_instance(backend='subprocess', version='3.4.1', download_if_missing=True) assert isinstance(sd, SubprocessBackend)
def setUp(self): kwargs = dict(version=self.version, download_if_missing=True) if self.backend is not None: kwargs['backend'] = self.backend self.sd = StanfordDependencies.get_instance(**kwargs)
def test_jpype_backend_creation(): sd = StanfordDependencies.get_instance(backend='jpype', version='3.4.1', download_if_missing=True) assert isinstance(sd, JPypeBackend), \ "Fell back to another backend due to a JPype error"
def test_subprocess_backend_creation(): sd = StanfordDependencies.get_instance(backend='subprocess', version='3.4.1', download_if_missing=True) assert isinstance(sd, SubprocessBackend)