def parse_semantic_tag(tokenstring): """ Extension that appends the SEMANTIC tag to the output of the parser. """ client = MBSP.Mbt(port=6066) # Find the semantic tag of words in the sentence. # Example: "macrophage/NN/I-NP/O/O/O/macrophage". s1 = tokenstring.split() # => [[[u'macrophage', u'NN', u'I-NP', u'O', u'O', u'O', u'macrophage']]] s2 = s1.reduce([MBSP.WORD]) # => [[[u'macrophage']]] s2 = MBSP.TokenString(client.send(s2.join()), tags=[MBSP.WORD, SEMANTIC]) # => macrophage/protein s2 = s2.split() # => [[[u'macrophage', u'protein']]] s1.tags.append(SEMANTIC, values=s2.tags.pop(s2.tags.index(SEMANTIC))) # => [[[u'macrophage', u'NN', u'I-NP', u'O', u'O', u'O', u'macrophage', u'protein']]] s1 = s1.join() # => macrophage/NN/I-NP/O/O/O/macrophage/protein client.disconnect() return s1
def update_pos_tag(tokenstring): """ Event handler that fires when the MBSP parser is done tagging and chunking. Updates the part-of-speech tags from a specialized biomedical corpus. Returns the updated string to the parser. """ client = MBSP.Mbt(port=6065) # Retag the part-of-speech tags with the GENIA corpus. # Example: "TGF-beta1-transcribing/NN/I-NP macrophages/NNS/I-NP" s1 = tokenstring.split() # => [[[u'TGF-beta1-transcribing', u'NN', u'I-NP'], [u'macrophages', u'NNS', u'I-NP']]] s2 = s1.reduce([MBSP.WORD]) # => [[[u'TGF-beta1-transcribing'], [u'macrophages']]] s2 = MBSP.TokenString(client.send(s2.join()), tags=[MBSP.WORD, MBSP.PART_OF_SPEECH]) # => TGF-beta1-transcribing/JJ macrophages/NNS s2 = s2.split() # => [[[u'TGF-beta1-transcribing', u'JJ'], [u'macrophages', u'NNS']]] s2.tags.append(MBSP.CHUNK, values=s1.tags.pop(s1.tags.index(MBSP.CHUNK))) # => [[[u'TGF-beta1-transcribing', u'JJ', u'I-NP'], [u'macrophages', u'NNS', u'I-NP']]] s2 = s2.join() # => TGF-beta1-transcribing/JJ/I-NP macrophages/NNS/I-NP client.disconnect() return s2