def __init__(self, fileobject, lines_to_ignore, chunkmap): ChunkedNLMOutput.__init__(self, type_of_lines=MetamapLine, fileobject=fileobject, lines_to_ignore=lines_to_ignore, chunkmap=chunkmap, type_of_line_set=MetamapLineList)
def is_ignorable(self, a_line): """Check to see if a line should be processed - in the case of METAMAP fielded output, we only want to process actual concept lines. The type of line is determined by the third field of the tab- separated list. But first we check to see if the line passes the original tests.""" # This will preserve the original behavior before adding extra checks if ChunkedNLMOutput.is_ignorable(self, a_line): return True try: line_type=a_line.split('\t')[2].lower().strip() except IndexError: # If it doesn't have a type field, we don't want the line. return True return line_type!='c' # We only want lines with 'c'
def __iter__(self): """We need to build on the previous iterator because each MappingLine (that ChunkedNLMOutput will return) actually contains several concepts that must be separated. We will separate those components into MetamapLine.""" for lineset in ChunkedNLMOutput.__iter__(self): # Get a line list, turn MappingLines into ConceptLines, repackage new_lines = [] for line in lineset.lines: if isinstance(line, MappingLine): for concept in line.iter_concepts(): new_lines.append(concept) else: new_lines.append(line) yield LineList(lineset.set_id, new_lines) return
def __iter__(self): """We need to build on the previous iterator because each MappingLine (that ChunkedNLMOutput will return) actually contains several concepts that must be separated. We will separate those components into MetamapLine.""" for lineset in ChunkedNLMOutput.__iter__(self): # Get a line list, turn MappingLines into ConceptLines, repackage new_lines=[] for line in lineset.lines: if isinstance(line, MappingLine): for concept in line.iter_concepts(): new_lines.append(concept) else: new_lines.append(line) yield LineList(lineset.set_id, new_lines) return
def __init__(self, fileobject, lines_to_ignore, chunkmap): ChunkedNLMOutput.__init__(self, fileobject, type_of_lines=SemrepOutput.line_factory, lines_to_ignore=lines_to_ignore, chunkmap=chunkmap)
def __init__(self, fileobject, lines_to_ignore=DEFAULT_LINES_TO_IGNORE, chunkmap=FakeChunkmap()): ChunkedNLMOutput.__init__(self, type_of_lines=MtiLine, fileobject=fileobject, lines_to_ignore=lines_to_ignore, chunkmap=chunkmap)