def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None): if not astar: algorithm = EditGraphAligner(collation, near_match=near_match, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=near_match, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) # join parallel segments if segmentation: join(graph) # check which output format is requested: graph or table if output == "svg": return display_variant_graph_as_SVG(graph) if output=="graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table else: raise Exception("Unknown output type: "+output)
def collate_pretokenized_json(json, output='table', layout='horizontal', **kwargs): # Takes more or less the same arguments as collate() above, but with some restrictions. # Only output types 'json' and 'table' are supported. if output not in ['json', 'table', 'html2']: raise UnsupportedError("Output type " + output + " not supported for pretokenized collation") if 'segmentation' in kwargs and kwargs['segmentation']: raise UnsupportedError( "Segmented output not supported for pretokenized collation") kwargs['segmentation'] = False # For each witness given, make a 'shadow' witness based on the normalization tokens # that will actually be collated. tokenized_witnesses = [] collation = Collation() for witness in json["witnesses"]: collation.add_witness(witness) tokenized_witnesses.append(witness["tokens"]) at = collate(collation, output="table", **kwargs) if output == "html2": return visualizeTableVerticallyWithColors(at, collation) # record whether there is variation in each of the columns (horizontal) or rows (vertical layout) has_variation_array = [] for column in at.columns: has_variation_array.append(column.variant) tokenized_at = AlignmentTable(collation, layout=layout) for row, tokenized_witness in zip(at.rows, tokenized_witnesses): new_row = Row(row.header) tokenized_at.rows.append(new_row) token_counter = 0 for cell in row.cells: new_row.cells.append( tokenized_witness[token_counter] if cell else None) if cell: token_counter += 1 # In order to have the same information as in the non pretokenized alignment table we # add variation information to the pretokenized alignment table. tokenized_at.has_rank_variation = has_variation_array if output == "json": return export_alignment_table_as_json(tokenized_at) if output == "table": # transform JSON objects to "t" form. for row in tokenized_at.rows: row.cells = [cell["t"] if cell else None for cell in row.cells] return tokenized_at
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None): if not astar: algorithm = EditGraphAligner( collation, near_match=near_match, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=near_match, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) # join parallel segments if segmentation: join(graph) # check which output format is requested: graph or table if output == "svg": return display_variant_graph_as_SVG(graph) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table else: raise Exception("Unknown output type: " + output)
def collate_pretokenized_json(json, output='table', layout='horizontal', **kwargs): # Takes more or less the same arguments as collate() above, but with some restrictions. # Only output types 'json' and 'table' are supported. if output not in ['json', 'table', 'html2']: raise UnsupportedError("Output type " + output + " not supported for pretokenized collation") if 'segmentation' in kwargs and kwargs['segmentation']: raise UnsupportedError("Segmented output not supported for pretokenized collation") kwargs['segmentation'] = False # For each witness given, make a 'shadow' witness based on the normalization tokens # that will actually be collated. tokenized_witnesses = [] collation = Collation() for witness in json["witnesses"]: collation.add_witness(witness) tokenized_witnesses.append(witness["tokens"]) at = collate(collation, output="table", **kwargs) if output == "html2": return visualizeTableVerticallyWithColors(at, collation) # record whether there is variation in each of the columns (horizontal) or rows (vertical layout) has_variation_array = [] for column in at.columns: has_variation_array.append(column.variant) tokenized_at = AlignmentTable(collation, layout=layout) for row, tokenized_witness in zip(at.rows, tokenized_witnesses): new_row = Row(row.header) tokenized_at.rows.append(new_row) token_counter = 0 for cell in row.cells: new_row.cells.append(tokenized_witness[token_counter] if cell else None) if cell: token_counter += 1 # In order to have the same information as in the non pretokenized alignment table we # add variation information to the pretokenized alignment table. tokenized_at.has_rank_variation = has_variation_array if output == "json": return export_alignment_table_as_json(tokenized_at) if output == "table": # transform JSON objects to "t" form. for row in tokenized_at.rows: row.cells = [cell["t"] if cell else None for cell in row.cells] return tokenized_at
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner( collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary if segmentation: raise SegmentationError( 'segmentation must be set to False for near matching') highestRank = ranking.byVertex[graph.end] witnessCount = len(collation.witnesses) # do-while loop to avoid looping through ranking while modifying it rank = highestRank - 1 condition = True while condition: rank = process_rank(scheduler, rank, collation, ranking, witnessCount) rank -= 1 condition = rank > 0 # # Verify that nodes have been moved # print("\nLabels at each rank at end of processing: ") # for rank in ranking.byRank: # print("\nRank: " + str(rank)) # print([node.label for node in ranking.byRank[rank]]) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_SVG(graph, svg_output, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) else: raise Exception("Unknown output type: " + output)
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()): # collation may be collation or json; if it's the latter, use it to build a real collation if isinstance(collation, dict): json_collation = Collation() for witness in collation["witnesses"]: json_collation.add_witness(witness) collation = json_collation # assume collation is collation (by now); no error trapping if not astar: algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) # build graph graph = VariantGraph() algorithm.collate(graph, collation) ranking = VariantGraphRanking.of(graph) if near_match: # Segmentation not supported for near matching; raise exception if necessary if segmentation: raise SegmentationError('segmentation must be set to False for near matching') highestRank = ranking.byVertex[graph.end] witnessCount = len(collation.witnesses) # do-while loop to avoid looping through ranking while modifying it rank = highestRank - 1 condition = True while condition: rank = process_rank(scheduler, rank, collation, ranking, witnessCount) rank -= 1 condition = rank > 0 # # Verify that nodes have been moved # print("\nLabels at each rank at end of processing: ") # for rank in ranking.byRank: # print("\nRank: " + str(rank)) # print([node.label for node in ranking.byRank[rank]]) # join parallel segments if segmentation: join(graph) ranking = VariantGraphRanking.of(graph) # check which output format is requested: graph or table if output == "svg" or output == "svg_simple": return display_variant_graph_as_SVG(graph, svg_output, output) if output == "graph": return graph # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_HTML(table) if output == "html2": return visualizeTableVerticallyWithColors(table, collation) if output == "table": return table if output == "xml": return export_alignment_table_as_xml(table) if output == "tei": return export_alignment_table_as_tei(table, indent) else: raise Exception("Unknown output type: " + output)