예제 #1
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False, detect_transpositions=False, debug_scores=False, properties_filter=None):
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=near_match, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=near_match, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    # join parallel segments
    if segmentation:
        join(graph)
    # check which output format is requested: graph or table
    if output == "svg":
        return display_variant_graph_as_SVG(graph)
    if output=="graph": 
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    else:
        raise Exception("Unknown output type: "+output)
예제 #2
0
def collate_pretokenized_json(json,
                              output='table',
                              layout='horizontal',
                              **kwargs):
    # Takes more or less the same arguments as collate() above, but with some restrictions.
    # Only output types 'json' and 'table' are supported.
    if output not in ['json', 'table', 'html2']:
        raise UnsupportedError("Output type " + output +
                               " not supported for pretokenized collation")
    if 'segmentation' in kwargs and kwargs['segmentation']:
        raise UnsupportedError(
            "Segmented output not supported for pretokenized collation")
    kwargs['segmentation'] = False

    # For each witness given, make a 'shadow' witness based on the normalization tokens
    # that will actually be collated.
    tokenized_witnesses = []
    collation = Collation()
    for witness in json["witnesses"]:
        collation.add_witness(witness)
        tokenized_witnesses.append(witness["tokens"])
    at = collate(collation, output="table", **kwargs)
    if output == "html2":
        return visualizeTableVerticallyWithColors(at, collation)

    # record whether there is variation in each of the columns (horizontal) or rows (vertical layout)
    has_variation_array = []
    for column in at.columns:
        has_variation_array.append(column.variant)
    tokenized_at = AlignmentTable(collation, layout=layout)
    for row, tokenized_witness in zip(at.rows, tokenized_witnesses):
        new_row = Row(row.header)
        tokenized_at.rows.append(new_row)
        token_counter = 0
        for cell in row.cells:
            new_row.cells.append(
                tokenized_witness[token_counter] if cell else None)
            if cell:
                token_counter += 1
    # In order to have the same information as in the non pretokenized alignment table we
    # add variation information to the pretokenized alignment table.
    tokenized_at.has_rank_variation = has_variation_array
    if output == "json":
        return export_alignment_table_as_json(tokenized_at)
    if output == "table":
        # transform JSON objects to "t" form.
        for row in tokenized_at.rows:
            row.cells = [cell["t"] if cell else None for cell in row.cells]
        return tokenized_at
예제 #3
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None):
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=near_match,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=near_match,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    # join parallel segments
    if segmentation:
        join(graph)
    # check which output format is requested: graph or table
    if output == "svg":
        return display_variant_graph_as_SVG(graph)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    else:
        raise Exception("Unknown output type: " + output)
예제 #4
0
def collate_pretokenized_json(json, output='table', layout='horizontal', **kwargs):
    # Takes more or less the same arguments as collate() above, but with some restrictions.
    # Only output types 'json' and 'table' are supported.
    if output not in ['json', 'table', 'html2']:
        raise UnsupportedError("Output type " + output + " not supported for pretokenized collation")
    if 'segmentation' in kwargs and kwargs['segmentation']:
        raise UnsupportedError("Segmented output not supported for pretokenized collation")
    kwargs['segmentation'] = False

    # For each witness given, make a 'shadow' witness based on the normalization tokens
    # that will actually be collated.
    tokenized_witnesses = []
    collation = Collation()
    for witness in json["witnesses"]:
        collation.add_witness(witness)
        tokenized_witnesses.append(witness["tokens"])
    at = collate(collation, output="table", **kwargs)
    if output == "html2":
        return visualizeTableVerticallyWithColors(at, collation)

    # record whether there is variation in each of the columns (horizontal) or rows (vertical layout)
    has_variation_array = []
    for column in at.columns:
        has_variation_array.append(column.variant)
    tokenized_at = AlignmentTable(collation, layout=layout)
    for row, tokenized_witness in zip(at.rows, tokenized_witnesses):
        new_row = Row(row.header)
        tokenized_at.rows.append(new_row)
        token_counter = 0
        for cell in row.cells:
            new_row.cells.append(tokenized_witness[token_counter] if cell else None)
            if cell:
                token_counter += 1
    # In order to have the same information as in the non pretokenized alignment table we
    # add variation information to the pretokenized alignment table.
    tokenized_at.has_rank_variation = has_variation_array
    if output == "json":
        return export_alignment_table_as_json(tokenized_at)
    if output == "table":
        # transform JSON objects to "t" form.
        for row in tokenized_at.rows:
            row.cells = [cell["t"] if cell else None for cell in row.cells]
        return tokenized_at
예제 #5
0
def collate(collation,
            output="table",
            layout="horizontal",
            segmentation=True,
            near_match=False,
            astar=False,
            detect_transpositions=False,
            debug_scores=False,
            properties_filter=None,
            svg_output=None,
            indent=False,
            scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(
            collation,
            near_match=False,
            detect_transpositions=detect_transpositions,
            debug_scores=debug_scores,
            properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation,
                                             near_match=False,
                                             debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError(
                'segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking,
                                witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)
예제 #6
0
def collate(collation, output="table", layout="horizontal", segmentation=True, near_match=False, astar=False,
            detect_transpositions=False, debug_scores=False, properties_filter=None, svg_output=None, indent=False, scheduler=Scheduler()):
    # collation may be collation or json; if it's the latter, use it to build a real collation
    if isinstance(collation, dict):
        json_collation = Collation()
        for witness in collation["witnesses"]:
            json_collation.add_witness(witness)
        collation = json_collation

    # assume collation is collation (by now); no error trapping
    if not astar:
        algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter)
    else:
        algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores)

    # build graph
    graph = VariantGraph()
    algorithm.collate(graph, collation)
    ranking = VariantGraphRanking.of(graph)
    if near_match:
        # Segmentation not supported for near matching; raise exception if necessary
        if segmentation:
            raise SegmentationError('segmentation must be set to False for near matching')

        highestRank = ranking.byVertex[graph.end]
        witnessCount = len(collation.witnesses)

        # do-while loop to avoid looping through ranking while modifying it
        rank = highestRank - 1
        condition = True
        while condition:
            rank = process_rank(scheduler, rank, collation, ranking, witnessCount)
            rank -= 1
            condition = rank > 0

        # # Verify that nodes have been moved
        # print("\nLabels at each rank at end of processing: ")
        # for rank in ranking.byRank:
        #     print("\nRank: " + str(rank))
        #     print([node.label for node in ranking.byRank[rank]])

    # join parallel segments
    if segmentation:
        join(graph)
        ranking = VariantGraphRanking.of(graph)
    # check which output format is requested: graph or table
    if output == "svg" or output == "svg_simple":
        return display_variant_graph_as_SVG(graph, svg_output, output)
    if output == "graph":
        return graph
    # create alignment table
    table = AlignmentTable(collation, graph, layout, ranking)
    if output == "json":
        return export_alignment_table_as_json(table)
    if output == "html":
        return display_alignment_table_as_HTML(table)
    if output == "html2":
        return visualizeTableVerticallyWithColors(table, collation)
    if output == "table":
        return table
    if output == "xml":
        return export_alignment_table_as_xml(table)
    if output == "tei":
        return export_alignment_table_as_tei(table, indent)
    else:
        raise Exception("Unknown output type: " + output)