Beispiel #1
0
def _main_lozenge_graph(args):
    """Display any lozenge shaped subgraphs
    """
    corpus = read_corpus(args, preselected={'stage': ['discourse', 'units']})
    output_dir = get_output_dir(args)
    keys = [k for k in corpus if k.stage == 'discourse']

    loz_count = Counter()
    loz_edges = Counter()
    for key in sorted(keys):
        gra = stacgraph.Graph.from_doc(corpus, key)
        if args.strip_cdus:
            gra = gra.without_cdus(sloppy=True)
        interesting = set()
        for node in gra.nodes():
            mloz = _maybe_lozenge(gra, node)
            if mloz is not None:
                l_n, l_e = mloz
                loz_count[key] += 1
                loz_edges[key] += len(l_e)
                interesting |= l_n
        gra = gra.copy(interesting)
        dot_gra = stacgraph.DotGraph(gra)
        if dot_gra.get_nodes():
            write_dot_graph(key, output_dir, dot_gra, run_graphviz=args.draw)
    for key in sorted(loz_count):
        print(key, loz_count[key], '({})'.format(loz_edges[key]))
    print('TOTAL lozenges:', sum(loz_count.values()))
    print('TOTAL edges in lozenges:', sum(loz_edges.values()))
Beispiel #2
0
def generate_graphs(settings):
    """
    Draw SVG graphs for each of the documents in the corpus
    """
    discourse_only = [k for k in settings.corpus if k.stage == 'discourse']
    report = settings.report

    # generate dot files
    for k in discourse_only:
        try:
            gra = egr.DotGraph(egr.Graph.from_doc(settings.corpus, k))
            dot_file = report.subreport_path(k, '.dot')
            create_dirname(dot_file)
            if gra.get_nodes():
                with codecs.open(dot_file, 'w', encoding='utf-8') as fout:
                    print(gra.to_string(), file=fout)
        except educe.graph.DuplicateIdException:
            warning = ("Couldn't graph %s because it has duplicate "
                       "annotation ids") % dot_file
            print(warning, file=sys.stderr)

    # attempt to graphviz them
    try:
        print("Generating graphs... (you can safely ^-C here)",
              file=sys.stderr)
        for k in discourse_only:
            dot_file = report.subreport_path(k, '.dot')
            svg_file = report.subreport_path(k, '.svg')
            if fp.exists(dot_file) and settings.draw:
                subprocess.call('dot -T svg -o %s %s' % (svg_file, dot_file),
                                shell=True)
    except OSError as oops:
        print("Couldn't run graphviz. (%s)" % oops, file=sys.stderr)
        print("You should install it for easier sanity check debugging.",
              file=sys.stderr)
Beispiel #3
0
def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    args.stage = 'discourse|units'
    corpus = read_corpus(args, verbose=True)
    output_dir = get_output_dir(args)

    keys = [k for k in corpus if k.stage == 'discourse']
    for k in sorted(keys):
        try:
            gra = stacgraph.Graph.from_doc(corpus,
                                           k,
                                           pred=_keep(corpus[k],
                                                      args.rel_types))
            dot_gra = stacgraph.DotGraph(gra)
            if dot_gra.get_nodes():
                write_dot_graph(k, output_dir, dot_gra, run_graphviz=args.draw)
            else:
                print("Skipping %s (empty graph)" % k, file=sys.stderr)
        except graph.DuplicateIdException:
            warning = "WARNING: %s has duplicate annotation ids" % k
            print(warning, file=sys.stderr)
Beispiel #4
0
def dump_graph(dump_filename, graph):
    """
    Write a dot graph and possibly run graphviz on it
    """
    dot_graph = stac_gr.DotGraph(graph)
    dot_file = dump_filename + '.dot'
    svg_file = dump_filename + '.svg'
    mk_parent_dirs(dot_file)
    with codecs.open(dot_file, 'w', encoding='utf-8') as dotf:
        print(dot_graph.to_string(), file=dotf)
    print("Creating %s" % svg_file, file=sys.stderr)
    subprocess.call('dot -T svg -o %s %s' % (svg_file, dot_file), shell=True)
Beispiel #5
0
def _main_rel_graph(args):
    """
    Draw graphs showing relation instances between EDUs
    """
    args.stage = 'discourse|units'
    corpus = _read_corpus(args)
    output_dir = get_output_dir(args)

    if args.live:
        keys = corpus
    else:
        keys = [k for k in corpus if k.stage == 'discourse']

    for k in sorted(keys):
        if args.highlight:
            highlights = [anno_id_from_tuple(x) for x in args.highlight]
            for anno in corpus[k].annotations():
                if anno.local_id() in highlights:
                    anno.features['highlight'] = 'orange'
        try:
            gra = stacgraph.Graph.from_doc(corpus, k)
            if args.strip_cdus:
                gra = gra.without_cdus(mode=args.strip_mode)
            dot_gra = stacgraph.DotGraph(gra)
            if dot_gra.get_nodes():
                write_dot_graph(k, output_dir, dot_gra, run_graphviz=args.draw)
                if args.split:
                    ccs = gra.connected_components()
                    for part, nodes in enumerate(ccs, 1):
                        gra2 = gra.copy(nodes)
                        write_dot_graph(k,
                                        output_dir,
                                        stacgraph.DotGraph(gra2),
                                        part=part,
                                        run_graphviz=args.draw)
            else:
                print("Skipping %s (empty graph)" % k, file=sys.stderr)
        except graph.DuplicateIdException:
            warning = "WARNING: %s has duplicate annotation ids" % k
            print(warning, file=sys.stderr)
Beispiel #6
0
def _main_rfc_graph(args):
    """
    Draw graphs showing relation instances between EDUs
    """
    args.stage = 'discourse|units'
    corpus = _read_corpus(args)
    output_dir = get_output_dir(args)

    if args.rfc == 'basic':
        mk_rfc = BasicRfc
    elif args.rfc == 'mlast':
        mk_rfc = ThreadedRfc
    else:
        raise NotImplementedError

    if args.live:
        keys = corpus
    else:
        keys = [k for k in corpus if k.stage == 'discourse']

    for key in sorted(keys):
        gra = stacgraph.Graph.from_doc(corpus, key)
        for subgra_nodes in gra.connected_components():
            subgra = gra.copy(subgra_nodes)
            sub_rfc = mk_rfc(subgra)
            for node in sub_rfc.frontier():
                gra.annotation(node).features['highlight'] = 'green'

        rfc = mk_rfc(gra)
        for link in rfc.violations():
            gra.annotation(link).features['highlight'] = 'red'
        dot_gra = stacgraph.DotGraph(gra)
        if dot_gra.get_nodes():
            write_dot_graph(key, output_dir, dot_gra, run_graphviz=args.draw)
        else:
            print("Skipping %s (empty graph)" % key, file=sys.stderr)