Python GraphBank.GraphBankの例

プログラミング言語: Python

名前空間/パッケージ名: daeso.gb.graphbank

クラス/型: GraphBank

メソッド/関数: GraphBank

hotexamples.comのコード掲載数: 6

Python GraphBank.GraphBank - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdaeso.gb.graphbank.GraphBank.GraphBankの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

GraphBank(6)

load(5)

get_graph(1)

よく使われるメソッド

GraphBank (6)

load (5)

get_graph (1)

コード例 #1

ファイルを表示

    def __init__(self, tokenizer=None, alpino=None, graph_aligner=None):
        self.init_tokenizer(tokenizer)
        self.init_alpino(alpino)
        self.init_graph_xml_parser()
        self.init_graph_aligner(graph_aligner)
        self.init_others()

        # a pair of graphbank dummies, which are needed when creating a new
        # GraphMapping instance
        self._graphbanks = Pair(GraphBank("", "alpino"),
                                GraphBank("", "alpino"))

コード例 #2

ファイルを表示

ファイル: test_graphbank.py プロジェクト: emsrc/daeso-framework

 def test_equal(self):
     gb1 = GraphBank("data/source-gb-1.xml", "alpino")
     gb1.load()
     
     gb2 = GraphBank("data/source-gb-1.xml", "alpino")
     gb2.load()
     self.assertTrue(gb1 == gb2)
     
     gb2 = GraphBank("../../test/gb/data/source-gb-1.xml", "alpino")
     gb2.load()
     self.assertTrue(gb1 == gb2)
     
     gb2 = GraphBank("data/target-gb-1.xml", "alpino")
     gb2.load()
     self.assertFalse(gb1 == gb2)

コード例 #3

ファイルを表示

def gb_stats(files,
             format,
             with_empty_nodes=False,
             with_failed_parses=False,
             with_punc=False,
             threshold=0):

    gb_table = GbStatsTable(size=len(files))
    gb_row = 0

    for i, fn in enumerate(files):
        bank = GraphBank(file_path=fn, format=format)
        bank.load()
        graph_stats(bank,
                    gb_table,
                    i,
                    with_empty_nodes,
                    with_failed_parses,
                    with_punc,
                    with_unaligned_roots=True,
                    threshold=threshold,
                    with_unaligned_graphs=True)

    gb_table.summarize()
    return gb_table

コード例 #4

ファイルを表示

ファイル: test_graphbank.py プロジェクト: emsrc/daeso-framework

 def test__iter__(self):
     gb = GraphBank("data/source-gb-1.xml", "alpino")
     gb.load()
     graphs = [graph for graph in gb]
     self.assertEqual(len(graphs), 3)

コード例 #5

ファイルを表示

ファイル: test_graphbank.py プロジェクト: emsrc/daeso-framework

 def test_init_1(self):
     gb = GraphBank("data/source-gb-1.xml", "alpino")
     gb.load()
     self.assertEqual(len(gb), 3)

コード例 #6

ファイルを表示

ファイル: creation.py プロジェクト: emsrc/daeso-framework

def pgc_from_ptc(text_corpus_file,
                 source_graphbank_file,
                 target_graphbank_file,
                 focus_tags=Pair("s", "s"),
                 graph_formats=Pair("alpino", "alpino"),
                 relations=RELATIONS,
                 min_token_diff=0,
                 max_token_len=99999):
    """
    Create a new parallel graph corpus from a parallel text corpus and a pair of
    graphbanks
    
    @PARAM text_corpus_file: parallel text corpus filename
    @PARAM source_bank: source graphank filename
    @PARAM target_bank: target graphbank filname
        
    @KEYWORD focus_tags: pair of focus tags
    @KEYWORD graph_format: pair of graphbank formats
    @KEYWORD relations: list of alignment relations
    @keyword min_token_diff: minimum number of different tokens
    @keyword max_token_len: maximum number of tokens per focus element 
    
    @RETURN: ParallelGraphCorpus object
    """
    # read parallel text corpus
    text_corpus = HitaextDoc(file=text_corpus_file)
    doc_trees = text_corpus.get_doc_trees(search=True)

    # read graph banks
    source_bank = GraphBank(source_graphbank_file, graph_formats.source)
    source_bank.load()
    target_bank = GraphBank(target_graphbank_file, graph_formats.target)
    target_bank.load()
    graph_banks = Pair(source_bank, target_bank)

    # create an empty parallel graph corpus
    graph_corpus = ParallelGraphCorpus(relations=relations)

    for alignment in text_corpus.alignment:
        if (alignment.get("from_tag") != focus_tags.source
                or alignment.get("to_tag") != focus_tags.target):
            continue

        source_tokens = _get_elem_tokens(doc_trees.source, focus_tags.source,
                                         alignment.get("from_id"))
        target_tokens = _get_elem_tokens(doc_trees.target, focus_tags.target,
                                         alignment.get("to_id"))

        if len(source_tokens) > max_token_len or len(
                target_tokens) > max_token_len:
            continue

        if (min_token_diff and
                _token_diff(source_tokens, target_tokens) < min_token_diff):
            continue

        # the crucial assumption is that id's of the aligned focus
        # elements in the marked-up text have corresponding graphs with
        # the same id in the graph banks
        source_graph_id = alignment.get("from_id")
        target_graph_id = alignment.get("to_id")
        graphs = Pair(source_bank.get_graph(source_graph_id),
                      target_bank.get_graph(target_graph_id))

        graph_pair = GraphPair(graph_banks, graphs)
        graph_corpus.append(graph_pair)

    return graph_corpus