Python ViterbiParser.parse 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nltk.parse.viterbi

클래스/타입: ViterbiParser

메소드/함수: parse

hotexamples.com에서의 예제들: 11

Python ViterbiParser.parse - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nltk.parse.viterbi.ViterbiParser.parse에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ViterbiParser(11)

parse(9)

parse_one(2)

trace(2)

parse_all(1)

parse_sents(1)

예제 #1

파일 보기

파일: bmm_labels2grammar.py 프로젝트: i-machine-think/emergent_grammar_induction

def main(config):
    grammar_string = parse_induced_grammar( config.grammar )

    if config.output:
        with open(config.output, 'w') as f:
            f.write(grammar_string)
    grammar = PCFG.fromstring( grammar_string )
    grammar._start = Nonterminal('TOP') # Not sure whether this is allowed or breaks things

    # Create directory for parse_trees if it does not already exist
    if config.textfile:
        if not os.path.exists(config.output_parse):
            os.makedirs(config.output_parse)
    
    if config.textfile:
        parser = ViterbiParser(grammar)
        with open(config.textfile, 'r') as f:
            lines = f.read().splitlines() 
        for i, line in enumerate(lines):
            if i==config.number_parses:
                break
            print(f"Parsing sentence {i+1}")
            sent = line.split()
            for t in parser.parse(sent):
                TreeView(t)._cframe.print_to_file(f"{config.output_parse}/tree_{i}")

예제 #2

파일 보기

파일: chartparser.py 프로젝트: mhw32/rubric-sampling-public

def pcfg_data_likelihood(cfg_path, weights, data, counts, epsilon=1e-10):
    """Compute the log-likelihood of the real programs dataset 
    using PCFG with user-specified weights.

    @param cfg_path: string
                     path to PCFG dump
    @param weights: np.array
                    parameters of CFG.
    @param data: list of code segments
                 each code segment is a list of strings (space-sep)
    @param counts: each data point is not weighted equally
                   we weight by occurrence
    @param epsilon: default to use for empty trees [default: 1e-10]
    @return log_lik: float
                     log likelihood of dataset.
    """
    # space of possible integers (some of the language
    # requires a countably infinite number of possiblilities.
    # we only care about encoding the real program-space so
    # we only explicitly model the integers in the real set.
    integer_domain = get_integer_domain(data)
    pcfg = build_pcfg(cfg_path, weights, integer_domain, True)
    parser = ViterbiParser(pcfg)
    log_like = 0
    missing = 0
    for i, (code, cnt) in enumerate(zip(data, counts)):
        generator = parser.parse(code)
        if generator is not None:
            tree = next(generator)
            ll = tree.logprob()
        else:  # this program is not covered by the pCFG
            ll = np.log(epsilon)
            log_like += -ll * cnt
            missing += 1
    return log_like

예제 #3

파일 보기

 def parse_command(self, seqs, keep=3):
     non_terminals = get_nonterminals(self._pcfg)
     viterbi = ViterbiParser(self._pcfg)
     for seq, id in seqs:
         curr_trees = []
         for parse_option in get_parse_options(seq, non_terminals):
             try:
                 for t in viterbi.parse(parse_option):
                     curr_trees.append((t, parse_option))
             except ValueError:
                 print(parse_option)
         print(curr_trees)
         curr_trees = sorted(curr_trees, key=lambda tree: -tree[0].prob())
         print(seq, sum([tree[0].prob() for tree in curr_trees]),
               len(curr_trees))
         if keep != -1:
             curr_trees = curr_trees[:keep]
         print('now', len(curr_trees))
         for tree, parse_option in curr_trees:
             self._parsed_trees.append((parse_option, tree, id))
     print(len(seqs), len(self._parsed_trees))
     trees = [(tree[0], tree[1], tree[2]) for tree in self._parsed_trees]
     output_files = []
     for i, (option, tree, ind) in enumerate(trees):
         a = save_tree(tree,
                       None,
                       'parse{}'.format(i),
                       postscript=False,
                       prob=tree.prob(),
                       csb_id=ind)
         output_files.append(a)
     merge_pdfs(output_files, 'merged_parse.pdf')

예제 #4

파일 보기

def parse(parser: ViterbiParser, sentence):
    start_time = time.time()
    parser.trace(trace=1)
    for tree in parser.parse(sentence):
        print(tree)
        print(
            f"Time elapsed for sentence of length {len(sentence)}: {time.time() - start_time}"
        )

예제 #5

파일 보기

파일: demo.py 프로젝트: xingzhong/grammar_learning

def parsing(sample, g):
	from nltk.parse.viterbi import ViterbiParser
	from nltk.draw.tree import draw_trees
	parser = ViterbiParser(g)
	for s in sample:
		print " ".join(s)
		t = parser.parse(s)
		if t:
			print t.logprob()

예제 #6

파일 보기

파일: pcfg.py 프로젝트: XsongyangX/ift6285-hw8

def parse_treebank(parser: ViterbiParser, sentences):
    start_time = time.time()
    parser.trace(trace=1)
    for sentence in treebank.parsed_sents(sentences[:3]):
        tokens = sentence.leaves()
        for tree in parser.parse(tokens):
            print(tree)
            print(
                f"Time elapsed for sentence of length {len(tokens)}: {time.time() - start_time}"
            )

예제 #7

파일 보기

파일: bmm_labels2grammar.py 프로젝트: i-machine-think/emergent_grammar_induction

def test_PCFG(grammar, shapes=False):
    ''' Test whether the grammar can parse a sentence '''
    #sent = [i.replace("'","") for i in TERMINALS[:5]]
    #sent = "in the middle center is a green square".split()
    if not shapes:
        sent = "2 2 2 12 2 12 2 2 12 2".split()
    else:
        sent = "in the middle center is a green square".split()
    sr = ViterbiParser(grammar)
    for t in sr.parse(sent):
        t.draw()

예제 #8

파일 보기

파일: cyk.py 프로젝트: helange23/ULL-mini

def run_parser(corpus):
    """
	Runs the parser on a corpus.
	@param corpus: List of lists with input tokens
	"""
    for sentence in corpus:
        grammar = getGrammar(sentence)
        parser = Parser(grammar)
        sent = splitSentence(sentence)
        tree = parser.parse(sent)
        # tree.draw()
        # print tree.pprint(margin=30)
        extractDepParse(tree, sentence)

예제 #9

파일 보기

파일: chartparser.py 프로젝트: mhw32/rubric-sampling-public

def sanity_test():
    """Unit Test to make sure this stuff is working.
    This function should NOT break.
    """
    from ..rubric_utils.load_params import (
        get_pcfg_params,
        get_pcfg_path,
        get_codeorg_data_root,
    )

    data_root = get_codeorg_data_root(1, 'raw')
    theta = get_pcfg_params(1, author='teacher', random=False)
    cfg_path = get_pcfg_path(1, author='teacher')

    data, counts = load_real_asts(data_root, 1, True)
    integer_domain = get_integer_domain(data)
    # CKY parser for p-cfgs...
    pcfg = build_pcfg(cfg_path, theta, integer_domain, False)
    parser = ViterbiParser(pcfg)
    generator = parser.parse(['Move', '(', '50', ')'])
    tree = next(generator)
    # print(tree.logprob())
    print(tree)

예제 #10

파일 보기

def analyse_viterbi(pcfg, messages):
    """
        Infers the Viterbi parses of the fixed induction set, split induction set and evaluation set
        Writes parses to txt file
        Computes message likelihood, tree diversity and evaluation coverage
        Writes these properties to a pickle file
        Returns a list of strings for summarized properties
        """

    # Get terminals
    prods_lexical = [
        prod for prod in pcfg.productions() if type(prod.rhs()[0]) == str
    ]
    terminals = set([prod.rhs()[0] for prod in prods_lexical])

    # Compute message likelihoods and tree depth
    parser = ViterbiParser(pcfg)
    message_count = len(messages)
    message_count_quarter = int(np.ceil(message_count / 4))
    lines_parse = []
    trees = []
    tree_depths = []
    logprobs = []
    failed_parses = []
    parsed_count_weighted = 0
    for i, sent in enumerate(messages):
        sent = list(sent)
        if all(sym in terminals for sym in sent):
            tree_list = list(parser.parse(sent))
            if len(
                    tree_list
            ) == 1:  # if the message can be parsed, tree_list contains one tree
                tree = tree_list[0]
                parse = to_parse_string(tree)
                trees.append(parse)
                tree_depths.append(tree_depth(tree))
                logprobs.append(
                    tree.logprob() / np.log(2)
                )  # convert natural logarithm from tree to log base 2 for description length
            else:
                parse = "NO_PARSE"
                logprobs.append(None)
                tree_depths.append(None)
                failed_parses.append(sent)
        else:
            parse = "NO_PARSE"
            logprobs.append(None)
            tree_depths.append(None)
            failed_parses.append(sent)

    # Compute final statistics
    parsed_count = len(ignore_none(logprobs))
    unparsed_count = message_count - parsed_count

    # Collect evaluation information (of unique messages)
    eval_stats = {
        'log2likelihoods': logprobs,  # corresponds to {data: frequencies}
        'unparsed_count': unparsed_count,
        'parsed_count': parsed_count,
        'failedparses': failed_parses,
    }

    # Evaluation coverage
    coverage = parsed_count / len(messages)
    eval_stats['coverage'] = coverage * 100
    eval_stats['average_log2likelihood'] = mean(logprobs) or float('nan')

    return eval_stats

예제 #11

파일 보기

파일: performance.py 프로젝트: XsongyangX/ift6285-hw8

def parse(parser: ViterbiParser, sentence):
    for tree in parser.parse(sentence):
        yield tree