def pass0(seg, extra_grammar_paths=[], glue_grammar_paths=[], pass_through=True, default_symbol='X', goal_str='GOAL', start_str='S', max_span=-1, n_goal=0, saving={}, redo=True, log=dummyfunc) -> 'Hypergraph': """ Pass0 consists in parsing with the source side of the grammar. For now, pass0 does not do any scoring (not even local), but it could (TODO). Steps 1. Make a hypergraph view of the grammar 2. Make an input DFA 3. Parse the input DFA :return: source forest """ if is_step_complete('forest', saving, redo): return unpickle_it(saving['forest']) # here we need to decode for sure log('[%d] Make hypergraph view of all available grammars', seg.id) # make a hypergraph view of all available grammars grammar = make_grammar_hypergraph(seg, extra_grammar_paths=extra_grammar_paths, glue_grammar_paths=glue_grammar_paths, pass_through=pass_through, default_symbol=default_symbol) # parse source lattice log('[%d] Parse source DFA', seg.id) goal_maker = GoalRuleMaker(goal_str=goal_str, start_str=start_str, n=n_goal) dfa = make_input_dfa(seg) forest = parse_dfa(grammar, grammar.fetch(Nonterminal(start_str)), dfa, goal_maker.get_iview(), bottomup=True, constraint=HieroConstraints(grammar, dfa, max_span)) if 'forest' in saving: pickle_it(saving['forest'], forest) return forest
def biparse(seg: SegmentMetaData, options: SimpleNamespace, joint_model: ModelView, conditional_model: ModelView, workingdir=None, redo=True, log=dummyfunc) -> SimpleNamespace: """ Biparse a segment using a local model. 1. we parse the source with a joint model 2. we bi-parse source and target with a conditional model This separation allows us to factorise these models differently wrt local/nonlocal components. For example, an LM maybe seen as a local (read tractable) component of a conditional model, and as a nonlocal (read intractable) component of a joint model. An implementation detail: bi-parsing is implemented as a cascade of intersections (with projections in between). :param seg: a segment :param options: parsing options :param joint_model: a factorised view of the joint model, here we use only the local components :param conditional_model: a factorised view of the conditional, here we use only the local components :param workingdir: where to save files :param redo: whether or not previously saved computation should be discarded :param log: a logging function :return: result.{joint,conditional}.{forest,components} for the respective local model """ if workingdir: saving = preprocessed_training_files('{0}/{1}'.format(workingdir, seg.id)) else: saving = {} result = SimpleNamespace() result.joint = SimpleNamespace() result.conditional = SimpleNamespace() if conditional_model is None: steps = ['joint.forest', 'joint.components'] if all(is_step_complete(step, saving, redo) for step in steps): log('[%d] Reusing joint and conditional distributions from files', seg.id) result.joint.forest = unpickle_it(saving['joint.forest']) result.joint.components = unpickle_it(saving['joint.components']) result.conditional.forest = None result.conditional.components = [] return result steps = ['joint.forest', 'joint.components', 'conditional.forest', 'conditional.components'] if all(is_step_complete(step, saving, redo) for step in steps): log('[%d] Reusing joint and conditional distributions from files', seg.id) result.joint.forest = unpickle_it(saving['joint.forest']) result.joint.components = unpickle_it(saving['joint.components']) result.conditional.forest = unpickle_it(saving['conditional.forest']) result.conditional.components = unpickle_it(saving['conditional.components']) return result # 1. Make a grammar # here we need to decode for sure log('[%d] Make hypergraph view of all available grammars', seg.id) # make a hypergraph view of all available grammars grammar = make_grammar_hypergraph(seg, extra_grammar_paths=options.extra_grammars, glue_grammar_paths=options.glue_grammars, pass_through=options.pass_through, default_symbol=options.default_symbol) #print('GRAMMAR') #print(grammar) # 2. Joint distribution - Step 1: parse source lattice n_goal = 0 log('[%d] Parse source DFA', seg.id) goal_maker = GoalRuleMaker(goal_str=options.goal, start_str=options.start, n=n_goal) src_dfa = make_input_dfa(seg) src_forest = parse_dfa(grammar, grammar.fetch(Nonterminal(options.start)), src_dfa, goal_maker.get_iview(), bottomup=True, constraint=HieroConstraints(grammar, src_dfa, options.max_span)) #print('SOURCE') #print(src_forest) if not src_forest: raise ValueError('I cannot parse the input lattice: i) make sure your grammar has glue rules; ii) make sure it handles OOVs') # 3. Target projection of the forest log('[%d] Project target rules', seg.id) tgt_forest = make_target_forest(src_forest) #print('TARGET') #print(tgt_forest) # 4. Joint distribution - Step 2: scoring log('[%d] Joint model: (exact) local scoring', seg.id) result.joint = exact_rescoring(joint_model.local_model(), tgt_forest, goal_maker, log) # save joint distribution if 'joint.forest' in saving: pickle_it(saving['joint.forest'], result.joint.forest) if 'joint.components' in saving: pickle_it(saving['joint.components'], result.joint.components) if conditional_model is None: result.conditional.forest = None result.conditional.components = [] return result # 5. Conditional distribution - Step 1: parse the reference lattice log('[%d] Parse reference DFA', seg.id) ref_dfa = make_reference_dfa(seg) goal_maker.update() ref_forest = parse_dfa(result.joint.forest, 0, ref_dfa, goal_maker.get_oview(), bottomup=False) if not ref_forest: # reference cannot be parsed log('[%d] References cannot be parsed', seg.id) result.conditional.forest = ref_forest result.conditional.components = [] else: # 6. Conditional distribution - Step 2: scoring log('[%d] Conditional model: exact (local) scoring', seg.id) result.conditional = exact_rescoring(conditional_model.local_model(), ref_forest, goal_maker, log) # save conditional distribution if 'conditional.forest' in saving: pickle_it(saving['conditional.forest'], result.conditional.forest) if 'conditional.components' in saving: pickle_it(saving['conditional.components'], result.conditional.components) return result