def parse_and_augment_proof_step(self, proof_step, out=True): ''' this gets the data for the proof step, but also does the randomization and the augmentation. There are a number of strange choices in the augmentation, particularly with keeping track of the distinct unconstrained variables but we'll leave them as is for now. ''' prop = proof_step.prop # figure out the unconstrained variables in prop, create # the corresponding replacement rules and such. # Eventually I'll distinguish the different non-target variables # but that is a task for another day. unconstrained_variables = prop.unconstrained_variables uv_dict = {var:'UC' for var in unconstrained_variables} target_index = np.random.choice(len(unconstrained_variables)) target_variable = unconstrained_variables[target_index] uv_dict[target_variable] = 'TARGET_UC' if out: self.out_tree = proof_step.unconstrained[target_index].copy() # figure out the fit of prop to the proof step. # now all the variables should be in fit or in uv_dict fit = data_utils.prop_applies_to_statement( proof_step.tree, prop, proof_step.context) self.to_prove_trees = [hyp.tree.copy().replace(fit).replace_values(uv_dict) for hyp in prop.hyps if hyp.type == 'e'] self.known_trees = [hyp.tree.copy() for hyp in proof_step.context.hyps if hyp.type == 'e'] # now generate a random replacement dictionary random_replacement_dict = self.config.lm.random_replacement_dict_f(f=proof_step.context.f) self.random_replacement_dict = random_replacement_dict # perform the replacements if out: self.out_tree.replace_values(random_replacement_dict) for tree in self.to_prove_trees: tree.replace_values(random_replacement_dict) for tree in self.known_trees: tree.replace_values(random_replacement_dict) # and get the graph structures self.known_graph_structure = TreeInformation(self.known_trees, start_symbol=None, intermediate_symbol='END_OF_HYP', end_symbol='END_OF_SECTION') self.to_prove_graph_structure = TreeInformation(self.to_prove_trees, start_symbol=None, intermediate_symbol='END_OF_HYP', end_symbol='END_OF_SECTION') if out: self.out_graph_structure = TreeInformation([self.out_tree], start_symbol='START_OUTPUT', intermediate_symbol=None, end_symbol=None)
def get_correct(context, t): ''' gets the correct hypotheses from this context. context is the un-standardized context ''' prop = t.prop fit = data.prop_applies_to_statement(t.tree, prop, context) #print fit assert fit is not None # the correct fit needs to work for var, tree in zip(prop.unconstrained_variables, t.unconstrained): fit[var] = tree hyps = [h.tree.copy().replace(fit) for h in prop.hyps if h.type == 'e'] #print 'hyps', hyps return hyps
def __init__(self, tree, context, prop, config, vs, args, gen_model, allowed_constructors=None, beam_search_state=None, return_replacement_dict=False, gt_step=None): ''' init just builds the beam search state, and we'll use it for copying inputs: vs: a set of variable objects, for all the models we are ensembling over config: the config object beam_search_state: a BeamSearchState to copy ''' self.request = [] self.total_symbols = 0 if beam_search_state is None else beam_search_state.total_symbols self.return_replacement_dict = return_replacement_dict if beam_search_state is None else beam_search_state.return_replacement_dict # basic configuration stuff self.random_replacement_dict = None if beam_search_state is None else beam_search_state.random_replacement_dict self.args = args if beam_search_state is None else beam_search_state.args self.gen_model = gen_model if beam_search_state is None else beam_search_state.gen_model self.vs = vs if beam_search_state is None else beam_search_state.vs self.config = config if beam_search_state is None else beam_search_state.config self.lm = self.config.lm self.tree = tree if beam_search_state is None else beam_search_state.tree self.context = context if beam_search_state is None else beam_search_state.context self.prop = prop if beam_search_state is None else beam_search_state.prop self.value = 0 if beam_search_state is None else beam_search_state.value self.complete = False if beam_search_state is None else beam_search_state.complete self.gt_step = gt_step if beam_search_state is None else beam_search_state.gt_step self.gt_string = None if beam_search_state is None else beam_search_state.gt_string self.allowed_constructors = (self.get_all_allowed_symbols(allowed_constructors) if beam_search_state is None else beam_search_state.allowed_constructors) # the current state (non-vector) self.string = [] if beam_search_state is None else beam_search_state.string[:] self.position_into_arity_stack = [0] if beam_search_state is None else beam_search_state.position_into_arity_stack[:] self.parent_arity_stack = [-1] if beam_search_state is None else beam_search_state.parent_arity_stack[:] self.complete = False if beam_search_state is None else beam_search_state.complete self.next_symbol = None if beam_search_state is None else beam_search_state.next_symbol # information about the current state vectors self.h = None if beam_search_state is None else beam_search_state.h self.parent_stack = [None] if beam_search_state is None else beam_search_state.parent_stack[:] self.left_sibling_stack = [None] if beam_search_state is None else beam_search_state.left_sibling_stack[:] self.logits = None if beam_search_state is None else beam_search_state.logits self.logit_order = None if beam_search_state is None else beam_search_state.logit_order self.parent_symbol_stack = None if beam_search_state is None else beam_search_state.parent_symbol_stack[:] # the information for returning symbols # this is immediately forgotten self.returned_symbols = 0 # this state information dies when we copy. self.exhausted = False self.new_wff_added = not PERMIT_NEW_WFFS self.new_set_added = not PERMIT_NEW_SETS self.new_class_added = not PERMIT_NEW_CLASSES self.next_vclass = None # information about the current model # this is kept until we move to the next unconstrained variable self.current_models = None if beam_search_state is None else beam_search_state.current_models self.disallowed_symbols = None if beam_search_state is None else beam_search_state.disallowed_symbols #self.used_symbols = context.hyp_symbols.copy() if beam_search_state is None else beam_search_state.used_symbols.copy() self.used_symbols = context.hyp_symbols if beam_search_state is None else beam_search_state.used_symbols.copy() self.this_ua = None if beam_search_state is None else beam_search_state.this_ua self.model_complete = True if beam_search_state is None else beam_search_state.model_complete if beam_search_state is None: self.fit = data_utils.prop_applies_to_statement(tree, prop, context) self.fit_initial = self.fit.copy() self.remaining_uas = [x.label for x in prop.hyps if x.type=='f' and x.label not in self.fit] #print (self.remaining_uas) np.random.shuffle(self.remaining_uas) self.set_up_next_model() if not self.complete: self.determine_next_vclass() else: self.remaining_uas = beam_search_state.remaining_uas[:] self.fit = beam_search_state.fit.copy() self.fit_initial = beam_search_state.fit_initial.copy()
def __init__(self, tree, context, prop, config, vs, allowed_constructors=None, beam_search_state=None, return_replacement_dict=False): ''' init just builds the beam search state, and we'll use it for copying inputs: vs: a set of variable objects, for all the models we are ensembling over config: the config object beam_search_state: a BeamSearchState to copy ''' self.total_symbols = 0 if beam_search_state is None else beam_search_state.total_symbols self.return_replacement_dict = return_replacement_dict if beam_search_state is None else beam_search_state.return_replacement_dict # basic configuration stuff self.vs = vs if beam_search_state is None else beam_search_state.vs self.config = config if beam_search_state is None else beam_search_state.config self.lm = self.config.lm self.tree = tree if beam_search_state is None else beam_search_state.tree self.context = context if beam_search_state is None else beam_search_state.context self.prop = prop if beam_search_state is None else beam_search_state.prop self.value = 0 if beam_search_state is None else beam_search_state.value self.complete = False if beam_search_state is None else beam_search_state.complete self.allowed_constructors = (self.get_all_allowed_symbols(allowed_constructors) if beam_search_state is None else beam_search_state.allowed_constructors) # the current state (non-vector) self.string = [] if beam_search_state is None else beam_search_state.string[:] self.position_into_arity_stack = [0] if beam_search_state is None else beam_search_state.position_into_arity_stack[:] self.parent_arity_stack = [-1] if beam_search_state is None else beam_search_state.parent_arity_stack[:] self.complete = False if beam_search_state is None else beam_search_state.complete self.next_symbol = None if beam_search_state is None else beam_search_state.next_symbol # information about the current state vectors self.h = None if beam_search_state is None else beam_search_state.h self.parent_stack = [None] if beam_search_state is None else beam_search_state.parent_stack[:] self.left_sibling_stack = [None] if beam_search_state is None else beam_search_state.left_sibling_stack[:] self.logits = None if beam_search_state is None else beam_search_state.logits self.logit_order = None if beam_search_state is None else beam_search_state.logit_order self.parent_symbol_stack = None if beam_search_state is None else beam_search_state.parent_symbol_stack[:] # the information for returning symbols # this is immediately forgotten self.returned_symbols = 0 # this state information dies when we copy. self.exhausted = False self.new_wff_added = not PERMIT_NEW_WFFS self.new_set_added = not PERMIT_NEW_SETS self.new_class_added = not PERMIT_NEW_CLASSES self.next_vclass = None # information about the current model # this is kept until we move to the next unconstrained variable self.current_models = None if beam_search_state is None else beam_search_state.current_models self.disallowed_symbols = None if beam_search_state is None else beam_search_state.disallowed_symbols #self.used_symbols = context.hyp_symbols.copy() if beam_search_state is None else beam_search_state.used_symbols.copy() self.used_symbols = context.hyp_symbols if beam_search_state is None else beam_search_state.used_symbols.copy() self.this_ua = None if beam_search_state is None else beam_search_state.this_ua self.model_complete = True if beam_search_state is None else beam_search_state.model_complete if beam_search_state is None: self.fit = data_utils.prop_applies_to_statement(tree, prop, context) self.remaining_uas = [x.label for x in prop.hyps if x.type=='f' and x.label not in self.fit] np.random.shuffle(self.remaining_uas) self.set_up_next_model() if not self.complete: self.determine_next_vclass() else: self.remaining_uas = beam_search_state.remaining_uas[:] self.fit = beam_search_state.fit.copy()