def __init__(self, sm: SemanticModel, karma_sm: KarmaModel): """Our semantic model and karma semantic model represent same semantic structure, but only name of data nodes are changed. We want to build a mapping from data node id in karma_sm to data node id in our sm, such that if we replace names of data nodes in karma_sm by name of data nodes in our sm; the new tree still capture same semantic like in our sm. Our conjecture: semantic of a data node doesn't change when a path from root node to this data node doesn't change. """ try: result = f1_precision_recall(sm.graph, karma_sm.graph, DataNodeMode.IGNORE_LABEL_DATA_NODE) except PermutationExploding as e: logging.error("PermutationExploding at source: %s", sm.id) raise bijection = result['_bijection'] try: assert result[ 'f1'] == 1.0 and None not in bijection.prime2x and None not in bijection.x2prime and len( bijection.x2prime) == len(bijection.prime2x) # mapping from karma node's id to our sm node's id alignment: Dict[int, int] = {} for n_prime in karma_sm.graph.iter_class_nodes(): n = sm.graph.get_node_by_id(bijection.prime2x[n_prime.id]) edges: Dict[bytes, List[GraphLink]] = _( n.iter_outgoing_links()).imap( lambda e: (e.label, e)).group_by_key().todict() e_primes: List[GraphLink] for lbl, e_primes in _(n_prime.iter_outgoing_links()).imap( lambda e: (e.label, e)).group_by_key().get_value(): assert len(e_primes) == len(edges[lbl]) e_primes = [ e for e in e_primes if e.get_target_node().is_data_node() ] es = [ e for e in edges[lbl] if e.get_target_node().is_data_node() ] assert len(e_primes) == len(es) # order doesn't matter because it doesn't change semantic for ep, e in zip(e_primes, es): alignment[ep.target_id] = e.target_id self.alignment = alignment self.sm = sm self.karma_sm = karma_sm except Exception as e: sm.graph.render2pdf("/tmp/sm.pdf") karma_sm.graph.render2pdf("/tmp/karma_sm.pdf") logging.error( f"Error when trying to build alignment between models. Source = {sm.id}" ) raise
def pred_type(self, col: Column, top_n: int) -> List[Tuple[Tuple[bytes, bytes], float]]: X = [] refcols = [ refcol for refcol in self.stype_db.train_columns if refcol.id != col.id ] j = self.stype_db.col2idx[col.id] for refcol in refcols: iref = self.stype_db.col2idx[refcol.id] X.append(self.stype_db.similarity_matrix[j, iref]) result = self.model.predict_proba(X)[:, 1] result = _(zip(result, (self.stype_db.col2types[rc.id] for rc in refcols))) \ .sort(key=lambda x: x[0], reverse=True) top_k_st = {} for score, stype in result: if stype not in top_k_st: top_k_st[stype] = score if len(top_k_st) == top_n: break return sorted([(stype, score) for stype, score in top_k_st.items()], reverse=True, key=lambda x: x[1])
def filter_unlikely_graph(g: MergeGraph) -> bool: settings = Settings.get_instance() max_n_duplications = settings.mrf_max_n_duplications max_n_duplication_types = settings.mrf_max_n_duplication_types for n in g.iter_class_nodes(): # FILTER middle nodes if n.n_incoming_links == 1 and n.n_outgoing_links == 1: link = next(iter(n.iter_outgoing_links())) if link.get_target_node().is_class_node(): return False # FILTER: max_size_duplication_group <= 7 and max_n_duplications <= 4 n_duplication_types = 0 for e_lbl, es in _(n.iter_outgoing_links()).imap(lambda e: (e.label, e)).group_by_key().get_value(): if len(es) > max_n_duplications: return False if len(es) > 1: n_duplication_types += 1 if n_duplication_types > max_n_duplication_types: return False return True
def predict_log_probs(self, examples: List[Example]): log_probs = [] for es in _(examples).isplit(self.max_n_tasks): varss = [self.get_variables(e) for e in es] # varss = self.parallel_get_variables(es) factorss = [self.model.get_factors(vars) for vars in varss] inferences = [ self.inference(f, v) for f, v in zip(factorss, varss) ] desired_assignments = [{ var: var.domain.encode_value(True) for var in vars } for vars in varss] logZs = parallel_marginal_inference( inferences, n_threads=Settings.get_instance().parallel_gmtk_n_threads) log_probs += [ sum( f.score_assignment(desired_assignments[i]) for f in factorss[i]) - logZs[i] for i in range(len(es)) ] return log_probs
def get_latest_model_id(workdir) -> int: model_ids = _( workdir.iterdir()).ifilter(lambda e: e.is_dir() and e.name.startswith( 'exp_no_')).map(lambda e: int(e.name.replace('exp_no_', ''))) if len(model_ids) == 0: return -1 return max(model_ids)
def get_example_detail(model_prefix, no_sample, link_id, model: LogLinearModel, graphs: List[List[TripleLabel]]): for graph in graphs: example = graph[0].triple.example if not example.model_id.startswith( model_prefix) or example.no_sample != no_sample: continue var = _(graph).first(lambda v: v.triple.link.id == link_id) assignment = {v: v.get_label_value() for v in graph} factors = model.get_factors(graph) print(var.triple.link.label) for lbl in [False, True]: assignment[var] = var.domain.encode_value(lbl) print("When value of variable is: ", assignment[var].val) score = 0.0 for factor in factors: if factor.touch(var): score += factor.score_assignment(assignment) if isinstance(factor, TripleFactorTemplate.TripleFactor): features = factor.assignment2features(assignment) print('\t. Factor features: ', [ (var.triple.features.domain.get_category(idx), features[idx]) for idx in var.triple.features.get_active_index() ]) else: print("\t .Factor features: ", factor.assignment2features(assignment).tolist()) print("\t .Score = ", score) break
def parallel_get_variables(self, examples: List[Example]): assert False, "Not ready to use yet" ParallelAnnotator.get_instance().annotate(examples) return [ _(self.example_annotator.example2vars(example)) \ .imap(lambda x: self.example_annotator.build_triple_features(x, self.tf_domain)) \ .map(lambda x: x.label) for example in examples ]
def get_variables(self, example: Example): if self.example_annotator is None: # TODO: should handle top_k_semantic_types configuration, and check if training_sources has been changed!! self.example_annotator = ExampleAnnotator(self.dataset, example.training_sources) self.example_annotator.annotate(example) return _(self.example_annotator.example2vars(example)) \ .imap(lambda x: self.example_annotator.build_triple_features(x, self.tf_domain)) \ .map(lambda x: x.label)
def pred_full_stype( self, col: Column, top_n: int ) -> List[Tuple[Tuple[bytes, bytes], float, Dict[Tuple[bytes, bytes], float]]]: X = [] refcols = [ refcol for refcol in self.stype_db.train_columns if refcol.id != col.id ] j = self.stype_db.col2idx[col.id] for refcol in refcols: iref = self.stype_db.col2idx[refcol.id] X.append(self.stype_db.similarity_matrix[j, iref]) result = self.model.predict_proba(X)[:, 1] result = _(zip(result, (self.stype_db.col2dnodes[rc.id] for rc in refcols))) \ .sort(key=lambda x: x[0], reverse=True) # each top_k_st is map between stype, its score, and list of parent stypes with score top_k_st: Dict[Tuple[bytes, bytes], Tuple[float, Dict[Tuple[Tuple[bytes, bytes], float]]]] = {} for score, dnode in result: link = dnode.get_first_incoming_link() parent = link.get_source_node() parent_link = parent.get_first_incoming_link() if parent_link is None: parent_stype = None else: parent_stype = (parent_link.get_source_node().label, parent_link.label) stype = (parent.label, link.label) if stype not in top_k_st: if len(top_k_st) == top_n: # ignore stype which doesn't make itself into top k continue top_k_st[stype] = (score, {parent_stype: score}) else: # keep looping until we collect enough parent_link, default is top 3 if parent_stype not in top_k_st[stype][1]: # if we have seen the parent_stype, we don't need to update score because it's already the greatest top_k_st[stype][1][parent_stype] = score return sorted([(stype, score, parent_stypes) for stype, (score, parent_stypes) in top_k_st.items()], reverse=True, key=lambda x: x[1])
def feature_extraction(self, graph: Graph, stype_score: Dict[int, Optional[float]]): node2features = {} for node in graph.iter_class_nodes(): prob_data_nodes = _(node.iter_outgoing_links()) \ .imap(lambda x: x.get_target_node()) \ .ifilter(lambda x: x.is_data_node()) \ .reduce(lambda a, b: a + (stype_score[b.id] or 0), 0) similar_nodes = graph.iter_nodes_by_label(node.label) minimum_merged_cost = min((get_merged_cost(node, similar_node, self.multival_predicate) for similar_node in similar_nodes)) node2features[node.id] = [('prob_data_nodes', prob_data_nodes), ('minimum_merged_cost', minimum_merged_cost)] return node2features
def evaluate(map_examples) -> Optional[ConfusionMatrix]: if len(map_examples) == 0: return None return _(map_examples).imap( lambda e: (e.get_map_assignment(), e.get_target_assignment())).imap( lambda e: Evaluation.get_confusion_matrix(*e)).reduce(operator.add)
def render_factor_graph(model_or_factors: Union[LogLinearModel, List[Factor]], vars: List[TripleLabel], fpath: str): if isinstance(model_or_factors, LogLinearModel): factors = model_or_factors.get_factors(vars) else: factors = model_or_factors def get_fnode_lbl(fnode: Union[TripleLabel, Factor]) -> bytes: if isinstance(fnode, Factor): label = fnode.__class__.__name__ else: s = fnode.triple.link.get_source_node() t = fnode.triple.link.get_target_node() label = "%s:%s--%s:%s" % (s.id, s.label.decode('utf-8'), t.id, t.label.decode('utf-8')) return label.encode('utf-8') class Node(GraphNode): def __init__(self, fnode: Union[TripleLabel, Factor]) -> None: super().__init__() self.fnode = fnode def get_dot_format(self, max_text_width: int): label = self.get_printed_label(max_text_width).encode( 'unicode_escape').decode() if isinstance(self.fnode, Variable): return '"%s"[style="filled",color="white",fillcolor="gold",label="%s"];' % ( self.id, label) return '"%s"[shape="plaintext",style="filled",fillcolor="lightgray",label="%s"];' % ( self.id, label) class Link(GraphLink): var2factor = "var2factor" var2var = "var2var" def __init__(self, link_type: str) -> None: super().__init__() self.link_type = link_type def get_dot_format(self, max_text_width: int): label = self.get_printed_label(max_text_width).encode( 'unicode_escape').decode() if self.link_type == Link.var2factor: return '"%s" -> "%s"[dir=none,color="brown",fontcolor="black",label="%s"];' % ( self.source_id, self.target_id, label) return '"%s" -> "%s"[color="brown",style="dashed",fontcolor="black",label="%s"];' % ( self.source_id, self.target_id, label) """Render factor graph for debugging""" g = Graph() # build graphs fnode2id: Dict[Union[Variable, Factor], int] = _( vars, factors).enumerate().imap(lambda v: (v[1], v[0])).todict() _(vars, factors).forall(lambda fnode: g.real_add_new_node( Node(fnode), GraphNodeType.CLASS_NODE, get_fnode_lbl(fnode))) for factor in factors: for var in factor.unobserved_variables: g.real_add_new_link(Link(Link.var2factor), GraphLinkType.UNSPECIFIED, b"", fnode2id[var], fnode2id[factor]) for var in vars: if var.triple.parent is not None: g.real_add_new_link(Link(Link.var2var), GraphLinkType.UNSPECIFIED, b"", fnode2id[var.triple.parent.label], fnode2id[var]) for var in vars: var.myid = "%s: %s" % (fnode2id[var], g.get_node_by_id( fnode2id[var]).label) for factor in factors: factor.myid = fnode2id[factor] g.render2pdf(fpath)
def train_model(dataset: str, train_sids: List[str], manual_seed: int, train_examples: List[Example], test_examples: List[Example], args: TrainingArgs, basedir: Path): DenseTensorFunc.manual_seed(manual_seed) tf_domain = GrowableBinaryVectorDomain() timer = pyutils.progress.Timer().start() input_train_examples = train_examples input_test_examples = test_examples # BUILDING VARIABLES NEEDED FOR THE TRAINING example_annotator = ExampleAnnotator(dataset, train_sids, training_examples=train_examples) train_examples = sequential_map(example_annotator.annotate, train_examples) train_examples = _(train_examples) \ .imap(example_annotator.example2vars) \ .submap(partial(example_annotator.build_triple_features, domain=tf_domain)) pairwise_domain = example_annotator.build_pairwise_domain() # Freeze domain now, we've added all feature values observed in training data tf_domain.freeze() test_examples = sequential_map(example_annotator.annotate, test_examples) test_examples = _(test_examples) \ .imap(example_annotator.example2vars) \ .submap(partial(example_annotator.build_triple_features, domain=tf_domain)) # print domain to debug logger.info("Preprocessing take %s" % timer.lap().get_total_time()) # build random variables train_graphs = _(train_examples).submap(lambda t: t.label) test_graphs = _(test_examples).submap(lambda t: t.label) # build models, select inference method model = TemplateLogLinearModel([ TripleFactorTemplate( *TripleFactorTemplate.get_default_args(tf_domain)), SubstructureFactorTemplate( *SubstructureFactorTemplate.get_default_args( pairwise_domain, example_annotator.get_obj_props())), # ExternalModelFactorTemplate(*ExternalModelFactorTemplate.get_default_weights()) ]) # or load previous training # model_dir = config.fsys.debug.as_path() + "/%s/models/exp_no_2" % dataset # model, ___, state_dict = deserialize(model_dir + '/gmtk_model.bin') inference = BeliefPropagation.get_constructor(InferProb.MARGINAL) map_inference = BeliefPropagation.get_constructor(InferProb.MAP) train_nll_examples = _( train_graphs).map(lambda vars: NegativeLogLikelihoodExample( vars, model.get_factors(vars), inference)) train_map_examples = _(train_nll_examples).map( lambda example: MAPAssignmentExample.from_nll_example( example, map_inference)) test_nll_examples = _( test_graphs).map(lambda vars: NegativeLogLikelihoodExample( vars, model.get_factors(vars), inference)) test_map_examples = _(test_nll_examples).map( lambda example: MAPAssignmentExample.from_nll_example( example, map_inference)) # select training method/parameters, and evaluation n_epoch = args.n_epoch params = args.optparams mini_batch_size = args.mini_batch_size n_switch = args.n_switch global_step = 0 require_closure = False if args.optimizer == 'SGD': optimizer = PyTorchOptimizer.SGD(parameters=model.get_parameters(), **params) elif args.optimizer == 'ADAM': optimizer = PyTorchOptimizer.Adam(parameters=model.get_parameters(), **params) elif args.optimizer == 'LBFGS': optimizer = PyTorchOptimizer.LBFGS(parameters=model.get_parameters(), **params) require_closure = True else: assert False # optimizer.optimizer.load_state_dict(state_dict) for template in model.templates: if hasattr(template, 'after_update_weights'): optimizer.register_on_step(template.after_update_weights) logger.info(args.to_string()) logger.info("Template info: \n%s" % ("\n" % (["\t" + template.get_info() for template in model.templates]))) logger.info("Train size: %s, Test size: %s", len(train_nll_examples), len(test_nll_examples)) reporter = TensorBoard(log_dir=basedir) # cast to list to keep train_map_examples & train_nll_examples aligned with each other (batch example may shuffle) if args.parallel_training: batch_nll_example = ParallelBatchExample(list(train_nll_examples), 0) else: batch_nll_example = BatchExample(list(train_nll_examples), 0) # *********************************************** DEBUG CODE # for i, triples in enumerate(train_examples): # example = triples[0].example # if example.model_id.startswith("s03") and example.no_sample == 29: # example.pred_sm.render() # render_factor_graph(model.get_factors(train_graphs[i]), train_graphs[i], # config.fsys.debug.as_path() + "/tmp/factor_graph.pdf") # exit(0) # # render_factor_graph(train_nll_examples[0].factors, train_nll_examples[0].variables, # config.fsys.debug.as_path() + "/tmp/factor_graph.pdf") # # loss_val_accum = ValueAccumulator() # gradient_accum = Tensor1AccumulatorDict() # for weights in model.get_parameters(): # gradient_accum.track_obj(weights, DenseTensorFunc.zeros_like(weights.val)) # ********************************************************** progress = pyutils.progress.Progress(n_epoch) progress.start() if n_switch > 0: examples = list(batch_nll_example.split_random(mini_batch_size)) else: examples = [batch_nll_example] cm_train, cm_test = None, None loss_history = [] param_hists = [] for i in range(n_epoch): logger.info("Iter %s" % i) if i >= n_switch: examples = [batch_nll_example] if args.shuffle_mini_batch and 0 < i < n_switch: examples = batch_nll_example.split_random(mini_batch_size) average_loss_val = [] if not require_closure: for example in examples: optimizer.zero_grad() example.accumulate_value_and_gradient( optimizer.get_value_accumulator(), optimizer.get_gradient_accumulator()) optimizer.average(example.size()) logger.info("Accum loss: %.10f" % optimizer.get_value_accumulator().get_value()) average_loss_val.append( optimizer.get_value_accumulator().get_value()) # *********************************************** DEBUG GRADIENT # numerical_gradient = NumericalGradient(1e-5) # for j, e in enumerate(example.examples): # print(f"\rExample {j}/{len(example.examples)}", end="", flush=True) # gradient_accum.clear() # loss_val_accum.clear() # e.accumulate_value_and_gradient(loss_val_accum, gradient_accum) # for template in model.templates: # for weights in template.get_weights(): # gradient = gradient_accum.get_value(weights) # approx_gradients = numerical_gradient.compute_gradient(weights, lambda: nll_func(e)) # try: # np.testing.assert_almost_equal(gradient.numpy(), approx_gradients.numpy(), 6) # except Exception: # logger.exception("Incorrect gradient...") # print(template, weights.val.tolist()) # print(["%11.8f" % x for x in gradient.tolist()]) # print(["%11.8f" % x for x in approx_gradients.tolist()]) # print(["%11d" % int(np.isclose(x, y, rtol=0, atol=1e-6)) for x, y in zip(gradient, approx_gradients)]) # # raise # print("\n") # ************************************************************** optimizer.step() reporter.loss_val( optimizer.get_value_accumulator().get_value(), global_step) global_step += 1 else: for example in examples: def closure(): optimizer.zero_grad() example.accumulate_value_and_gradient( optimizer.get_value_accumulator(), optimizer.get_gradient_accumulator()) optimizer.average(example.size()) optimizer.copy_grad() return optimizer.get_value_accumulator().get_value() optimizer.step(closure) logger.info("Accum loss: %.10f" % optimizer.get_value_accumulator().get_value()) average_loss_val.append( optimizer.get_value_accumulator().get_value()) reporter.loss_val( optimizer.get_value_accumulator().get_value(), global_step) global_step += 1 if len(average_loss_val) > 1: logger.info("Average accum loss: %.10f" % np.average(average_loss_val)) if optimizer.get_value_accumulator().get_value() < 0: break if i % args.n_iter_eval == 0 or i == n_epoch - 1: cm_train = evaluate(train_map_examples) cm_test = evaluate(test_map_examples) or cm_train logger.info('train (class_idx=0): %s', cm_train.precision_recall_fbeta(class_idx=0)) logger.info('train (class_idx=1): %s', cm_train.precision_recall_fbeta(class_idx=1)) logger.info('test (class_idx=0): %s', cm_test.precision_recall_fbeta(class_idx=0)) logger.info('test (class_idx=1): %s', cm_test.precision_recall_fbeta(class_idx=1)) reporter.precision_recall_fbeta(cm_train, global_step, group='train') reporter.precision_recall_fbeta(cm_test, global_step, group='test') loss_history.append(np.average(average_loss_val)) param_hists.append(model.clone_parameters()) if len(param_hists) > 3: param_hists.pop(0) if args.optimizer == "ADAM" and len(loss_history) > 4 and all( x - y > 0 for x, y in zip(loss_history[-3:], loss_history[-4:-1])): logger.info("Loss increase after 3 epoches. Stop training!") break progress.finish_one() if args.report_final_loss: loss_val_accum = ValueAccumulator() batch_nll_example.accumulate_value_and_gradient(loss_val_accum, None) logger.info("Average accum loss: %.10f" % (loss_val_accum.get_value() / batch_nll_example.size())) logger.info("\n\r%s" % progress.summary()) cm_train.pretty_print("** TRAIN **", precision_recall_fbeta=True, output_stream=logger.info) cm_test.pretty_print("** TEST **", precision_recall_fbeta=True, output_stream=logger.info) # save model and move everything into another folder for storage reporter.close() reporter.export(basedir / 'tensorboard_raw.json') # clear all cache for template in model.templates: if isinstance(template, CachedTemplateFactorConstructor): template.clear_cache() assert len(param_hists) == len(loss_history[-3:]) min_loss, min_params, min_idx = min(zip(loss_history[-3:], param_hists, [-3, -2, -1]), key=lambda x: x[0]) logger.info("Select parameters at index: %d. Loss = %s", min_idx, min_loss) model.update_parameters(min_params) serialize( (model, tf_domain, pairwise_domain, optimizer.optimizer.state_dict()), basedir / 'gmtk_model.bin') save_evaluation_result(zip(train_map_examples, train_nll_examples), basedir / 'train.output.json') save_evaluation_result(zip(test_map_examples, test_nll_examples), basedir / 'test.output.json') serializeJSON(input_train_examples, basedir / "train.json") serializeJSON(input_test_examples, basedir / "test.json") # attempt to copy log file try: logger.handlers[1].flush() shutil.copy(logger.handlers[1].file_handler.baseFilename, str(basedir / "train.log")) except: logger.exception("Cannot backup log...") model_id = get_latest_model_id(basedir) + 1 move_current_files(basedir, model_id) logger.info("Save model %s", model_id) return model, tf_domain, pairwise_domain, optimizer.optimizer.state_dict()
def build_triple_features( self, triple: Triple, domain: GrowableBinaryVectorDomain[str]) -> Triple: json_features = triple.example.link2features[triple.link.id] features = TripleFeatures(domain) triple_target = triple.target triple_source = triple.source triple_source_label = triple_source.label.decode('utf-8') # if 'multi_val_prob' in json_features: # features += ("multi_val_prob", max(json_features['multi_val_prob'], 0.01)) # features += ("single_val_prob", max(1 - json_features['multi_val_prob'], 0.01)) if json_features['p_link_given_so'] is not None: if triple_target.is_data_node(): name = "(%s---%s)" % (triple_source_label, triple.link.label.decode("utf-8")) features += (f'{name}=True.p_semantic_type', max(json_features['p_link_given_so'], 0.01)) features += (f'{name}=False.p_semantic_type', max(1 - json_features['p_link_given_so'], 0.01)) # features += (f'{name}=True.delta_p_semantic_type', min(json_features['delta_stype_score'], -0.01)) # features += (f'{name}=False.delta_p_semantic_type', max(-1 * json_features['delta_stype_score'], 0.01)) features += (f'{name}=True.delta_p_semantic_type', max(json_features['delta_stype_score'], 0.01)) features += (f'{name}=False.delta_p_semantic_type', max(-1 * json_features['delta_stype_score'], 0.01)) features += (f'{name}=True.ratio_p_semantic_type', 1 / json_features['ratio_stype_score']) features += (f'{name}=False.ratio_p_semantic_type', json_features['ratio_stype_score']) # features += (f'{name}=True.norm-p_semantic_type', # max(json_features['p_link_given_so'] / json_features['total_stype_score'], 0.01)) # features += (f'{name}=False.norm-p_semantic_type', # max(1 - (json_features['p_link_given_so'] / json_features['total_stype_score']), 0.01)) features += (f'{name}-order={json_features["stype_order"]}', 1) features += (f'{triple_source_label}=True.p_triple', max(json_features['p_triple'], 0.01)) features += (f'{triple_source_label}=False.p_triple', max(1 - json_features['p_triple'], 0.01)) else: features += (f'{triple_source_label}=True.p_triple', max(json_features['p_triple'], 0.01)) features += (f'{triple_source_label}=False.p_triple', max(1 - json_features['p_triple'], 0.01)) # if json_features['local_constraint'] is not None: # features += (f"class={triple_source_label}=True.local_constraint", max(json_features['local_constraint'], 0.01)) # features += (f"class={triple_source_label}=False.local_constraint", max(1 - json_features['local_constraint'], 0.01)) # if json_features['global_constraint'] is not None: # features += (f"class={triple_source_label}=True.global_constraint", max(json_features['global_constraint'], 0.01)) # features += (f"class={triple_source_label}=False.global_constraint", max(1 - json_features['global_constraint'], 0.01)) if json_features['stype_prob'] is not None: features += (f"True.stype_prob", max(json_features['stype_prob'], 0.01)) features += (f"False.stype_prob", max(1 - json_features['stype_prob'], 0.01)) if triple.target.is_class_node() and _( triple.siblings).all(lambda t: t.target.is_class_node()): # if domain.has_value("source_node_no_data_child") or not domain.is_frozen: features += ( f"class={triple_source_label}.source_node_no_data_child", 1) if triple.target.is_class_node() and len(triple.siblings) == 0: # if domain.has_value("no_siblings") or not domain.is_frozen: features += (f"class={triple_source_label}.no_siblings", 1) if triple.parent is None: # if domain.has_value("no_parent_&_no_siblings") or not domain.is_frozen: features += ( f"class={triple_source_label}.no_parent_&_no_siblings", 1) triple.features = features return triple
topK = 20 class_idx = 0 assert len(model.templates) == 2 triple_factor = model.templates[0] triple_factor_weights = triple_factor.weights.view(2, -1) features = [(tf_domain.get_category(i), x, triple_factor_weights[1, i]) for i, x in enumerate(triple_factor_weights[0, :])] features.sort(key=lambda x: x[1], reverse=True) for f in features: print(f) substructure = model.templates[1].weights print(substructure) # re-populate data and output evaluation train_examples, test_examples = load_data(data_source) _(train_examples, test_examples).iflatten().forall( lambda x: build_triple_features(x, tf_domain)) train_graphs = _(train_examples).submap(lambda t: t.label) test_graphs = _(test_examples).submap(lambda t: t.label) # get detail explanation of one link get_example_detail('s00', 0, 'L014', model, test_graphs) # NOTE: uncommment code below to run full-evaluation # inference = BeliefPropagation.get_constructor(InferProb.MARGINAL) # , max_iter=5) # map_inference = BeliefPropagation.get_constructor(InferProb.MAP) # , max_iter=5) # # train_nll_examples = _(train_graphs).map(lambda vars: NegativeLogLikelihoodExample(vars, model, inference)) # train_map_examples = _(train_nll_examples).map(lambda example: MAPAssignmentExample(example, map_inference)) # test_nll_examples = _(test_graphs).map(lambda vars: NegativeLogLikelihoodExample(vars, model, inference)) # test_map_examples = _(test_nll_examples).map(lambda example: MAPAssignmentExample(example, map_inference))