def weight_corpus(corpus_inst, timbl_out, weight_func): """ Weight corpus instances @param corpus_inst: CorpusInst instance @param timbl_out: iterator over Timbl output @param weight_func: weighting fuction """ for graph_inst in corpus_inst: for inst, (inst_str, k_nn_list) in zip(graph_inst, timbl_out): feats_str, true_class, pred_class, distrib_str, distance = \ parse_inst(inst_str, with_distrib=True, with_distance=True) inst["pred_relation"] = pred_class inst["pred_weight"] = weight_func( category=pred_class, distribution=parse_distrib(distrib_str))
def classify(self, instances): """ adds predicted class and associated weight to instances @param instances: numpy.ndarray instance """ for inst in instances: # Assumes that last field in instance is the true class inst_str = "\t".join( self._to_str(value) for value in inst ) result = self._client.classify(inst_str) inst["pred_relation"] = result["CATEGORY"] # The Timbl client is lazy and does not automatically parse the # distribution string, so we use parse_distrib to obtain an # iterator over (class, count) pairs distribution = parse_distrib(result["DISTRIBUTION"]) inst["pred_weight"] = self.weight_func( category=result["CATEGORY"], distribution=distribution)