Beispiel #1
0
def weight_corpus(corpus_inst, timbl_out, weight_func):
    """
    Weight corpus instances
    
    @param corpus_inst: CorpusInst instance
    
    @param timbl_out: iterator over Timbl output
    
    @param weight_func: weighting fuction
    """
    for graph_inst in corpus_inst:
        for inst, (inst_str, k_nn_list) in zip(graph_inst, timbl_out):
            feats_str, true_class, pred_class, distrib_str, distance = \
            parse_inst(inst_str, with_distrib=True, with_distance=True) 
            
            inst["pred_relation"] = pred_class            
            inst["pred_weight"] = weight_func(
                category=pred_class, 
                distribution=parse_distrib(distrib_str))
Beispiel #2
0
 def classify(self, instances):
     """
     adds predicted class and associated weight to instances
     
     @param instances: numpy.ndarray instance 
     """
     for inst in instances:
         # Assumes that last field in instance is the true class
         inst_str = "\t".join( self._to_str(value) 
                               for value in inst )
         result = self._client.classify(inst_str) 
         inst["pred_relation"] = result["CATEGORY"]
         # The Timbl client is lazy and does not automatically parse the
         # distribution string, so we use parse_distrib to obtain an
         # iterator over (class, count) pairs
         distribution = parse_distrib(result["DISTRIBUTION"]) 
         inst["pred_weight"] = self.weight_func(
             category=result["CATEGORY"], 
             distribution=distribution)