class FeatureExtractionFlowClassifierReducer: def __init__(self, saved_model_name): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) def process(self, line): try: if line.strip() != "": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) result = self._instance.classify(_inst_feature_dict) flag = "NO-MATCH" if str(category) == str(result): flag = "MATCH" feature_category_tuple = (word, result, category, flag, feature_dict, line) print feature_category_tuple except Exception as ex: print >> sys.stderr, ex.message, "\n for :", line
class FeatureExtractionFlowClassifierReducer: def __init__(self, saved_model_name): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) def process(self, line): try: if line.strip()!="": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) result = self._instance.classify(_inst_feature_dict) flag = "NO-MATCH" if str(category) == str(result): flag = "MATCH" feature_category_tuple = (word,result,category,flag, feature_dict, line) print feature_category_tuple except Exception as ex: print>>sys.stderr, ex.message,"\n for :",line
class MaltParsedFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value=85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) self.threshold = threshold_value def process(self, line): if line.strip() != "": try: if line.count("\t") >= 1: _collection = line.split("\t") word = _collection[0] feature_dict = _collection[1] line = _collection[2] _inst_feature_dict = literal_eval(feature_dict) # result = self._instance.classify(_inst_feature_dict) # prob classify approach prob_dist = self._instance.entity.prob_classify( _inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print >> sys.stderr, " pos_prob:", pos_prob print >> sys.stderr, " neg_prob:", neg_prob if float(pos_prob) * 100 >= float(self.threshold): result = "1" else: result = "-1" # output only positive instances if not "-" in result: feature_category_tuple = (word, result, feature_dict, line, pos_prob) print feature_category_tuple except Exception as ex: print >> sys.stderr, ex.message pass
class MaltParsedFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value = 85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) self.threshold = threshold_value def process(self, line): if line.strip()!="": try: if line.count("\t")>=1: _collection = line.split("\t") word = _collection[0] feature_dict = _collection[1] line = _collection[2] _inst_feature_dict = literal_eval(feature_dict) # result = self._instance.classify(_inst_feature_dict) # prob classify approach prob_dist = self._instance.entity.prob_classify(_inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print>>sys.stderr, " pos_prob:",pos_prob print>>sys.stderr, " neg_prob:",neg_prob if float(pos_prob)*100 >= float(self.threshold): result = "1" else: result = "-1" # output only positive instances if not "-" in result: feature_category_tuple = (word, result, feature_dict, line, pos_prob) print feature_category_tuple except Exception as ex: print >>sys.stderr,ex.message pass
class CrossValidationFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value=85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) self.threshold = threshold_value def process(self, line): try: if line.strip() != "": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) prob_dist = self._instance.entity.prob_classify( _inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print >> sys.stderr, " pos_prob:", pos_prob print >> sys.stderr, " neg_prob:", neg_prob if float(pos_prob) * 100 >= float(self.threshold): result = "1" else: result = "-1" flag = "NO-MATCH" if str(category.strip()) == str(result.strip()): flag = "MATCH" feature_category_tuple = (word, result, category, flag, feature_dict, line) print feature_category_tuple except Exception as ex: print >> sys.stderr, ex.message, "\n for :", line
class CrossValidationFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value = 85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) self.threshold = threshold_value def process(self, line): try: if line.strip()!="": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) prob_dist = self._instance.entity.prob_classify(_inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print>>sys.stderr, " pos_prob:",pos_prob print>>sys.stderr, " neg_prob:",neg_prob if float(pos_prob)*100 >= float(self.threshold): result = "1" else: result = "-1" flag = "NO-MATCH" if str(category.strip()) == str(result.strip()): flag = "MATCH" feature_category_tuple = (word,result,category,flag, feature_dict, line) print feature_category_tuple except Exception as ex: print>>sys.stderr, ex.message,"\n for :",line