def __init__(self, saved_model_name, threshold_value=85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) self.threshold = threshold_value
class FeatureExtractionFlowClassifierReducer: def __init__(self, saved_model_name): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) def process(self, line): try: if line.strip()!="": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) result = self._instance.classify(_inst_feature_dict) flag = "NO-MATCH" if str(category) == str(result): flag = "MATCH" feature_category_tuple = (word,result,category,flag, feature_dict, line) print feature_category_tuple except Exception as ex: print>>sys.stderr, ex.message,"\n for :",line
class FeatureExtractionFlowClassifierReducer: def __init__(self, saved_model_name): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) def process(self, line): try: if line.strip() != "": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) result = self._instance.classify(_inst_feature_dict) flag = "NO-MATCH" if str(category) == str(result): flag = "MATCH" feature_category_tuple = (word, result, category, flag, feature_dict, line) print feature_category_tuple except Exception as ex: print >> sys.stderr, ex.message, "\n for :", line
def __init__(self, saved_model_name, threshold_value = 85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) self.threshold = threshold_value
class MaltParsedFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value=85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) self.threshold = threshold_value def process(self, line): if line.strip() != "": try: if line.count("\t") >= 1: _collection = line.split("\t") word = _collection[0] feature_dict = _collection[1] line = _collection[2] _inst_feature_dict = literal_eval(feature_dict) # result = self._instance.classify(_inst_feature_dict) # prob classify approach prob_dist = self._instance.entity.prob_classify( _inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print >> sys.stderr, " pos_prob:", pos_prob print >> sys.stderr, " neg_prob:", neg_prob if float(pos_prob) * 100 >= float(self.threshold): result = "1" else: result = "-1" # output only positive instances if not "-" in result: feature_category_tuple = (word, result, feature_dict, line, pos_prob) print feature_category_tuple except Exception as ex: print >> sys.stderr, ex.message pass
class MaltParsedFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value = 85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) self.threshold = threshold_value def process(self, line): if line.strip()!="": try: if line.count("\t")>=1: _collection = line.split("\t") word = _collection[0] feature_dict = _collection[1] line = _collection[2] _inst_feature_dict = literal_eval(feature_dict) # result = self._instance.classify(_inst_feature_dict) # prob classify approach prob_dist = self._instance.entity.prob_classify(_inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print>>sys.stderr, " pos_prob:",pos_prob print>>sys.stderr, " neg_prob:",neg_prob if float(pos_prob)*100 >= float(self.threshold): result = "1" else: result = "-1" # output only positive instances if not "-" in result: feature_category_tuple = (word, result, feature_dict, line, pos_prob) print feature_category_tuple except Exception as ex: print >>sys.stderr,ex.message pass
def model(self): self.arg_obj.parse(sys.argv) if not self.check_params(): self.show_help() _feature_set_location = self.arg_obj.args["feature_set_location"] if os.path.isfile(_feature_set_location): # if given a file path and not provided the model name to save as if not self.arg_obj.args.has_key("model_name"): self.show_help() _model_name = self.arg_obj.args["model_name"] _instance = POSContextSequenceModeler(feature_set_location = _feature_set_location) _instance.train() _instance.save_model(name=_model_name,location="trained_models") print "ModelingStub: modeling done for given feature set file." if os.path.isdir(_feature_set_location): print "ModelingStub: looking into feature set directory..." # filter only feature set files with .txt extension file_list = [fn for fn in os.listdir(_feature_set_location) if fn.endswith(('.txt'))] for _file in file_list: _path = _feature_set_location+"/"+_file _coll = _file.split(".") _model_name = _coll[0]+".model" _instance = POSContextSequenceModeler(feature_set_location = _path) _instance.train() print "ModelingStub: trained the model.about to save." _instance.save_model(name=_model_name,location="trained_models") print "ModelingStub: modeling done for:",_file print "ModelingStub: modeling done for all files in directory provided."
class CrossValidationFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value=85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name) self.threshold = threshold_value def process(self, line): try: if line.strip() != "": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) prob_dist = self._instance.entity.prob_classify( _inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print >> sys.stderr, " pos_prob:", pos_prob print >> sys.stderr, " neg_prob:", neg_prob if float(pos_prob) * 100 >= float(self.threshold): result = "1" else: result = "-1" flag = "NO-MATCH" if str(category.strip()) == str(result.strip()): flag = "MATCH" feature_category_tuple = (word, result, category, flag, feature_dict, line) print feature_category_tuple except Exception as ex: print >> sys.stderr, ex.message, "\n for :", line
class CrossValidationFeatureExtractionFlowReducer: def __init__(self, saved_model_name, threshold_value = 85): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name) self.threshold = threshold_value def process(self, line): try: if line.strip()!="": _collection = line.split("\t") word = _collection[0] category = _collection[1] feature_dict = _collection[2] _inst_feature_dict = literal_eval(feature_dict) prob_dist = self._instance.entity.prob_classify(_inst_feature_dict) pos_prob = prob_dist.prob('1') neg_prob = prob_dist.prob('-1') print>>sys.stderr, " pos_prob:",pos_prob print>>sys.stderr, " neg_prob:",neg_prob if float(pos_prob)*100 >= float(self.threshold): result = "1" else: result = "-1" flag = "NO-MATCH" if str(category.strip()) == str(result.strip()): flag = "MATCH" feature_category_tuple = (word,result,category,flag, feature_dict, line) print feature_category_tuple except Exception as ex: print>>sys.stderr, ex.message,"\n for :",line
def __init__(self, saved_model_name): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name=saved_model_name)
def model(self): self.arg_obj.parse(sys.argv) if not self.check_params(): self.show_help() _feature_set_location = self.arg_obj.args["feature_set_location"] if os.path.isfile(_feature_set_location): # if given a file path and not provided the model name to save as if not self.arg_obj.args.has_key("model_name"): self.show_help() _model_name = self.arg_obj.args["model_name"] _instance = POSContextSequenceModeler( feature_set_location=_feature_set_location) _instance.train() _instance.save_model(name=_model_name, location="trained_models") print "ModelingStub: modeling done for given feature set file." if os.path.isdir(_feature_set_location): print "ModelingStub: looking into feature set directory..." # filter only feature set files with .txt extension file_list = [ fn for fn in os.listdir(_feature_set_location) if fn.endswith(('.txt')) ] for _file in file_list: _path = _feature_set_location + "/" + _file _coll = _file.split(".") _model_name = _coll[0] + ".model" _instance = POSContextSequenceModeler( feature_set_location=_path) _instance.train() print "ModelingStub: trained the model.about to save." _instance.save_model(name=_model_name, location="trained_models") print "ModelingStub: modeling done for:", _file print "ModelingStub: modeling done for all files in directory provided."
def __init__(self, saved_model_name): pass self._instance = POSContextSequenceModeler() coll = saved_model_name.split("/") saved_model_name = coll[-1:][0] self._instance.load_model(name = saved_model_name)