Exemple #1
0
 def __init__(self, saved_model_name, threshold_value=85):
     pass
     self._instance = POSContextSequenceModeler()
     coll = saved_model_name.split("/")
     saved_model_name = coll[-1:][0]
     self._instance.load_model(name=saved_model_name)
     self.threshold = threshold_value
class FeatureExtractionFlowClassifierReducer:
    def __init__(self, saved_model_name):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name = saved_model_name)

    def process(self, line):
        try:
            if line.strip()!="":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                result = self._instance.classify(_inst_feature_dict)

                flag = "NO-MATCH"
                if str(category) == str(result):
                    flag = "MATCH"

                feature_category_tuple = (word,result,category,flag, feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print>>sys.stderr, ex.message,"\n for :",line
Exemple #3
0
class FeatureExtractionFlowClassifierReducer:
    def __init__(self, saved_model_name):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name=saved_model_name)

    def process(self, line):
        try:
            if line.strip() != "":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                result = self._instance.classify(_inst_feature_dict)

                flag = "NO-MATCH"
                if str(category) == str(result):
                    flag = "MATCH"

                feature_category_tuple = (word, result, category, flag,
                                          feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print >> sys.stderr, ex.message, "\n for :", line
 def __init__(self, saved_model_name, threshold_value = 85):
     pass
     self._instance = POSContextSequenceModeler()
     coll = saved_model_name.split("/")
     saved_model_name = coll[-1:][0]
     self._instance.load_model(name = saved_model_name)
     self.threshold = threshold_value
Exemple #5
0
class MaltParsedFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value=85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name=saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        if line.strip() != "":
            try:
                if line.count("\t") >= 1:
                    _collection = line.split("\t")
                    word = _collection[0]
                    feature_dict = _collection[1]
                    line = _collection[2]
                    _inst_feature_dict = literal_eval(feature_dict)
                    # result = self._instance.classify(_inst_feature_dict)

                    # prob classify approach
                    prob_dist = self._instance.entity.prob_classify(
                        _inst_feature_dict)

                    pos_prob = prob_dist.prob('1')
                    neg_prob = prob_dist.prob('-1')
                    print >> sys.stderr, " pos_prob:", pos_prob
                    print >> sys.stderr, " neg_prob:", neg_prob

                    if float(pos_prob) * 100 >= float(self.threshold):
                        result = "1"
                    else:
                        result = "-1"

                    # output only positive instances
                    if not "-" in result:
                        feature_category_tuple = (word, result, feature_dict,
                                                  line, pos_prob)
                        print feature_category_tuple
            except Exception as ex:
                print >> sys.stderr, ex.message
                pass
class MaltParsedFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value = 85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name = saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        if line.strip()!="":
            try:
                if line.count("\t")>=1:
                    _collection = line.split("\t")
                    word = _collection[0]
                    feature_dict = _collection[1]
                    line = _collection[2]
                    _inst_feature_dict = literal_eval(feature_dict)
                    # result = self._instance.classify(_inst_feature_dict)

                    # prob classify approach
                    prob_dist = self._instance.entity.prob_classify(_inst_feature_dict)

                    pos_prob = prob_dist.prob('1')
                    neg_prob = prob_dist.prob('-1')
                    print>>sys.stderr, " pos_prob:",pos_prob
                    print>>sys.stderr, " neg_prob:",neg_prob

                    if float(pos_prob)*100 >= float(self.threshold):
                        result = "1"
                    else:
                        result = "-1"


                    # output only positive instances
                    if not "-" in result:
                        feature_category_tuple = (word, result, feature_dict,
                                                  line, pos_prob)
                        print feature_category_tuple
            except Exception as ex:
                print >>sys.stderr,ex.message
                pass
    def model(self):
        self.arg_obj.parse(sys.argv)

        if not self.check_params():
            self.show_help()

        _feature_set_location = self.arg_obj.args["feature_set_location"]

        if os.path.isfile(_feature_set_location):
            # if given a file path and not provided the model name to save as
            if not self.arg_obj.args.has_key("model_name"):
                self.show_help()

            _model_name = self.arg_obj.args["model_name"]
            _instance = POSContextSequenceModeler(feature_set_location = _feature_set_location)
            _instance.train()
            _instance.save_model(name=_model_name,location="trained_models")

            print "ModelingStub: modeling done for given feature set file."

        if os.path.isdir(_feature_set_location):
            print "ModelingStub: looking into feature set directory..."

            # filter only feature set files with .txt extension
            file_list = [fn for fn in os.listdir(_feature_set_location) if fn.endswith(('.txt'))]

            for _file in file_list:
                _path = _feature_set_location+"/"+_file

                _coll = _file.split(".")

                _model_name = _coll[0]+".model"

                _instance = POSContextSequenceModeler(feature_set_location = _path)
                _instance.train()
                print "ModelingStub: trained the model.about to save."
                _instance.save_model(name=_model_name,location="trained_models")
                print "ModelingStub: modeling done for:",_file

            print "ModelingStub: modeling done for all files in directory provided."
class CrossValidationFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value=85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name=saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        try:
            if line.strip() != "":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                prob_dist = self._instance.entity.prob_classify(
                    _inst_feature_dict)

                pos_prob = prob_dist.prob('1')
                neg_prob = prob_dist.prob('-1')
                print >> sys.stderr, " pos_prob:", pos_prob
                print >> sys.stderr, " neg_prob:", neg_prob

                if float(pos_prob) * 100 >= float(self.threshold):
                    result = "1"
                else:
                    result = "-1"

                flag = "NO-MATCH"
                if str(category.strip()) == str(result.strip()):
                    flag = "MATCH"

                feature_category_tuple = (word, result, category, flag,
                                          feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print >> sys.stderr, ex.message, "\n for :", line
class CrossValidationFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value = 85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name = saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        try:
            if line.strip()!="":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                prob_dist = self._instance.entity.prob_classify(_inst_feature_dict)

                pos_prob = prob_dist.prob('1')
                neg_prob = prob_dist.prob('-1')
                print>>sys.stderr, " pos_prob:",pos_prob
                print>>sys.stderr, " neg_prob:",neg_prob

                if float(pos_prob)*100 >= float(self.threshold):
                    result = "1"
                else:
                    result = "-1"

                flag = "NO-MATCH"
                if str(category.strip()) == str(result.strip()):
                    flag = "MATCH"

                feature_category_tuple = (word,result,category,flag, feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print>>sys.stderr, ex.message,"\n for :",line
Exemple #10
0
 def __init__(self, saved_model_name):
     pass
     self._instance = POSContextSequenceModeler()
     coll = saved_model_name.split("/")
     saved_model_name = coll[-1:][0]
     self._instance.load_model(name=saved_model_name)
Exemple #11
0
    def model(self):
        self.arg_obj.parse(sys.argv)

        if not self.check_params():
            self.show_help()

        _feature_set_location = self.arg_obj.args["feature_set_location"]

        if os.path.isfile(_feature_set_location):
            # if given a file path and not provided the model name to save as
            if not self.arg_obj.args.has_key("model_name"):
                self.show_help()

            _model_name = self.arg_obj.args["model_name"]
            _instance = POSContextSequenceModeler(
                feature_set_location=_feature_set_location)
            _instance.train()
            _instance.save_model(name=_model_name, location="trained_models")

            print "ModelingStub: modeling done for given feature set file."

        if os.path.isdir(_feature_set_location):
            print "ModelingStub: looking into feature set directory..."

            # filter only feature set files with .txt extension
            file_list = [
                fn for fn in os.listdir(_feature_set_location)
                if fn.endswith(('.txt'))
            ]

            for _file in file_list:
                _path = _feature_set_location + "/" + _file

                _coll = _file.split(".")

                _model_name = _coll[0] + ".model"

                _instance = POSContextSequenceModeler(
                    feature_set_location=_path)
                _instance.train()
                print "ModelingStub: trained the model.about to save."
                _instance.save_model(name=_model_name,
                                     location="trained_models")
                print "ModelingStub: modeling done for:", _file

            print "ModelingStub: modeling done for all files in directory provided."
 def __init__(self, saved_model_name):
     pass
     self._instance = POSContextSequenceModeler()
     coll = saved_model_name.split("/")
     saved_model_name = coll[-1:][0]
     self._instance.load_model(name = saved_model_name)