Exemple #1
0
class FeatureExtractionFlowClassifierReducer:
    def __init__(self, saved_model_name):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name=saved_model_name)

    def process(self, line):
        try:
            if line.strip() != "":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                result = self._instance.classify(_inst_feature_dict)

                flag = "NO-MATCH"
                if str(category) == str(result):
                    flag = "MATCH"

                feature_category_tuple = (word, result, category, flag,
                                          feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print >> sys.stderr, ex.message, "\n for :", line
class FeatureExtractionFlowClassifierReducer:
    def __init__(self, saved_model_name):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name = saved_model_name)

    def process(self, line):
        try:
            if line.strip()!="":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                result = self._instance.classify(_inst_feature_dict)

                flag = "NO-MATCH"
                if str(category) == str(result):
                    flag = "MATCH"

                feature_category_tuple = (word,result,category,flag, feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print>>sys.stderr, ex.message,"\n for :",line
Exemple #3
0
class MaltParsedFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value=85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name=saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        if line.strip() != "":
            try:
                if line.count("\t") >= 1:
                    _collection = line.split("\t")
                    word = _collection[0]
                    feature_dict = _collection[1]
                    line = _collection[2]
                    _inst_feature_dict = literal_eval(feature_dict)
                    # result = self._instance.classify(_inst_feature_dict)

                    # prob classify approach
                    prob_dist = self._instance.entity.prob_classify(
                        _inst_feature_dict)

                    pos_prob = prob_dist.prob('1')
                    neg_prob = prob_dist.prob('-1')
                    print >> sys.stderr, " pos_prob:", pos_prob
                    print >> sys.stderr, " neg_prob:", neg_prob

                    if float(pos_prob) * 100 >= float(self.threshold):
                        result = "1"
                    else:
                        result = "-1"

                    # output only positive instances
                    if not "-" in result:
                        feature_category_tuple = (word, result, feature_dict,
                                                  line, pos_prob)
                        print feature_category_tuple
            except Exception as ex:
                print >> sys.stderr, ex.message
                pass
class MaltParsedFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value = 85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name = saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        if line.strip()!="":
            try:
                if line.count("\t")>=1:
                    _collection = line.split("\t")
                    word = _collection[0]
                    feature_dict = _collection[1]
                    line = _collection[2]
                    _inst_feature_dict = literal_eval(feature_dict)
                    # result = self._instance.classify(_inst_feature_dict)

                    # prob classify approach
                    prob_dist = self._instance.entity.prob_classify(_inst_feature_dict)

                    pos_prob = prob_dist.prob('1')
                    neg_prob = prob_dist.prob('-1')
                    print>>sys.stderr, " pos_prob:",pos_prob
                    print>>sys.stderr, " neg_prob:",neg_prob

                    if float(pos_prob)*100 >= float(self.threshold):
                        result = "1"
                    else:
                        result = "-1"


                    # output only positive instances
                    if not "-" in result:
                        feature_category_tuple = (word, result, feature_dict,
                                                  line, pos_prob)
                        print feature_category_tuple
            except Exception as ex:
                print >>sys.stderr,ex.message
                pass
class CrossValidationFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value=85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name=saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        try:
            if line.strip() != "":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                prob_dist = self._instance.entity.prob_classify(
                    _inst_feature_dict)

                pos_prob = prob_dist.prob('1')
                neg_prob = prob_dist.prob('-1')
                print >> sys.stderr, " pos_prob:", pos_prob
                print >> sys.stderr, " neg_prob:", neg_prob

                if float(pos_prob) * 100 >= float(self.threshold):
                    result = "1"
                else:
                    result = "-1"

                flag = "NO-MATCH"
                if str(category.strip()) == str(result.strip()):
                    flag = "MATCH"

                feature_category_tuple = (word, result, category, flag,
                                          feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print >> sys.stderr, ex.message, "\n for :", line
class CrossValidationFeatureExtractionFlowReducer:
    def __init__(self, saved_model_name, threshold_value = 85):
        pass
        self._instance = POSContextSequenceModeler()
        coll = saved_model_name.split("/")
        saved_model_name = coll[-1:][0]
        self._instance.load_model(name = saved_model_name)
        self.threshold = threshold_value

    def process(self, line):
        try:
            if line.strip()!="":
                _collection = line.split("\t")
                word = _collection[0]
                category = _collection[1]
                feature_dict = _collection[2]

                _inst_feature_dict = literal_eval(feature_dict)
                prob_dist = self._instance.entity.prob_classify(_inst_feature_dict)

                pos_prob = prob_dist.prob('1')
                neg_prob = prob_dist.prob('-1')
                print>>sys.stderr, " pos_prob:",pos_prob
                print>>sys.stderr, " neg_prob:",neg_prob

                if float(pos_prob)*100 >= float(self.threshold):
                    result = "1"
                else:
                    result = "-1"

                flag = "NO-MATCH"
                if str(category.strip()) == str(result.strip()):
                    flag = "MATCH"

                feature_category_tuple = (word,result,category,flag, feature_dict, line)
                print feature_category_tuple
        except Exception as ex:
            print>>sys.stderr, ex.message,"\n for :",line