Python parseSentence Examples

Programming Language: Python

Namespace/Package Name: util

Method/Function: parseSentence

Examples at hotexamples.com: 6

Python parseSentence - 6 examples found. These are the top rated real world Python examples of util.parseSentence extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def _extrParamW2V(self, model, reprList, ques):
     """
     extract parameters using word2vec model
     """
     if model == None:
         raise Exception("W2VM Not Found")
     parsedQues = ut.parseSentence(ques)
     res = []
     for word in parsedQues.split(' '):
         replWord = ut.replNum(word)
         if replWord in model.vocab:
             l = []
             for w in reprList:
                 replRepr = ut.replNum(w)  #.decode('utf-8')
                 if replRepr in model.vocab:
                     val = model.similarity(replRepr, replWord)
                     l.append(val)
             if self.W2VCalcMethod == 'max':
                 d = (word, max(l))
             if self.W2VCalcMethod == 'avg':
                 d = (word, sum(l) / len(l))
             print '{} : {}'.format(d[0].encode('utf-8'), d[1])
             if d[1] > self.THR:
                 res.append(d)
     return res

Example #2

Show file

 def _buildIndvW2VM(self, cat, corpus):
     """
     Build the category's word2vec model using corpus
     """
     sentences = [
         ut.replNum(ut.parseSentence(x)).split(' ') for x in corpus
     ]
     self.indvW2VM[cat] = gensim.models.Word2Vec(sentences,
                                                 min_count=1,
                                                 size=100,
                                                 workers=12)
     self.save()

Example #3

Show file

 def _buildAllW2VM(self, allCorpus):
     """
     Build word2vec model using all corpus
     NOTE : indvW2V makes each category's model but allW2V is shared among categories, so It doesn't have to rebuild often.
     """
     sentences = []
     for v in allCorpus.values():
         sentences.extend(
             [ut.replNum(ut.parseSentence(x)).split(' ') for x in v])
     self.allW2VM = gensim.models.Word2Vec(sentences,
                                           min_count=1,
                                           size=100,
                                           workers=12)
     self.save()

Example #4

Show file

    def _learnParam(self, cat, feat, rawReprList):
        """
        Learn reprentative words of each feature
        """
        reprList = ut.parseSentence(' '.join(rawReprList)).split(' ')

        if not self.reprDict.has_key(cat):
            self.reprDict[cat] = {}

        if not self.reprDict[cat].has_key(feat):
            self.reprDict[cat][feat] = []
        self.reprDict[cat][feat].extend(reprList)

        self.save()

Example #5

Show file

    def build(self, allCorpus):
        """
        Build classifier model from corpus
        """
        #Make question and category list to use at sklearn
        #NOTE: each category's corpus has different amount of corpus, so we equalize each category's corpus
        cntPerCat = min(map(len, allCorpus.values()))
        quesList = sum([x[0:cntPerCat] for x in allCorpus.values()], [])
        catList = sum([[x] * cntPerCat for x in allCorpus.keys()], [])
        #shuffle question and category list to build better model
        combined = list(zip(quesList, catList))
        random.shuffle(combined)
        quesList[:], catList[:] = zip(*combined)

        self.categories = allCorpus.keys()
        #We use TfidVectorizer and bigram
        self.vectorizer = TfidfVectorizer(ngram_range=(1, 2))
        Xlist = self.vectorizer.fit_transform(
            [ut.replNum(ut.parseSentence(x)) for x in quesList])
        Ylist = [self.categories.index(x) for x in catList]
        print 'build prepared'

        #Search best model
        svc_param = {'C': np.logspace(-2, 0, 20)}
        print 'build start!'
        gs_svc = GridSearchCV(LinearSVC(), svc_param, cv=5, n_jobs=8)
        gs_svc.fit(Xlist, Ylist)
        #logging.debug(gs_svc.best_params_)
        #logging.debug('score : ' + str(gs_svc.best_score_))
        print gs_svc.best_params_
        print 'score : ' + str(gs_svc.best_score_)
        print 'make model using C parameter...'
        svm = LinearSVC(C=gs_svc.best_params_['C'])
        self.clfModel = CalibratedClassifierCV(base_estimator=svm)

        #Build model
        self.clfModel.fit(Xlist, Ylist)
        #save model
        self.save()

Example #6

Show file

    def predict(self, ques):
        """
        Predict category of the question
        """
        if type(ques) is not unicode:
            ques = ques.decode('utf-8')
        if self.vectorizer == None or self.categories == None or self.clfModel == None:
            raise Exception('contextClf Not built yet')

        parsedQues = ut.parseSentence(ques)
        testX = self.vectorizer.transform([ut.replNum(parsedQues)])
        """
        Predict category and probability
        """
        predList = self.clfModel.predict_proba(testX)
        """
        Make List of tuples and sort to return
        """
        res = [(self.categories[x], predList[0][x])
               for x in range(len(self.categories))]
        sortedRes = sorted(res, key=operator.itemgetter(1), reverse=True)
        return sortedRes