Beispiel #1
0
    def getTopics(self):
        result = self.traversePages("setTopicandTerms", 'topicmodel')

        classifier = Classifier()
        classifier.init(result.topics, result.terms).MinKey(2)

        classifier = self.traversePages("retrainClassifier", 'article',
                                        classifier).classifier
        classifier.setweights()
        os.remove("classvectors.json")
        classifier.tojson("classvectors")
        return "test2"
Beispiel #2
0
    def getCategory(self, query, option, alltopicsandkeys):
        result = alltopicsandkeys
        outcome = {}
        merger = Merger()
        classifier = Classifier()
        # result = self.traversePages("setTopicandTerms",'topicmodel')
        classifier.init(result.topics, result.terms).MinKey(2)
        # classifier.load('articlemodel.json')
        classifier.load('classvectors.json')
        # queryvector.tojson('query')
        # print(queryvector.model['model'])
        outcome['categoriesconfidence'] = {}
        alreadymerged = 0
        for k, arr in result.terms.items():
            value = classifier.goodtopicscore(arr, query.lower())
            outcome['categoriesconfidence'][k] = value
            if option == 1:
                if value > 0 and not alreadymerged:
                    queryvector = self.docToclassvector(query, result)
                    classvectormodels = []
                    classvectormodels.append(classifier)
                    classvectormodels.append(queryvector)
                    classifier = merger.merge(classvectormodels)
                    alreadymerged = 1
                    # print("{} {}".format(k,value))
                value = 0
        classifier.tojson("classvectors")
        poutcome = classifier.predict('model', query).getTopics()
        k = Counter(poutcome)
        # Finding 3 highest values
        answer = k.most_common(3)
        categories = poutcome

        outcome['categoriestop3'] = answer
        outcome['categories'] = categories
        outcome['document'] = query
        outcome['categorieswordmatch'] = {}

        for k, v in classifier.termVectors.items():
            outcome['categorieswordmatch'][k] = str(v)
            # print(k)
            # print(v)

        # print(type(classifier.termVectors))
        # print(classifier.termVectors)
        return json.dumps(outcome)