def getTopics(self): result = self.traversePages("setTopicandTerms", 'topicmodel') classifier = Classifier() classifier.init(result.topics, result.terms).MinKey(2) classifier = self.traversePages("retrainClassifier", 'article', classifier).classifier classifier.setweights() os.remove("classvectors.json") classifier.tojson("classvectors") return "test2"
def getCategory(self, query, option, alltopicsandkeys): result = alltopicsandkeys outcome = {} merger = Merger() classifier = Classifier() # result = self.traversePages("setTopicandTerms",'topicmodel') classifier.init(result.topics, result.terms).MinKey(2) # classifier.load('articlemodel.json') classifier.load('classvectors.json') # queryvector.tojson('query') # print(queryvector.model['model']) outcome['categoriesconfidence'] = {} alreadymerged = 0 for k, arr in result.terms.items(): value = classifier.goodtopicscore(arr, query.lower()) outcome['categoriesconfidence'][k] = value if option == 1: if value > 0 and not alreadymerged: queryvector = self.docToclassvector(query, result) classvectormodels = [] classvectormodels.append(classifier) classvectormodels.append(queryvector) classifier = merger.merge(classvectormodels) alreadymerged = 1 # print("{} {}".format(k,value)) value = 0 classifier.tojson("classvectors") poutcome = classifier.predict('model', query).getTopics() k = Counter(poutcome) # Finding 3 highest values answer = k.most_common(3) categories = poutcome outcome['categoriestop3'] = answer outcome['categories'] = categories outcome['document'] = query outcome['categorieswordmatch'] = {} for k, v in classifier.termVectors.items(): outcome['categorieswordmatch'][k] = str(v) # print(k) # print(v) # print(type(classifier.termVectors)) # print(classifier.termVectors) return json.dumps(outcome)