def api(): input1 = request.args['variable'] data = preprocess(str(input1)) output = {} output['date'] = datetime.datetime.now().strftime("%d/%m/%Y") output['response'] = data output['message'] = "Thanks For Using Text Process" return jsonify(**output)
def train(self,text,label): data = preprocess(text) self.training_data['total_data_set']+=1 if label in self.training_data['labels']: self.training_data['labels'][label]['docs']+=1 else : self.training_data['labels'][label] = {} self.training_data['labels'][label]['docs'] = 1 self.training_data['labels'][label]['keywords'] = {} self.training_data['labels'][label]['total_keywords'] = 0 for word in data : self.training_data['labels'][label]['total_keywords']+=1 if word in self.training_data['labels'][label]['keywords']: self.training_data['labels'][label]['keywords'][word]+=1 else: self.training_data['labels'][label]['keywords'][word]=1 self.save()
def classify(self,text): if self.training_data['total_data_set'] ==0: print 'Training data empty: Please train the system before executing classify' sys.exit(0) data = preprocess(text) label_weights = {} for label in self.training_data['labels']: prior = math.log10(1+(self.training_data['labels'][label]['docs'])/(self.training_data['total_data_set'])) term_weight = 0 for word in data: if word in self.training_data['labels'][label]['keywords']: n = self.training_data['labels'][label]['keywords'][word] else : n = 0 term_weight+= math.log10((n+1)/(self.training_data['labels'][label]['docs']+ len(data))) label_weights[label] = prior + term_weight print label_weights return max(label_weights.iteritems(), key=operator.itemgetter(1))[0]