def find_wh(text): """ Given text, returns WH type """ text_s = text.lower().strip() words = utils.tokenizer(text_s) wh_categories = set([]) for wh_type, keywords in WH_TYPES.iteritems(): temp_categories = [wh_type for keyword in keywords if keyword in words] wh_categories = wh_categories.union(set(temp_categories)) category_weights = dict((category, 100) for category in wh_categories) if category_weights: return max(category_weights, key=category_weights.get) return None
def classify(text): text = utils.replace_slack_mentions(text) text_s = text.lower().strip() # Correct all the spelling mistakes words = utils.tokenizer(text_s) words = [corrector.correct(word) for word in words] # find exact matches of keywords first_categories = set([]) for category, keywords in category_map.iteritems(): temp_categories = set([category for k in keywords if k in words]) first_categories = first_categories.union(temp_categories) category_weights = dict((category, 100) for category in first_categories) if category_weights: return max(category_weights, key=category_weights.get) return None