Esempio n. 1
0
def find_wh(text):
    """ Given text, returns WH type """
    text_s = text.lower().strip()
    words = utils.tokenizer(text_s)

    wh_categories = set([])
    for wh_type, keywords in WH_TYPES.iteritems():
        temp_categories = [wh_type for keyword in keywords if keyword in words]
        wh_categories = wh_categories.union(set(temp_categories))

    category_weights = dict((category, 100) for category in wh_categories)

    if category_weights:
        return max(category_weights, key=category_weights.get)
    return None
Esempio n. 2
0
def classify(text):
    text = utils.replace_slack_mentions(text)
    text_s = text.lower().strip()

    # Correct all the spelling mistakes
    words = utils.tokenizer(text_s)
    words = [corrector.correct(word) for word in words]

    # find exact matches of keywords
    first_categories = set([])
    for category, keywords in category_map.iteritems():
        temp_categories = set([category for k in keywords if k in words])
        first_categories = first_categories.union(temp_categories)

    category_weights = dict((category, 100) for category in first_categories)

    if category_weights:
        return max(category_weights, key=category_weights.get)
    return None