def process_product(product_name_dict, cat_model, dang_model, logger):
    results = {}
    results_cache = ''

    product_name = product_name_dict.get('prd', "")
    if product_name:
        final_result = {}
        original_dict = copy.deepcopy(product_name_dict)

        product_name_clean = (re.sub(ALPHA_NUM_REGEX, '',
                                     product_name)).lower()
        product_name_key = 'catfight:' + ':' + product_name_clean
        results_cache = r.get(product_name_key)
        wbn = product_name_dict.get('wbn', "")
        if not results_cache:
            results = predict_category(product_name.encode('ascii', 'ignore'),
                                       wbn, cat_model, dang_model, logger)
            if results:
                r.setex(product_name_key, json.dumps(results), CACHE_EXPIRY)
                results['cached'] = False
        else:
            results = json.loads(results_cache)
            l_product_name = product_name.lower()
            product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX,
                                       l_product_name)
            clean_product_name = " ".join(product_words)
            first_level = results['cat']
            dg_report = predict_dangerous(clean_product_name, wbn, first_level,
                                          dang_model.dg_keywords, logger)

            results['dg'] = dg_report['dangerous']
            results['cached'] = True
    else:
        results['invalid_product_name'] = True

    final_result = original_dict
    final_result['result'] = results
    return final_result
def process_product(product_name_dict, cat_model, dang_model, logger):
    results = {}
    results_cache = ''
    
    product_name = product_name_dict.get('prd', "")
    if product_name:
        final_result = {}
        original_dict = copy.deepcopy(product_name_dict)

        product_name_clean = (re.sub(ALPHA_NUM_REGEX, '', product_name)).lower()
        product_name_key = 'catfight:' +':' + product_name_clean
        results_cache = r.get(product_name_key)
        wbn = product_name_dict.get('wbn', "")
        if not results_cache:
            results = predict_category(product_name.encode('ascii','ignore'),
                                       wbn, cat_model, dang_model, logger)
            if results:
                r.setex(product_name_key, json.dumps(results), CACHE_EXPIRY)
                results['cached'] = False
        else:
            results = json.loads(results_cache)
            l_product_name = product_name.lower()
            product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name)
            clean_product_name = " ".join(product_words)
            first_level = results['cat']
            dg_report = predict_dangerous(clean_product_name, wbn, first_level,
                                      dang_model.dg_keywords, logger)

            results['dg'] = dg_report['dangerous']
            results['cached'] = True
    else:
        results['invalid_product_name'] = True
    
    final_result = original_dict
    final_result['result'] = results
    return final_result
def predict_category(product_name, wbn, cat_model, dang_model, logger):
    try:
        l_product_name = product_name.lower()
        product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name)
        clean_product_name = " ".join(product_words)

        vectorizer = cat_model.vectorizer
        clf_bayes = cat_model.clf_bayes
        clf_chi = cat_model.clf_chi
        clf_rf = cat_model.clf_rf

        second_level_vectorizer = cat_model.second_level_vectorizer
        second_level_clf_bayes = cat_model.second_level_clf_bayes
        second_level_clf_fpr = cat_model.second_level_clf_fpr
        second_level_clf_rf = cat_model.second_level_clf_rf

        class1 = clf_bayes.predict(vectorizer.transform([l_product_name]))[0]
        class2_prob_vector = clf_chi.predict_proba(
            vectorizer.transform([l_product_name]))[0]
        class3_prob_vector = clf_rf.predict_proba(
            vectorizer.transform([l_product_name]))[0]

        if len(np.unique(class2_prob_vector)) == 1:
            class2 = "Delhivery_Others"
        else:
            class2 = clf_bayes.classes_[np.argmax(class2_prob_vector)]
        if len(np.unique(class3_prob_vector)) == 1:
            class3 = "Delhivery_Others"
        else:
            class3 = clf_bayes.classes_[np.argmax(class3_prob_vector)]

        if class3 == "Delhivery_Others":
            if class1 == class2:
                first_level = class1
            elif class1 == "Delhivery_Others":
                first_level = class2
            elif class2 == "Delhivery_Others":
                first_level = class1
            else:
                first_level = class2
        else:
            first_level = class3

        second_level = ""

        if first_level in cat_model.second_level_cat_names_set_nb:
            prob_vector = second_level_clf_fpr[first_level].predict_proba(
                second_level_vectorizer[first_level].transform(
                    [l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform(
                        [l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[
                    np.argmax(prob_vector)]

        elif first_level in cat_model.second_level_cat_names_set_rf:
            prob_vector = second_level_clf_rf[first_level].predict_proba(
                second_level_vectorizer[first_level].transform(
                    [l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform(
                        [l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[
                    np.argmax(prob_vector)]

        dg_report = predict_dangerous(clean_product_name, wbn, first_level,
                                      dang_model.dg_keywords, logger)

        result = {}
        result['cat'] = first_level
        result['scat'] = second_level
        result['dg'] = dg_report['dangerous']
        return result

    except Exception as err:
        logger.error('Exception {} occurred against product: {}'.format(
            err, product_name))
        sentry_client.captureException(message="predict.py: Exception occured",
                                       extra={
                                           "error": err,
                                           "product_name": product_name
                                       })
def predict_category(product_name, wbn, cat_model, dang_model, logger):
    try:
        l_product_name = product_name.lower()
        product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name)
        clean_product_name = " ".join(product_words)
        
        vectorizer = cat_model.vectorizer
        clf_bayes = cat_model.clf_bayes
        clf_chi = cat_model.clf_chi
        clf_rf = cat_model.clf_rf

        second_level_vectorizer = cat_model.second_level_vectorizer
        second_level_clf_bayes = cat_model.second_level_clf_bayes
        second_level_clf_fpr = cat_model.second_level_clf_fpr
        second_level_clf_rf = cat_model.second_level_clf_rf

        class1 = clf_bayes.predict(vectorizer.transform([l_product_name]))[0]
        class2_prob_vector = clf_chi.predict_proba(vectorizer.transform([l_product_name]))[0]
        class3_prob_vector = clf_rf.predict_proba(vectorizer.transform([l_product_name]))[0]

        if len(np.unique(class2_prob_vector)) == 1:
            class2 = "Delhivery_Others"
        else:
            class2 = clf_bayes.classes_[np.argmax(class2_prob_vector)]
        if len(np.unique(class3_prob_vector)) == 1:
            class3 = "Delhivery_Others"
        else:
            class3 = clf_bayes.classes_[np.argmax(class3_prob_vector)]

        if class3 == "Delhivery_Others":
            if class1 == class2:
                first_level = class1
            elif class1 == "Delhivery_Others":
                first_level = class2
            elif class2 == "Delhivery_Others":
                first_level = class1
            else:
                first_level = class2
        else:
            first_level = class3

        second_level = ""

        if first_level in cat_model.second_level_cat_names_set_nb:
            prob_vector = second_level_clf_fpr[first_level].predict_proba(
                second_level_vectorizer[first_level].transform([l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform([l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[np.argmax(prob_vector)]
        
        elif first_level in cat_model.second_level_cat_names_set_rf:
            prob_vector = second_level_clf_rf[first_level].predict_proba(
                second_level_vectorizer[first_level].transform([l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform([l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[np.argmax(prob_vector)]
            
            
        dg_report = predict_dangerous(clean_product_name, wbn, first_level,
                                      dang_model.dg_keywords, logger)
        
        result = {}
        result['cat'] = first_level
        result['scat'] = second_level
        result['dg'] = dg_report['dangerous']
        return result

    except Exception as err:
        logger.error(
            'Exception {} occurred against product: {}'.format(
                err, product_name))
        sentry_client.captureException(
            message = "predict.py: Exception occured",
            extra = {"error" : err, "product_name" : product_name})