예제 #1
0
def get_category(list_product_names, job_id):
    output_list = []
    logger.info("Request received {}".format(list_product_names))
    if list_product_names:
        for product_name_dict in list_product_names:
            try:
                valid_record, error_response = validate_product_args(product_name_dict)
                if valid_record:
                    result = process_product(product_name_dict,
                                            cat_model,
                                            dang_model,
                                            logger)
                    output_list.append(result)
                else:
                    for key, value in product_name_dict.items():
                        error_response[key] = value
                        output_list.append(error_response)
            except Exception as err:
                logger.error(
                    'get_category:Exception {} occurred against input: {} for job_id {}'.
                    format(err, list_product_names, job_id))
                sentry_client.captureException(
                    message = "Exception occurred against input in get_category",
                    extra = {"error" : err,"job_id" : job_id,
                             "product_name_dict" : product_name_dict})
    else:
        error_response = ERROR_CODE['MissingProductList']
        output_list.append(error_response)

    logger.info("Result produced {}".format(output_list))

    return output_list
예제 #2
0
def get_category():
    try:
        list_product_names = list(request.get_json())
        output_list = []

        for product_name_dict in list_product_names:
            app.logger.info("Request received {}".format(product_name_dict))
            results = {}
            results_cache = ''

            product_name = product_name_dict.get('product_name', "")
            if product_name:
                product_name_clean = (re.sub(ALPHA_NUM_REGEX, '',
                                             product_name)).lower()
                product_name_key = 'catfight:' + ':' + product_name_clean
                results_cache = r.get(product_name_key)
                if not results_cache:
                    results = predict_category(
                        product_name.encode('ascii', 'ignore'), cat_model,
                        dang_model, app.logger)
                    if results:
                        r.setex(product_name_key, json.dumps(results),
                                CACHE_EXPIRY)
                        results['cached'] = False
                else:
                    results = json.loads(results_cache)
                    results['cached'] = True
            else:
                results['invalid_product_name'] = True

            results['waybill'] = product_name_dict.get('wbn', None)

            app.logger.info("Result produced {}".format(results))

            output_list.append(results)

        return Response(json.dumps(output_list), mimetype='application/json')

    except Exception as err:
        app.logger.error('Exception {} occurred against payload: {}'.format(
            err, list_product_names))

        sentry_client.captureException(message="predict.py: Exception occured",
                                       extra={
                                           "error": err,
                                           "payload": list_product_names
                                       })
def get_category():
    try:
        list_product_names = list(request.get_json())
        output_list = []

        for product_name_dict in list_product_names:
            app.logger.info("Request received {}".format(product_name_dict))
            results = {}
            results_cache = ''
            
            product_name = product_name_dict.get('product_name', "")
            if product_name:
                product_name_clean = (re.sub(ALPHA_NUM_REGEX, '', product_name)).lower()
                product_name_key = 'catfight:' +':' + product_name_clean
                results_cache = r.get(product_name_key)
                if not results_cache:
                    results = predict_category(product_name.encode('ascii','ignore'),
                                               cat_model, dang_model, app.logger)
                    if results:
                        r.setex(product_name_key, json.dumps(results), CACHE_EXPIRY)
                        results['cached'] = False
                else:
                    results = json.loads(results_cache)
                    results['cached'] = True
            else:
                results['invalid_product_name'] = True
            
            results['waybill'] = product_name_dict.get('wbn', None)
            
            app.logger.info("Result produced {}".format(results))
    
            output_list.append(results)

        return Response(json.dumps(output_list),  mimetype='application/json')
    
    except Exception as err:
        app.logger.error(
            'Exception {} occurred against payload: {}'.format(
                err, list_product_names))

        sentry_client.captureException(
            message = "predict.py: Exception occured",
            extra = {"error" : err, "payload" : list_product_names})
예제 #4
0
def get_products():
    """
    Function to fetch job from disque queue, catfight_input, splitting the job
    into vendor and results, and calling search_addresses to generate products
    for the job passed 
    """
    while True:
        try:
            jobs = client.get_job([catfight_input])
            for queue_name, job_id, job in jobs:
                job_data = json.loads(job)
                vendor = job_data['vendor']
                products = json.loads(job_data['payload'])

                results = get_category(products, job_id)
                if results:
                    results_dict = {}
                    results_dict['vendor'] = vendor
                    results_dict['catfight_results'] = results
                    second_job_id = client.add_job(catfight_output,
                                                   json.dumps(results_dict),
                                                   retry = 5)
                    client.ack_job(job_id)
                    logger.info("Successfully fetched from Disque queue catfight_input GET Job ID {} with job {}".
                                format(job_id, job))
                    logger.info("Successfully added to Disque queue catfight_output with Job ID {} and job {}".
                                format(second_job_id, job))
                else:
                    logger.info("No results found for Job ID {} with job {}".
                                format(job_id, job))
        except Exception as e:
            logger.info("Function get_products failed for Job ID {} with job {} with error {}".
                        format(job_id,job,e))
            sentry_client.captureException(
                message = "get_products failed", 
                extra = {"error" : e})
            pass
예제 #5
0
logger = logging.getLogger('Catfight App')
handler = RotatingFileHandler(CATFIGHT_LOGGING_PATH, maxBytes=200000000,
                              backupCount=10)
formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
logger.info("Loading Process Started")

try:
    dang_model = dangerousModel()
    cat_model = categoryModel()
except Exception as err:
    logger.error("Error {} while loading models".format(err))
    sentry_client.captureException(
        message = "service_category_disque : Failed to load models",
        extra = {"error" : err})

logger.info("Loading Process Complete")

ERROR_CODE = {
    'MissingProductName' : 'MissingProduct',
    'MissingProductList' : 'MissingProductList',
    'MissingWBN' : 'MissingWBN'
}

def validate_product_args(record):
    value = True
    error_response = {}
    if not record.get('prd', None):
        error_response = {'error': ERROR_CODE['MissingProductName']}
def predict_category(product_name, wbn, cat_model, dang_model, logger):
    try:
        l_product_name = product_name.lower()
        product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name)
        clean_product_name = " ".join(product_words)

        vectorizer = cat_model.vectorizer
        clf_bayes = cat_model.clf_bayes
        clf_chi = cat_model.clf_chi
        clf_rf = cat_model.clf_rf

        second_level_vectorizer = cat_model.second_level_vectorizer
        second_level_clf_bayes = cat_model.second_level_clf_bayes
        second_level_clf_fpr = cat_model.second_level_clf_fpr
        second_level_clf_rf = cat_model.second_level_clf_rf

        class1 = clf_bayes.predict(vectorizer.transform([l_product_name]))[0]
        class2_prob_vector = clf_chi.predict_proba(
            vectorizer.transform([l_product_name]))[0]
        class3_prob_vector = clf_rf.predict_proba(
            vectorizer.transform([l_product_name]))[0]

        if len(np.unique(class2_prob_vector)) == 1:
            class2 = "Delhivery_Others"
        else:
            class2 = clf_bayes.classes_[np.argmax(class2_prob_vector)]
        if len(np.unique(class3_prob_vector)) == 1:
            class3 = "Delhivery_Others"
        else:
            class3 = clf_bayes.classes_[np.argmax(class3_prob_vector)]

        if class3 == "Delhivery_Others":
            if class1 == class2:
                first_level = class1
            elif class1 == "Delhivery_Others":
                first_level = class2
            elif class2 == "Delhivery_Others":
                first_level = class1
            else:
                first_level = class2
        else:
            first_level = class3

        second_level = ""

        if first_level in cat_model.second_level_cat_names_set_nb:
            prob_vector = second_level_clf_fpr[first_level].predict_proba(
                second_level_vectorizer[first_level].transform(
                    [l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform(
                        [l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[
                    np.argmax(prob_vector)]

        elif first_level in cat_model.second_level_cat_names_set_rf:
            prob_vector = second_level_clf_rf[first_level].predict_proba(
                second_level_vectorizer[first_level].transform(
                    [l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform(
                        [l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[
                    np.argmax(prob_vector)]

        dg_report = predict_dangerous(clean_product_name, wbn, first_level,
                                      dang_model.dg_keywords, logger)

        result = {}
        result['cat'] = first_level
        result['scat'] = second_level
        result['dg'] = dg_report['dangerous']
        return result

    except Exception as err:
        logger.error('Exception {} occurred against product: {}'.format(
            err, product_name))
        sentry_client.captureException(message="predict.py: Exception occured",
                                       extra={
                                           "error": err,
                                           "product_name": product_name
                                       })
def predict_category(product_name, wbn, cat_model, dang_model, logger):
    try:
        l_product_name = product_name.lower()
        product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name)
        clean_product_name = " ".join(product_words)
        
        vectorizer = cat_model.vectorizer
        clf_bayes = cat_model.clf_bayes
        clf_chi = cat_model.clf_chi
        clf_rf = cat_model.clf_rf

        second_level_vectorizer = cat_model.second_level_vectorizer
        second_level_clf_bayes = cat_model.second_level_clf_bayes
        second_level_clf_fpr = cat_model.second_level_clf_fpr
        second_level_clf_rf = cat_model.second_level_clf_rf

        class1 = clf_bayes.predict(vectorizer.transform([l_product_name]))[0]
        class2_prob_vector = clf_chi.predict_proba(vectorizer.transform([l_product_name]))[0]
        class3_prob_vector = clf_rf.predict_proba(vectorizer.transform([l_product_name]))[0]

        if len(np.unique(class2_prob_vector)) == 1:
            class2 = "Delhivery_Others"
        else:
            class2 = clf_bayes.classes_[np.argmax(class2_prob_vector)]
        if len(np.unique(class3_prob_vector)) == 1:
            class3 = "Delhivery_Others"
        else:
            class3 = clf_bayes.classes_[np.argmax(class3_prob_vector)]

        if class3 == "Delhivery_Others":
            if class1 == class2:
                first_level = class1
            elif class1 == "Delhivery_Others":
                first_level = class2
            elif class2 == "Delhivery_Others":
                first_level = class1
            else:
                first_level = class2
        else:
            first_level = class3

        second_level = ""

        if first_level in cat_model.second_level_cat_names_set_nb:
            prob_vector = second_level_clf_fpr[first_level].predict_proba(
                second_level_vectorizer[first_level].transform([l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform([l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[np.argmax(prob_vector)]
        
        elif first_level in cat_model.second_level_cat_names_set_rf:
            prob_vector = second_level_clf_rf[first_level].predict_proba(
                second_level_vectorizer[first_level].transform([l_product_name]))[0]
            if len(np.unique(prob_vector)) == 1:
                second_level = second_level_clf_bayes[first_level].predict(
                    second_level_vectorizer[first_level].transform([l_product_name]))[0]
            else:
                second_level = second_level_clf_bayes[first_level].classes_[np.argmax(prob_vector)]
            
            
        dg_report = predict_dangerous(clean_product_name, wbn, first_level,
                                      dang_model.dg_keywords, logger)
        
        result = {}
        result['cat'] = first_level
        result['scat'] = second_level
        result['dg'] = dg_report['dangerous']
        return result

    except Exception as err:
        logger.error(
            'Exception {} occurred against product: {}'.format(
                err, product_name))
        sentry_client.captureException(
            message = "predict.py: Exception occured",
            extra = {"error" : err, "product_name" : product_name})