Example #1
0
def generate_response(return_content_par, status_par):

    if RESPONSE_DEBUG:
        logger.debug("1")

    logger.info('generate_response(return_content, status)')

    if RESPONSE_DEBUG:
        logger.debug("2")

    return_content = return_content_par

    if RESPONSE_DEBUG:
        logger.debug("3")
        logger.debug(repr(return_content))

    try:
        if RESPONSE_DEBUG:
            logger.debug("4")
            logger.debug(repr(return_content))

        return_content = json.dumps(return_content_par).replace("\"[", "[").replace("]\"", "]").replace('\\', '').replace("\"{", "{").replace("}\"", "}")
    except Exception as argument:
        if RESPONSE_DEBUG:
            logger.debug("5")
            logger.debug(repr(return_content))
        abort(500, ERROR_RESPONSE_JSON_PARSE + ' json.dumps(): ' + repr(argument).replace("u'", "").replace("'", ""))

    try:
        if RESPONSE_DEBUG:
            logger.debug("6")
            logger.debug(repr(return_content))
        return_content = json.loads(return_content)
        if RESPONSE_DEBUG:
            logger.debug("7")
            logger.debug(repr(return_content))
    except Exception as argument:
        if RESPONSE_DEBUG:
            logger.debug("8")
            logger.debug(repr(return_content))
        abort(500, ERROR_RESPONSE_JSON_PARSE + ' json.loads(): ' + repr(argument).replace("u'", "").replace("'", ""))

    #Log response content
    if RESPONSE_DEBUG:
        logger.debug("9")
        logger.debug(repr(return_content))
    logger.debug('Response Status: ' + str(status_par) + ', Content: ' + repr(return_content))

    # Because 204 has no body content
    if status_par == '204':
        response = make_response()
        response.status_code = 204
    else:
        response = make_response(jsonify({'requestURL': request.url, 'requestMethod': request.method, 'requestStatusCode': status_par, 'content': return_content}))
        response.headers['Content-Type'] = 'application/json'
        response.status_code = int(status_par)

    logger.debug('Response: ' + repr(response))
    return response
Example #2
0
 def __init__(self):
     self.input = pd.read_csv(os.path.join('../input', self.filename))
     self.training_schema = ModelConfig.training_schema
     self.training_data_columns = list(self.training_schema.keys())
     self.training_impute = ModelConfig.training_impute
     self.model_features = ModelConfig.model_features
     self.cat_cols = ModelConfig.cat_cols
     logger.info(f'Preprocessing starts...')
Example #3
0
 def predict(self):
     self.prediction = self.model.predict(self.predict_data)
     self.output = pd.DataFrame(list(zip(self.id_, self.prediction)),
                                columns=['sku', 'prediction'])
     self.output["prediction"] = self.output["prediction"].map({
         0: "Yes",
         1: "No"
     })
     self.output.to_csv(os.path.join('../prediction',
                                     'output_' + self.filename),
                        index=False)
     logger.info(
         f"\n Predicted file saved at {os.path.join('../prediction', 'output_' + self.filename)}... \n"
     )
Example #4
0
def predict():

    #Files uploaded
    f = request.files['file']

    if len(f.filename) == 0:
        logger.error(f'No file uploaded')
        return "No file uploaded"

    if f.filename.split('.')[1] != 'csv':
        logger.error(f'Non csv file received: {f.filename}')
        return "Uploaded non-csv file."

    try:

        logger.info(f'\n\n\n File received {f.filename}...\n')

        time_ = datetime.now().strftime("%d_%m_%H_%M")
        filename = f.filename.split('.')[0] + '_' + time_ + '.csv'
        f.save(os.path.join('../input', filename))

        train = Train(filename)

        # Data Quality Check and imputation with proper values if required
        train.process_data()

        # Prediction
        train.predict()

        logger.info(" \n -------- SUCCESS --------")

        # Once control reaches error add suffix 'SUFFIX' to log filename

        return send_file(os.path.join('../prediction', 'output_' + filename),
                         mimetype='text/csv',
                         attachment_filename='backorder_prediction.csv',
                         as_attachment=True)

    except Exception as e:
        logger.error(e, exc_info=True)
        logger.error(" \n -------- FAILURE --------")
        # Once control reaches error add suffix 'FAILURE' to log filename
        return Response("Error Occurred! %s" % e)
Example #5
0
 def process_data(self):
     logger.info(f'\n Data Quality Check Starts... \n')
     super().data_quality_check()
     logger.info(f'\n Data Imputation Starts... \n')
     self.imputation()
     logger.info(f'\n Encoding Categorical Starts... \n')
     self.encode_categorical(cat_cols=None)
     self.input.to_csv(os.path.join('../transformation',
                                    'transformed_' + self.filename),
                       index=False)
     logger.info(
         f"\n Transformed file saved at {os.path.join('../transformation', 'transformed_' + self.filename)}... \n"
     )
     self.predict_data = self.feature_transform.transform(
         self.input[self.model_features])
Example #6
0
    def data_quality_check(self):
        def _2str(x):
            if x == x:
                try:
                    x = str(x)
                    x = x.lower()
                except:
                    x = np.nan
            return x

        def _2int(x):
            try:
                x = int(x)
            except:
                x = np.nan
            return x

        def _2float(x):
            try:
                x = float(x)
            except:
                raise ValueError(
                    f'{col_name} contains {x} , expected only float')
            return x

        col_required = [
            col for col in self.training_data_columns
            if col not in self.input.columns.tolist()
        ]

        # If all required columns are not found
        if len(col_required) > 0:
            raise ValueError(
                f'Following columns was not passed {col_required}.\n Note: Columns name are case sensitive. '
            )

        self.input = self.input[self.training_data_columns]

        # id can not be blank
        if self.input['sku'].isna().sum() / self.input.shape[0] > 0:
            raise ValueError('sku is an identifier and can not be null.')

        # id can not be float
        if self.input.sku.dtype != 'int':
            raise ValueError(f'sku should be int found {self.input.sku.dtype}')

        logger.info(f'Checking for schema compatibility...')

        # Check for datatype of each columns
        for col_name, data_type in self.training_schema.items():
            if data_type == 'str':
                self.input[col_name] = self.input[col_name].apply(
                    lambda x: _2str(x))
            elif data_type == 'int':
                self.input[col_name] = self.input[col_name].apply(
                    lambda x: _2int(x))
            elif data_type == 'float':
                self.input[col_name] = self.input[col_name].apply(
                    lambda x: _2float(x))
            else:
                raise ValueError(
                    'Unexpected dtype %s specified for columns %s' %
                    (data_type, col_name))

        # converting negative values into null values
        for col in ['national_inv', 'perf_6_month_avg', 'perf_12_month_avg']:
            self.input[col] = self.input[col].mask(self.input[col] < 0)

        # Check for null values
        na_count = (self.input.isna().sum().sum() * 100 /
                    (self.input.shape[0] * self.input.shape[1])).astype('int')
        if na_count > 30:
            raise ValueError(
                f'{na_count}% null values. Insufficient data for prediction.')

        logger.info(
            f'{na_count}% null values. Null values details below \n {self.input.isna().sum()/self.input.shape[0]}'
        )

        # Check for specific value in categorical variables
        for col in self.cat_cols:
            val = [
                val_ for val_ in self.input[col].unique().tolist()
                if val_ not in ['yes', 'no']
            ]

            if len(val) > 0:
                raise ValueError(
                    f'{col} has value {val} ; expected only yes,no values ')
Example #7
0
        # Data Quality Check and imputation with proper values if required
        train.process_data()

        # Prediction
        train.predict()

        logger.info(" \n -------- SUCCESS --------")

        # Once control reaches error add suffix 'SUFFIX' to log filename

        return send_file(os.path.join('../prediction', 'output_' + filename),
                         mimetype='text/csv',
                         attachment_filename='backorder_prediction.csv',
                         as_attachment=True)

    except Exception as e:
        logger.error(e, exc_info=True)
        logger.error(" \n -------- FAILURE --------")
        # Once control reaches error add suffix 'FAILURE' to log filename
        return Response("Error Occurred! %s" % e)


if __name__ == "__main__":
    #port = int(os.getenv('PORT', 5000))
    #print("Starting app on port %d" % port)
    #app.run(debug=True, port=port, host='0.0.0.0')
    host = '0.0.0.0'
    port = 5000
    httpd = simple_server.make_server(host, port, app)
    logger.info("Serving on %s %d" % (host, port))
    httpd.serve_forever()