def generate_response(return_content_par, status_par): if RESPONSE_DEBUG: logger.debug("1") logger.info('generate_response(return_content, status)') if RESPONSE_DEBUG: logger.debug("2") return_content = return_content_par if RESPONSE_DEBUG: logger.debug("3") logger.debug(repr(return_content)) try: if RESPONSE_DEBUG: logger.debug("4") logger.debug(repr(return_content)) return_content = json.dumps(return_content_par).replace("\"[", "[").replace("]\"", "]").replace('\\', '').replace("\"{", "{").replace("}\"", "}") except Exception as argument: if RESPONSE_DEBUG: logger.debug("5") logger.debug(repr(return_content)) abort(500, ERROR_RESPONSE_JSON_PARSE + ' json.dumps(): ' + repr(argument).replace("u'", "").replace("'", "")) try: if RESPONSE_DEBUG: logger.debug("6") logger.debug(repr(return_content)) return_content = json.loads(return_content) if RESPONSE_DEBUG: logger.debug("7") logger.debug(repr(return_content)) except Exception as argument: if RESPONSE_DEBUG: logger.debug("8") logger.debug(repr(return_content)) abort(500, ERROR_RESPONSE_JSON_PARSE + ' json.loads(): ' + repr(argument).replace("u'", "").replace("'", "")) #Log response content if RESPONSE_DEBUG: logger.debug("9") logger.debug(repr(return_content)) logger.debug('Response Status: ' + str(status_par) + ', Content: ' + repr(return_content)) # Because 204 has no body content if status_par == '204': response = make_response() response.status_code = 204 else: response = make_response(jsonify({'requestURL': request.url, 'requestMethod': request.method, 'requestStatusCode': status_par, 'content': return_content})) response.headers['Content-Type'] = 'application/json' response.status_code = int(status_par) logger.debug('Response: ' + repr(response)) return response
def __init__(self): self.input = pd.read_csv(os.path.join('../input', self.filename)) self.training_schema = ModelConfig.training_schema self.training_data_columns = list(self.training_schema.keys()) self.training_impute = ModelConfig.training_impute self.model_features = ModelConfig.model_features self.cat_cols = ModelConfig.cat_cols logger.info(f'Preprocessing starts...')
def predict(self): self.prediction = self.model.predict(self.predict_data) self.output = pd.DataFrame(list(zip(self.id_, self.prediction)), columns=['sku', 'prediction']) self.output["prediction"] = self.output["prediction"].map({ 0: "Yes", 1: "No" }) self.output.to_csv(os.path.join('../prediction', 'output_' + self.filename), index=False) logger.info( f"\n Predicted file saved at {os.path.join('../prediction', 'output_' + self.filename)}... \n" )
def predict(): #Files uploaded f = request.files['file'] if len(f.filename) == 0: logger.error(f'No file uploaded') return "No file uploaded" if f.filename.split('.')[1] != 'csv': logger.error(f'Non csv file received: {f.filename}') return "Uploaded non-csv file." try: logger.info(f'\n\n\n File received {f.filename}...\n') time_ = datetime.now().strftime("%d_%m_%H_%M") filename = f.filename.split('.')[0] + '_' + time_ + '.csv' f.save(os.path.join('../input', filename)) train = Train(filename) # Data Quality Check and imputation with proper values if required train.process_data() # Prediction train.predict() logger.info(" \n -------- SUCCESS --------") # Once control reaches error add suffix 'SUFFIX' to log filename return send_file(os.path.join('../prediction', 'output_' + filename), mimetype='text/csv', attachment_filename='backorder_prediction.csv', as_attachment=True) except Exception as e: logger.error(e, exc_info=True) logger.error(" \n -------- FAILURE --------") # Once control reaches error add suffix 'FAILURE' to log filename return Response("Error Occurred! %s" % e)
def process_data(self): logger.info(f'\n Data Quality Check Starts... \n') super().data_quality_check() logger.info(f'\n Data Imputation Starts... \n') self.imputation() logger.info(f'\n Encoding Categorical Starts... \n') self.encode_categorical(cat_cols=None) self.input.to_csv(os.path.join('../transformation', 'transformed_' + self.filename), index=False) logger.info( f"\n Transformed file saved at {os.path.join('../transformation', 'transformed_' + self.filename)}... \n" ) self.predict_data = self.feature_transform.transform( self.input[self.model_features])
def data_quality_check(self): def _2str(x): if x == x: try: x = str(x) x = x.lower() except: x = np.nan return x def _2int(x): try: x = int(x) except: x = np.nan return x def _2float(x): try: x = float(x) except: raise ValueError( f'{col_name} contains {x} , expected only float') return x col_required = [ col for col in self.training_data_columns if col not in self.input.columns.tolist() ] # If all required columns are not found if len(col_required) > 0: raise ValueError( f'Following columns was not passed {col_required}.\n Note: Columns name are case sensitive. ' ) self.input = self.input[self.training_data_columns] # id can not be blank if self.input['sku'].isna().sum() / self.input.shape[0] > 0: raise ValueError('sku is an identifier and can not be null.') # id can not be float if self.input.sku.dtype != 'int': raise ValueError(f'sku should be int found {self.input.sku.dtype}') logger.info(f'Checking for schema compatibility...') # Check for datatype of each columns for col_name, data_type in self.training_schema.items(): if data_type == 'str': self.input[col_name] = self.input[col_name].apply( lambda x: _2str(x)) elif data_type == 'int': self.input[col_name] = self.input[col_name].apply( lambda x: _2int(x)) elif data_type == 'float': self.input[col_name] = self.input[col_name].apply( lambda x: _2float(x)) else: raise ValueError( 'Unexpected dtype %s specified for columns %s' % (data_type, col_name)) # converting negative values into null values for col in ['national_inv', 'perf_6_month_avg', 'perf_12_month_avg']: self.input[col] = self.input[col].mask(self.input[col] < 0) # Check for null values na_count = (self.input.isna().sum().sum() * 100 / (self.input.shape[0] * self.input.shape[1])).astype('int') if na_count > 30: raise ValueError( f'{na_count}% null values. Insufficient data for prediction.') logger.info( f'{na_count}% null values. Null values details below \n {self.input.isna().sum()/self.input.shape[0]}' ) # Check for specific value in categorical variables for col in self.cat_cols: val = [ val_ for val_ in self.input[col].unique().tolist() if val_ not in ['yes', 'no'] ] if len(val) > 0: raise ValueError( f'{col} has value {val} ; expected only yes,no values ')
# Data Quality Check and imputation with proper values if required train.process_data() # Prediction train.predict() logger.info(" \n -------- SUCCESS --------") # Once control reaches error add suffix 'SUFFIX' to log filename return send_file(os.path.join('../prediction', 'output_' + filename), mimetype='text/csv', attachment_filename='backorder_prediction.csv', as_attachment=True) except Exception as e: logger.error(e, exc_info=True) logger.error(" \n -------- FAILURE --------") # Once control reaches error add suffix 'FAILURE' to log filename return Response("Error Occurred! %s" % e) if __name__ == "__main__": #port = int(os.getenv('PORT', 5000)) #print("Starting app on port %d" % port) #app.run(debug=True, port=port, host='0.0.0.0') host = '0.0.0.0' port = 5000 httpd = simple_server.make_server(host, port, app) logger.info("Serving on %s %d" % (host, port)) httpd.serve_forever()