def predict(job_id, payload): data = payload['data'] input_type = payload['input_type'] try: model = MLJob.get_model(job_id) if input_type == 'csv': csv_data = BytesIO(base64.b64decode(data)) df = pd.read_csv(csv_data, sep=",") df_prediction = model.predict(df) output_data = df_prediction.to_csv(index=False) result = {} result['data'] = base64.b64encode(output_data.encode('utf-8')) return result elif input_type == 'dataset': dataset = DatasetManager.get_dataset(data) df = dataset.get_df() df_prediction = model.predict(df) payload = {} payload["cols"], payload["rows"] = df_to_cols_rows( df_prediction) return payload else: message = f'input type {input_type} is not supported for prediction' logger.error(message) raise RuntimeError(message) except Exception as e: logger.exception( f'failed to do prediction for data={data} id={job_id} error={e}' ) raise e
def query(self, query_str, query_type=QUERY_TYPE_NORMAL, to_payload=False): if self.df is None: self._load() if query_str == '': return self.get_payload() query_result = None if query_type == QUERY_TYPE_NORMAL: # 'query' # http://jose-coto.com/query-method-pandas query_result = self.df.query(query_str) elif query_type == QUERY_TYPE_SQL: # sql # TODO: integrate with https://github.com/yhat/pandasql/ dataset = self.df query_result = sqldf(query_str, locals()) else: logger.warning(f'query type {query_type} is not supported') return None if to_payload: payload = {} payload["cols"], payload["rows"] = df_to_cols_rows(query_result) return payload return query_result
def test_sqlquery(): dataset = DatasetManager.get_dataset('iris') query_result = dataset.query('SELECT * FROM dataset LIMIT 10;', 'sql') assert query_result is not None cols, rows = df_to_cols_rows(query_result) assert rows is not None assert len(rows) == 10
def _validate(self): future_data = self.model.make_future_dataframe(periods=365) forecast = self.model.predict(future_data) forecast['ds'] = forecast['ds'].astype(str) validation_df = pd.DataFrame(data=forecast) cols, rows = df_to_cols_rows(validation_df) self.validation_result['forecast'] = {} self.validation_result['forecast']['cols'] = cols self.validation_result['forecast']['rows'] = rows
def get_payload(self): self.get_df() if self.payload is None: self.payload = {} self.df = self.df.where(pd.notnull(self.df), None) self.payload["id"] = self.name self.payload["name"] = self.name self.payload["cols"], self.payload["rows"] = df_to_cols_rows(self.df) logger.debug('payload is filled ') return self.payload