예제 #1
0
 def predict(job_id, payload):
     data = payload['data']
     input_type = payload['input_type']
     try:
         model = MLJob.get_model(job_id)
         if input_type == 'csv':
             csv_data = BytesIO(base64.b64decode(data))
             df = pd.read_csv(csv_data, sep=",")
             df_prediction = model.predict(df)
             output_data = df_prediction.to_csv(index=False)
             result = {}
             result['data'] = base64.b64encode(output_data.encode('utf-8'))
             return result
         elif input_type == 'dataset':
             dataset = DatasetManager.get_dataset(data)
             df = dataset.get_df()
             df_prediction = model.predict(df)
             payload = {}
             payload["cols"], payload["rows"] = df_to_cols_rows(
                 df_prediction)
             return payload
         else:
             message = f'input type {input_type} is not supported for prediction'
             logger.error(message)
             raise RuntimeError(message)
     except Exception as e:
         logger.exception(
             f'failed to do prediction for data={data} id={job_id} error={e}'
         )
         raise e
예제 #2
0
    def query(self, query_str, query_type=QUERY_TYPE_NORMAL, to_payload=False):
        if self.df is None:
            self._load()

        if query_str == '':
            return self.get_payload()

        query_result = None
        if query_type == QUERY_TYPE_NORMAL:  # 'query'
            # http://jose-coto.com/query-method-pandas
            query_result = self.df.query(query_str)
        elif query_type == QUERY_TYPE_SQL:  # sql
            # TODO: integrate with https://github.com/yhat/pandasql/
            dataset = self.df
            query_result = sqldf(query_str, locals())
        else:
            logger.warning(f'query type {query_type} is not supported')
            return None

        if to_payload:
            payload = {}
            payload["cols"], payload["rows"] = df_to_cols_rows(query_result)
            return payload

        return query_result
예제 #3
0
def test_sqlquery():
    dataset = DatasetManager.get_dataset('iris')
    query_result = dataset.query('SELECT * FROM dataset LIMIT 10;', 'sql')
    assert query_result is not None

    cols, rows = df_to_cols_rows(query_result)
    assert rows is not None
    assert len(rows) == 10
예제 #4
0
 def _validate(self):
     future_data = self.model.make_future_dataframe(periods=365)
     forecast = self.model.predict(future_data)
     forecast['ds'] = forecast['ds'].astype(str)
     validation_df = pd.DataFrame(data=forecast)
     cols, rows = df_to_cols_rows(validation_df)
     self.validation_result['forecast'] = {}
     self.validation_result['forecast']['cols'] = cols
     self.validation_result['forecast']['rows'] = rows
예제 #5
0
    def get_payload(self):
        self.get_df()

        if self.payload is None:
            self.payload = {}
            self.df = self.df.where(pd.notnull(self.df), None)
            self.payload["id"] = self.name
            self.payload["name"] = self.name
            self.payload["cols"], self.payload["rows"] = df_to_cols_rows(self.df)
            logger.debug('payload is filled ')

        return self.payload