def delete_dataset(id): dataset = DatasetManager.get_dataset(id) domain = 'datasets' ConfigurationManager.remove_section(domain, id) try: dataset.delete() except Exception: logger.warning(f'faile to delete dataset {id}')
def list_jobs(): job_base_dir = ConfigurationManager.get_confs('mljob').get( 'job', 'dir') try: job_ids = [ file for file in os.listdir(job_base_dir) if os.path.isdir(os.path.join(job_base_dir, file)) ] results = [] for job_id in job_ids: try: logger.debug(f'find one job with id={job_id}') item = {} item['id'] = job_id status = MLJob.get_status_by_id(job_id) item['status'] = status.name meta = MLJob.get_meta(job_id) for key in ['type', 'name']: item[key] = meta[key] results.append(item) except Exception: logger.exception(f'failed to retrieve job id={job_id}') return results except Exception: logger.exception('failed to list job') return []
def getdbClient(): dbconf = ConfigurationManager.get_confs('database') for section in dbconf.sections(): if section == 'mongodb': host = dbconf.get(section, 'host') port = np.int(dbconf.get(section, 'port')) return MongoClient(host, port)
def getPoolConfig(): dbconf = ConfigurationManager.get_confs('database') for section in dbconf.sections(): if section == 'redis': host = dbconf.get(section, 'host') port = np.int(dbconf.get(section, 'port')) return {'host': host, 'port': port}
def drawWordCloud(request): try: appname = request.json['appname'] connection = request.json['name'] channel = request.json['channel'] sentiment = request.json['sentiment'] startTime = request.json['startTime'].replace('/','-') endTime = request.json['endTime'].replace('/','-') keywordColumn = request.json['keywordColumn'] result = DataSet().getConnectionPredictedByCondition(connection,startTime,endTime) if len(result) > 0: df = pd.DataFrame(result) df['keywords_counts'] = df[keywordColumn].apply(lambda x: len(x)) df = df[df['keywords_counts'] > 0] comments_keyword = df[(df[channel] == appname) & (df['sentimental'] == sentiment)][keywordColumn] # keywords = ',' # keywords = keywords.join(comments_keyword.apply(lambda x:str(x[0]))) server_config = ConfigurationManager.get_confs('server') filepath = server_config.get('server', 'wordcloudPath') filename = 'wordcloud' + datetime.now().strftime('%Y%m%d%H%M%S') data = frequency_wordcloud(list(comments_keyword),filepath,filename,sentiment) filename_suffix = filename + '.jpeg' return response.json({'message':'successd to generate wordcloud','result':filename_suffix},status=200) else: return response.json({'message':'there is no record during the period',result:[]},status=200) except Exception: logger.error(Exception); response.json({'message':'failed to draw wordCloud'},status=500)
async def uploadModelFile(request): server_config = ConfigurationManager.get_confs('server') filepath = server_config.get('server', 'filePath') if not os.path.exists(filepath): os.makedirs(filepath) test_file = request.files.get('file') file_parameters = { 'body': test_file.body, 'name': test_file.name, 'type': test_file.type, } try: file_path = filepath + file_parameters.get('name') with open(file_path, 'wb') as f: f.write(file_parameters['body']) f.close() print('file wrote to disk') return response.json( { "message": 'Upload file successfully', "file_names": request.files.keys(), "success": True }, status=200) except Exception: print(Exception) return response.json( { "message": 'Upload file failed', "file_names": request.files.keys(), "success": False }, status=500)
async def saveModel(request): try: if 'content' in request.json.keys(): filename = request.json['content'] else: filename = 'Service with specific functionality' server_config = ConfigurationManager.get_confs('server') path = server_config.get('server', 'filePath') department = request.json['department'] team = request.json['team'] version = request.json['version'] description = request.json['description'] features = request.json['features'] name = request.json['name'] model = ModelService.addModel(filename=filename, path=path, department=department, team=team, version=version, description=description, features=features, isDeployed=False, name=name) return response.json({'message': 'Add model successfully'}, status=200) except Exception: logger.error(Exception) return response.json({'message': 'Add model failed'}, status=500)
def _handle_job_option(self): job_config = ConfigurationManager.get_confs('mljob') default_job_config = {} default_job_config['time_left_for_this_task'] = job_config.getint( 'auto_ml', 'time_left_for_this_task') default_job_config['per_run_time_limit'] = job_config.getint( 'auto_ml', 'per_run_time_limit') default_job_config[ 'initial_configurations_via_metalearning'] = job_config.getint( 'auto_ml', 'initial_configurations_via_metalearning') default_job_config['ensemble_size'] = job_config.getint( 'auto_ml', 'ensemble_size') default_job_config['ensemble_nbest'] = job_config.getint( 'auto_ml', 'ensemble_nbest') default_job_config['ensemble_memory_limit'] = job_config.getint( 'auto_ml', 'ensemble_memory_limit') default_job_config['ml_memory_limit'] = job_config.getint( 'auto_ml', 'ml_memory_limit') for key in default_job_config: if key not in self.job_option: self.job_option[key] = default_job_config[key] self.job_option['tmp_folder'] = os.path.join(self.job_dir, 'tmp') self.job_option['output_folder'] = os.path.join(self.job_dir, 'output')
def get_dataset(id): config = ConfigurationManager.get_confs('datasets') content = config.get(id, 'content') name = config.get(id, 'name') description = config.get(id, 'description') dataset_type = config.get(id, 'type') dataset_class = get_dataset_class(dataset_type) dataset = dataset_class(id, name, content, description) return dataset
def _handle_validation_option(self): validation_config = ConfigurationManager.get_confs('mljob') default_validation = {} default_validation['test_size'] = validation_config.getfloat( 'validation_option', 'test_size') default_validation['random_state'] = validation_config.getint( 'validation_option', 'random_state') default_validation['shuffle'] = validation_config.getboolean( 'validation_option', 'shuffle') for key in default_validation: if key not in self.validation_option: self.validation_option[key] = default_validation[key]
def create_job(job_payload): job_type = job_payload['type'] job_option = {} job_option_attrs = [ 'name', 'dataset', 'features', 'targets', 'job_option', 'validation_option', ] for key in job_option_attrs: if key not in job_payload: job_payload[key] = {} job_option[key] = job_payload[key] if job_type == 'AutoClassificationJob': job = AutoClassificationJob(**job_option) elif job_type == 'AutoRegressionJob': job = AutoRegressionJob(**job_option) elif job_type == 'TimeSerialsForecastsJob': job = TimeSerialsForecastsJob(**job_option) else: raise RuntimeError(f'job type={job_type} not supported!') is_multi_prorcess = ConfigurationManager.get_confs('mljob').getboolean( 'job', 'multi_processes') if is_multi_prorcess: # run train in a new process try: logger.debug(f'start new process to train ml job={job.id}') p = Process(target=job.train) p.start() # p.join() # TODO: update training status using web sock except: logger.exception( f'failed to run ml job process for job={job.id}') else: try: logger.debug(f'start new thread to train ml job {job.id}') _thread.start_new_thread(job.train, ()) # TODO: update training status using web sock except: logger.exception( f'failed to run ml job thread for job={job.id}') return job
from dataplay.confsvc.manager import ConfigurationManager from dataplay.datasvc.registry import DatasetTypeRegistry dataset_type_config = ConfigurationManager.get_confs('dataset_type') dataset_registry = DatasetTypeRegistry() for section in dataset_type_config.sections(): module_name = dataset_type_config.get(section, 'module') class_name = dataset_type_config.get(section, 'class') dataset_registry.register(section, class_name, module_name)
async def handle_request(request,file): server_config = ConfigurationManager.get_confs('server') filepath = server_config.get('server', 'wordcloudPath') return await response.file(filepath + file)
class MLJob(ABC): base_dir = ConfigurationManager.get_confs('mljob').get('job', 'dir') def __init__(self, name, dataset): self.id = str(uuid.uuid4()) self.name = name self.dataset_id = dataset self.dataset = DatasetManager.get_dataset(dataset) self.df = self.dataset.get_df() self.job_dir = os.path.join(MLJob.base_dir, self.id) self.metadata = {} self._init() @abstractmethod def train(self): return NotImplemented @abstractmethod def predict(self, df): return NotImplemented def _build_meta(self): self.metadata['name'] = self.name self.metadata['dataset_id'] = self.dataset_id def _save_meta(self): self._build_meta() meta_file = os.path.join(self.job_dir, 'meta.json') with FileLock(meta_file): with open(meta_file, 'w') as f: f.write(json.dumps(self.metadata)) def _save_model(self): logger.debug( f'save model for class={type(self).__name__} id={self.id} name={self.name}' ) model_file = os.path.join(self.job_dir, 'model.joblib') dump(self, model_file) logger.debug('save model complete') @staticmethod def get_meta(id): meta_file = os.path.join(MLJob.base_dir, id, 'meta.json') with FileLock(meta_file): with open(meta_file) as f: return json.loads(f.read()) @staticmethod def get_model(id): model_file = os.path.join(MLJob.base_dir, id, 'model.joblib') model = load(model_file) return model def _init(self): if os.path.isdir(self.job_dir): logger.error(f'job dir {self.job_dir} already exists') raise RuntimeError(f'job {self.id} already exists') try: os.makedirs(self.job_dir) self._update_status(MLJobStatus.INITIALIZED) self._save_meta() except OSError: logger.error(f'failed to create job dir {self.job_dir}') else: logger.debug(f'successfully created the directory {self.job_dir}') def _update_status(self, status): try: status_file = os.path.join(self.job_dir, 'status') with FileLock(status_file): with open(status_file, 'w') as f: f.write(str(status.value)) except Exception: raise RuntimeError(f'failed to update status for ml job {self.id}') @staticmethod def get_status_by_id(id): status_file = os.path.join(MLJob.base_dir, id, 'status') with FileLock(status_file): with open(status_file) as f: status_value = f.read() return MLJobStatus(int(status_value)) def get_status(self): return MLJob.get_status_by_id(self.id) @staticmethod def delete_job_by_id(job_id): job_dir = os.path.join(MLJob.base_dir, job_id) try: shutil.rmtree(job_dir) except Exception: logger.exception(f'failed to delete job dir {job_dir}') else: logger.debug(f'successfully deleted the directory {job_dir}') def clean(self): MLJob.delete_job_by_id(self.id)
import os from dataplay.confsvc.manager import ConfigurationManager server_config = ConfigurationManager.get_confs('server') filepath = server_config.get('server', 'datasetPath') # CSV_DATASET_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'dataset', 'csv') CSV_DATASET_PATH = filepath QUERY_TYPE_NORMAL = 'query' QUERY_TYPE_SQL = 'sql' QUERY_TYPES = [QUERY_TYPE_NORMAL, QUERY_TYPE_SQL]
interface=RedisSessionInterface(expiry=600, sessioncookie=True, httponly=True)) # Add cors extension CORS(app, automatic_options=True, supports_credentials=True) # app.blueprint(openapi_blueprint) app.blueprint(swagger_blueprint) app.config.API_VERSION = '1.0.0' app.config.API_TITLE = 'Dataplay API' app.config.API_DESCRIPTION = 'Dataplay API' app.config.API_CONTACT_EMAIL = '*****@*****.**' app.config.API_PRODUCES_CONTENT_TYPES = ['application/json'] server_config = ConfigurationManager.get_confs('server') app.config.HOST = server_config.get('server', 'host') app.config.port = 8888 app.config.DEBUG = server_config.getboolean('server', 'debug') app.config.WORKERS = server_config.getint('server', 'workers') dataset_type_config = ConfigurationManager.get_confs('dataset_type') dataset_registry = DatasetTypeRegistry() for section in dataset_type_config.sections(): module_name = dataset_type_config.get(section, 'module') class_name = dataset_type_config.get(section, 'class') dataset_registry.register(section, class_name, module_name) app.blueprint(file_svc) app.blueprint(dataset_svc, url_prefix=PREFIX) app.blueprint(user_svc, url_prefix=PREFIX)