def main(unused_argv): job_id = FLAGS.job_id if job_id == "59ae047e0c11f35fafebc422": raise ValueError('no job_id flag') job = job_business.get_by_job_id(job_id) # project id project_id = job.project.id project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') # user ID user_ID = ow.user.user_ID args = job.run_args try: run_model(args['conf'], args['project_id'], args['data_source_id'], args['model_id'], job_id, **args['kwargs']) except Exception: # if error send error, save error and raise error exc_type, exc_value, exc_traceback = sys.exc_info() message = { 'error': repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), 'type': 'model' } print(message) emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID) save_job_status(job, error=message, status=300) else: message = { 'project_name': project.name, 'type': 'model', 'complete': True, 'content': 'Model job completed in project ' + project.name } emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID)
def custom_model(conf, model_fn, input_data, **kw): """ :param model_fn: :param params: :param input_data: :param kw: :return: """ project_id = kw.pop('project_id', None) job_id = kw.pop('job_id', None) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') user_ID = ow.user.user_ID result_sds = kw.pop('result_sds', None) result_dir = kw.pop('result_dir', None) est_params = conf.get('estimator', None) fit_params = conf.get('fit', {}) eval_params = conf.get('evaluate', {}) if result_sds is None: raise RuntimeError('no result sds id passed to model') # if project_id is None: # raise RuntimeError('no project_id input') # def eval_input_fn(): # return input_fn(test, continuous_cols, categorical_cols, label_col) logging_flag = kw.pop('logging', True) return custom_model_help(model_fn, input_data, project_id, job_id, user_ID, result_dir, result_sds, est_params, fit_params, eval_params, logging_flag)
def check_private(owned, owned_type): """ check if the object is private :param owned: :param owned_type: :return: True for private, False for public """ return ownership_business.get_ownership_by_owned_item( owned, owned_type).private is True
def remove_user_request_by_id(user_request_id, user_ID): user_request = user_request_business.get_by_user_request_id( user_request_id) # check ownership ownership = ownership_business.get_ownership_by_owned_item( user_request, 'user_request') if user_ID != ownership.user.user_ID: raise ValueError('this request not belong to this user, cannot delete') return user_request_business.remove_by_id(user_request_id)
def run_job(): data = request.get_json() job_id = data['section_id'] project_id = data["project_id"] job_obj = job_business.get_by_job_id(job_id) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') # user ID user_ID = ow.user.user_ID type = None try: if job_obj.toolkit: type = 'toolkit' complete = True content = 'Toolkit job completed in project ' + project.name result = job_service.run_toolkit_job(project_id=project_id, job_obj=job_obj) elif job_obj.model: type = 'model' complete = False content = 'Model job successfully created in project ' + \ project.name result = job_service.run_model_job(project_id=project_id, job_obj=job_obj) else: return jsonify({"response": 'no model and toolkit in job object'}), 400 result = json_utility.convert_to_json(result) except Exception as e: # if error send error, save error and raise error exc_type, exc_value, exc_traceback = sys.exc_info() message = { 'error': repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), 'type': type } print(message) emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID) save_job_status(job_obj, error=message, status=300) raise e # return jsonify({ # "response": { # "result": message # }}), 200 else: message = { 'project_name': project.name, 'type': type, 'complete': complete, 'content': content } emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID) return jsonify({"response": {"result": result}}), 200
def update_request_answer(request_answer_id, user_id, answer): request_answer = request_answer_business. \ get_by_request_answer_id(request_answer_id) ownership = ownership_business.get_ownership_by_owned_item( request_answer, 'request_answer') if ownership.user.user_ID != user_id: raise RuntimeError( 'this request not belong to this user, cannot update') else: request_answer_business.update_request_answer_by_id( request_answer_id=request_answer_id, answer=answer)
def update_user_request_comments(user_request_comments_id, user_ID, comments): user_request_comments = comments_business.\ get_by_user_request_comments_id(user_request_comments_id) ownership = ownership_business.get_ownership_by_owned_item( user_request_comments, 'user_request_comments') if ownership.user.user_ID != user_ID: raise RuntimeError( 'this request not belong to this user, cannot update') else: comments_business.update_user_request_comments_by_id( user_request_comments_id=user_request_comments_id, comments=comments)
def accept_request_answer(user_request_id, user_ID, request_answer_id): user_request = user_request_business. \ get_by_user_request_id(user_request_id) ownership = ownership_business.get_ownership_by_owned_item( user_request, 'user_request') if ownership.user.user_ID != user_ID: raise RuntimeError( 'this request not belong to this user, cannot update') else: user_request_business.update_user_request_by_id( user_request_id=user_request_id, accept_answer=ObjectId(request_answer_id))
def to_code(): data = request.get_json() job_id = data['section_id'] project_id = data["project_id"] job_obj = job_business.get_by_job_id(job_id) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') # user ID user_ID = ow.user.user_ID code = job_service.model_job_to_code(project_id=project_id, job_obj=job_obj) return jsonify({"response": {"code": code}}), 200
def get_results_dir_by_job_id(job_id, user_ID, checkpoint='final'): """ get training result by job id :param job_id: :param user_ID: :param checkpoint: :return: """ project = job_business.get_by_job_id(job_id).project project_name = project.name ownership = ownership_business.get_ownership_by_owned_item(project, 'project') if ownership.private and ownership.user.user_ID != user_ID: raise ValueError('Authentication failed') user_ID = ownership.user.user_ID result_dir = os.path.join(user_directory + user_ID + '/', project_name + '/', job_id) filename = '{}.hdf5'.format(checkpoint) return result_dir, filename
def remove_project_by_id(project_id, user_ID): """ remove project by its object_id :param project_id: object_id of project to remove :return: """ project = project_business.get_by_id(project_id) # check ownership ownership = ownership_business.get_ownership_by_owned_item(project, 'project') if user_ID != ownership.user.user_ID: raise ValueError('project not belong to this user, cannot delete') # delete tmp jupyterhub user delete_hub_user(user_ID, project.name) # delete project directory project_directory = UPLOAD_FOLDER + user_ID + '/' + project.name if os.path.isdir(project_directory): shutil.rmtree(project_directory) # delete project object return project_business.remove_by_id(project_id)
def mlp(conf, input, **kw): result_sds = kw.pop('result_sds', None) project_id = kw.pop('project_id', None) result_dir = kw.pop('result_dir', None) job_id = kw.pop('job_id', None) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') user_ID = ow.user.user_ID f = conf['fit'] e = conf['evaluate'] x_train = input['x_tr'] y_train = input['y_tr'] x_val = input['x_te'] y_val = input['y_te'] x_test = input['x_te'] y_test = input['y_te'] with graph.as_default(): return mlp_main(result_sds, project_id, job_id, user_ID, result_dir, x_train, y_train, x_val, y_val, x_test, y_test, f, e)
def update_project(project_id, name, description, is_private=True, related_fields=[], tags=[], related_tasks=[], done_indices=[]): """ Create a new project :param name: str :param description: str :param user_ID: ObjectId :param is_private: boolean :return: a new created project object """ project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') ownership_business.update_by_id(ow['id'], private=is_private) project_business.update_by_id(project_id, name=name, description=description, update_time=datetime.utcnow(), related_fields=related_fields, tags=tags, related_tasks=related_tasks, done_indices=done_indices)
def fork(project_id, new_user_ID): """ fork project :param project_id: :param new_user_ID: :return: """ # get project project = project_business.get_by_id(project_id) # get ownership, and check privacy ownership = ownership_business.get_ownership_by_owned_item( project, 'project') if ownership.private is True: raise NameError('forked project is private, fork failed') if ownership.user.user_ID == new_user_ID: raise NameError('you are forking your self project') # get user object user = UserBusiness.get_by_user_ID(new_user_ID) # copy and save project project_cp = project_business.copy(project) # create ownership relation ownership_business.add(user, True, project=project_cp) # copy staging data sets sds_array = staging_data_set_business.get_by_project_id(project_id, False) for sds in sds_array: staging_data_service.copy_staging_data_set(sds, project_cp) # copy jobs and save jobs = project.jobs jobs_cp = [] for job in jobs: # get source sds if hasattr(job, 'staging_data_set') and job.staging_data_set: sds_cp = staging_data_set_business.get_by_name_and_project( job.staging_data_set.name, job.staging_data_set.project) # sds_cp = staging_data_service.copy_staging_data_set( # job.staging_data_set, project_cp) else: sds_cp = None # copy job job_cp = job_business.copy_job(job, project_cp, sds_cp) if not job_cp: continue jobs_cp.append(job_cp) # copy result staging data set by job and bind to project try: # get result sds result_sds = staging_data_set_business.get_by_job_id(job['id']) # bind job to sds staging_data_set_business.update_job_by_name_and_project( result_sds.name, result_sds.project, job_cp) # staging_data_service.copy_staging_data_set(result_sds, project_cp, # belonged_job=job_cp) except DoesNotExist: pass project_business.update_by_id(project_cp['id'], jobs=jobs_cp) project_cp.reload() return project_cp
def unpublish_project(project_id): project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') return ownership_business.update_by_id(ow['id'], private=True)
def get_by_id(project_id): project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') project.is_private = ow.private return project
def run_model(conf, project_id, data_source_id, model_id, job_id, **kwargs): """ run model by model_id and the parameter config :param conf: :param project_id: :param data_source_id: :param model_id: :param job_id: :param kwargs: :return: """ model = model_business.get_by_model_id(model_id) project = project_business.get_by_id(project_id) ownership = ownership_business.get_ownership_by_owned_item(project, 'project') result_dir = os.path.join(user_directory, ownership.user.user_ID, project.name, job_id) # import model function if model['category'] == ModelType['neural_network']: # keras nn f = getattr(models, model.entry_function) input_dict = manage_nn_input(conf, data_source_id, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, input_dict, result_dir=result_dir) elif model['category'] == ModelType['unstructured']: # input from folder f = getattr(models, model.entry_function) input_dict = model_input_manager_unstructured(conf, data_source_id, **kwargs) return job_service.run_code(conf, project_id, None, model, f, job_id, input_dict, file_id=data_source_id, result_dir=result_dir) elif model['category'] == ModelType['hyperopt']: f = getattr(models, model.entry_function) fit = conf.get('fit', None) data_fields = fit.get('data_fields', [[], []]) input_dict = model_input_manager_custom_supervised(data_fields, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, input_dict, result_dir=result_dir) else: # custom models f = models.custom_model model_fn = getattr(models, model.entry_function) fit = conf.get('fit', None) if model['category'] == ModelType['custom_supervised']: data_fields = fit.get('data_fields', [[], []]) input_dict = model_input_manager_custom_supervised(data_fields, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, model_fn, input_dict, result_dir=result_dir) if model['category'] == ModelType['unsupervised']: x_cols = fit.get('data_fields', []) input_dict = model_input_manager_unsupervised(x_cols, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, model_fn, input_dict, result_dir=result_dir) if model['category'] == ModelType['hyperopt']: data_fields = fit.get('data_fields', [[], []]) input_dict = model_input_manager_custom_supervised(data_fields, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, model_fn, input_dict, result_dir=result_dir)
def keras_seq(conf, input, **kw): """ a general implementation of sequential model of keras :param conf: config dict :return: """ result_sds = kw.pop('result_sds', None) project_id = kw.pop('project_id', None) job_id = kw.pop('job_id', None) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') user_ID = ow.user.user_ID print('conf') print(conf) result_dir = kw.pop('result_dir', None) if result_sds is None: raise RuntimeError('no result sds id passed to model') if project_id is None: raise RuntimeError('no project id passed to model') with graph.as_default(): model = Sequential() ls = conf['layers'] comp = conf['compile'] f = conf['fit'] e = conf['evaluate'] x_train = input['x_tr'] y_train = input['y_tr'] x_val = input['x_te'] y_val = input['y_te'] x_test = input['x_te'] y_test = input['y_te'] training_logger = logger_service.TrainingLogger(f['args']['epochs'], project_id, job_id, user_ID, result_sds) # TODO add validator # op = comp['optimizer'] # loop to add layers for l in ls: # get layer class from keras layer_class = getattr(layers, l['name']) # add layer model.add(layer_class(**l['args'])) # optimiser # sgd_class = getattr(optimizers, op['name']) # sgd = sgd_class(**op['args']) # define the metrics # compile model.compile(**comp['args']) # callback to save metrics batch_print_callback = LambdaCallback(on_epoch_begin= lambda epoch, logs: training_logger.log_epoch_begin( epoch, logs), on_epoch_end= lambda epoch, logs: training_logger.log_epoch_end( epoch, logs), on_batch_end= lambda batch, logs: training_logger.log_batch_end( batch, logs) ) # checkpoint to save best weight best_checkpoint = MyModelCheckpoint( os.path.abspath(os.path.join(result_dir, 'best.hdf5')), save_weights_only=True, verbose=1, save_best_only=True) # checkpoint to save latest weight general_checkpoint = MyModelCheckpoint( os.path.abspath(os.path.join(result_dir, 'latest.hdf5')), save_weights_only=True, verbose=1) # training history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[batch_print_callback, best_checkpoint, general_checkpoint], verbose=0, **f['args']) # testing score = model.evaluate(x_test, y_test, **e['args']) # weights = model.get_weights() config = model.get_config() logger_service.log_train_end(result_sds, model_config=config, score=score, history=history.history) keras_saved_model.save_model(result_dir, model) return {'score': score, 'history': history.history}