def read_tasks_with_self_and_modules(cls, user_ID, module_id, module_version, task_status): # 做过滤操作 if task_status == 'all': # , results user_ID, tasks = CSTaskBusiness.read_tasks_with_self_and_modules( user_ID, module_id, module_version, None) elif task_status == 'doing': tasks = CSTaskBusiness.read_tasks_with_self_and_modules( user_ID, module_id, module_version, 0) elif task_status == 'training': tasks = CSTaskBusiness.read_tasks_with_self_and_modules( user_ID, module_id, module_version, 1) else: # done tasks = CSTaskBusiness.read_tasks_with_self_and_modules( user_ID, module_id, module_version, 2) results = [] if tasks and len(tasks) > 0: for task in tasks: if task.total_count <= 0: continue results.append(cls._add_extra_info_with_task(task)) return results
def get_evalution_csv(cls, task_id, evaluation): task = CSTaskBusiness.read_task_by_id(task_id) task.evaluation = int(evaluation) file_path = CSTaskService._generate_csv_file_for_results( task, task.dataset, task.sponsor.user_ID) return file_path
def get_task_detail(cls, task_id): task = CSTaskBusiness.read_task_by_id(task_id) # former info former = {} for i in range(1, task.evaluation + 1): # read the result for evaluation results = CSResultBusiness.read_all_by_id(task, i + 1) task.evaluation = i file_path = cls._generate_csv_file_for_results( task, task.dataset, task.sponsor.user_ID) # sub_directory = file_path.split('/') # file_path = os.path.join(sub_directory[1], *sub_directory[1:]) # file_path = file_name = str(task.id) + '_' + str(task.evaluation) + '.csv' # file_path = os.path.join('crowdsourcing', task.sponsor.user_ID, # task.module.name, task.dataset_version, file_name) sample_marked_for_user, need_marked_times, marked_times = cls._compute_sample_marked_count( results) if i not in former: former[i] = { 'file_name': file_name, 'file_path': file_path, 'samples': sample_marked_for_user, 'need_marked_times': need_marked_times, 'marked_times': marked_times } if len(former): return former else: return False
def _read_directory(cls, path): results = [] for root, dirs, files in os.walk(path): if len(files) > 0: for file in files: # statical info of csv file. file_path = os.path.join(root, file) task_id = file.split('_')[0] # read it task = CSTaskBusiness.read_task_by_id(task_id) results.append({ 'file_path': file_path, 'file_name': file, 'info': sum(1 for _ in open( file_path, encoding='utf-8', )), 'class_type': task.task_type, 'classes': task.classes }) return results
def fulfil_task(cls, task_id, user_ID): task = CSTaskBusiness.get_task_by_id(task_id) dataset = ProjectBusiness.get_by_id(task.dataset.id) # complete task.status = 2 task.save() # file_path csv_file = cls._generate_csv_file_for_results(task, dataset, user_ID) # print(csv_file) dest_path = os.path.join(task.module.path, CS_PATH, str(task.id)) if not os.path.exists(dest_path): os.makedirs(dest_path) # dest_path = os.path.join(module_path, task.task_type + '_' + str(task.evaluation)) dest_path = os.path.join(dest_path, csv_file.split('/')[-1]) shutil.copyfile(csv_file, dest_path) # print('error:', task.to_mongo()) results = CSResultBusiness.read_all_by_id(task.id, task.evaluation) sample_marked_for_user, need_marked_times, marked_times = \ cls._compute_sample_marked_count(results) # 结束 if task.evaluation >= 3 or sample_marked_for_user < task.total_count: task.user_operation = True task.save() # add the info task = cls._add_extra_info_with_task(task) print('back') return csv_file, task, dest_path
def _read_tasks_for_status(cls, dataset_id): # read the all dataset tasks = CSTaskBusiness.read_tasks_with_dataset(dataset_id, None) results = [] done_count = 0 doing_count = 0 total_count = 0 users = [] for task in tasks: if task.total_count <= 0: continue result = CSResultBusiness.read_all_by_id(task, task.evaluation) task_results, _ = cls._analyse_labels(result) unique_users = result.distinct('user') unique_users = [user.user_ID for user in unique_users] users.extend(unique_users) results.extend([ result.get('_id') for result in task_results if len(np.unique(result.get('labels'))) > 1 ]) total_count += task.total_count if task.status == 2: done_count += 1 elif task.status == 0: doing_count += 1 users = list(set(users)) return doing_count, done_count, total_count, np.unique( results).tolist(), len(users)
def read_need_to_task(cls, user_ID, task_id, evaluation=1): results = CSResultBusiness.read_by_user_ID_and_task_ID( user_ID, task_id, evaluation) task = CSTaskBusiness.get_task_by_id(task_id) results = [ results[i] for i in range(len(results)) if results[i].label == '' ] part_data = [] for result in results: paths = result.sample.split('/') index = 0 for i in range(len(paths)): if paths[i] == 'datasets': index = i break result.url = os.path.join(task.module.path, *paths[index:len(paths) - 1]) result.filename = paths[-1] part_data.append({ '_id': str(result.id), 'url': result.url, 'filename': result.filename }) # print('results: ', result.to_mongo()) return part_data, task.classes
def _dispatch(cls, non_exactly, task_id, evaluation): # non_exactly = cls._get_crowd_sourcing_samples(test) # choose the user task = CSTaskBusiness.get_task_by_id(task_id) count = len(non_exactly) # real of users real_of_users = UserBusiness.get_all() # promise do not send self # print(task.sponsor.user_ID) real_of_users = [ user for user in real_of_users if user.user_ID != task.sponsor.user_ID ] # real_of_users = [user for user in real_of_users # if user.user_ID != 'lu_xu_1'] # print("real_of_users:", [user.user_ID for user in real_of_users]) # print(type(real_of_users)) real_of_user_nums = np.arange(len(real_of_users)).tolist() # dispatch result = {} # ever image has dispatch at least to three people for sample_idx in range(count): result[sample_idx] = { 'user': [ real_of_users[i] for i in random.sample( real_of_user_nums, random.sample(cls._choose_people, 1)[-1]) ], 'sample': non_exactly[sample_idx] } # print([user.user_ID for user in result[sample_idx]['user']]) # 往数据库插入信息 invited_users = [] for key in result.keys(): sample = result[key]['sample'] # 测试账号 # CSResultService.add_result(task_id, evaluation, 'lu_xu_1', sample, # '') for user in result[key]['user']: # add the invited if user.id not in invited_users: invited_users.append(user.id) CSResultBusiness.add_result(task_id, evaluation, user.user_ID, sample, '') # print(user.user_ID) # invited users message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_invited', invited_users, project=task.dataset, task=task, ) print('invited:', message.to_mongo())
def cancel(cls, task_id): task = CSTaskBusiness.get_task_by_id(task_id) # 取消Job jobs = JobService.business.get_by_task_id(task) for job in jobs: JobService.business.terminate(job.id) # 修改完成, 让看到最后的信息 task.status = 2 task.user_operation = False task.save() task = cls._add_extra_info_with_task(task) return task
def add_task(cls, dataset_id, module_id, user_ID, module_version, dataset_version, task_type='classification', data_type='image', evaluation=0, desc='', user_token=None, device='cpu'): # insert module module = ProjectBusiness.get_by_id(module_id) dataset = ProjectBusiness.get_by_id(dataset_id) # 数据插入 config = cls._insert_module_and_parse_json(dataset, module, dataset_version) task = CSTaskBusiness.add_task(dataset_id, user_ID, 0, 0, module_id, task_type, data_type, evaluation, desc, module_version, dataset_version, classes=config.get('classes'), device=device) # create pickle insert_version = '' if dataset_version == 'current' else dataset_version data_path = os.path.join(module.path, 'results') des_pickle_path = os.path.join(data_path, str(task.id)) if not os.path.exists(des_pickle_path): os.makedirs(des_pickle_path) des_pickle_path = os.path.join(des_pickle_path, 'create.pkl') with open(des_pickle_path, 'wb') as file: user_module_read_dataset_path = os.path.join( './datasets', dataset.user.user_ID + '-' + dataset.name + '-' + insert_version) pickle.dump({'filename': user_module_read_dataset_path}, file) print('dump to pickle:', user_module_read_dataset_path) print('pickle path:', des_pickle_path) # create done # 后期修改, 要删除, 只为了暂时测试, 不需要返回值的 cls._create_job_for_get_probability(task, module, user_ID, user_token, des_pickle_path, device) # 制造 # tasnform data format return task
def read_all_task(cls, dataset_id, dataset_version=None, task_status='all'): # 做过滤操作 if dataset_version == '': dataset_version = None print(dataset_id, dataset_version, task_status) if task_status == 'all': # , results user_ID, tasks = CSTaskBusiness.read_tasks_with_dataset( dataset_id, dataset_version) elif task_status == 'doing': tasks = CSTaskBusiness.read_tasks_with_dataset_and_key( dataset_id, 0, dataset_version) elif task_status == 'training': tasks = CSTaskBusiness.read_tasks_with_dataset_and_key( dataset_id, 1, dataset_version) else: # done tasks = CSTaskBusiness.read_tasks_with_dataset_and_key( dataset_id, 2, dataset_version) results = [] ''' if task_status != 'training' and task_status != 'all': if task.total_count <= 0: continue ''' if tasks and len(tasks) > 0: for task in tasks: if task.total_count <= 0: continue results.append(cls._add_extra_info_with_task(task)) # print(results) return results
def _add_marked_count(cls, task_id): task = CSTaskBusiness.get_task_by_id(task_id) results = CSResultBusiness.read_all_by_id(task, task.evaluation) task_results, _ = cls._analyse_labels(results) # marked count count = len([ result for result in task_results if len(np.unique(result.get('labels'))) == 1 and np.unique(result.get('labels'))[-1] != '' ]) # print(count) if task.marked_count != count: task.marked_count = count task.save() return task
def train(cls, task_id, user_token): # load the module task = CSTaskBusiness.get_task_by_id(task_id) module = ModuleBusiness.get_by_id(task.module.id) # load the dataset # former_eval_marked = CSResultService.read_all(task_id, task.evaluation) # generate the dataset old_path = cls._generate_csv_file_for_results(task, task.dataset, task.sponsor.user_ID) # copy to dataset new_path = os.path.join(module.path, CS_PATH) print(new_path) # if not os.path.exists(new_path): # if not os.Path(new_path).exists(): if not os.path.exists(new_path): os.makedirs(new_path) # file path filename = old_path.split('/')[-1] new_path = os.path.join(new_path, filename) # create the crowsourcing.pkl # pic_file = os.path.join(module.path, 'results', str(task.id) + '.pkl') # with open(pic_file, 'wb') as file: # pickle.dump({'filename': os.path.join('./results', 'crowdsourcing', filename)}, file) # copy file to indicate path shutil.copyfile(old_path, new_path) # error if task.evaluation < 3: task.status = 1 # update task # task.evaluation += 1 task.save() # 调用任务接口 cls._load_to_job_for_train(task.module, task, task.sponsor.user_ID, user_token) # back the info # task.evaluation -= 1 task = cls._add_extra_info_with_task(task) # task.evaluation += 1 # How many sample is marked return task, new_path else: return { 'msg': 'The current task is achieve the max iterator times.' }, new_path
def need_csv_file(cls, task_id, user_ID): # what is time for generating this file. task = CSTaskBusiness.get_task_by_id(task_id) dataset = ProjectBusiness.get_by_id(task.dataset.id) # generate file, will overwrite when the next time. csv_file = cls._generate_csv_file_for_results(task, dataset, task.sponsor.user_ID) # if the sponsor click it if task.sponsor.user_ID == user_ID: # copy it dest_path = os.path.join(task.module.path, CS_PATH, str(task.id)) if not os.path.exists(dest_path): os.makedirs(dest_path) # dest_path = os.path.join(module_path, task.task_type + '_' + str(task.evaluation)) dest_path = os.path.join(dest_path, csv_file.split('/')[-1]) shutil.copyfile(csv_file, dest_path) return csv_file, task, dest_path # print(csv_file) return csv_file, task
def read_single_task(cls, task_id): task = CSTaskBusiness.read_task_by_id(task_id) task = cls._add_extra_info_with_task(task) return task
def job_call_the_filter(cls, task_id, if_success=True): task = CSTaskBusiness.get_task_by_id(task_id) # print(task_id, if_success) # print(task.to_mongo()) if if_success: # read pickle module = ProjectBusiness.get_by_id(task.module.id) file_path = os.path.join(module.path, 'results', str(task.id), 'sample.pkl') try: with open(file_path, 'rb') as file: non_exactly = pickle.load(file) # file.close() # print('non_exactly:', non_exactly) if len(non_exactly.values()) > 2: # raise RuntimeError('The pickle file is not valid.') task.status = 2 task.user_operation = True task.save() _ = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_pickle_file_invalid', [task.sponsor], user=task.sponsor, task=task, project=task.module) return if len(non_exactly['results']) > 0: # task.status = 2 task.user_operation = True task.save() _ = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_done', [task.sponsor], user=task.sponsor, task=task, project=task.module) return # print(file_path, non_exactly) # print(non_exactly) unmarked_set = cls._filter_probabilities( [non_exactly['results'], non_exactly['max_choose']]) # print(unmarked_set) if len(unmarked_set) > 0: task.status = 0 task.evaluation += 1 task.total_count += len(unmarked_set) cls._dispatch(unmarked_set, task_id, task.evaluation) task.save() # send notification to user message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_start', [task.sponsor], user=task.sponsor, task=task, project=task.module) # print('success:', message.to_mongo()) return task else: task.status = 2 task.user_operation = True # return {'msg': 'This task has occurred error when executed, ' # 'you can check it in your module job page.'} # send notification to user task.save() message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_done', [task.sponsor], user=task.sponsor, task=task, project=task.module) print('no unlabeled:', message.to_mongo()) except FileNotFoundError as e: # return {'msg': 'your module don`t create the pickle file.'} # send notification to user task.status = 2 task.user_operation = True task.save() task = cls._add_extra_info_with_task(task) message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_pickle_file_not_found', [task.sponsor], task=task, project=task.module, ) # , 'project': task.module print("file not found:", message.to_mongo()) except Exception as e: # return {'msg': 'error occur.'} print(e) task.status = 2 task.user_operation = True task.save() task = cls._add_extra_info_with_task(task) message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_job_error', [task.sponsor], task=task, user=task.sponsor, project=task.module) # , 'project': task.module print('exception:', e, 'message:', message.to_mongo()) raise RuntimeError(e) else: task.status = 2 task.user_operation = True # task.evaluation -= 1 if task.evaluation > 1: message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_job_error', [task.sponsor], user=task.sponsor, task=task, project=task.module, ) # , 'project': task.module print('train, has exception:', message.to_mongo()) task.save()