Exemple #1
0
    def read_tasks_with_self_and_modules(cls, user_ID, module_id,
                                         module_version, task_status):
        # 做过滤操作
        if task_status == 'all':
            # , results user_ID,
            tasks = CSTaskBusiness.read_tasks_with_self_and_modules(
                user_ID, module_id, module_version, None)
        elif task_status == 'doing':
            tasks = CSTaskBusiness.read_tasks_with_self_and_modules(
                user_ID, module_id, module_version, 0)
        elif task_status == 'training':
            tasks = CSTaskBusiness.read_tasks_with_self_and_modules(
                user_ID, module_id, module_version, 1)
        else:
            # done
            tasks = CSTaskBusiness.read_tasks_with_self_and_modules(
                user_ID, module_id, module_version, 2)

        results = []

        if tasks and len(tasks) > 0:
            for task in tasks:
                if task.total_count <= 0:
                    continue
                results.append(cls._add_extra_info_with_task(task))
        return results
Exemple #2
0
    def get_evalution_csv(cls, task_id, evaluation):
        task = CSTaskBusiness.read_task_by_id(task_id)
        task.evaluation = int(evaluation)
        file_path = CSTaskService._generate_csv_file_for_results(
            task, task.dataset, task.sponsor.user_ID)

        return file_path
Exemple #3
0
 def get_task_detail(cls, task_id):
     task = CSTaskBusiness.read_task_by_id(task_id)
     # former info
     former = {}
     for i in range(1, task.evaluation + 1):
         # read the result for evaluation
         results = CSResultBusiness.read_all_by_id(task, i + 1)
         task.evaluation = i
         file_path = cls._generate_csv_file_for_results(
             task, task.dataset, task.sponsor.user_ID)
         # sub_directory = file_path.split('/')
         # file_path = os.path.join(sub_directory[1], *sub_directory[1:])
         # file_path =
         file_name = str(task.id) + '_' + str(task.evaluation) + '.csv'
         # file_path = os.path.join('crowdsourcing', task.sponsor.user_ID,
         #                                      task.module.name, task.dataset_version, file_name)
         sample_marked_for_user, need_marked_times, marked_times = cls._compute_sample_marked_count(
             results)
         if i not in former:
             former[i] = {
                 'file_name': file_name,
                 'file_path': file_path,
                 'samples': sample_marked_for_user,
                 'need_marked_times': need_marked_times,
                 'marked_times': marked_times
             }
     if len(former):
         return former
     else:
         return False
Exemple #4
0
 def _read_directory(cls, path):
     results = []
     for root, dirs, files in os.walk(path):
         if len(files) > 0:
             for file in files:
                 # statical info of csv file.
                 file_path = os.path.join(root, file)
                 task_id = file.split('_')[0]
                 # read it
                 task = CSTaskBusiness.read_task_by_id(task_id)
                 results.append({
                     'file_path':
                     file_path,
                     'file_name':
                     file,
                     'info':
                     sum(1 for _ in open(
                         file_path,
                         encoding='utf-8',
                     )),
                     'class_type':
                     task.task_type,
                     'classes':
                     task.classes
                 })
     return results
Exemple #5
0
 def fulfil_task(cls, task_id, user_ID):
     task = CSTaskBusiness.get_task_by_id(task_id)
     dataset = ProjectBusiness.get_by_id(task.dataset.id)
     # complete
     task.status = 2
     task.save()
     # file_path
     csv_file = cls._generate_csv_file_for_results(task, dataset, user_ID)
     # print(csv_file)
     dest_path = os.path.join(task.module.path, CS_PATH, str(task.id))
     if not os.path.exists(dest_path):
         os.makedirs(dest_path)
     # dest_path = os.path.join(module_path, task.task_type + '_' + str(task.evaluation))
     dest_path = os.path.join(dest_path, csv_file.split('/')[-1])
     shutil.copyfile(csv_file, dest_path)
     # print('error:', task.to_mongo())
     results = CSResultBusiness.read_all_by_id(task.id, task.evaluation)
     sample_marked_for_user, need_marked_times, marked_times = \
         cls._compute_sample_marked_count(results)
     # 结束
     if task.evaluation >= 3 or sample_marked_for_user < task.total_count:
         task.user_operation = True
     task.save()
     # add the info
     task = cls._add_extra_info_with_task(task)
     print('back')
     return csv_file, task, dest_path
Exemple #6
0
 def _read_tasks_for_status(cls, dataset_id):
     # read the all dataset
     tasks = CSTaskBusiness.read_tasks_with_dataset(dataset_id, None)
     results = []
     done_count = 0
     doing_count = 0
     total_count = 0
     users = []
     for task in tasks:
         if task.total_count <= 0:
             continue
         result = CSResultBusiness.read_all_by_id(task, task.evaluation)
         task_results, _ = cls._analyse_labels(result)
         unique_users = result.distinct('user')
         unique_users = [user.user_ID for user in unique_users]
         users.extend(unique_users)
         results.extend([
             result.get('_id') for result in task_results
             if len(np.unique(result.get('labels'))) > 1
         ])
         total_count += task.total_count
         if task.status == 2:
             done_count += 1
         elif task.status == 0:
             doing_count += 1
     users = list(set(users))
     return doing_count, done_count, total_count, np.unique(
         results).tolist(), len(users)
Exemple #7
0
 def read_need_to_task(cls, user_ID, task_id, evaluation=1):
     results = CSResultBusiness.read_by_user_ID_and_task_ID(
         user_ID, task_id, evaluation)
     task = CSTaskBusiness.get_task_by_id(task_id)
     results = [
         results[i] for i in range(len(results)) if results[i].label == ''
     ]
     part_data = []
     for result in results:
         paths = result.sample.split('/')
         index = 0
         for i in range(len(paths)):
             if paths[i] == 'datasets':
                 index = i
                 break
         result.url = os.path.join(task.module.path,
                                   *paths[index:len(paths) - 1])
         result.filename = paths[-1]
         part_data.append({
             '_id': str(result.id),
             'url': result.url,
             'filename': result.filename
         })
         # print('results: ', result.to_mongo())
     return part_data, task.classes
Exemple #8
0
 def _dispatch(cls, non_exactly, task_id, evaluation):
     # non_exactly = cls._get_crowd_sourcing_samples(test)
     # choose the user
     task = CSTaskBusiness.get_task_by_id(task_id)
     count = len(non_exactly)
     # real of users
     real_of_users = UserBusiness.get_all()
     # promise do not send self
     # print(task.sponsor.user_ID)
     real_of_users = [
         user for user in real_of_users
         if user.user_ID != task.sponsor.user_ID
     ]
     # real_of_users = [user for user in real_of_users
     #                  if user.user_ID != 'lu_xu_1']
     # print("real_of_users:", [user.user_ID for user in real_of_users])
     # print(type(real_of_users))
     real_of_user_nums = np.arange(len(real_of_users)).tolist()
     # dispatch
     result = {}
     # ever image has dispatch at least to three people
     for sample_idx in range(count):
         result[sample_idx] = {
             'user': [
                 real_of_users[i] for i in random.sample(
                     real_of_user_nums,
                     random.sample(cls._choose_people, 1)[-1])
             ],
             'sample':
             non_exactly[sample_idx]
         }
         # print([user.user_ID for user in result[sample_idx]['user']])
     # 往数据库插入信息
     invited_users = []
     for key in result.keys():
         sample = result[key]['sample']
         # 测试账号
         # CSResultService.add_result(task_id, evaluation, 'lu_xu_1', sample,
         #                            '')
         for user in result[key]['user']:
             # add the invited
             if user.id not in invited_users:
                 invited_users.append(user.id)
             CSResultBusiness.add_result(task_id, evaluation, user.user_ID,
                                         sample, '')
             # print(user.user_ID)
     # invited users
     message = MessageService.create_message(
         UserBusiness.get_by_user_ID('admin'),
         'cs_task_invited',
         invited_users,
         project=task.dataset,
         task=task,
     )
     print('invited:', message.to_mongo())
Exemple #9
0
 def cancel(cls, task_id):
     task = CSTaskBusiness.get_task_by_id(task_id)
     # 取消Job
     jobs = JobService.business.get_by_task_id(task)
     for job in jobs:
         JobService.business.terminate(job.id)
     # 修改完成, 让看到最后的信息
     task.status = 2
     task.user_operation = False
     task.save()
     task = cls._add_extra_info_with_task(task)
     return task
Exemple #10
0
    def add_task(cls,
                 dataset_id,
                 module_id,
                 user_ID,
                 module_version,
                 dataset_version,
                 task_type='classification',
                 data_type='image',
                 evaluation=0,
                 desc='',
                 user_token=None,
                 device='cpu'):
        # insert module
        module = ProjectBusiness.get_by_id(module_id)
        dataset = ProjectBusiness.get_by_id(dataset_id)
        # 数据插入
        config = cls._insert_module_and_parse_json(dataset, module,
                                                   dataset_version)
        task = CSTaskBusiness.add_task(dataset_id,
                                       user_ID,
                                       0,
                                       0,
                                       module_id,
                                       task_type,
                                       data_type,
                                       evaluation,
                                       desc,
                                       module_version,
                                       dataset_version,
                                       classes=config.get('classes'),
                                       device=device)

        # create pickle
        insert_version = '' if dataset_version == 'current' else dataset_version
        data_path = os.path.join(module.path, 'results')
        des_pickle_path = os.path.join(data_path, str(task.id))
        if not os.path.exists(des_pickle_path):
            os.makedirs(des_pickle_path)
        des_pickle_path = os.path.join(des_pickle_path, 'create.pkl')
        with open(des_pickle_path, 'wb') as file:
            user_module_read_dataset_path = os.path.join(
                './datasets', dataset.user.user_ID + '-' + dataset.name + '-' +
                insert_version)
            pickle.dump({'filename': user_module_read_dataset_path}, file)
        print('dump to pickle:', user_module_read_dataset_path)
        print('pickle path:', des_pickle_path)
        # create done
        # 后期修改, 要删除, 只为了暂时测试, 不需要返回值的
        cls._create_job_for_get_probability(task, module, user_ID, user_token,
                                            des_pickle_path, device)
        # 制造
        # tasnform data format
        return task
Exemple #11
0
    def read_all_task(cls,
                      dataset_id,
                      dataset_version=None,
                      task_status='all'):
        # 做过滤操作
        if dataset_version == '':
            dataset_version = None
        print(dataset_id, dataset_version, task_status)
        if task_status == 'all':
            # , results user_ID,
            tasks = CSTaskBusiness.read_tasks_with_dataset(
                dataset_id, dataset_version)
        elif task_status == 'doing':
            tasks = CSTaskBusiness.read_tasks_with_dataset_and_key(
                dataset_id, 0, dataset_version)
        elif task_status == 'training':
            tasks = CSTaskBusiness.read_tasks_with_dataset_and_key(
                dataset_id, 1, dataset_version)
        else:
            # done
            tasks = CSTaskBusiness.read_tasks_with_dataset_and_key(
                dataset_id, 2, dataset_version)

        results = []
        '''
        
                if task_status != 'training' and task_status != 'all':
                    if task.total_count <= 0:
                        continue
        '''
        if tasks and len(tasks) > 0:
            for task in tasks:
                if task.total_count <= 0:
                    continue
                results.append(cls._add_extra_info_with_task(task))
        # print(results)
        return results
Exemple #12
0
    def _add_marked_count(cls, task_id):
        task = CSTaskBusiness.get_task_by_id(task_id)
        results = CSResultBusiness.read_all_by_id(task, task.evaluation)
        task_results, _ = cls._analyse_labels(results)
        # marked count
        count = len([
            result for result in task_results
            if len(np.unique(result.get('labels'))) == 1
            and np.unique(result.get('labels'))[-1] != ''
        ])
        # print(count)
        if task.marked_count != count:
            task.marked_count = count
            task.save()

        return task
Exemple #13
0
 def train(cls, task_id, user_token):
     # load the module
     task = CSTaskBusiness.get_task_by_id(task_id)
     module = ModuleBusiness.get_by_id(task.module.id)
     # load the dataset
     # former_eval_marked = CSResultService.read_all(task_id, task.evaluation)
     # generate the dataset
     old_path = cls._generate_csv_file_for_results(task, task.dataset,
                                                   task.sponsor.user_ID)
     # copy to dataset
     new_path = os.path.join(module.path, CS_PATH)
     print(new_path)
     # if not os.path.exists(new_path):
     # if not os.Path(new_path).exists():
     if not os.path.exists(new_path):
         os.makedirs(new_path)
     # file path
     filename = old_path.split('/')[-1]
     new_path = os.path.join(new_path, filename)
     # create the crowsourcing.pkl
     # pic_file = os.path.join(module.path, 'results', str(task.id) + '.pkl')
     # with open(pic_file, 'wb') as file:
     #     pickle.dump({'filename': os.path.join('./results', 'crowdsourcing', filename)}, file)
     # copy file to indicate path
     shutil.copyfile(old_path, new_path)
     # error
     if task.evaluation < 3:
         task.status = 1
         # update task
         # task.evaluation += 1
         task.save()
         # 调用任务接口
         cls._load_to_job_for_train(task.module, task, task.sponsor.user_ID,
                                    user_token)
         # back the info
         # task.evaluation -= 1
         task = cls._add_extra_info_with_task(task)
         # task.evaluation += 1
         # How many sample is marked
         return task, new_path
     else:
         return {
             'msg': 'The current task is achieve the max iterator times.'
         }, new_path
Exemple #14
0
 def need_csv_file(cls, task_id, user_ID):
     # what is time for generating this file.
     task = CSTaskBusiness.get_task_by_id(task_id)
     dataset = ProjectBusiness.get_by_id(task.dataset.id)
     # generate file, will overwrite when the next time.
     csv_file = cls._generate_csv_file_for_results(task, dataset,
                                                   task.sponsor.user_ID)
     # if the sponsor click it
     if task.sponsor.user_ID == user_ID:
         # copy it
         dest_path = os.path.join(task.module.path, CS_PATH, str(task.id))
         if not os.path.exists(dest_path):
             os.makedirs(dest_path)
         # dest_path = os.path.join(module_path, task.task_type + '_' + str(task.evaluation))
         dest_path = os.path.join(dest_path, csv_file.split('/')[-1])
         shutil.copyfile(csv_file, dest_path)
         return csv_file, task, dest_path
     # print(csv_file)
     return csv_file, task
Exemple #15
0
    def read_single_task(cls, task_id):
        task = CSTaskBusiness.read_task_by_id(task_id)
        task = cls._add_extra_info_with_task(task)

        return task
Exemple #16
0
    def job_call_the_filter(cls, task_id, if_success=True):
        task = CSTaskBusiness.get_task_by_id(task_id)
        # print(task_id, if_success)
        # print(task.to_mongo())
        if if_success:
            # read pickle
            module = ProjectBusiness.get_by_id(task.module.id)
            file_path = os.path.join(module.path, 'results', str(task.id),
                                     'sample.pkl')
            try:
                with open(file_path, 'rb') as file:
                    non_exactly = pickle.load(file)
                    # file.close()
                    # print('non_exactly:', non_exactly)
                    if len(non_exactly.values()) > 2:
                        # raise RuntimeError('The pickle file is not valid.')
                        task.status = 2
                        task.user_operation = True
                        task.save()
                        _ = MessageService.create_message(
                            UserBusiness.get_by_user_ID('admin'),
                            'cs_task_pickle_file_invalid', [task.sponsor],
                            user=task.sponsor,
                            task=task,
                            project=task.module)
                        return

                    if len(non_exactly['results']) > 0:
                        #
                        task.status = 2
                        task.user_operation = True
                        task.save()
                        _ = MessageService.create_message(
                            UserBusiness.get_by_user_ID('admin'),
                            'cs_task_done', [task.sponsor],
                            user=task.sponsor,
                            task=task,
                            project=task.module)
                        return
                    # print(file_path, non_exactly)
                    # print(non_exactly)
                    unmarked_set = cls._filter_probabilities(
                        [non_exactly['results'], non_exactly['max_choose']])
                    # print(unmarked_set)
                    if len(unmarked_set) > 0:
                        task.status = 0
                        task.evaluation += 1
                        task.total_count += len(unmarked_set)
                        cls._dispatch(unmarked_set, task_id, task.evaluation)
                        task.save()
                        # send notification to user
                        message = MessageService.create_message(
                            UserBusiness.get_by_user_ID('admin'),
                            'cs_task_start', [task.sponsor],
                            user=task.sponsor,
                            task=task,
                            project=task.module)
                        # print('success:', message.to_mongo())
                        return task
                    else:
                        task.status = 2
                        task.user_operation = True
                        # return {'msg': 'This task has occurred error when executed, '
                        #                'you can check it in your module job page.'}
                        # send notification to user
                        task.save()
                        message = MessageService.create_message(
                            UserBusiness.get_by_user_ID('admin'),
                            'cs_task_done', [task.sponsor],
                            user=task.sponsor,
                            task=task,
                            project=task.module)
                        print('no unlabeled:', message.to_mongo())
            except FileNotFoundError as e:
                # return {'msg': 'your module don`t create the pickle file.'}
                # send notification to user
                task.status = 2
                task.user_operation = True
                task.save()
                task = cls._add_extra_info_with_task(task)
                message = MessageService.create_message(
                    UserBusiness.get_by_user_ID('admin'),
                    'cs_task_pickle_file_not_found',
                    [task.sponsor],
                    task=task,
                    project=task.module,
                )
                # , 'project': task.module
                print("file not found:", message.to_mongo())
            except Exception as e:
                # return {'msg': 'error occur.'}
                print(e)
                task.status = 2
                task.user_operation = True
                task.save()
                task = cls._add_extra_info_with_task(task)
                message = MessageService.create_message(
                    UserBusiness.get_by_user_ID('admin'),
                    'cs_task_job_error', [task.sponsor],
                    task=task,
                    user=task.sponsor,
                    project=task.module)
                # , 'project': task.module
                print('exception:', e, 'message:', message.to_mongo())
                raise RuntimeError(e)
        else:
            task.status = 2
            task.user_operation = True
            # task.evaluation -= 1
            if task.evaluation > 1:
                message = MessageService.create_message(
                    UserBusiness.get_by_user_ID('admin'),
                    'cs_task_job_error',
                    [task.sponsor],
                    user=task.sponsor,
                    task=task,
                    project=task.module,
                )
                # , 'project': task.module
                print('train, has exception:', message.to_mongo())
            task.save()