예제 #1
0
 def marked_sample(cls, result_id, label):
     # trigger two event
     result = CSResultBusiness.read_by_id(result_id)
     result.label = label
     # print('after_result: ', result.to_mongo())
     result.save()
     # 打一个标都记录上去, 同一个标不重复记录
     task = cls._add_marked_count(result.task.id)
     # task.save()
     # 如果打完标签
     results = CSResultBusiness.read_all_by_id(task, task.evaluation)
     sample_marked_for_user, need_marked_times, marked_times = \
         cls._compute_sample_marked_count(results)
     if sample_marked_for_user >= task.total_count:
         # send notification
         admin_user = UserBusiness.get_by_user_ID('admin')
         # reciever = UserBusiness.get_by_id(task.sponsor.id)
         # send notification
         MessageService.create_message(
             admin_user,
             'cs_task_done',
             [task.sponsor],
             user=task.sponsor,
             task=result.task,
             project=result.task.module,
         )
     # all people is labeled, finish this task
     if task.marked_count == task.total_count:
         task.status = 2
         task.save()
         return cls._add_extra_info_with_task(task)
     return None
예제 #2
0
 def _dispatch(cls, non_exactly, task_id, evaluation):
     # non_exactly = cls._get_crowd_sourcing_samples(test)
     # choose the user
     task = CSTaskBusiness.get_task_by_id(task_id)
     count = len(non_exactly)
     # real of users
     real_of_users = UserBusiness.get_all()
     # promise do not send self
     # print(task.sponsor.user_ID)
     real_of_users = [
         user for user in real_of_users
         if user.user_ID != task.sponsor.user_ID
     ]
     # real_of_users = [user for user in real_of_users
     #                  if user.user_ID != 'lu_xu_1']
     # print("real_of_users:", [user.user_ID for user in real_of_users])
     # print(type(real_of_users))
     real_of_user_nums = np.arange(len(real_of_users)).tolist()
     # dispatch
     result = {}
     # ever image has dispatch at least to three people
     for sample_idx in range(count):
         result[sample_idx] = {
             'user': [
                 real_of_users[i] for i in random.sample(
                     real_of_user_nums,
                     random.sample(cls._choose_people, 1)[-1])
             ],
             'sample':
             non_exactly[sample_idx]
         }
         # print([user.user_ID for user in result[sample_idx]['user']])
     # 往数据库插入信息
     invited_users = []
     for key in result.keys():
         sample = result[key]['sample']
         # 测试账号
         # CSResultService.add_result(task_id, evaluation, 'lu_xu_1', sample,
         #                            '')
         for user in result[key]['user']:
             # add the invited
             if user.id not in invited_users:
                 invited_users.append(user.id)
             CSResultBusiness.add_result(task_id, evaluation, user.user_ID,
                                         sample, '')
             # print(user.user_ID)
     # invited users
     message = MessageService.create_message(
         UserBusiness.get_by_user_ID('admin'),
         'cs_task_invited',
         invited_users,
         project=task.dataset,
         task=task,
     )
     print('invited:', message.to_mongo())
예제 #3
0
 def get_task_detail(cls, task_id):
     task = CSTaskBusiness.read_task_by_id(task_id)
     # former info
     former = {}
     for i in range(1, task.evaluation + 1):
         # read the result for evaluation
         results = CSResultBusiness.read_all_by_id(task, i + 1)
         task.evaluation = i
         file_path = cls._generate_csv_file_for_results(
             task, task.dataset, task.sponsor.user_ID)
         # sub_directory = file_path.split('/')
         # file_path = os.path.join(sub_directory[1], *sub_directory[1:])
         # file_path =
         file_name = str(task.id) + '_' + str(task.evaluation) + '.csv'
         # file_path = os.path.join('crowdsourcing', task.sponsor.user_ID,
         #                                      task.module.name, task.dataset_version, file_name)
         sample_marked_for_user, need_marked_times, marked_times = cls._compute_sample_marked_count(
             results)
         if i not in former:
             former[i] = {
                 'file_name': file_name,
                 'file_path': file_path,
                 'samples': sample_marked_for_user,
                 'need_marked_times': need_marked_times,
                 'marked_times': marked_times
             }
     if len(former):
         return former
     else:
         return False
예제 #4
0
 def fulfil_task(cls, task_id, user_ID):
     task = CSTaskBusiness.get_task_by_id(task_id)
     dataset = ProjectBusiness.get_by_id(task.dataset.id)
     # complete
     task.status = 2
     task.save()
     # file_path
     csv_file = cls._generate_csv_file_for_results(task, dataset, user_ID)
     # print(csv_file)
     dest_path = os.path.join(task.module.path, CS_PATH, str(task.id))
     if not os.path.exists(dest_path):
         os.makedirs(dest_path)
     # dest_path = os.path.join(module_path, task.task_type + '_' + str(task.evaluation))
     dest_path = os.path.join(dest_path, csv_file.split('/')[-1])
     shutil.copyfile(csv_file, dest_path)
     # print('error:', task.to_mongo())
     results = CSResultBusiness.read_all_by_id(task.id, task.evaluation)
     sample_marked_for_user, need_marked_times, marked_times = \
         cls._compute_sample_marked_count(results)
     # 结束
     if task.evaluation >= 3 or sample_marked_for_user < task.total_count:
         task.user_operation = True
     task.save()
     # add the info
     task = cls._add_extra_info_with_task(task)
     print('back')
     return csv_file, task, dest_path
예제 #5
0
 def _read_tasks_for_status(cls, dataset_id):
     # read the all dataset
     tasks = CSTaskBusiness.read_tasks_with_dataset(dataset_id, None)
     results = []
     done_count = 0
     doing_count = 0
     total_count = 0
     users = []
     for task in tasks:
         if task.total_count <= 0:
             continue
         result = CSResultBusiness.read_all_by_id(task, task.evaluation)
         task_results, _ = cls._analyse_labels(result)
         unique_users = result.distinct('user')
         unique_users = [user.user_ID for user in unique_users]
         users.extend(unique_users)
         results.extend([
             result.get('_id') for result in task_results
             if len(np.unique(result.get('labels'))) > 1
         ])
         total_count += task.total_count
         if task.status == 2:
             done_count += 1
         elif task.status == 0:
             doing_count += 1
     users = list(set(users))
     return doing_count, done_count, total_count, np.unique(
         results).tolist(), len(users)
예제 #6
0
 def read_need_to_task(cls, user_ID, task_id, evaluation=1):
     results = CSResultBusiness.read_by_user_ID_and_task_ID(
         user_ID, task_id, evaluation)
     task = CSTaskBusiness.get_task_by_id(task_id)
     results = [
         results[i] for i in range(len(results)) if results[i].label == ''
     ]
     part_data = []
     for result in results:
         paths = result.sample.split('/')
         index = 0
         for i in range(len(paths)):
             if paths[i] == 'datasets':
                 index = i
                 break
         result.url = os.path.join(task.module.path,
                                   *paths[index:len(paths) - 1])
         result.filename = paths[-1]
         part_data.append({
             '_id': str(result.id),
             'url': result.url,
             'filename': result.filename
         })
         # print('results: ', result.to_mongo())
     return part_data, task.classes
예제 #7
0
    def _add_extra_info_with_task(cls, task):
        results = CSResultBusiness.read_all_by_id(task, task.evaluation)
        users = results.distinct('user')
        task.invited_users = []
        for user in users:
            task.invited_users.append({
                "username": user.username,
                "user_ID": user.user_ID,
                "avatar_url": user.avatar_url
            })

        task.user_ID = task.sponsor.user_ID
        task.username = task.sponsor.username
        task.avatar_url = task.sponsor.avatar_url
        # How many People
        task.accept_user_count = len(task.user_ID)
        # How many sample is marked
        task.sample_marked_for_user, task.need_marked_times, task.marked_times = \
            cls._compute_sample_marked_count(results)
        total_count = 0.01 if task.total_count == 0 else task.total_count
        task.percentage = int(task.sample_marked_for_user / total_count * 100)
        # tasnform data format
        task.dataset_version = task.dataset_version.replace('_', '.')
        task.module_version = task.module_version.replace('_', '.')
        # How many People
        task.accept_user_count = len(task.user_ID)
        # 数据集名字
        task.dataset_name = task.dataset.display_name
        return task
예제 #8
0
    def _add_marked_count(cls, task_id):
        task = CSTaskBusiness.get_task_by_id(task_id)
        results = CSResultBusiness.read_all_by_id(task, task.evaluation)
        task_results, _ = cls._analyse_labels(results)
        # marked count
        count = len([
            result for result in task_results
            if len(np.unique(result.get('labels'))) == 1
            and np.unique(result.get('labels'))[-1] != ''
        ])
        # print(count)
        if task.marked_count != count:
            task.marked_count = count
            task.save()

        return task
예제 #9
0
    def _generate_csv_file_for_results(cls, task, dataset, user_ID):
        # init the path
        moudle = ProjectBusiness.get_by_id(task.module.id)
        dataset_path = dataset.dataset_path
        save_path = os.path.join('crowdsourcing', user_ID, moudle.name,
                                 task.dataset_version)
        csv_file_name = str(task.id) + '_' + str(task.evaluation) + '.csv'
        # read the result
        results = CSResultBusiness.read_all_by_id(task, task.evaluation)
        # create the path
        store_path = os.path.join(dataset_path, save_path)
        if not os.path.exists(store_path):
            os.makedirs(store_path)
        # get the statistics result
        full_path = os.path.join(store_path, csv_file_name)
        _, transform_tasks = cls._analyse_labels(results)
        # get the labels
        save_key = []
        for sample in transform_tasks:
            # print(sample)
            max_count = 0
            label = ''
            for key in sample['label'].keys():
                if key != '':
                    if label == '':
                        max_count = sample['label'][key]
                        label = key
                    elif max_count < sample['label'][key]:
                        max_count = sample['label'][key]
                        label = key

            label = 'None' if label == '' else label
            save_key.append({'sample': sample['sample'], 'label': label})
        # save
        if len(save_key) <= 0:
            save_key.append({'sample': 'no samples', 'label': 'no labels'})

        # print(save_key)
        pd.DataFrame(save_key,
                     index=np.arange(len(save_key))).to_csv(full_path,
                                                            index=None)
        # move to module
        # path
        return full_path
예제 #10
0
 def add_result(cls, task_id, evaluation, user_id, sample, sample_label):
     return CSResultBusiness.add_result(user_id, task_id, evaluation,
                                        sample, sample_label)
예제 #11
0
 def read_by_id(cls, result_id):
     result = CSResultBusiness.read_by_id(result_id)
     return result
예제 #12
0
    def read_by_user_and_sample(cls, user_ID, sample, task_id, evaluation):
        result = CSResultBusiness.read_by_user_and_sample(
            user_ID, sample, task_id, evaluation)

        print('result', result)
        return result
예제 #13
0
 def read_by_user_and_task(cls, user_ID, task_id, evaluation=1):
     return CSResultBusiness.read_by_user_ID_and_task_ID(
         user_ID, task_id, evaluation)
예제 #14
0
 def read_all(cls, task, evaluation):
     return CSResultBusiness.read_all_by_id(task, evaluation)