コード例 #1
0
def stem_dataset_sound_tags():
    logger.info('Start computing stem tags for FSD sounds')
    dataset = Dataset.objects.get(short_name='fsd')
    with transaction.atomic():
        for sound in dataset.sounds.all():
            tags = sound.extra_data['tags']
            stemmed_tags = [stem(tag) for tag in tags]
            sound.extra_data['stemmed_tags'] = stemmed_tags
            sound.save()
    logger.info('Finished computing stem tags for FSD sounds')
コード例 #2
0
    def handle(self, *args, **options):
        file_location = options['filepath']
        dataset_short_name = options['dataset_short_name']
        algorithm_name = options['algorithm_name']
        dataset = Dataset.objects.get(short_name=dataset_short_name)
        print('Loading data...')
        data = json.load(open(file_location))

        count = 0
        # Iterate all the sounds in chunks so we can do all transactions of a chunk atomically
        for chunk in chunks(list(data.keys()), 5000):
            with transaction.atomic():
                for sound_id in chunk:
                    sound_data = data[sound_id]
                    count += 1
                    sys.stdout.write('\rCreating sound %i of %i (%.2f%%)' %
                                     (count + 1, len(data), 100.0 *
                                      (count + 1) / len(data)))
                    sys.stdout.flush()
                    sound = Sound.objects.create(
                        name=sound_data['name'][:200],
                        freesound_id=sound_id,
                        extra_data={
                            'tags':
                            sound_data['tags'],
                            'stemmed_tags':
                            [stem(tag) for tag in sound_data['tags']],
                            'duration':
                            sound_data['duration'],
                            'username':
                            sound_data['username'],
                            'license':
                            sound_data['license'],
                            'description':
                            sound_data['description'],
                            'previews':
                            sound_data['previews'],
                            'analysis':
                            sound_data['analysis']
                            if 'analysis' in sound_data.keys() else {},
                        })
                    sound_dataset = SoundDataset.objects.create(
                        dataset=dataset, sound=sound)

                    for node_id in sound_data['aso_ids']:
                        c = CandidateAnnotation.objects.create(
                            sound_dataset=sound_dataset,
                            type='AU',
                            algorithm=algorithm_name,
                            taxonomy_node=TaxonomyNode.objects.get(
                                node_id=node_id))
                        c.update_priority_score()
コード例 #3
0
ファイル: views.py プロジェクト: leegudala/freesound-datasets
def mapping_category(request, short_name, node_id):
    dataset = get_object_or_404(Dataset, short_name=short_name)
    if not dataset.user_is_maintainer(request.user):
        return HttpResponseRedirect(reverse('dataset', args=[dataset.short_name]))
    node_id = unquote(node_id)
    node = dataset.taxonomy.get_element_at_id(node_id)

    if request.method == 'POST':
        run_or_submit = dict(request.POST).get('run-or-submit', ['run'])[0]

        positive_tags_raw = dict(request.POST).get('positive-tags', '')  # e.g. ['dog, cat', 'dog']
        negative_tags_raw = dict(request.POST).get('negative-tags', '')
        preproc_positive = True if dict(request.POST).get('preproc-positive', ['true']) == ['true'] else False
        preproc_negative = True if dict(request.POST).get('preproc-negative', ['false']) == ['true'] else False

        positive_tags = [[stem(tag.replace(' ', '').lower()) if preproc_positive else tag.replace(' ', '').lower()
                          for tag in tags.split(',')]
                         for tags in positive_tags_raw if tags != '']  # e.g. [['dog', 'cat'], ['dog']]

        negative_tags = [stem(tag.replace(' ', '')).lower() if preproc_negative else tag.replace(' ', '').lower()
                         for tags in negative_tags_raw
                         for tag in tags.split(',') if tags != '']

        results = dataset.retrieve_sound_by_tags(positive_tags, negative_tags, preproc_positive, preproc_negative)
        candidates = list(node.candidate_annotations.values_list('sound_dataset__sound__freesound_id', flat=True))

        # Run the mapping strategy and return the retrieved sounds and some statistics
        if run_or_submit == 'run':
            quality_estimate = dataset.quality_estimate_mapping(results, node_id)
            freesound_ids = list(results.values_list('freesound_id', flat=True))
            shuffle(freesound_ids)
            quality_estimate['freesound_ids'] = freesound_ids
            quality_estimate['num_sounds'] = len(freesound_ids)
            num_common_sounds = len(list(set(candidates).intersection(set(freesound_ids))))

            stats = {
                'retrieved': quality_estimate,
                'mapping': node.quality_estimate,
                'num_common_sounds': num_common_sounds
            }
            return JsonResponse(stats)

        # Submit the retrieved sounds
        elif run_or_submit == 'submit':
            freesound_ids_str = dict(request.POST).get('freesound-ids', [None])[0]

            # Retrieved by Freesound IDs
            if freesound_ids_str:
                freesound_ids = freesound_ids_str.split(',')
                results = dataset.sounds.filter(freesound_id__in=freesound_ids)
                new_sounds = results.exclude(freesound_id__in=candidates)
                num_new_sounds = new_sounds.count()
                try:
                    with transaction.atomic():
                        for sound in new_sounds:
                            CandidateAnnotation.objects.create(
                                sound_dataset=sound.sounddataset_set.filter(dataset=dataset).first(),
                                type='MA',
                                algorithm='platform_manual: By Freesound ID',
                                taxonomy_node=node,
                                created_by=request.user
                            )
                except:
                    return JsonResponse({'error': True})

                return JsonResponse({'error': False,
                                     'num_candidates_added': num_new_sounds,
                                     'num_candidates_deleted': 0})

            # Retrieved by the tag based query
            else:
                add_or_replace = dict(request.POST).get('add-or-replace', ['add'])[0]
                voted_negative = dict(request.POST).get('voted-negative', [])
                results = results.exclude(freesound_id__in=voted_negative)
                name_algorithm = str(positive_tags) + ' AND NOT ' + str(negative_tags)
                num_new_sounds = 0
                num_deleted = 0

                # Add the new candidates to the existing ones
                if add_or_replace == 'add':
                    new_sounds = results.exclude(freesound_id__in=candidates)
                    num_new_sounds = new_sounds.count()
                    try:
                        with transaction.atomic():
                            for sound in new_sounds:
                                CandidateAnnotation.objects.create(
                                    sound_dataset=sound.sounddataset_set.filter(dataset=dataset).first(),
                                    type='AU',
                                    algorithm='platform_mapping: {}'.format(name_algorithm),
                                    taxonomy_node=node,
                                    created_by=request.user
                                )
                    except:
                        return JsonResponse({'error': True})

                # Replace the actual candidates with the retrieved ones (deletes only candidates never voted)
                elif add_or_replace == 'replace':
                    try:
                        with transaction.atomic():
                            new_sounds = results.exclude(freesound_id__in=candidates)
                            num_deleted = node.candidate_annotations.exclude(sound_dataset__sound__in=results)\
                                                                    .annotate(num_votes=Count('votes'))\
                                                                    .filter(num_votes=0)\
                                                                    .delete()[0]
                            num_new_sounds = new_sounds.count()
                            for sound in new_sounds:
                                CandidateAnnotation.objects.create(
                                    sound_dataset=sound.sounddataset_set.filter(dataset=dataset).first(),
                                    type='AU',
                                    algorithm='platform_mapping: {}'.format(name_algorithm),
                                    taxonomy_node=node,
                                    created_by=request.user
                                )
                    except:
                        return JsonResponse({'error': True})

                return JsonResponse({'error': False,
                                     'num_candidates_added': num_new_sounds,
                                     'num_candidates_deleted': num_deleted})

    elif request.method == 'GET':
        mapping_rule = [dataset.taxonomy.data[node_id].get('fs_tags', ''),
                        dataset.taxonomy.data[node_id].get('omit_fs_tags', '')]
        platform_mapping_rules = list(set(node.candidate_annotations.exclude(type='MA')
                                          .values_list('algorithm', flat=True)))
        platform_mapping_rules.remove('tag_matching_mtg_1')
        platform_mapping_rules_formated = [(m.split(' AND NOT ')[0].split('platform_mapping: ')[1],
                                            m.split(' AND NOT ')[1])
                                           for m in platform_mapping_rules]
        return render(request, 'monitor/mapping_category.html', {
            'dataset': dataset,
            'node': node,
            'mapping_rule': mapping_rule,
            'platform_mapping_rules': platform_mapping_rules_formated
        })