Esempio n. 1
0
def store_feature_values(ids, feature, values_arr):
    index_filename = data_path('binary/features',
                               '{}.idx'.format(feature.name),
                               for_url=False)
    value_filename = data_path('binary/features',
                               '{}.val'.format(feature.name),
                               for_url=False)

    ensure_parent_folder_exists(index_filename)
    binstorage.store(ids, values_arr, index_filename, value_filename)
Esempio n. 2
0
def _cached_get_segment_audio_data(audio_file_name, database_id, fs, start,
                                   end):
    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              '{}.wav'.format(audio_file_name))
    chunk = wavfile.read_segment(wav_file_path,
                                 start,
                                 end,
                                 normalised=False,
                                 mono=True)

    audio_segment = pydub.AudioSegment(chunk.tobytes(),
                                       frame_rate=fs,
                                       sample_width=chunk.dtype.itemsize,
                                       channels=1)

    audio_segment = _match_target_amplitude(audio_segment)

    out = io.BytesIO()
    audio_segment.export(out, format=settings.AUDIO_COMPRESSED_FORMAT)
    binary_content = out.getvalue()
    out.close()

    response = HttpResponse()
    response.write(binary_content)
    response['Content-Type'] = 'audio/' + settings.AUDIO_COMPRESSED_FORMAT
    response['Content-Length'] = len(binary_content)
    return response
Esempio n. 3
0
def wav_path(audio_file, for_url=False):
    if audio_file.is_original():
        database_id = str(audio_file.database.id)
        file_name = audio_file.name + '.wav'
    else:
        database_id = str(audio_file.original.database.id)
        file_name = audio_file.original.name + '.wav'
    return data_path('audio/wav/{}'.format(database_id), file_name, for_url)
Esempio n. 4
0
def audio_path(audio_file, ext, for_url=False):
    if audio_file.is_original():
        database_id = str(audio_file.database.id)
        file_name = audio_file.name + '.' + ext
    else:
        database_id = str(audio_file.original.database.id)
        file_name = audio_file.original.name + '.' + ext
    return data_path('audio/{}/{}'.format(ext, database_id), file_name,
                     for_url)
Esempio n. 5
0
def get_movement_data(request):
    """
    Return a playable audio segment given the segment id
    :param request: must specify segment-id, this is the ID of a Segment object to be played
    :return: a binary blob specified as audio/ogg (or whatever the format is), playable and volume set to -10dB
    """
    movement_id = get_or_error(request.POST, 'movement-id')
    skeletaton_file = data_path('movements',
                                '{}.skeleton'.format(movement_id),
                                for_url=False)

    # skeletaton_file = 'user_data/movements/S017C003P020R002A060.skeleton'
    frames = read_skeleton_file(skeletaton_file)
    num_frames = len(frames)
    rgb_files = []
    for i in range(1, num_frames + 1):
        rgb_file = data_path('movements',
                             '{}_rgb/{}_rgb-{:04d}.jpg'.format(
                                 movement_id, movement_id, i),
                             for_url=True)
        rgb_files.append(rgb_file)

    return dict(frames=frames, imgs=rgb_files)
Esempio n. 6
0
def merge_audio_chunks(request):
    """
    This action should be called after the last audio chunk is uploaded.
    It will merge all the saved chunks (foo.wav__1, foo.wav__2, etc...) into foo.wav
    And import to the database
    :param request:
    :return:
    """
    user = request.user
    params = request.POST
    name = params['name']
    chunk_count = int(params['chunkCount'])
    max_fs = int(request.POST.get('browser-fs', 0))

    if name.lower().endswith('.wav'):
        name = name[:-4]

    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              name + '.wav')

    with open(wav_file_path, 'wb') as combined_file:
        for i in range(chunk_count):
            chunk_file_path = wav_file_path + '__' + str(i)
            with open(chunk_file_path, 'rb') as chunk_file:
                combined_file.write(chunk_file.read())

    size, comp, num_channels, fs, sbytes, block_align, bitrate, bytes, dtype = read_wav_info(
        wav_file_path)
    if comp == 3:
        warning('File is IEEE format. Convert to standard WAV')
        audio = pydub.AudioSegment.from_file(wav_file_path)
        audio.export(wav_file_path, format='wav')

    audio_file = _import_and_convert_audio_file(database, combined_file,
                                                max_fs)

    for i in range(chunk_count):
        chunk_file_path = wav_file_path + '__' + str(i)
        os.remove(chunk_file_path)

    added_files = AudioFile.objects.filter(id=audio_file.id)
    _, rows = get_sequence_info_empty_songs(added_files)
    return dict(origin='merge_audio_chunks',
                success=True,
                warning=None,
                payload=rows)
Esempio n. 7
0
def get_binstorage_locations(features, aggregators):
    """
    Deduce the locations of feature binary files and feature-aggregator binary files from their names
    Then return these locations in two dictionaries for lookup convenience
    :param features:
    :param aggregators:
    :return:
    """
    # feature to binstorage's files
    f2bs = {}
    # feature+aggregation to binstorage's files
    fa2bs = {}

    for feature in features:
        feature_name = feature.name
        index_filename = data_path('binary/features',
                                   '{}.idx'.format(feature_name),
                                   for_url=False)
        value_filename = data_path('binary/features',
                                   '{}.val'.format(feature_name),
                                   for_url=False)
        f2bs[feature] = (index_filename, value_filename)

        if feature not in fa2bs:
            fa2bs[feature] = {}
        for aggregator in aggregators:
            aggregator_name = aggregator.get_name()
            folder = os.path.join('binary', 'features', feature_name)

            index_filename = data_path(folder,
                                       '{}.idx'.format(aggregator_name),
                                       for_url=False)
            value_filename = data_path(folder,
                                       '{}.val'.format(aggregator_name),
                                       for_url=False)
            fa2bs[feature][aggregator] = (index_filename, value_filename)
    return f2bs, fa2bs
Esempio n. 8
0
def import_audio_chunk(request):
    """
    To facilitate sending big files, Dropzone allows uploading by chunk
    Each chunk is uploaded in one request. This function will save this chunk to the database
    by using the chunk's index as enumeration appended to the file's name
    :param request:
    :return:
    """
    user = request.user
    params = request.POST

    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    file = File(file=request.FILES['file'])
    name = params['dzFilename']
    chunk_index = int(params['dzChunkIndex'])

    if name.lower().endswith('.wav'):
        name = name[:-4]

    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              name + '.wav')

    if chunk_index == 0:
        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name).exists()

        if not is_unique:
            raise CustomAssertionError(
                'Error: file {} already exists in this database'.format(name))

    chunk_file_path = wav_file_path + '__' + str(chunk_index)
    with open(chunk_file_path, 'wb') as f:
        f.write(file.read())

    return dict(origin='import_audio_chunk',
                success=True,
                warning=None,
                payload=None)
Esempio n. 9
0
def history_path(fullname, for_url=False):
    return data_path('history', fullname, for_url)
Esempio n. 10
0
def pickle_path(objid, subdir=None, for_url=False):
    fullname = '{}.pkl'.format(objid)
    folder = 'pickle'
    if subdir:
        folder = os.path.join(folder, subdir)
    return data_path(folder, fullname, for_url)
Esempio n. 11
0
def _import_and_convert_audio_file(database,
                                   file,
                                   max_fs,
                                   real_fs=None,
                                   audio_file=None,
                                   track=None,
                                   start=None,
                                   end=None):

    file_already_exists = False
    if isinstance(file, BufferedWriter):
        file_already_exists = True
        name_ext = os.path.basename(file.name)
    else:
        name_ext = file.name

    if name_ext.lower().endswith('.wav'):
        name_no_ext = name_ext[:-4]
    else:
        name_no_ext = name_ext

    # Need a unique name (database-wide) for new file
    if audio_file is None:
        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name_no_ext).exists()
        if not is_unique:
            raise CustomAssertionError(
                'File {} already exists'.format(name_no_ext))
    elif audio_file.name != name_no_ext:
        raise CustomAssertionError(
            'Impossible! File name in your table and in the database don\'t match'
        )

    wav_name = data_path('audio/wav/{}'.format(database.id),
                         '{}.wav'.format(name_no_ext))
    name_compressed = data_path(
        'audio/{}/{}'.format(settings.AUDIO_COMPRESSED_FORMAT, database.id),
        '{}.{}'.format(name_no_ext, settings.AUDIO_COMPRESSED_FORMAT))

    fake_wav_name = wav_name + '.bak'

    if not file_already_exists:
        with open(wav_name, 'wb') as wav_file:
            wav_file.write(file.read())

    _fs, length, noc = get_wav_info(wav_name, return_noc=True)

    # If real_fs is provided, it is absolute -- otherwise it is what we can really read from the file
    if real_fs is None:
        real_fs = _fs

    fake_fs = None
    # If real_fs is not what we read from the file, then the file is fake, and we must restore the original file
    # to do that we rename the wav file that we just stored (which is fake) to .bak, then change the sample rate
    # back to the original and store the original file as .wav
    if real_fs != _fs:
        os.rename(wav_name, fake_wav_name)
        change_fs_without_resampling(fake_wav_name, real_fs, wav_name)
        audio = pydub.AudioSegment.from_file(fake_wav_name)
        os.remove(fake_wav_name)

    # Otherwise, if real_fs is more than max_fs, we must create a fake file for the sake of converting to mp3:
    elif real_fs > max_fs:
        fake_fs = max_fs
        change_fs_without_resampling(wav_name, fake_fs, fake_wav_name)
        audio = pydub.AudioSegment.from_file(fake_wav_name)
        os.remove(fake_wav_name)
    # Otherwise the file is ordinary - no need to fake it
    else:
        audio = pydub.AudioSegment.from_file(wav_name)

    ensure_parent_folder_exists(name_compressed)
    audio.export(name_compressed, format=settings.AUDIO_COMPRESSED_FORMAT)

    if audio_file is None:
        if track is None:
            track = AudioTrack.objects.get_or_create(name='TBD')[0]
        individual = Individual.objects.get_or_create(name='TBD')[0]
        audio_file = AudioFile(name=name_no_ext,
                               length=length,
                               fs=real_fs,
                               database=database,
                               track=track,
                               start=start,
                               end=end,
                               fake_fs=fake_fs,
                               added=timezone.now(),
                               noc=noc,
                               individual=individual)
        audio_file.save()
        if track.name == 'TBD':
            track.name = str(audio_file.id)
            track.save()
        individual.name = str(audio_file.id)
        individual.save()
    else:
        audio_file.start = start
        audio_file.end = end
        audio_file.length = length
        audio_file.save()

    return audio_file
def spect_mask_path(spect_id, subdir=None, for_url=False):
    folder = 'spect/mask'
    fullname = '{}.png'.format(spect_id)
    if subdir:
        folder = os.path.join(folder, subdir)
    return data_path(folder, fullname, for_url)
def import_audio_files(request):
    """
    Store uploaded files (only wav is accepted)
    :param request: must contain a list of files and the id of the database to be stored against
    :return:
    """
    user = request.user
    files = request.FILES.values()

    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    added_files = []
    not_importable_filenames = []
    importable_files = []

    for f in files:
        file = File(file=f)
        name = file.name
        if name.lower().endswith('.wav'):
            name = name[:-4]

        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name).exists()

        if not is_unique:
            not_importable_filenames.append(name)
        else:
            importable_files.append(file)

    if len(not_importable_filenames) > 0:
        raise CustomAssertionError(
            'Error: No files were imported because the following files already exist: {}'
            .format(', '.join(not_importable_filenames)))
    else:
        for file in importable_files:
            name = file.name
            if name.lower().endswith('.wav'):
                name = name[:-4]

            name_wav = data_path('audio/wav/{}'.format(database.id),
                                 '{}.wav'.format(name))
            name_compressed = data_path(
                'audio/{}/{}'.format(settings.AUDIO_COMPRESSED_FORMAT,
                                     database.id),
                '{}.{}'.format(name, settings.AUDIO_COMPRESSED_FORMAT))

            with open(name_wav, 'wb') as wav_file:
                wav_file.write(file.read())

            audio = pydub.AudioSegment.from_file(name_wav)

            ensure_parent_folder_exists(name_compressed)
            audio.export(name_compressed,
                         format=settings.AUDIO_COMPRESSED_FORMAT)

            fs = audio.frame_rate
            length = audio.raw_data.__len__() // audio.frame_width
            audio_file = AudioFile(name=name,
                                   length=length,
                                   fs=fs,
                                   database=database)
            added_files.append(audio_file)

        AudioFile.objects.bulk_create(added_files)
        added_files = AudioFile.objects.filter(
            database=database, name__in=[x.name for x in added_files])
        _, rows = get_sequence_info_empty_songs(added_files)
        return rows
def import_audio_file(request):
    """
    Store uploaded file (only wav is accepted)
    :param request: must contain a list of files and the id of the database to be stored against
    :return:
    """
    user = request.user
    f = request.FILES['file']

    database_id = get_or_error(request.POST, 'database-id')
    item = json.loads(get_or_error(request.POST, 'item'))
    track_id = get_or_error(request.POST, 'track-id')

    database = get_or_error(Database, dict(id=database_id))
    track = get_or_error(AudioTrack, dict(id=track_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    start = item['start']
    end = item['end']
    song_id = item['id']

    file = File(file=f)
    name = file.name
    if name.lower().endswith('.wav'):
        name = name[:-4]

    audio_file = None
    need_unique_name = True
    if not isinstance(song_id, str) or not song_id.startswith('new:'):
        audio_file = AudioFile.objects.filter(id=song_id).first()
        if audio_file and audio_file.name == name:
            need_unique_name = False

    if need_unique_name:
        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name).exists()
        if not is_unique:
            raise CustomAssertionError('File {} already exists'.format(name))

    name_wav = data_path('audio/wav/{}'.format(database.id),
                         '{}.wav'.format(name))
    name_compressed = data_path(
        'audio/{}/{}'.format(settings.AUDIO_COMPRESSED_FORMAT, database.id),
        '{}.{}'.format(name, settings.AUDIO_COMPRESSED_FORMAT))

    with open(name_wav, 'wb') as wav_file:
        wav_file.write(file.read())

    audio = pydub.AudioSegment.from_file(name_wav)

    ensure_parent_folder_exists(name_compressed)
    audio.export(name_compressed, format=settings.AUDIO_COMPRESSED_FORMAT)
    fs = audio.frame_rate
    length = audio.raw_data.__len__() // audio.frame_width

    if audio_file is None:
        audio_file = AudioFile(name=name,
                               length=length,
                               fs=fs,
                               database=database,
                               track=track,
                               start=start,
                               end=end)
    else:
        if audio_file.name != name:
            AudioFile.set_name([audio_file], name)
        audio_file.start = start
        audio_file.end = end
        audio_file.length = length
        audio_file.save()

    quality = item.get('quality', None)
    individual_name = item.get('individual', None)
    note = item.get('note', None)
    type = item.get('type', None)
    sex = item.get('sex', None)

    if individual_name is not None:
        individual = Individual.objects.filter(name=individual_name).first()
        if individual is None:
            individual = Individual.objects.create(name=individual_name,
                                                   gender=sex)
        elif sex is not None:
            individual.gender = sex
            individual.save()

        audio_file.individual = individual

    if quality:
        audio_file.quality = quality

    audio_file.save()
    audio_file_attrs = settings.ATTRS.audio_file
    if note:
        extra_attr_value = ExtraAttrValue.objects.filter(
            user=user, owner_id=audio_file.id, attr=audio_file_attrs.note)
        extra_attr_value.value = note

    if type:
        extra_attr_value = ExtraAttrValue.objects.create(
            user=user, owner_id=audio_file.id, attr=audio_file_attrs.type)
        extra_attr_value.value = type

    return dict(id=audio_file.id, name=audio_file.name)
Esempio n. 15
0
def extract_database_measurements(arg=None, force=False):
    if isinstance(arg, int):
        task = get_or_wait(arg)
    else:
        task = arg
    runner = TaskRunner(task)
    try:
        runner.preparing()

        if isinstance(task, Task):
            cls, dm_id = task.target.split(':')
            dm_id = int(dm_id)
            assert cls == DataMatrix.__name__
            dm = DataMatrix.objects.get(id=dm_id)

            if dm.database:
                segments = Segment.objects.filter(
                    audio_file__database=dm.database)
                sids = segments.values_list('id', flat=True)
            else:
                sids = dm.tmpdb.ids
            features_hash = dm.features_hash
            aggregations_hash = dm.aggregations_hash
        else:
            sids = task.sids
            features_hash = task.features_hash
            aggregations_hash = task.aggregations_hash

        features = Feature.objects.filter(id__in=features_hash.split('-'))
        aggregations = Aggregation.objects.filter(
            id__in=aggregations_hash.split('-'))
        aggregators = [aggregator_map[x.name] for x in aggregations]

        # feature to binstorage's files
        f2bs = {}
        # feature+aggregation to binstorage's files
        fa2bs = {}

        for feature in features:
            feature_name = feature.name
            index_filename = data_path('binary/features',
                                       '{}.idx'.format(feature_name),
                                       for_url=False)
            value_filename = data_path('binary/features',
                                       '{}.val'.format(feature_name),
                                       for_url=False)
            f2bs[feature] = (index_filename, value_filename)

            if feature not in fa2bs:
                fa2bs[feature] = {}

            for aggregator in aggregators:
                aggregator_name = aggregator.get_name()
                folder = os.path.join('binary', 'features', feature_name)
                mkdirp(os.path.join(settings.MEDIA_URL, folder)[1:])

                index_filename = data_path(folder,
                                           '{}.idx'.format(aggregator_name),
                                           for_url=False)
                value_filename = data_path(folder,
                                           '{}.val'.format(aggregator_name),
                                           for_url=False)
                fa2bs[feature][aggregator] = (index_filename, value_filename)

        tids, f2tid2fvals = extract_segment_features_for_segments(
            runner, sids, features, f2bs, force)

        for feature, (index_filename, value_filename) in f2bs.items():
            _tids, _fvals = f2tid2fvals.get(feature, (None, None))
            if _tids:
                _tids = np.array(_tids, dtype=np.int32)
                ensure_parent_folder_exists(index_filename)
                binstorage.store(_tids, _fvals, index_filename, value_filename)

        runner.wrapping_up()
        child_task = task.__class__(user=task.user, parent=task)
        child_task.save()
        child_runner = TaskRunner(child_task)
        child_runner.preparing()

        aggregate_feature_values(child_runner, sids, f2bs, fa2bs, features,
                                 aggregators)
        child_runner.complete()

        if isinstance(task, Task):
            full_sids_path = dm.get_sids_path()
            full_bytes_path = dm.get_bytes_path()
            full_cols_path = dm.get_cols_path()

            data, col_inds = extract_rawdata(f2bs, fa2bs, tids, features,
                                             aggregators)

            ndarray_to_bytes(data, full_bytes_path)
            ndarray_to_bytes(np.array(sids, dtype=np.int32), full_sids_path)

            with open(full_cols_path, 'w', encoding='utf-8') as f:
                json.dump(col_inds, f)

            dm.ndims = data.shape[1]
            dm.save()
        runner.complete()

    except Exception as e:
        runner.error(e)