Ejemplo n.º 1
0
def calculate_similarity(task_id,
                         send_email='always',
                         raise_err=False,
                         *args,
                         **kwargs):
    task = get_or_wait(task_id)

    runner = TaskRunner(task, send_email=send_email)
    sim = None

    try:
        runner.preparing()

        cls, sim_id = task.target.split(':')
        sim_id = int(sim_id)
        assert cls == SimilarityIndex.__name__
        sim = SimilarityIndex.objects.get(id=sim_id)

        _calculate_similarity(sim, runner)

        runner.complete()
    except Exception as e:
        if sim is not None:
            sim.delete()
        if raise_err:
            raise e
        runner.error(e)
Ejemplo n.º 2
0
def construct_ordination(task_id, send_email='always', raise_err=False, *args, **kwargs):
    task = get_or_wait(task_id)
    runner = TaskRunner(task, send_email=send_email)

    try:
        runner.preparing()

        cls, ord_id = task.target.split(':')
        ord_id = int(ord_id)
        assert cls == Ordination.__name__
        ord = Ordination.objects.get(id=ord_id)

        _construct_ordination(ord, runner)

        runner.complete()
    except Exception as e:
        if raise_err:
            raise e
        runner.error(e)
Ejemplo n.º 3
0
def extract_database_measurements(arg=None,
                                  force=False,
                                  send_email='always',
                                  raise_err=False,
                                  *args,
                                  **kwargs):
    if isinstance(arg, int):
        task = get_or_wait(arg)
    else:
        task = arg

    send_email = 'error-only' if settings.DEBUG else send_email
    runner = TaskRunner(task, send_email=send_email)
    try:
        runner.preparing()

        if isinstance(task, Task):
            cls, dm_id = task.target.split(':')
            dm_id = int(dm_id)
            assert cls == DataMatrix.__name__
            dm = DataMatrix.objects.get(id=dm_id)

            if dm.database:
                segments = Segment.objects.filter(
                    audio_file__database=dm.database)
                sids = segments.values_list('id', flat=True)
            else:
                sids = dm.tmpdb.ids
            features_hash = dm.features_hash
            aggregations_hash = dm.aggregations_hash
        else:
            sids = task.sids
            features_hash = task.features_hash
            aggregations_hash = task.aggregations_hash

        if len(sids) == 0:
            raise CustomAssertionError(
                'Measurement cannot be extracted because your database doesn\'t contain any segments.'
            )

        segments = Segment.objects.filter(id__in=sids)
        tids = np.array(segments.values_list('tid', flat=True), dtype=np.int32)

        features = Feature.objects.filter(id__in=features_hash.split('-'))
        aggregations = Aggregation.objects.filter(
            id__in=aggregations_hash.split('-'))

        available_feature_names = feature_extractors.keys()
        disabled_features_names = [
            x.name for x in features if x.name not in available_feature_names
        ]

        if len(disabled_features_names):
            warning('Task #{}: Features {} are no longer available'.format(
                task.id, disabled_features_names))
            features = [
                x for x in features if x.name in available_feature_names
            ]

        available_aggregator_names = aggregator_map.keys()
        disabled_aggregators_names = [
            x.name for x in aggregations
            if x.name not in available_aggregator_names
        ]

        if len(disabled_aggregators_names):
            warning('Task #{}: Aggregation {} are no longer available'.format(
                task.id, disabled_aggregators_names))
            aggregations = [
                x for x in aggregations if x.name in available_aggregator_names
            ]

        aggregators = [aggregator_map[x.name] for x in aggregations]

        extract_segment_features_for_segments(runner,
                                              sids,
                                              features,
                                              force=force)

        runner.wrapping_up()
        child_task = task.__class__(user=task.user, parent=task)
        child_task.save()
        child_runner = TaskRunner(child_task)
        child_runner.preparing()

        aggregate_feature_values(child_runner,
                                 tids,
                                 features,
                                 aggregators,
                                 force=force)
        child_runner.complete()

        if isinstance(task, Task):
            full_sids_path = dm.get_sids_path()
            full_bytes_path = dm.get_bytes_path()
            full_cols_path = dm.get_cols_path()

            data, col_inds = extract_rawdata(tids, features, aggregators)

            ndarray_to_bytes(data, full_bytes_path)
            ndarray_to_bytes(np.array(sids, dtype=np.int32), full_sids_path)

            with open(full_cols_path, 'w', encoding='utf-8') as f:
                json.dump(col_inds, f)

            dm.ndims = data.shape[1]
            dm.save()
        runner.complete()

    except Exception as e:
        if raise_err:
            raise e
        runner.error(e)
Ejemplo n.º 4
0
def celery_task_test(task_id, *args, **kwargs):
    task = Task.objects.get(id=task_id)

    runner = TaskRunner(task)

    runner.preparing()

    sleep(5)
    max = 100
    runner.start(max=max)

    for i in range(max):
        sleep(0.1)
        runner.tick()

    sleep(5)
    runner.wrapping_up()

    sleep(5)
    runner.complete()
Ejemplo n.º 5
0
def calculate_similarity(task_id):
    task = get_or_wait(task_id)
    runner = TaskRunner(task)
    try:
        runner.preparing()

        cls, sim_id = task.target.split(':')
        sim_id = int(sim_id)
        assert cls == SimilarityIndex.__name__
        sim = SimilarityIndex.objects.get(id=sim_id)

        dm = sim.dm
        ord = sim.ord

        assert dm.task is None or dm.task.is_completed()
        assert ord is None or ord.task is None or ord.task.is_completed()

        if ord:
            sids_path = ord.get_sids_path()
            source_bytes_path = ord.get_bytes_path()
        else:
            sids_path = dm.get_sids_path()
            source_bytes_path = dm.get_bytes_path()

        runner.start()

        sids, sorted_order = _calculate_similarity(sids_path,
                                                   source_bytes_path)

        runner.wrapping_up()

        sim_sids_path = sim.get_sids_path()
        sim_bytes_path = sim.get_bytes_path()

        ndarray_to_bytes(sorted_order, sim_bytes_path)
        ndarray_to_bytes(sids, sim_sids_path)

        runner.complete()
    except Exception as e:
        runner.error(e)
Ejemplo n.º 6
0
def construct_ordination(task_id):
    task = get_or_wait(task_id)
    runner = TaskRunner(task)
    try:
        runner.preparing()

        cls, ord_id = task.target.split(':')
        ord_id = int(ord_id)
        assert cls == Ordination.__name__
        ord = Ordination.objects.get(id=ord_id)

        dm = ord.dm
        method_name = ord.method
        ndims = ord.ndims
        param_kwargs = Ordination.params_to_kwargs(ord.params)

        assert dm.task is None or dm.task.is_completed()
        assert method_name in methods.keys(), 'Unknown method {}'.format(
            method_name)
        assert 2 <= ndims <= 3, 'Only support 2 or 3 dimensional ordination'

        runner.start()
        dm_sids_path = dm.get_sids_path()
        dm_bytes_path = dm.get_bytes_path()

        sids = bytes_to_ndarray(dm_sids_path, np.int32)
        dm_data = get_rawdata_from_binary(dm_bytes_path, len(sids))

        data = zscore(dm_data)
        data[np.where(np.isnan(data))] = 0
        data[np.where(np.isinf(data))] = 0

        method = methods[method_name]
        result = method(data, ndims, **param_kwargs)

        runner.wrapping_up()

        ord_sids_path = ord.get_sids_path()
        ord_bytes_path = ord.get_bytes_path()

        ndarray_to_bytes(result, ord_bytes_path)
        ndarray_to_bytes(sids, ord_sids_path)

        runner.complete()
    except Exception as e:
        runner.error(e)
Ejemplo n.º 7
0
def extract_database_measurements(arg=None, force=False):
    if isinstance(arg, int):
        task = get_or_wait(arg)
    else:
        task = arg
    runner = TaskRunner(task)
    try:
        runner.preparing()

        if isinstance(task, Task):
            cls, dm_id = task.target.split(':')
            dm_id = int(dm_id)
            assert cls == DataMatrix.__name__
            dm = DataMatrix.objects.get(id=dm_id)

            if dm.database:
                segments = Segment.objects.filter(
                    audio_file__database=dm.database)
                sids = segments.values_list('id', flat=True)
            else:
                sids = dm.tmpdb.ids
            features_hash = dm.features_hash
            aggregations_hash = dm.aggregations_hash
        else:
            sids = task.sids
            features_hash = task.features_hash
            aggregations_hash = task.aggregations_hash

        features = Feature.objects.filter(id__in=features_hash.split('-'))
        aggregations = Aggregation.objects.filter(
            id__in=aggregations_hash.split('-'))
        aggregators = [aggregator_map[x.name] for x in aggregations]

        # feature to binstorage's files
        f2bs = {}
        # feature+aggregation to binstorage's files
        fa2bs = {}

        for feature in features:
            feature_name = feature.name
            index_filename = data_path('binary/features',
                                       '{}.idx'.format(feature_name),
                                       for_url=False)
            value_filename = data_path('binary/features',
                                       '{}.val'.format(feature_name),
                                       for_url=False)
            f2bs[feature] = (index_filename, value_filename)

            if feature not in fa2bs:
                fa2bs[feature] = {}

            for aggregator in aggregators:
                aggregator_name = aggregator.get_name()
                folder = os.path.join('binary', 'features', feature_name)
                mkdirp(os.path.join(settings.MEDIA_URL, folder)[1:])

                index_filename = data_path(folder,
                                           '{}.idx'.format(aggregator_name),
                                           for_url=False)
                value_filename = data_path(folder,
                                           '{}.val'.format(aggregator_name),
                                           for_url=False)
                fa2bs[feature][aggregator] = (index_filename, value_filename)

        tids, f2tid2fvals = extract_segment_features_for_segments(
            runner, sids, features, f2bs, force)

        for feature, (index_filename, value_filename) in f2bs.items():
            _tids, _fvals = f2tid2fvals.get(feature, (None, None))
            if _tids:
                _tids = np.array(_tids, dtype=np.int32)
                ensure_parent_folder_exists(index_filename)
                binstorage.store(_tids, _fvals, index_filename, value_filename)

        runner.wrapping_up()
        child_task = task.__class__(user=task.user, parent=task)
        child_task.save()
        child_runner = TaskRunner(child_task)
        child_runner.preparing()

        aggregate_feature_values(child_runner, sids, f2bs, fa2bs, features,
                                 aggregators)
        child_runner.complete()

        if isinstance(task, Task):
            full_sids_path = dm.get_sids_path()
            full_bytes_path = dm.get_bytes_path()
            full_cols_path = dm.get_cols_path()

            data, col_inds = extract_rawdata(f2bs, fa2bs, tids, features,
                                             aggregators)

            ndarray_to_bytes(data, full_bytes_path)
            ndarray_to_bytes(np.array(sids, dtype=np.int32), full_sids_path)

            with open(full_cols_path, 'w', encoding='utf-8') as f:
                json.dump(col_inds, f)

            dm.ndims = data.shape[1]
            dm.save()
        runner.complete()

    except Exception as e:
        runner.error(e)