def calculate_similarity(task_id, send_email='always', raise_err=False, *args, **kwargs): task = get_or_wait(task_id) runner = TaskRunner(task, send_email=send_email) sim = None try: runner.preparing() cls, sim_id = task.target.split(':') sim_id = int(sim_id) assert cls == SimilarityIndex.__name__ sim = SimilarityIndex.objects.get(id=sim_id) _calculate_similarity(sim, runner) runner.complete() except Exception as e: if sim is not None: sim.delete() if raise_err: raise e runner.error(e)
def construct_ordination(task_id, send_email='always', raise_err=False, *args, **kwargs): task = get_or_wait(task_id) runner = TaskRunner(task, send_email=send_email) try: runner.preparing() cls, ord_id = task.target.split(':') ord_id = int(ord_id) assert cls == Ordination.__name__ ord = Ordination.objects.get(id=ord_id) _construct_ordination(ord, runner) runner.complete() except Exception as e: if raise_err: raise e runner.error(e)
def extract_database_measurements(arg=None, force=False, send_email='always', raise_err=False, *args, **kwargs): if isinstance(arg, int): task = get_or_wait(arg) else: task = arg send_email = 'error-only' if settings.DEBUG else send_email runner = TaskRunner(task, send_email=send_email) try: runner.preparing() if isinstance(task, Task): cls, dm_id = task.target.split(':') dm_id = int(dm_id) assert cls == DataMatrix.__name__ dm = DataMatrix.objects.get(id=dm_id) if dm.database: segments = Segment.objects.filter( audio_file__database=dm.database) sids = segments.values_list('id', flat=True) else: sids = dm.tmpdb.ids features_hash = dm.features_hash aggregations_hash = dm.aggregations_hash else: sids = task.sids features_hash = task.features_hash aggregations_hash = task.aggregations_hash if len(sids) == 0: raise CustomAssertionError( 'Measurement cannot be extracted because your database doesn\'t contain any segments.' ) segments = Segment.objects.filter(id__in=sids) tids = np.array(segments.values_list('tid', flat=True), dtype=np.int32) features = Feature.objects.filter(id__in=features_hash.split('-')) aggregations = Aggregation.objects.filter( id__in=aggregations_hash.split('-')) available_feature_names = feature_extractors.keys() disabled_features_names = [ x.name for x in features if x.name not in available_feature_names ] if len(disabled_features_names): warning('Task #{}: Features {} are no longer available'.format( task.id, disabled_features_names)) features = [ x for x in features if x.name in available_feature_names ] available_aggregator_names = aggregator_map.keys() disabled_aggregators_names = [ x.name for x in aggregations if x.name not in available_aggregator_names ] if len(disabled_aggregators_names): warning('Task #{}: Aggregation {} are no longer available'.format( task.id, disabled_aggregators_names)) aggregations = [ x for x in aggregations if x.name in available_aggregator_names ] aggregators = [aggregator_map[x.name] for x in aggregations] extract_segment_features_for_segments(runner, sids, features, force=force) runner.wrapping_up() child_task = task.__class__(user=task.user, parent=task) child_task.save() child_runner = TaskRunner(child_task) child_runner.preparing() aggregate_feature_values(child_runner, tids, features, aggregators, force=force) child_runner.complete() if isinstance(task, Task): full_sids_path = dm.get_sids_path() full_bytes_path = dm.get_bytes_path() full_cols_path = dm.get_cols_path() data, col_inds = extract_rawdata(tids, features, aggregators) ndarray_to_bytes(data, full_bytes_path) ndarray_to_bytes(np.array(sids, dtype=np.int32), full_sids_path) with open(full_cols_path, 'w', encoding='utf-8') as f: json.dump(col_inds, f) dm.ndims = data.shape[1] dm.save() runner.complete() except Exception as e: if raise_err: raise e runner.error(e)
def celery_task_test(task_id, *args, **kwargs): task = Task.objects.get(id=task_id) runner = TaskRunner(task) runner.preparing() sleep(5) max = 100 runner.start(max=max) for i in range(max): sleep(0.1) runner.tick() sleep(5) runner.wrapping_up() sleep(5) runner.complete()
def calculate_similarity(task_id): task = get_or_wait(task_id) runner = TaskRunner(task) try: runner.preparing() cls, sim_id = task.target.split(':') sim_id = int(sim_id) assert cls == SimilarityIndex.__name__ sim = SimilarityIndex.objects.get(id=sim_id) dm = sim.dm ord = sim.ord assert dm.task is None or dm.task.is_completed() assert ord is None or ord.task is None or ord.task.is_completed() if ord: sids_path = ord.get_sids_path() source_bytes_path = ord.get_bytes_path() else: sids_path = dm.get_sids_path() source_bytes_path = dm.get_bytes_path() runner.start() sids, sorted_order = _calculate_similarity(sids_path, source_bytes_path) runner.wrapping_up() sim_sids_path = sim.get_sids_path() sim_bytes_path = sim.get_bytes_path() ndarray_to_bytes(sorted_order, sim_bytes_path) ndarray_to_bytes(sids, sim_sids_path) runner.complete() except Exception as e: runner.error(e)
def construct_ordination(task_id): task = get_or_wait(task_id) runner = TaskRunner(task) try: runner.preparing() cls, ord_id = task.target.split(':') ord_id = int(ord_id) assert cls == Ordination.__name__ ord = Ordination.objects.get(id=ord_id) dm = ord.dm method_name = ord.method ndims = ord.ndims param_kwargs = Ordination.params_to_kwargs(ord.params) assert dm.task is None or dm.task.is_completed() assert method_name in methods.keys(), 'Unknown method {}'.format( method_name) assert 2 <= ndims <= 3, 'Only support 2 or 3 dimensional ordination' runner.start() dm_sids_path = dm.get_sids_path() dm_bytes_path = dm.get_bytes_path() sids = bytes_to_ndarray(dm_sids_path, np.int32) dm_data = get_rawdata_from_binary(dm_bytes_path, len(sids)) data = zscore(dm_data) data[np.where(np.isnan(data))] = 0 data[np.where(np.isinf(data))] = 0 method = methods[method_name] result = method(data, ndims, **param_kwargs) runner.wrapping_up() ord_sids_path = ord.get_sids_path() ord_bytes_path = ord.get_bytes_path() ndarray_to_bytes(result, ord_bytes_path) ndarray_to_bytes(sids, ord_sids_path) runner.complete() except Exception as e: runner.error(e)
def extract_database_measurements(arg=None, force=False): if isinstance(arg, int): task = get_or_wait(arg) else: task = arg runner = TaskRunner(task) try: runner.preparing() if isinstance(task, Task): cls, dm_id = task.target.split(':') dm_id = int(dm_id) assert cls == DataMatrix.__name__ dm = DataMatrix.objects.get(id=dm_id) if dm.database: segments = Segment.objects.filter( audio_file__database=dm.database) sids = segments.values_list('id', flat=True) else: sids = dm.tmpdb.ids features_hash = dm.features_hash aggregations_hash = dm.aggregations_hash else: sids = task.sids features_hash = task.features_hash aggregations_hash = task.aggregations_hash features = Feature.objects.filter(id__in=features_hash.split('-')) aggregations = Aggregation.objects.filter( id__in=aggregations_hash.split('-')) aggregators = [aggregator_map[x.name] for x in aggregations] # feature to binstorage's files f2bs = {} # feature+aggregation to binstorage's files fa2bs = {} for feature in features: feature_name = feature.name index_filename = data_path('binary/features', '{}.idx'.format(feature_name), for_url=False) value_filename = data_path('binary/features', '{}.val'.format(feature_name), for_url=False) f2bs[feature] = (index_filename, value_filename) if feature not in fa2bs: fa2bs[feature] = {} for aggregator in aggregators: aggregator_name = aggregator.get_name() folder = os.path.join('binary', 'features', feature_name) mkdirp(os.path.join(settings.MEDIA_URL, folder)[1:]) index_filename = data_path(folder, '{}.idx'.format(aggregator_name), for_url=False) value_filename = data_path(folder, '{}.val'.format(aggregator_name), for_url=False) fa2bs[feature][aggregator] = (index_filename, value_filename) tids, f2tid2fvals = extract_segment_features_for_segments( runner, sids, features, f2bs, force) for feature, (index_filename, value_filename) in f2bs.items(): _tids, _fvals = f2tid2fvals.get(feature, (None, None)) if _tids: _tids = np.array(_tids, dtype=np.int32) ensure_parent_folder_exists(index_filename) binstorage.store(_tids, _fvals, index_filename, value_filename) runner.wrapping_up() child_task = task.__class__(user=task.user, parent=task) child_task.save() child_runner = TaskRunner(child_task) child_runner.preparing() aggregate_feature_values(child_runner, sids, f2bs, fa2bs, features, aggregators) child_runner.complete() if isinstance(task, Task): full_sids_path = dm.get_sids_path() full_bytes_path = dm.get_bytes_path() full_cols_path = dm.get_cols_path() data, col_inds = extract_rawdata(f2bs, fa2bs, tids, features, aggregators) ndarray_to_bytes(data, full_bytes_path) ndarray_to_bytes(np.array(sids, dtype=np.int32), full_sids_path) with open(full_cols_path, 'w', encoding='utf-8') as f: json.dump(col_inds, f) dm.ndims = data.shape[1] dm.save() runner.complete() except Exception as e: runner.error(e)