def __calculate_complete_required_descriptor_names( self, descriptor_names, only_leaf_descriptors=False): if not descriptor_names: descriptor_names = self.descriptor_names['all'][:] try: structured_layout = generate_structured_dict_from_layout( self.descriptor_names['all'][:]) processed_descriptor_names = [] for name in descriptor_names: nested_descriptors = get_nested_dictionary_value( name.split('.')[1:], structured_layout) if not nested_descriptors: processed_descriptor_names.append(name) else: if only_leaf_descriptors: # only return descriptors if nested descriptors are statistics if len( set(nested_descriptors.keys()).intersection([ 'min', 'max', 'dvar2', 'dmean2', 'dmean', 'var', 'dvar', 'mean' ])) > 0: for extra_name in nested_descriptors.keys(): processed_descriptor_names.append( '%s.%s' % (name, extra_name)) else: # Return all nested descriptor names extra_names = [] get_nested_descriptor_names(nested_descriptors, extra_names) for extra_name in extra_names: processed_descriptor_names.append( '%s.%s' % (name, extra_name)) processed_descriptor_names = list(set(processed_descriptor_names)) return processed_descriptor_names except: return { 'error': True, 'result': 'Wrong descriptor names, unable to create layout.', 'status_code': sim_settings.BAD_REQUEST_CODE }
def api_search(self, target_type, target, filter, preset_name, metric_descriptor_names, num_results, offset, in_ids): # Check if index has sufficient points size = self.original_dataset.size() if size < sim_settings.SIMILARITY_MINIMUM_POINTS: msg = 'Not enough datapoints in the dataset (%s < %s).' % ( size, sim_settings.SIMILARITY_MINIMUM_POINTS) logger.info(msg) return { 'error': True, 'result': msg, 'status_code': sim_settings.SERVER_ERROR_CODE } # Get some dataset parameters that will be useful later trans_hist = self.transformations_history layout = self.original_dataset.layout() pca_layout = self.pca_dataset.layout() coeffs = None # Get normalization coefficients for i in range(0, len(trans_hist)): if trans_hist[-(i + 1)]['Analyzer name'] == 'normalize': coeffs = trans_hist[-(i + 1)]['Applier parameters']['coeffs'] # Process target if target: if target_type == 'sound_id': query_point = str(target) if not self.original_dataset.contains(query_point): msg = "Sound with id %s doesn't exist in the dataset and can not be set as similarity target." \ % query_point logger.info(msg) return { 'error': True, 'result': msg, 'status_code': sim_settings.NOT_FOUND_CODE } else: query = query_point elif target_type == 'descriptor_values': # Transform input params to the normalized feature space and add them to a query point # If there are no params specified in the target, the point is set as empty (probably random sounds # are returned) feature_names = [] query = Point() query.setLayout(layout) try: for param in target.keys(): # Only add numerical parameters. Non numerical ones (like key) are only used as filters if param in coeffs.keys(): feature_names.append(str(param)) value = target[param] if coeffs: a = coeffs[param]['a'] b = coeffs[param]['b'] if len(a) == 1: norm_value = a[0] * value + b[0] else: norm_value = [] for i in range(0, len(a)): norm_value.append(a[i] * value[i] + b[i]) query.setValue(str(param), norm_value) else: query.setValue(str(param), value) except: return { 'error': True, 'result': 'Invalid target (descriptor values could not be correctly parsed)', 'status_code': sim_settings.BAD_REQUEST_CODE } # Overwrite metric with present descriptors in target metric = DistanceFunctionFactory.create( 'euclidean', layout, {'descriptorNames': feature_names}) elif target_type == 'file': # Target is specified as the attached file # Create a point with the data in 'descriptors_data' and search for it target_file_parsing_type = '-' try: # Try directly loading the file p, query = Point(), Point() p.loadFromString(yaml.dump(target)) if preset_name == 'pca': query = self.pca_dataset.history().mapPoint( p) # map point to pca dataset else: query = self.original_dataset.history().mapPoint( p) # map point to original dataset target_file_parsing_type = 'mapPoint' except Exception as e: logger.info( 'Unable to create gaia point from uploaded file (%s). ' 'Trying adding descriptors one by one.' % e) # If does not work load descriptors one by one try: query = Point() #query.setLayout(layout) feature_names = [] get_nested_descriptor_names(target, feature_names) feature_names = [ '.%s' % item for item in feature_names ] nonused_features = [] for param in feature_names: if param in coeffs.keys(): value = get_nested_dictionary_value( param[1:].split('.'), target) if coeffs: try: a = coeffs[param]['a'] b = coeffs[param]['b'] if len(a) == 1: norm_value = a[0] * value + b[0] else: norm_value = [] for i in range(0, len(a)): norm_value.append(a[i] * value[i] + b[i]) query.setValue(str(param[1:]), norm_value) except: nonused_features.append(param) else: query.setValue(str(param[1:]), value) else: nonused_features.append(param) if preset_name == 'pca': query = self.pca_dataset.history().mapPoint( query) # map point to pca dataset else: query = self.original_dataset.history().mapPoint( p) # map point to original dataset target_file_parsing_type = 'walkDict' except Exception as e: logger.info( 'Unable to create gaia point from uploaded file and adding descriptors one by ' 'one (%s)' % e) return { 'error': True, 'result': 'Unable to create gaia point from uploaded file. Probably the ' 'file does not have the required layout. Are you using the ' 'correct version of Essentia\'s Freesound extractor?', 'status_code': sim_settings.SERVER_ERROR_CODE } else: query = Point() # Empty target if preset_name == 'pca': query.setLayout(pca_layout) else: query.setLayout(layout) # Process filter if filter: filter = parse_filter_list(filter, coeffs) else: filter = "" # Empty filter # log log_message = 'Similarity search' if target: if target_type == 'sound_id': log_target = '%s (sound id)' % str(target) elif target_type == 'descriptor_values': log_target = '%s (descriptor values)' % str(target) elif target_type == 'file': log_target = 'uploaded file (%s)' % target_file_parsing_type log_message += ' with target: %s' % log_target if filter: log_message += ' with filter: %s' % str(filter) logger.info(log_message) # if in_ids is specified, edit the filter accordingly if in_ids: if not filter: filter = 'WHERE point.id IN ("' + '", "'.join(in_ids) + '")' else: filter += ' AND point.id IN ("' + '", "'.join(in_ids) + '")' # Set query metric metric = self.metrics[preset_name] if metric_descriptor_names: metric = DistanceFunctionFactory.create( 'euclidean', layout, {'descriptorNames': metric_descriptor_names}) # Do query! try: if target_type == 'descriptor_values' and target: search = self.view.nnSearch(query, metric, str(filter)) else: if preset_name == 'pca': search = self.view_pca.nnSearch(query, metric, str(filter)) else: search = self.view.nnSearch(query, metric, str(filter)) results = search.get(num_results, offset=offset) count = search.size() except Exception as e: return { 'error': True, 'result': 'Similarity server error', 'status_code': sim_settings.SERVER_ERROR_CODE } note = None if target_type == 'file': if target_file_parsing_type == 'walkDict': note = 'The layout of the given analysis file differed from what we expected. Similarity results ' \ 'might not be accurate. Was the file generated with the last version of Essentia\'s ' \ 'Freesound extractor?' return { 'error': False, 'result': { 'results': results, 'count': count, 'note': note } }