def search_dataset(self, query_point, number_of_results, preset_name): preset_name = str(preset_name) query_point = str(query_point) logger.debug("NN search for point with name %s (preset = %s)" % (query_point, preset_name)) size = self.original_dataset.size() if size < SIMILARITY_MINIMUM_POINTS: msg = "Not enough datapoints in the dataset (%s < %s)." % (size, SIMILARITY_MINIMUM_POINTS) logger.debug(msg) return {"error": True, "result": msg} # raise Exception('Not enough datapoints in the dataset (%s < %s).' % (size, SIMILARITY_MINIMUM_POINTS)) if query_point.endswith(".yaml"): # The point doesn't exist in the dataset.... # So, make a temporary point, add all the transformations # to it and search for it p, p1 = Point(), Point() p.load(query_point) p1 = self.original_dataset.history().mapPoint(p) similar_sounds = self.view.nnSearch(p1, self.metrics[preset_name]).get(int(number_of_results)) else: if not self.original_dataset.contains(query_point): msg = "Sound with id %s doesn't exist in the dataset." % query_point logger.debug(msg) return {"error": True, "result": msg} # raise Exception("Sound with id %s doesn't exist in the dataset." % query_point) similar_sounds = self.view.nnSearch(query_point, self.metrics[preset_name]).get(int(number_of_results)) return {"error": False, "result": similar_sounds}
def add_point(self, point_location, point_name): if self.original_dataset.contains(str(point_name)): self.original_dataset.removePoint(str(point_name)) try: p = Point() p.load(str(point_location)) p.setName(str(point_name)) self.original_dataset.addPoint(p) size = self.original_dataset.size() logger.debug("Added point with name %s. Index has now %i points." % (str(point_name), size)) except: msg = "Point with name %s could NOT be added. Index has now %i points." % (str(point_name), size) logger.debug(msg) return {"error": True, "result": msg} # If when adding a new point we reach the minimum points for similarity, prepare the dataset, save and create view and distance metrics # This will most never happen, only the first time we start similarity server, there is no index created and we add 2000 points. if size == SIMILARITY_MINIMUM_POINTS: self.__prepare_original_dataset() self.__normalize_original_dataset() self.save_index(msg="(reaching 2000 points)") # build metrics for the different similarity presets self.__build_metrics() # create view view = View(self.original_dataset) self.view = view return {"error": False, "result": True}
def testEnumerateKey(self): db = testdata.loadTestDB() testdata.useEnumerate = True dbe = testdata.loadTestDB() # also make sure we can map single points correctly # we need to load it separately and not take it from the dataset to ensure # that it'll have a different enum map p = Point() p.load('data/dataset_small/Vocal and Acapella/04 Blue Skies.mp3.sig') print(p.name()) #also create a transfo that forwards enums after we did the enumerate transfo dbes = transform(dbe, 'select', { 'descriptorNames': '*key*' }) pe = dbes.history().mapPoint(p) self.assertEqual(p['key_mode'], pe['key_mode']) self.assertEqual(p['key_key'], pe['key_key']) self.assertNotEqual(db.layout(), dbe.layout()) for p in db.points(): pe = dbe.point(p.name()) self.assertEqual(p.label('key_key'), pe.label('key_key')) self.assertEqual(p.label('key_mode'), pe.label('key_mode'))
def testSecondChanceForLayoutEquality(self): '''ticket #21: points try to morph to adapt to dataset if they cannot be naturally inserted''' ds = DataSet() p = Point() p.setName('Paris Hilton') p.load('data/04 - Cansei de Ser Sexy - Meeting Paris Hilton.mp3.sig') ds.addPoint(p) p.setName('2005') p.load('data/11_2005-fwyh.mp3.sig') ds.addPoint(p) self.assertEqual(ds.point('2005')['title'], '2005')
def add_point(self, point_location, point_name): if self.original_dataset.contains(str(point_name)): self.original_dataset.removePoint(str(point_name)) p = Point() if os.path.exists(str(point_location)): try: p.load(str(point_location)) p.setName(str(point_name)) if self.original_dataset.size( ) <= sim_settings.SIMILARITY_MINIMUM_POINTS: # Add point to original_dataset because PCA dataset has not been created yet self.original_dataset.addPoint(p) msg = 'Added point with name %s. Index has now %i points.' % \ (str(point_name), self.original_dataset.size()) logger.info(msg) else: # Add point to PCA dataset because it has been already created. # PCA dataset will take care of adding the point to the original dataset as well. self.pca_dataset.addPoint(p) msg = 'Added point with name %s. Index has now %i points (pca index has %i points).' % \ (str(point_name), self.original_dataset.size(), self.pca_dataset.size()) logger.info(msg) except Exception as e: msg = 'Point with name %s could NOT be added (%s).' % ( str(point_name), str(e)) logger.info(msg) return { 'error': True, 'result': msg, 'status_code': sim_settings.SERVER_ERROR_CODE } else: msg = 'Point with name %s could NOT be added because analysis file does not exist (%s).' % \ (str(point_name), str(point_location)) logger.info(msg) return { 'error': True, 'result': msg, 'status_code': sim_settings.SERVER_ERROR_CODE } if self.original_dataset.size( ) == sim_settings.SIMILARITY_MINIMUM_POINTS: # Do enumerate try: self.original_dataset = transform( self.original_dataset, 'enumerate', {'descriptorNames': ['.tonal.chords_progression']}) except: # TODO: exception too broad here... logger.info( 'WARNING: enumerate transformation to .tonal.chords_progression could not be performed.' ) # If when adding a new point we reach the minimum points for similarity, do the needful so that the dataset # can be used for search. This includes preparing the dataset, normalizing it, saveing it and creating view and # distance metrics. This will only happen once when the size of the dataset reaches SIMILARITY_MINIMUM_POINTS. if self.original_dataset.size( ) == sim_settings.SIMILARITY_MINIMUM_POINTS and not self.indexing_only_mode: self.__prepare_original_dataset() self.__normalize_original_dataset() self.transformations_history = self.original_dataset.history( ).toPython() self.save_index(msg="(reaching %i points)" % sim_settings.SIMILARITY_MINIMUM_POINTS) # TODO: the code below is repeated from __load_dataset() method, should be moved into a util function # Build metrics for the different similarity presets, create a Gaia view self.__build_metrics() view = View(self.original_dataset) self.view = view # Compute PCA and create pca view and metric # NOTE: this step may take a long time if the dataset is big, but it only needs to be performed once # when the similarity server is loaded- self.pca_dataset = transform( self.original_dataset, 'pca', { 'descriptorNames': sim_settings.PCA_DESCRIPTORS, 'dimension': sim_settings.PCA_DIMENSIONS, 'resultName': 'pca' }) self.pca_dataset.setReferenceDataSet(self.original_dataset) self.view_pca = View(self.pca_dataset) self.__build_pca_metric() return {'error': False, 'result': msg}
def testNamespaceClash(self): p = Point() p.load('data/namespace_clash.sig')
def add_point(self, point_location, point_name): if self.original_dataset.contains(str(point_name)): self.original_dataset.removePoint(str(point_name)) p = Point() if os.path.exists(str(point_location)): try: p.load(str(point_location)) p.setName(str(point_name)) if self.original_dataset.size( ) <= settings.SIMILARITY_MINIMUM_POINTS: # Add point to original_dataset self.original_dataset.addPoint(p) msg = 'Added point with name %s. Index has now %i points.' % ( str(point_name), self.original_dataset.size()) logger.info(msg) else: # Add point to pca dataset (as it has been already created). Pca dataset will take are of add it to the original too self.pca_dataset.addPoint(p) msg = 'Added point with name %s. Index has now %i points (pca index has %i points).' % ( str(point_name), self.original_dataset.size(), self.pca_dataset.size()) logger.info(msg) except Exception as e: msg = 'Point with name %s could NOT be added (%s).' % ( str(point_name), str(e)) logger.info(msg) return { 'error': True, 'result': msg, 'status_code': settings.SERVER_ERROR_CODE } else: msg = 'Point with name %s could NOT be added because analysis file does not exist (%s).' % ( str(point_name), str(point_location)) logger.info(msg) return { 'error': True, 'result': msg, 'status_code': settings.SERVER_ERROR_CODE } if self.original_dataset.size() == settings.SIMILARITY_MINIMUM_POINTS: # Do enumerate try: self.original_dataset = transform( self.original_dataset, 'enumerate', {'descriptorNames': ['.tonal.chords_progression']}) except: logger.info( 'WARNING: enumerate transformation to .tonal.chords_progression could not be performed.' ) # If when adding a new point we reach the minimum points for similarity, prepare the dataset, save and create view and distance metrics # This will most never happen, only the first time we start similarity server, there is no index created and we add 2000 points. if self.original_dataset.size( ) == settings.SIMILARITY_MINIMUM_POINTS and not self.indexing_only_mode: #self.__prepare_original_dataset() #self.__normalize_original_dataset() self.transformations_history = self.original_dataset.history( ).toPython() self.save_index(msg="(reaching 2000 points)") # build metrics for the different similarity presets self.__build_metrics() # create view view = View(self.original_dataset) self.view = view # do pca and create pca view and metric self.pca_dataset = transform( self.original_dataset, 'pca', { 'descriptorNames': settings.PCA_DESCRIPTORS, 'dimension': settings.PCA_DIMENSIONS, 'resultName': 'pca' }) self.pca_dataset.setReferenceDataSet(self.original_dataset) self.view_pca = View(self.pca_dataset) self.__build_pca_metric() return {'error': False, 'result': msg}