Exemple #1
0
    def search_dataset(self, query_point, number_of_results, preset_name):
        preset_name = str(preset_name)
        query_point = str(query_point)
        logger.debug("NN search for point with name %s (preset = %s)" % (query_point, preset_name))
        size = self.original_dataset.size()
        if size < SIMILARITY_MINIMUM_POINTS:
            msg = "Not enough datapoints in the dataset (%s < %s)." % (size, SIMILARITY_MINIMUM_POINTS)
            logger.debug(msg)
            return {"error": True, "result": msg}
            # raise Exception('Not enough datapoints in the dataset (%s < %s).' % (size, SIMILARITY_MINIMUM_POINTS))

        if query_point.endswith(".yaml"):
            # The point doesn't exist in the dataset....
            # So, make a temporary point, add all the transformations
            # to it and search for it
            p, p1 = Point(), Point()
            p.load(query_point)
            p1 = self.original_dataset.history().mapPoint(p)
            similar_sounds = self.view.nnSearch(p1, self.metrics[preset_name]).get(int(number_of_results))
        else:
            if not self.original_dataset.contains(query_point):
                msg = "Sound with id %s doesn't exist in the dataset." % query_point
                logger.debug(msg)
                return {"error": True, "result": msg}
                # raise Exception("Sound with id %s doesn't exist in the dataset." % query_point)

            similar_sounds = self.view.nnSearch(query_point, self.metrics[preset_name]).get(int(number_of_results))

        return {"error": False, "result": similar_sounds}
Exemple #2
0
    def add_point(self, point_location, point_name):
        if self.original_dataset.contains(str(point_name)):
            self.original_dataset.removePoint(str(point_name))
        try:
            p = Point()
            p.load(str(point_location))
            p.setName(str(point_name))
            self.original_dataset.addPoint(p)
            size = self.original_dataset.size()
            logger.debug("Added point with name %s. Index has now %i points." % (str(point_name), size))
        except:
            msg = "Point with name %s could NOT be added. Index has now %i points." % (str(point_name), size)
            logger.debug(msg)
            return {"error": True, "result": msg}

        # If when adding a new point we reach the minimum points for similarity, prepare the dataset, save and create view and distance metrics
        #   This will most never happen, only the first time we start similarity server, there is no index created and we add 2000 points.
        if size == SIMILARITY_MINIMUM_POINTS:
            self.__prepare_original_dataset()
            self.__normalize_original_dataset()
            self.save_index(msg="(reaching 2000 points)")

            # build metrics for the different similarity presets
            self.__build_metrics()
            # create view
            view = View(self.original_dataset)
            self.view = view

        return {"error": False, "result": True}
Exemple #3
0
 def load_point(signame):
     """Load point data from JSON file."""
     point = Point()
     with open(signame, 'r') as sig:
         jsonsig = json.load(sig)
         if jsonsig.get('metadata', {}).get('tags'):
             del jsonsig['metadata']['tags']
         yamlsig = yaml.dump(jsonsig)
     point.loadFromString(yamlsig)
     return point
Exemple #4
0
 def load_point(signame):
     """Load point data from JSON file."""
     point = Point()
     with open(signame, 'r') as sig:
         jsonsig = json.load(sig)
         if jsonsig.get('metadata', {}).get('tags'):
             del jsonsig['metadata']['tags']
         yamlsig = yaml.dump(jsonsig)
     point.loadFromString(yamlsig)
     return point
Exemple #5
0
    def testSecondChanceForLayoutEquality(self):
        '''ticket #21: points try to morph to adapt to dataset if they cannot be naturally inserted'''
        ds = DataSet()
        p = Point()

        p.setName('Paris Hilton')
        p.load('data/04 - Cansei de Ser Sexy - Meeting Paris Hilton.mp3.sig')
        ds.addPoint(p)

        p.setName('2005')
        p.load('data/11_2005-fwyh.mp3.sig')
        ds.addPoint(p)

        self.assertEqual(ds.point('2005')['title'], '2005')
Exemple #6
0
    def testEnumerateKey(self):
        db = testdata.loadTestDB()

        testdata.useEnumerate = True
        dbe = testdata.loadTestDB()

        # also make sure we can map single points correctly
        # we need to load it separately and not take it from the dataset to ensure
        # that it'll have a different enum map
        p = Point()
        p.load('data/dataset_small/Vocal and Acapella/04 Blue Skies.mp3.sig')
        print(p.name())

        #also create a transfo that forwards enums after we did the enumerate transfo
        dbes = transform(dbe, 'select', { 'descriptorNames': '*key*' })
        pe = dbes.history().mapPoint(p)

        self.assertEqual(p['key_mode'], pe['key_mode'])
        self.assertEqual(p['key_key'],  pe['key_key'])

        self.assertNotEqual(db.layout(), dbe.layout())

        for p in db.points():
            pe = dbe.point(p.name())

            self.assertEqual(p.label('key_key'),
                             pe.label('key_key'))

            self.assertEqual(p.label('key_mode'),
                             pe.label('key_mode'))
Exemple #7
0
    def testAddToDataSetWithDifferentLayouts(self):
        p1 = Point()
        p1.setLayout(self.l1) # +1, ref = 2
        p2 = Point()

        ds = DataSet()
        ds.addPoint(p1) # +2 (dataset+pointcopy), ref = 4
        self.assertRaises(Exception, ds.addPoint, p2)
        self.assertEqual(p1.layout().ref(), 4)
        self.assertEqual(p2.layout().ref(), 1)
Exemple #8
0
    def testOwnershipIssues(self):
        # 1: layout of a temp point becomes invalid
        Point().layout().descriptorNames()

        # 2: sample point of a dataset becomes invalid
        ds = testdata.loadSmallDB()
        p = ds.samplePoint()
        del ds
        p.name()
Exemple #9
0
    def testForceIdentity(self):
        l = PointLayout()
        l.add('a', RealType, FixedLength, 1)

        p = Point()
        p.setLayout(l)

        cd = MetricFactory.create('cosinesimilarity', p.layout(), { 'defaultValue': 0.5 })
        self.assertEquals(cd(p, p), 0.5)

        ficd = MetricFactory.create('forceidentity',
                                    p.layout(),
                                    { 'distance': 'cosinesimilarity',
                                      'params': { 'defaultValue': 0.5 }
                                      })

        self.assertEquals(ficd(p, p), 0.0)

        p2 = Point(p)
        p2.setName('p2')
        self.assertEquals(ficd(p, p2), 0.5)
Exemple #10
0
    def testComplexReferenceCounting(self):
        ds = DataSet()
        self.assertEqual(ds.layout().ref(), 2) # 1 + 1 from temp object

        p = Point()
        p.setName('p1')
        lext = PointLayout(p.layout()) # +1, {lext,p}.ref = 2
        self.assertEqual(lext.ref(), 2)

        lext = p.layout().copy() # copy, lext.ref = 1; p.ref -= 1, = 1
        self.assertEqual(lext.ref(), 1)

        ds.addPoint(p) # +3 (dataset + pointcopy), ref = 3

        self.assertEqual(lext.ref(), 1)
        self.assertEqual(ds.layout().ref(), 4) # 3 + 1 temp object

        p2 = Point(p) # +1, {p,p2}.ref = 5
        p2.setName('p2')
        self.assertEqual(ds.layout().ref(), 5)
        ds.addPoint(p2)
        self.assertEqual(ds.layout().ref(), 6) # +1 pointcopy, ref = 6
Exemple #11
0
def PCA(x):
    points = []
    layout = PointLayout()
    layout.add('x', RealType)

    for i, l in enumerate(x):
        p = Point()
        p.setName('p%d' % i)
        p.setLayout(layout)
        p['x'] = l
        points.append(p)

    ds = DataSet()
    ds.addPoints(points)

    ds = transform(ds, 'fixlength')
    ds = transform(ds, 'pca', { 'dimension': len(x[0]), 'resultName': 'pca' })

    result = []
    for p in ds.points():
        result.append(p['pca'])

    return result
Exemple #12
0
def newPoint(name):
    l = PointLayout()
    l.add('a', RealType)

    p = Point()
    p.setName(name)
    p.setLayout(l)

    return p
Exemple #13
0
    def testWeightedPearson(self):
        ds = testdata.createSimpleDataSet()
        ds.point('p')['a.1'] = [ 0, 0 ] # need to have 2 values before fixing length
        p1 = transform(ds, 'fixlength').point('p')
        p2 = Point(p1)

        dist = MetricFactory.create('WeightedPearson', p1.layout(), { 'weights': { '1': 0.3,
                                                                                   'c': 0.7 }
                                                                      })
        p1['a.1'] = [ 0.12, 2.71 ]
        p1['c'] = 4.32
        p2['1'] = [ 0.46, 1.12 ]
        p2['c'] = 2.4242

        self.assertAlmostEqual(dist(p1, p2), 0.038222129799, 6)
Exemple #14
0
    def testRhythmDistance(self):
        p1 = testdata.createSimpleDataSet().point('p')
        p2 = Point(p1)

        dist = MetricFactory.create('Rhythm', p1.layout(), { 'descriptorName': 'a.1',
                                                             'indexRange': [ 1, 2, 4, 8 ],
                                                             'alpha': 0.8 })

        p1['a.1'] = 3
        p2['a.1'] = 2
        self.assertAlmostEqual(dist(p1, p1), 0.0)
        self.assertAlmostEqual(dist(p1, p2), 0.4)
        self.assertAlmostEqual(dist(p2, p1), 0.4)

        p1['a.1'] = 3.14
        self.assertAlmostEqual(dist(p1, p2), 0.344)

        p1['a.1'] = 6.23
        self.assertAlmostEqual(dist(p1, p2), 0.45312)
Exemple #15
0
def createSimpleDataSet():
    global useFixedLength, useEnumerate
    l = createSimpleLayout()
    ds = DataSet()
    p = Point()
    p.setName('p')
    p.setLayout(l)
    ds.addPoint(p)

    if useFixedLength:
        ds = fixLength(ds)

    if useEnumerate:
        ds = enumerateStrings(ds)

    return ds
Exemple #16
0
    def testValueQuoting(self):
        labelPoints = [
            'a: label', 'a: "label"', 'a: [ label ]', 'a: [ "label" ]'
        ]

        valueLPoints = ['a: "123"', 'a: [ "123" ]']

        valuePoints = ['a: 123', 'a: [ 123 ]']

        p = Point()
        for lp in labelPoints:
            p.loadFromString(lp)
            self.assertEqual(isinstance(p['a'], str), True)
            self.assertEqual(p['a'], 'label')

        for lp in valueLPoints:
            p.loadFromString(lp)
            self.assertEqual(isinstance(p['a'], str), True)
            self.assertEqual(p['a'], '123')

        for lp in valuePoints:
            p.loadFromString(lp)
            self.assertEqual(isinstance(p['a'], float), True)
            self.assertEqual(p['a'], 123)
Exemple #17
0
    def testCreatedInputSpace(self):
        ds = testdata.createSimpleDataSet()
        ds.point('p')['a.1'] = 23.0

        for i in range(5):
            p = Point()
            p.setName('p%d' % i)
            p.setLayout(ds.originalLayout())
            p['a.1'] = float(i)
            ds.addPoint(p)

        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('euclidean', ds.layout())
        v = View(ds)

        p = ds.point('p')

        RS_remove = v.nnSearch(p, dist)
        RS_remove.removePoints(['p2', 'p4'])

        RS_create = InputSpace()
        RS_create.addPoints(ds, ['p', 'p0', 'p1', 'p3'])

        rsc = v.nnSearch(p, RS_remove, dist)
        rsa = v.nnSearch(p, RS_create, dist)

        self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.), ('p0', 23.)),
                         v.nnSearch(p, rsc, dist).get(10))

        self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.), ('p0', 23.)),
                         v.nnSearch(p, rsa, dist).get(10))

        # test thresholdLimit method
        self.assertEqual((('p', 0.), ),
                         v.nnSearch(p, rsa, dist).thresholdLimit(10).get(10))
        self.assertEqual((('p', 0.), ('p3', 20.)),
                         v.nnSearch(p, rsa, dist).thresholdLimit(20).get(10))
        self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.)),
                         v.nnSearch(p, rsa,
                                    dist).thresholdLimit(22.01).get(10))
Exemple #18
0
def PCA(x):
    points = []
    layout = PointLayout()
    layout.add('x', RealType)

    for i, l in enumerate(x):
        p = Point()
        p.setName('p%d' % i)
        p.setLayout(layout)
        p['x'] = l
        points.append(p)

    ds = DataSet()
    ds.addPoints(points)

    ds = transform(ds, 'fixlength')
    ds = transform(ds, 'pca', {'dimension': len(x[0]), 'resultName': 'pca'})

    result = []
    for p in ds.points():
        result.append(p['pca'])

    return result
Exemple #19
0
    def testCenter(self):
        ds = testdata.createSimpleDataSet()
        l = testdata.createSimpleLayout()
        for i in range(4):
            p = Point()
            p.setName('p%d' % i)
            p.setLayout(l)
            ds.addPoint(p)
        ds.removePoint('p')

        ds.point('p0')['a.1'] = [ 0, 1 ]
        ds.point('p1')['a.1'] = [ 4, 3 ]
        ds.point('p2')['a.1'] = [ 6, 9 ]
        ds.point('p3')['a.1'] = [ 2, 27 ]
        # mean = [ 3, 10 ]

        ds = transform(ds, 'fixlength')
        dsc = transform(ds, 'center', { 'descriptorNames': 'a.1' })

        self.assertEqual(dsc.point('p0')['a.1'], (-3, -9))
        self.assertEqual(dsc.point('p1')['a.1'], ( 1, -7))
        self.assertEqual(dsc.point('p2')['a.1'], ( 3, -1))
        self.assertEqual(dsc.point('p3')['a.1'], (-1, 17))
Exemple #20
0
def readLibSVMDataSet(filename):
    data = [l.split() for l in open(filename).readlines()]
    minidx = maxidx = 1
    for l in data:
        for i in range(1, len(l)):
            dim, value = l[i].split(':')
            l[i] = (int(dim), float(value))
            minidx = min(minidx, int(dim))
            maxidx = max(maxidx, int(dim))

    dimension = maxidx - minidx + 1

    layout = PointLayout()
    layout.add('class', StringType)
    layout.add('value', RealType)

    ds = DataSet()
    n = 0
    points = []

    for l in data:
        p = Point()
        p.setLayout(layout)
        p.setName('instance_%06d' % n)
        n += 1

        p['class'] = l[0]
        desc = RealDescriptor(dimension, 0.0)
        for dim, value in l[1:]:
            desc[dim - minidx] = value
        p['value'] = desc

        points.append(p)

    ds.addPoints(points)

    return ds
Exemple #21
0
    def api_search(self, target_type, target, filter, preset_name,
                   metric_descriptor_names, num_results, offset, in_ids):

        # Check if index has sufficient points
        size = self.original_dataset.size()
        if size < sim_settings.SIMILARITY_MINIMUM_POINTS:
            msg = 'Not enough datapoints in the dataset (%s < %s).' % (
                size, sim_settings.SIMILARITY_MINIMUM_POINTS)
            logger.info(msg)
            return {
                'error': True,
                'result': msg,
                'status_code': sim_settings.SERVER_ERROR_CODE
            }

        # Get some dataset parameters that will be useful later
        trans_hist = self.transformations_history
        layout = self.original_dataset.layout()
        pca_layout = self.pca_dataset.layout()
        coeffs = None  # Get normalization coefficients
        for i in range(0, len(trans_hist)):
            if trans_hist[-(i + 1)]['Analyzer name'] == 'normalize':
                coeffs = trans_hist[-(i + 1)]['Applier parameters']['coeffs']

        # Process target
        if target:
            if target_type == 'sound_id':
                query_point = str(target)
                if not self.original_dataset.contains(query_point):
                    msg = "Sound with id %s doesn't exist in the dataset and can not be set as similarity target." \
                          % query_point
                    logger.info(msg)
                    return {
                        'error': True,
                        'result': msg,
                        'status_code': sim_settings.NOT_FOUND_CODE
                    }
                else:
                    query = query_point

            elif target_type == 'descriptor_values':
                # Transform input params to the normalized feature space and add them to a query point
                # If there are no params specified in the target, the point is set as empty (probably random sounds
                # are returned)
                feature_names = []
                query = Point()
                query.setLayout(layout)
                try:
                    for param in target.keys():
                        # Only add numerical parameters. Non numerical ones (like key) are only used as filters
                        if param in coeffs.keys():
                            feature_names.append(str(param))
                            value = target[param]
                            if coeffs:
                                a = coeffs[param]['a']
                                b = coeffs[param]['b']
                                if len(a) == 1:
                                    norm_value = a[0] * value + b[0]
                                else:
                                    norm_value = []
                                    for i in range(0, len(a)):
                                        norm_value.append(a[i] * value[i] +
                                                          b[i])
                                query.setValue(str(param), norm_value)
                            else:
                                query.setValue(str(param), value)
                except:
                    return {
                        'error': True,
                        'result':
                        'Invalid target (descriptor values could not be correctly parsed)',
                        'status_code': sim_settings.BAD_REQUEST_CODE
                    }

                # Overwrite metric with present descriptors in target
                metric = DistanceFunctionFactory.create(
                    'euclidean', layout, {'descriptorNames': feature_names})

            elif target_type == 'file':
                # Target is specified as the attached file
                # Create a point with the data in 'descriptors_data' and search for it
                target_file_parsing_type = '-'

                try:
                    # Try directly loading the file
                    p, query = Point(), Point()
                    p.loadFromString(yaml.dump(target))
                    if preset_name == 'pca':
                        query = self.pca_dataset.history().mapPoint(
                            p)  # map point to pca dataset
                    else:
                        query = self.original_dataset.history().mapPoint(
                            p)  # map point to original dataset
                    target_file_parsing_type = 'mapPoint'

                except Exception as e:
                    logger.info(
                        'Unable to create gaia point from uploaded file (%s). '
                        'Trying adding descriptors one by one.' % e)

                    # If does not work load descriptors one by one
                    try:
                        query = Point()
                        #query.setLayout(layout)

                        feature_names = []
                        get_nested_descriptor_names(target, feature_names)
                        feature_names = [
                            '.%s' % item for item in feature_names
                        ]
                        nonused_features = []

                        for param in feature_names:
                            if param in coeffs.keys():
                                value = get_nested_dictionary_value(
                                    param[1:].split('.'), target)
                                if coeffs:
                                    try:
                                        a = coeffs[param]['a']
                                        b = coeffs[param]['b']
                                        if len(a) == 1:
                                            norm_value = a[0] * value + b[0]
                                        else:
                                            norm_value = []
                                            for i in range(0, len(a)):
                                                norm_value.append(a[i] *
                                                                  value[i] +
                                                                  b[i])
                                        query.setValue(str(param[1:]),
                                                       norm_value)
                                    except:
                                        nonused_features.append(param)
                                else:
                                    query.setValue(str(param[1:]), value)
                            else:
                                nonused_features.append(param)

                        if preset_name == 'pca':
                            query = self.pca_dataset.history().mapPoint(
                                query)  # map point to pca dataset
                        else:
                            query = self.original_dataset.history().mapPoint(
                                p)  # map point to original dataset

                        target_file_parsing_type = 'walkDict'

                    except Exception as e:
                        logger.info(
                            'Unable to create gaia point from uploaded file and adding descriptors one by '
                            'one (%s)' % e)
                        return {
                            'error':
                            True,
                            'result':
                            'Unable to create gaia point from uploaded file. Probably the '
                            'file does not have the required layout. Are you using the '
                            'correct version of Essentia\'s Freesound extractor?',
                            'status_code':
                            sim_settings.SERVER_ERROR_CODE
                        }
        else:
            query = Point()  # Empty target
            if preset_name == 'pca':
                query.setLayout(pca_layout)
            else:
                query.setLayout(layout)

        # Process filter
        if filter:
            filter = parse_filter_list(filter, coeffs)
        else:
            filter = ""  # Empty filter

        # log
        log_message = 'Similarity search'
        if target:
            if target_type == 'sound_id':
                log_target = '%s (sound id)' % str(target)
            elif target_type == 'descriptor_values':
                log_target = '%s (descriptor values)' % str(target)
            elif target_type == 'file':
                log_target = 'uploaded file (%s)' % target_file_parsing_type
            log_message += ' with target: %s' % log_target
        if filter:
            log_message += ' with filter: %s' % str(filter)
        logger.info(log_message)

        # if in_ids is specified, edit the filter accordingly
        if in_ids:
            if not filter:
                filter = 'WHERE point.id IN ("' + '", "'.join(in_ids) + '")'
            else:
                filter += ' AND point.id IN ("' + '", "'.join(in_ids) + '")'

        # Set query metric
        metric = self.metrics[preset_name]
        if metric_descriptor_names:
            metric = DistanceFunctionFactory.create(
                'euclidean', layout,
                {'descriptorNames': metric_descriptor_names})

        # Do query!
        try:
            if target_type == 'descriptor_values' and target:
                search = self.view.nnSearch(query, metric, str(filter))
            else:
                if preset_name == 'pca':
                    search = self.view_pca.nnSearch(query, metric, str(filter))
                else:
                    search = self.view.nnSearch(query, metric, str(filter))
            results = search.get(num_results, offset=offset)
            count = search.size()
        except Exception as e:
            return {
                'error': True,
                'result': 'Similarity server error',
                'status_code': sim_settings.SERVER_ERROR_CODE
            }

        note = None
        if target_type == 'file':
            if target_file_parsing_type == 'walkDict':
                note = 'The layout of the given analysis file differed from what we expected. Similarity results ' \
                       'might not be accurate. Was the file generated with the last version of Essentia\'s ' \
                       'Freesound extractor?'

        return {
            'error': False,
            'result': {
                'results': results,
                'count': count,
                'note': note
            }
        }
Exemple #22
0
def createDataSet():
 
    l = PointLayout()
    l.add('a', RealType)

    ds = DataSet()

    # p1.a = (0.0, 0.0)
    p = Point()
    p.setName('p1')
    p.setLayout(l)
    p['a'] = (0.0, 0.0)
    ds.addPoint(p)

    # p2.a = (0.5, 1.0)
    p = Point()
    p.setName('p2')
    p.setLayout(l)
    p['a'] = (0.5, 1.0)
    ds.addPoint(p)

    if testdata.useFixedLength:
        ds = testdata.fixLength(ds)

    if testdata.useEnumerate:
        ds = testdata.enumerateStrings(ds)

    return ds
Exemple #23
0
    def add_point(self, point_location, point_name):

        if self.original_dataset.contains(str(point_name)):
            self.original_dataset.removePoint(str(point_name))

        p = Point()
        if os.path.exists(str(point_location)):
            try:
                p.load(str(point_location))
                p.setName(str(point_name))
                if self.original_dataset.size(
                ) <= sim_settings.SIMILARITY_MINIMUM_POINTS:
                    # Add point to original_dataset because PCA dataset has not been created yet
                    self.original_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points.' % \
                          (str(point_name), self.original_dataset.size())
                    logger.info(msg)
                else:
                    # Add point to PCA dataset because it has been already created.
                    # PCA dataset will take care of adding the point to the original dataset as well.
                    self.pca_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points (pca index has %i points).' % \
                          (str(point_name), self.original_dataset.size(), self.pca_dataset.size())
                    logger.info(msg)

            except Exception as e:
                msg = 'Point with name %s could NOT be added (%s).' % (
                    str(point_name), str(e))
                logger.info(msg)
                return {
                    'error': True,
                    'result': msg,
                    'status_code': sim_settings.SERVER_ERROR_CODE
                }
        else:
            msg = 'Point with name %s could NOT be added because analysis file does not exist (%s).' % \
                  (str(point_name), str(point_location))
            logger.info(msg)
            return {
                'error': True,
                'result': msg,
                'status_code': sim_settings.SERVER_ERROR_CODE
            }

        if self.original_dataset.size(
        ) == sim_settings.SIMILARITY_MINIMUM_POINTS:
            # Do enumerate
            try:
                self.original_dataset = transform(
                    self.original_dataset, 'enumerate',
                    {'descriptorNames': ['.tonal.chords_progression']})
            except:  # TODO: exception too broad here...
                logger.info(
                    'WARNING: enumerate transformation to .tonal.chords_progression could not be performed.'
                )

        # If when adding a new point we reach the minimum points for similarity, do the needful so that the dataset
        # can be used for search. This includes preparing the dataset, normalizing it, saveing it and creating view and
        # distance metrics. This will only happen once when the size of the dataset reaches SIMILARITY_MINIMUM_POINTS.
        if self.original_dataset.size(
        ) == sim_settings.SIMILARITY_MINIMUM_POINTS and not self.indexing_only_mode:
            self.__prepare_original_dataset()
            self.__normalize_original_dataset()
            self.transformations_history = self.original_dataset.history(
            ).toPython()
            self.save_index(msg="(reaching %i points)" %
                            sim_settings.SIMILARITY_MINIMUM_POINTS)

            # TODO: the code below is repeated from __load_dataset() method, should be moved into a util function
            # Build metrics for the different similarity presets, create a Gaia view
            self.__build_metrics()
            view = View(self.original_dataset)
            self.view = view

            # Compute PCA and create pca view and metric
            # NOTE: this step may take a long time if the dataset is big, but it only needs to be performed once
            # when the similarity server is loaded-
            self.pca_dataset = transform(
                self.original_dataset, 'pca', {
                    'descriptorNames': sim_settings.PCA_DESCRIPTORS,
                    'dimension': sim_settings.PCA_DIMENSIONS,
                    'resultName': 'pca'
                })
            self.pca_dataset.setReferenceDataSet(self.original_dataset)
            self.view_pca = View(self.pca_dataset)
            self.__build_pca_metric()

        return {'error': False, 'result': msg}
Exemple #24
0
    def testSimplifyHistory(self):
        ds = testdata.createSimpleDataSet()
        p = Point()
        p.setName('p2')
        p.setLayout(ds.layout())
        p['a.2'] = [1.2, 2.3]
        ds.addPoint(p)
        ds0 = ds.copy()

        ds1 = ds.copy()
        ds1.simplifyHistory()
        self.assertEqual(ds1.history().size(), 0)

        ds = transform(ds, 'removevl')
        ds2 = ds.copy()
        ds2.simplifyHistory()

        self.assertEqual(ds2.history().toPython(), [{
            'Analyzer name': 'remove',
            'Analyzer parameters': {
                'descriptorNames': ['.a.2']
            },
            'Applier name': 'removedesc',
            'Applier parameters': {
                'descriptorNames': ['.a.2']
            },
            'Additional info': {}
        }])

        ds = transform(ds, 'fixlength')
        ds3 = ds.copy()
        ds3.simplifyHistory()

        self.assertEqual(ds3.history().toPython(), [{
            'Analyzer name': 'remove',
            'Analyzer parameters': {
                'descriptorNames': ['.a.2']
            },
            'Applier name': 'removedesc',
            'Applier parameters': {
                'descriptorNames': ['.a.2']
            },
            'Additional info': {}
        }, {
            'Analyzer name': 'fixlength',
            'Analyzer parameters': {
                'descriptorNames': ['.a.1', '.b', '.c', '.d']
            },
            'Applier name': 'fixlengthapplier',
            'Applier parameters': {
                'descriptorNames': ['.a.1', '.b', '.c', '.d']
            },
            'Additional info': {}
        }])

        ds = transform(ds, 'remove', {'descriptorNames': 'a.*'})
        ds4 = ds.copy()
        ds4.simplifyHistory()

        self.assertEqual(ds4.history().toPython(), [{
            'Analyzer name': 'remove',
            'Analyzer parameters': {
                'descriptorNames': ['.a.1', '.a.2']
            },
            'Applier name': 'removedesc',
            'Applier parameters': {
                'descriptorNames': ['.a.1', '.a.2']
            },
            'Additional info': {}
        }, {
            'Analyzer name': 'fixlength',
            'Analyzer parameters': {
                'descriptorNames': ['.b', '.c', '.d']
            },
            'Applier name': 'fixlengthapplier',
            'Applier parameters': {
                'descriptorNames': ['.b', '.c', '.d']
            },
            'Additional info': {}
        }])

        ds = transform(ds, 'select', {'descriptorNames': ['b', 'c']})
        ds5 = ds.copy()
        ds5.simplifyHistory()

        self.assertEqual(ds5.history().toPython(), [{
            'Analyzer name': 'remove',
            'Analyzer parameters': {
                'descriptorNames': ['.a.1', '.a.2', '.d']
            },
            'Applier name': 'removedesc',
            'Applier parameters': {
                'descriptorNames': ['.a.1', '.a.2', '.d']
            },
            'Additional info': {}
        }, {
            'Analyzer name': 'fixlength',
            'Analyzer parameters': {
                'descriptorNames': ['.b', '.c']
            },
            'Applier name': 'fixlengthapplier',
            'Applier parameters': {
                'descriptorNames': ['.b', '.c']
            },
            'Additional info': {}
        }])

        p2 = Point()
        p2.setLayout(ds0.layout())
        p2['b'] = 23
        p2['c'] = 78
        p2['a.2'] = [1, 2, 3, 4]

        p2m = ds5.history().mapPoint(p2)

        self.assertEqual(p2m.layout().descriptorNames(), ('.b', '.c'))
        self.assertEqual(p2m['b'], 23.)
        self.assertEqual(p2m['c'], 78.)
Exemple #25
0
    def query_dataset(self, query_parameters, number_of_results):

        size = self.original_dataset.size()
        if size < SIMILARITY_MINIMUM_POINTS:
            msg = "Not enough datapoints in the dataset (%s < %s)." % (size, SIMILARITY_MINIMUM_POINTS)
            logger.debug(msg)
            return {"error": True, "result": msg}
            # raise Exception('Not enough datapoints in the dataset (%s < %s).' % (size, SIMILARITY_MINIMUM_POINTS))

        trans_hist = self.original_dataset.history().toPython()
        layout = self.original_dataset.layout()

        # Get normalization coefficients to transform the input data (get info from the last transformation which has been a normalization)
        coeffs = None
        for i in range(0, len(trans_hist)):
            if trans_hist[-(i + 1)]["Analyzer name"] == "normalize":
                coeffs = trans_hist[-(i + 1)]["Applier parameters"]["coeffs"]

        ##############
        # PARSE TARGET
        ##############

        # Transform input params to the normalized feature space and add them to a query point
        # If there are no params specified in the target, the point is set as empty (probably random sounds are returned)
        q = Point()
        q.setLayout(layout)
        feature_names = []
        # If some target has been specified...
        if query_parameters["target"].keys():
            for param in query_parameters["target"].keys():
                # Only add numerical parameters. Non numerical ones (like key) are only used as filters
                if param in coeffs.keys():
                    feature_names.append(str(param))
                    value = query_parameters["target"][param]
                    if coeffs:
                        a = coeffs[param]["a"]
                        b = coeffs[param]["b"]
                        if len(a) == 1:
                            norm_value = a[0] * value + b[0]
                        else:
                            norm_value = []
                            for i in range(0, len(a)):
                                norm_value.append(a[i] * value[i] + b[i])
                        # text = str(type(param)) + " " + str(type(norm_value))
                        q.setValue(str(param), norm_value)
                    else:
                        q.setValue(str(param), value)

        ##############
        # PARSE FILTER
        ##############

        filter = ""
        # If some filter has been specified...
        if query_parameters["filter"]:
            if type(query_parameters["filter"][0:5]) == str:
                filter = query_parameters["filter"]
            else:
                filter = self.parse_filter_list(query_parameters["filter"], coeffs)

        #############
        # DO QUERY!!!
        #############

        logger.debug(
            "Content based search with target: " + str(query_parameters["target"]) + " and filter: " + str(filter)
        )
        metric = DistanceFunctionFactory.create("euclidean", layout, {"descriptorNames": feature_names})
        # Looks like that depending on the version of gaia, variable filter must go after or before the metric
        # For the gaia version we have currently (sep 2012) in freesound: nnSearch(query,filter,metric)
        # results = self.view.nnSearch(q,str(filter),metric).get(int(number_of_results)) # <- Freesound
        results = self.view.nnSearch(q, metric, str(filter)).get(int(number_of_results))

        return {"error": False, "result": results}
Exemple #26
0
    def testSimple(self):
        ds = testdata.createSimpleDataSet()
        p2 = Point(ds.point('p'))
        p2.setName('p2')
        p3 = Point(ds.point('p'))
        p3.setName('p3')
        p3['a.1'] = 1
        p4 = Point(ds.point('p'))
        p4.setName('p4')
        p4['a.1'] = 1
        p4['a.2'] = 1
        ds.addPoint(p2)
        ds.addPoint(p3)
        ds.addPoint(p4)

        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('euclidean', ds.layout())
        v = View(ds)

        results = v.nnSearch('p', dist).get(10)
        self.assertEqual(results[0][1], 0.0)
        self.assertEqual(results[1][1], 0.0)
        self.assertSearchResultEqual(results[2], ('p3', 1.0))
        self.assertSearchResultEqual(results[3], ('p4', math.sqrt(2)))
Exemple #27
0
 def testNamespaceClash(self):
     p = Point()
     p.load('data/namespace_clash.sig')
Exemple #28
0
    def testLayout(self):
        layout = testdata.createSimpleLayout()

        (tmpFile, tmpName) = tempfile.mkstemp()
        os.close(tmpFile)

        # write dataset with layout
        p = Point()
        p.setName('p1')
        p.setLayout(layout)
        p2 = Point()
        p2.setName('p2')
        p2.setLayout(layout)

        p['a.1'] = 23
        self.assertEqual(p['a.1'], 23)
        self.assertRaises(Exception, p.setValue, 'a.4', 34)

        ds1 = DataSet()
        ds1.setName('ds1')
        ds1.addPoint(p)
        ds1.addPoint(p2)
        ds1.save(tmpName)

        # reload dataset
        ds2 = DataSet()
        ds2.load(tmpName)
        self.assertEqual(layout, ds2.layout())
        self.assertEqual(ds2.point('p1')['a.1'], 23)

        # remove temp file
        os.remove(tmpName)
Exemple #29
0
    def testMergePointsWithDifferentEnumerationMaps(self):
        #'''ticket #74: when changing the layout of a point, we must also make sure that the enum maps are correctly mapped'''

        p1 = Point()
        p1.setName('p1')
        p1.setLayout(self.l1)
        p1['d'] = 'hello'

        p2 = Point()
        p2.setName('p2')
        p2.setLayout(self.l1)
        p2['d'] = 'world'

        ds = DataSet()
        ds.addPoint(p1)
        ds.addPoint(p2)

        self.assertEqual(ds.point('p1').label('d'), 'hello')
        self.assertEqual(ds.point('p2').label('d'), 'world')

        ds.removePoint('p2')
        ds = transform(ds, 'enumerate', { 'descriptorNames': 'd' })
        ds.addPoint(p2)

        self.assertEqual(ds.point('p1').label('d'), 'hello')
        self.assertEqual(ds.point('p2').label('d'), 'world')
Exemple #30
0
    def add_point(self, point_location, point_name):

        if self.original_dataset.contains(str(point_name)):
            self.original_dataset.removePoint(str(point_name))

        p = Point()
        if os.path.exists(str(point_location)):
            try:
                p.load(str(point_location))
                p.setName(str(point_name))
                if self.original_dataset.size(
                ) <= settings.SIMILARITY_MINIMUM_POINTS:
                    # Add point to original_dataset
                    self.original_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points.' % (
                        str(point_name), self.original_dataset.size())
                    logger.info(msg)
                else:
                    # Add point to pca dataset (as it has been already created). Pca dataset will take are of add it to the original too
                    self.pca_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points (pca index has %i points).' % (
                        str(point_name), self.original_dataset.size(),
                        self.pca_dataset.size())
                    logger.info(msg)

            except Exception as e:
                msg = 'Point with name %s could NOT be added (%s).' % (
                    str(point_name), str(e))
                logger.info(msg)
                return {
                    'error': True,
                    'result': msg,
                    'status_code': settings.SERVER_ERROR_CODE
                }
        else:
            msg = 'Point with name %s could NOT be added because analysis file does not exist (%s).' % (
                str(point_name), str(point_location))
            logger.info(msg)
            return {
                'error': True,
                'result': msg,
                'status_code': settings.SERVER_ERROR_CODE
            }

        if self.original_dataset.size() == settings.SIMILARITY_MINIMUM_POINTS:
            # Do enumerate
            try:
                self.original_dataset = transform(
                    self.original_dataset, 'enumerate',
                    {'descriptorNames': ['.tonal.chords_progression']})
            except:
                logger.info(
                    'WARNING: enumerate transformation to .tonal.chords_progression could not be performed.'
                )

        # If when adding a new point we reach the minimum points for similarity, prepare the dataset, save and create view and distance metrics
        #   This will most never happen, only the first time we start similarity server, there is no index created and we add 2000 points.
        if self.original_dataset.size(
        ) == settings.SIMILARITY_MINIMUM_POINTS and not self.indexing_only_mode:
            #self.__prepare_original_dataset()
            #self.__normalize_original_dataset()
            self.transformations_history = self.original_dataset.history(
            ).toPython()
            self.save_index(msg="(reaching 2000 points)")

            # build metrics for the different similarity presets
            self.__build_metrics()
            # create view
            view = View(self.original_dataset)
            self.view = view
            # do pca and create pca view and metric
            self.pca_dataset = transform(
                self.original_dataset, 'pca', {
                    'descriptorNames': settings.PCA_DESCRIPTORS,
                    'dimension': settings.PCA_DIMENSIONS,
                    'resultName': 'pca'
                })
            self.pca_dataset.setReferenceDataSet(self.original_dataset)
            self.view_pca = View(self.pca_dataset)
            self.__build_pca_metric()

        return {'error': False, 'result': msg}