Esempio n. 1
0
    def _search(self, params):
        assert hasattr(self, 'search_url')

        classifications = json.loads(params['classifications']) if 'classifications' in params else []
        cmu_images = requests.post(self.search_url,
                                   data=params['url'],
                                   headers={
                                       'Content-type': 'text',
                                       'Content-length': str(len(params['url']))
                                   },
                                   verify=False).json()

        cmu_images = [[image.replace(os.environ['IMAGE_SPACE_CMU_PREFIX'],
                                     os.environ['IMAGE_SPACE_SOLR_PREFIX']), score]
                      for (image, score) in cmu_images]
        cmu_scores = {image.lower(): score for image, score in cmu_images}

        documents = solr_documents_from_paths([x[0] for x in cmu_images], classifications)

        # Augment original scores from response into solr documents
        for document in documents:
            document['im_score'] = cmu_scores[document['id'].lower()]

        return {
            'numFound': len(documents),
            'docs': documents
        }
Esempio n. 2
0
    def runImageSimilaritySearch(self, params):
        assert hasattr(self, 'search_url')
        classifications = json.loads(params['classifications']) if 'classifications' in params else []
        params['n'] = params['n'] if 'n' in params else str(DEFAULT_PAGE_SIZE)
        smqtk_r = requests.get(self.search_url + '/n=' + params['n'] + '/' + params['url']).json()
        neighbors_to_distances = dict(zip(smqtk_r['neighbors'], smqtk_r['distances']))

        db = getDbConnection().get_default_database()
        mapped_paths = db[os.environ['IMAGE_SPACE_SMQTK_MAP_COLLECTION']].find({
            'sha': {
                '$in': smqtk_r['neighbors']
            }
        })
        solr_id_to_shas = {os.environ['IMAGE_SPACE_SOLR_PREFIX'] + '/' + x['path']: x['sha'] for x in mapped_paths}
        documents = solr_documents_from_paths(solr_id_to_shas.keys(), classifications)

        for document in documents:
            document['im_distance'] = neighbors_to_distances[solr_id_to_shas[document['id']]]

        if 'near_duplicates' in params and int(params['near_duplicates']) == 1:
            documents = [x for x in documents if x['im_distance'] <= NEAR_DUPLICATES_THRESHOLD]

        return {
            'numFound': len(documents),
            'docs': documents
        }