def _search(self, params): assert hasattr(self, 'search_url') classifications = json.loads(params['classifications']) if 'classifications' in params else [] cmu_images = requests.post(self.search_url, data=params['url'], headers={ 'Content-type': 'text', 'Content-length': str(len(params['url'])) }, verify=False).json() cmu_images = [[image.replace(os.environ['IMAGE_SPACE_CMU_PREFIX'], os.environ['IMAGE_SPACE_SOLR_PREFIX']), score] for (image, score) in cmu_images] cmu_scores = {image.lower(): score for image, score in cmu_images} documents = solr_documents_from_paths([x[0] for x in cmu_images], classifications) # Augment original scores from response into solr documents for document in documents: document['im_score'] = cmu_scores[document['id'].lower()] return { 'numFound': len(documents), 'docs': documents }
def runImageSimilaritySearch(self, params): assert hasattr(self, 'search_url') classifications = json.loads(params['classifications']) if 'classifications' in params else [] params['n'] = params['n'] if 'n' in params else str(DEFAULT_PAGE_SIZE) smqtk_r = requests.get(self.search_url + '/n=' + params['n'] + '/' + params['url']).json() neighbors_to_distances = dict(zip(smqtk_r['neighbors'], smqtk_r['distances'])) db = getDbConnection().get_default_database() mapped_paths = db[os.environ['IMAGE_SPACE_SMQTK_MAP_COLLECTION']].find({ 'sha': { '$in': smqtk_r['neighbors'] } }) solr_id_to_shas = {os.environ['IMAGE_SPACE_SOLR_PREFIX'] + '/' + x['path']: x['sha'] for x in mapped_paths} documents = solr_documents_from_paths(solr_id_to_shas.keys(), classifications) for document in documents: document['im_distance'] = neighbors_to_distances[solr_id_to_shas[document['id']]] if 'near_duplicates' in params and int(params['near_duplicates']) == 1: documents = [x for x in documents if x['im_distance'] <= NEAR_DUPLICATES_THRESHOLD] return { 'numFound': len(documents), 'docs': documents }