def process_message(key, job): # get features: print 'FINDING SIMILARITY' # do the work to find similarity error = validate_job(job) if error is not None: print "Error in Job : {}".format(error) job['data'] = [] job['error'] = error job['state'] = 'error' return image_similarity = ImageSimilarity(float(job['similarity_threshold']), job['start_time_ms'], job['end_time_ms'], job['similarity_method']) query_params = [{ "query_type": "between", "property_name": "timestamp_ms", "query_value": [job['start_time_ms'], job['end_time_ms']] }] if 'lang' in job.keys(): query_params.append({ "query_type": "where", "property_name": "lang", "query_value": job['lang'] }) loopy = Loopy(job['query_url'], query_params) if loopy.result_count == 0: print "No data to process" job['data'] = [] job['error'] = "No data found to process." job['state'] = 'error' return while True: print "Scrolling...{}".format(loopy.current_page) page = loopy.get_next_page() if page is None: break # Do something with the obtained page for doc in page: if job['data_type'] == "text" and 'text_features' in doc and 'id' in doc and \ len(doc['text_features']) > 0: image_similarity.process_vector(doc['id'], doc['text_features']) continue if job['data_type'] == "image" and 'image_features' in doc and 'id' in doc and \ len(doc['image_features']) > 0: image_similarity.process_vector(doc['id'], doc['image_features']) clusters = image_similarity.get_clusters() print 'FINISHED SIMILARITY PROCESSING: found {} clusters'.format(len(clusters)) for cluster in clusters: cluster['job_monitor_id'] = job['job_id'] loopy.post_result(job['result_url'], cluster) job['data'] = image_similarity.to_json() job['state'] = 'processed'
def image_similarity_object(): """fixture for video object Returns: Image_Similarity -- an instantiated Image_Similarity object """ from image_similarity import ImageSimilarity return ImageSimilarity()