def init(): print("sent_sim_search :: init :: Start") global is_initialized, embed, session, index, file_lst # Setting logging level. tf.logging.set_verbosity(tf.logging.ERROR) if not is_initialized: # Loading the Show and Tell Model. caption_generator.init() # Import the Universal Sentence Encoder's TF Hub module embed = hub.Module(constants.SENT_SIM_MODULE_URL) session = tf.Session() session.run( [tf.global_variables_initializer(), tf.tables_initializer()]) is_initialized = True # Loading the FAISS Index. index = faiss.read_index(constants.FAISS_INDEX_PATH) # Loading the file list. file_lst = pickle.load(open(constants.FAISS_FILE_LST_PATH, "rb")) print("sent_sim_search :: init :: End")
def explain(query_image_path): # Initializing the caption generator model. caption_generator.init() # Obtaining the caption for the image. img_caption = caption_generator.get_caption(query_image_path, True) print("explain.py :: explain :: image caption :: ", img_caption) # Handling the fullstops in captions. query_caption = img_caption if query_caption[-1] == '.': query_caption = query_caption[:-1].strip() print("explain.py :: explain :: caption after cleaning :: ", query_caption) # Obtaining the explain plan contents from the backend. explain_req = constants.EXPLAIN_SEARCH_URL + query_caption explain_res_text = requests.get(explain_req).text explain_json = json.loads(explain_res_text) print("explain.py :: explain :: response from the backend :: ", explain_json) # Constructing the parse tree ps file. parse_tree = explain_json['Parse_Tree'] print("explain.py :: explain :: parse tree :: ", parse_tree) create_tree_ps(parse_tree) # Converting the ps file to pdf and pdf to png. subprocess.call( shlex.split("ps2pdf -dEPSCrop " + constants.QIK_DATA_DIR + "query_parse_tree.ps " + constants.QIK_DATA_DIR + "query_parse_tree.pdf")) subprocess.call( shlex.split("pdfcrop --margins '5 10 20 30' " + constants.QIK_DATA_DIR + "query_parse_tree.pdf" + " " + constants.QIK_DATA_DIR + "query_parse_tree_cropped.pdf")) subprocess.call( shlex.split("gs -o " + constants.QIK_WEBAPP_PATH + "query_parse_tree.jpg -sDEVICE=jpeg -r500 " + constants.QIK_DATA_DIR + "query_parse_tree_cropped.pdf")) # Return values. parse_tree = explain_json['Parse_Tree'] parse_tree_img = constants.QIK_WEBAPP_PATH + "query_parse_tree.jpg" xml_representation = explain_json['XML_Representation'] min_xml_representation = explain_json['Minimum_XML_Representation'] xpath = explain_json['XPath'] optimized_xpath = explain_json['Optimized_XPath'] query_exec_time = explain_json['Query_Exec_Time'] similar_exec_time = explain_json['Similar_Exec_Time'] similar_xpath = explain_json['Similar_XPath'] return img_caption, parse_tree, parse_tree_img, xml_representation, min_xml_representation, xpath, optimized_xpath, query_exec_time, similar_exec_time, similar_xpath
def init(): print("sentence_similarity_index :: init :: Start") global is_initialized, embed, session # Setting logging level. tf.logging.set_verbosity(tf.logging.ERROR) if not is_initialized: # Loading the Show and Tell Model. caption_generator.init() # Import the Universal Sentence Encoder's TF Hub module embed = hub.Module(sent_sim_constants.SENT_SIM_MODULE_URL) session = tf.Session() session.run( [tf.global_variables_initializer(), tf.tables_initializer()]) is_initialized = True print("sentence_similarity_index :: init :: End")
cap2 = re.search(r'\S (.*?) \(', line).group(0).replace(")", "").replace("(", "").strip() p2 = line.split("(")[-1].split("p=")[-1].split(")")[0] if "." in cap2: json_data['cap3_cap'] = cap2[:-2] else: json_data['cap3_cap'] = cap2 json_data['cap3_p'] = p2 # Adding the dependency tree. json_data['cap3_dep_tree'] = get_dep_tree(json_data['cap3_cap']) print(json_data) # Posting the captions to the index engine. req = constants.INDEX_ENGINE_URL + urllib.parse.quote(str(json_data)) requests.get(req) print("Finished :: ", self.getName()) if __name__ == "__main__": # Loading the object detection model. detect_objects.init() # Loading the Show and Tell Model. caption_generator.init() # Starting the producer process Producer().start() # Starting the consumer process. Consumer().start()
def qik_search(query_image, ranking_func=None, obj_det_enabled=False, pure_objects_search=False, fetch_count=None): obj_res = None cap_res = None similar_images = None captionRanksDict = {} sortedCaptionRanksDict = {} # Noting the time taken for further auditing. time = datetime.datetime.now() if obj_det_enabled: # Initial Loading of the object detection model. detect_objects.init() # Detecting objects. json_data = {} json_data['objects'] = detect_objects.get_detected_objects( query_image, constants.OBJECT_DETECTED_THRESHOLD) print("qik_search :: qik_search :: objects :: ", json_data['objects']) # Querying the backend to fetch the list of images and captions based on the objects detected. obj_req = constants.DETECT_OBJECTS_URL + urllib.parse.quote( str(json_data)) obj_res = json.loads(requests.get(obj_req).text) print("qik_search :: qik_search :: obj_res :: ", obj_res) if pure_objects_search: if obj_res is not None: # Forming the return image set. for resMap in obj_res: caption = resMap['caption'] image = resMap['fileURL'] # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = 1 print(captionRanksDict) # Formating done for Ranking sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=True) # Auditing the QIK execution time. print("QIK Execution time :: ", (datetime.datetime.now() - time)) if sortedCaptionRanksDict and fetch_count is not None: print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict[:fetch_count]) return "Query Image", sortedCaptionRanksDict[: fetch_count], None else: print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict) return "Query Image", sortedCaptionRanksDict, None return "Query Image", sortedCaptionRanksDict, None # Initial Loading of the caption generator model. caption_generator.init() # Generating the captions. query = caption_generator.get_caption(query_image, True) # Handling the fullstops in captions. if query[-1] == '.': query = query[:-1].strip() print("Caption Generated :: ", query) # Querying the backend to fetch the list of images and captions. cap_req = constants.SOLR_QUERY_URL + query cap_res = json.loads(requests.get(cap_req).text) print("QIK Captions Response :: ", cap_res) print("QIK Fetch Execution time :: ", (datetime.datetime.now() - time)) # Merging the two responses. if obj_res is None: res = cap_res elif cap_res is None: res = obj_res else: res = obj_res + cap_res print("QIK Combined Response :: ", res) # Forming the return image set. if res is not None: # Generating the parse tree for the input query. queryParseTree = parse_show_tree.parseSentence(query) # Generating the dependency tree for the input query. queryDepTree = parse_show_tree.dependencyParser(query) # Performing TED based Ranking on the parse tree. if ranking_func == 'Parse Tree': for resMap in res: # for Auditing TED Time ted_time = datetime.datetime.now() image = resMap['fileURL'] caption = resMap['caption'] captionParseTree = resMap['parseTree'] parseTED = APTED(apth.Tree.from_text(queryParseTree), apth.Tree.from_text(captionParseTree), PerEditOperationConfig( 1, 1, 1)).compute_edit_distance() # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = parseTED # Sorting the results based on the Parse TED. sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=False) elif ranking_func == 'Dependency Tree': for resMap in res: # for Auditing TED Time ted_time = datetime.datetime.now() image = resMap['fileURL'] caption = resMap['caption'] depTree = resMap['depTree'] parseTED = APTED(apth.Tree.from_text(queryDepTree), apth.Tree.from_text(depTree), PerEditOperationConfig( 1, 1, 1)).compute_edit_distance() # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = parseTED # Sorting the results based on the Parse TED. sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=False) else: # Forming the return image set (Without ranking) for resMap in res: caption = resMap['caption'] image = resMap['fileURL'] # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = 1 print(captionRanksDict) # Formating done for Ranking sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=True) similar_images = get_similar_images(query) print("qik_search :: qik_search :: similar_images :: ", similar_images) # Auditing the QIK execution time. print("QIK Execution time :: ", (datetime.datetime.now() - time)) print("Arun :: fetch_count :: ", fetch_count) if sortedCaptionRanksDict and fetch_count is not None: print("Arun :: Entering :: ") print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict[:fetch_count]) return query, sortedCaptionRanksDict[:fetch_count], similar_images else: print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict) return query, sortedCaptionRanksDict, similar_images