def init():
    print("sent_sim_search :: init :: Start")

    global is_initialized, embed, session, index, file_lst

    # Setting logging level.
    tf.logging.set_verbosity(tf.logging.ERROR)

    if not is_initialized:
        # Loading the Show and Tell Model.
        caption_generator.init()

        # Import the Universal Sentence Encoder's TF Hub module
        embed = hub.Module(constants.SENT_SIM_MODULE_URL)

        session = tf.Session()
        session.run(
            [tf.global_variables_initializer(),
             tf.tables_initializer()])
        is_initialized = True

        # Loading the FAISS Index.
        index = faiss.read_index(constants.FAISS_INDEX_PATH)

        # Loading the file list.
        file_lst = pickle.load(open(constants.FAISS_FILE_LST_PATH, "rb"))

    print("sent_sim_search :: init :: End")
Exemple #2
0
def explain(query_image_path):
    # Initializing the caption generator model.
    caption_generator.init()

    # Obtaining the caption for the image.
    img_caption = caption_generator.get_caption(query_image_path, True)
    print("explain.py :: explain :: image caption :: ", img_caption)

    # Handling the fullstops in captions.
    query_caption = img_caption
    if query_caption[-1] == '.':
        query_caption = query_caption[:-1].strip()
    print("explain.py :: explain :: caption after cleaning :: ", query_caption)

    # Obtaining the explain plan contents from the backend.
    explain_req = constants.EXPLAIN_SEARCH_URL + query_caption
    explain_res_text = requests.get(explain_req).text
    explain_json = json.loads(explain_res_text)
    print("explain.py :: explain :: response from the backend :: ",
          explain_json)

    # Constructing the parse tree ps file.
    parse_tree = explain_json['Parse_Tree']
    print("explain.py :: explain :: parse tree :: ", parse_tree)
    create_tree_ps(parse_tree)

    # Converting the ps file to pdf and pdf to png.
    subprocess.call(
        shlex.split("ps2pdf -dEPSCrop " + constants.QIK_DATA_DIR +
                    "query_parse_tree.ps " + constants.QIK_DATA_DIR +
                    "query_parse_tree.pdf"))
    subprocess.call(
        shlex.split("pdfcrop --margins '5 10 20 30' " +
                    constants.QIK_DATA_DIR + "query_parse_tree.pdf" + " " +
                    constants.QIK_DATA_DIR + "query_parse_tree_cropped.pdf"))
    subprocess.call(
        shlex.split("gs -o " + constants.QIK_WEBAPP_PATH +
                    "query_parse_tree.jpg -sDEVICE=jpeg -r500 " +
                    constants.QIK_DATA_DIR + "query_parse_tree_cropped.pdf"))

    # Return values.
    parse_tree = explain_json['Parse_Tree']
    parse_tree_img = constants.QIK_WEBAPP_PATH + "query_parse_tree.jpg"
    xml_representation = explain_json['XML_Representation']
    min_xml_representation = explain_json['Minimum_XML_Representation']
    xpath = explain_json['XPath']
    optimized_xpath = explain_json['Optimized_XPath']
    query_exec_time = explain_json['Query_Exec_Time']
    similar_exec_time = explain_json['Similar_Exec_Time']
    similar_xpath = explain_json['Similar_XPath']

    return img_caption, parse_tree, parse_tree_img, xml_representation, min_xml_representation, xpath, optimized_xpath, query_exec_time, similar_exec_time, similar_xpath
def init():
    print("sentence_similarity_index :: init :: Start")

    global is_initialized, embed, session

    # Setting logging level.
    tf.logging.set_verbosity(tf.logging.ERROR)

    if not is_initialized:
        # Loading the Show and Tell Model.
        caption_generator.init()

        # Import the Universal Sentence Encoder's TF Hub module
        embed = hub.Module(sent_sim_constants.SENT_SIM_MODULE_URL)

        session = tf.Session()
        session.run(
            [tf.global_variables_initializer(),
             tf.tables_initializer()])
        is_initialized = True

    print("sentence_similarity_index :: init :: End")
                    cap2 = re.search(r'\S (.*?) \(', line).group(0).replace(")", "").replace("(", "").strip()
                    p2 = line.split("(")[-1].split("p=")[-1].split(")")[0]
                    if "." in cap2:
                        json_data['cap3_cap'] = cap2[:-2]
                    else:
                        json_data['cap3_cap'] = cap2
                    json_data['cap3_p'] = p2
                    
                     # Adding the dependency tree.
                    json_data['cap3_dep_tree'] = get_dep_tree(json_data['cap3_cap'])
                    
        print(json_data)

        # Posting the captions to the index engine.
        req = constants.INDEX_ENGINE_URL + urllib.parse.quote(str(json_data))
        requests.get(req)

        print("Finished :: ", self.getName())

if __name__ == "__main__":
    # Loading the object detection model.
    detect_objects.init()

    # Loading the Show and Tell Model.
    caption_generator.init()
    
    # Starting the producer process
    Producer().start()

    # Starting the consumer process.
    Consumer().start()
Exemple #5
0
def qik_search(query_image,
               ranking_func=None,
               obj_det_enabled=False,
               pure_objects_search=False,
               fetch_count=None):
    obj_res = None
    cap_res = None
    similar_images = None

    captionRanksDict = {}
    sortedCaptionRanksDict = {}

    # Noting the time taken for further auditing.
    time = datetime.datetime.now()

    if obj_det_enabled:
        # Initial Loading of the object detection model.
        detect_objects.init()

        # Detecting objects.
        json_data = {}
        json_data['objects'] = detect_objects.get_detected_objects(
            query_image, constants.OBJECT_DETECTED_THRESHOLD)
        print("qik_search :: qik_search :: objects :: ", json_data['objects'])

        # Querying the backend to fetch the list of images and captions based on the objects detected.
        obj_req = constants.DETECT_OBJECTS_URL + urllib.parse.quote(
            str(json_data))
        obj_res = json.loads(requests.get(obj_req).text)
        print("qik_search :: qik_search :: obj_res :: ", obj_res)

    if pure_objects_search:
        if obj_res is not None:
            # Forming the return image set.
            for resMap in obj_res:
                caption = resMap['caption']
                image = resMap['fileURL']

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = 1
            print(captionRanksDict)

            # Formating done for Ranking
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=True)

            # Auditing the QIK execution time.
            print("QIK Execution time :: ", (datetime.datetime.now() - time))

            if sortedCaptionRanksDict and fetch_count is not None:
                print("sortedCaptionRanksDict :: ",
                      sortedCaptionRanksDict[:fetch_count])
                return "Query Image", sortedCaptionRanksDict[:
                                                             fetch_count], None
            else:
                print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict)
                return "Query Image", sortedCaptionRanksDict, None

        return "Query Image", sortedCaptionRanksDict, None

    # Initial Loading of the caption generator model.
    caption_generator.init()

    # Generating the captions.
    query = caption_generator.get_caption(query_image, True)

    # Handling the fullstops in captions.
    if query[-1] == '.':
        query = query[:-1].strip()
    print("Caption Generated :: ", query)

    # Querying the backend to fetch the list of images and captions.
    cap_req = constants.SOLR_QUERY_URL + query
    cap_res = json.loads(requests.get(cap_req).text)
    print("QIK Captions Response :: ", cap_res)
    print("QIK Fetch Execution time :: ", (datetime.datetime.now() - time))

    # Merging the two responses.
    if obj_res is None:
        res = cap_res
    elif cap_res is None:
        res = obj_res
    else:
        res = obj_res + cap_res
    print("QIK Combined Response :: ", res)

    # Forming the return image set.
    if res is not None:
        # Generating the parse tree for the input query.
        queryParseTree = parse_show_tree.parseSentence(query)

        # Generating the dependency tree for the input query.
        queryDepTree = parse_show_tree.dependencyParser(query)

        # Performing TED based Ranking on the parse tree.
        if ranking_func == 'Parse Tree':
            for resMap in res:
                # for Auditing TED Time
                ted_time = datetime.datetime.now()

                image = resMap['fileURL']
                caption = resMap['caption']
                captionParseTree = resMap['parseTree']

                parseTED = APTED(apth.Tree.from_text(queryParseTree),
                                 apth.Tree.from_text(captionParseTree),
                                 PerEditOperationConfig(
                                     1, 1, 1)).compute_edit_distance()

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = parseTED

            # Sorting the results based on the Parse TED.
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=False)

        elif ranking_func == 'Dependency Tree':
            for resMap in res:
                # for Auditing TED Time
                ted_time = datetime.datetime.now()

                image = resMap['fileURL']
                caption = resMap['caption']
                depTree = resMap['depTree']

                parseTED = APTED(apth.Tree.from_text(queryDepTree),
                                 apth.Tree.from_text(depTree),
                                 PerEditOperationConfig(
                                     1, 1, 1)).compute_edit_distance()

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = parseTED

            # Sorting the results based on the Parse TED.
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=False)

        else:
            # Forming the return image set (Without ranking)
            for resMap in res:
                caption = resMap['caption']
                image = resMap['fileURL']

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = 1
            print(captionRanksDict)

            # Formating done for Ranking
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=True)

        similar_images = get_similar_images(query)
        print("qik_search :: qik_search :: similar_images :: ", similar_images)

    # Auditing the QIK execution time.
    print("QIK Execution time :: ", (datetime.datetime.now() - time))

    print("Arun :: fetch_count :: ", fetch_count)

    if sortedCaptionRanksDict and fetch_count is not None:
        print("Arun :: Entering :: ")
        print("sortedCaptionRanksDict :: ",
              sortedCaptionRanksDict[:fetch_count])
        return query, sortedCaptionRanksDict[:fetch_count], similar_images
    else:
        print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict)
        return query, sortedCaptionRanksDict, similar_images