Example #1
0
 def __init__(self, brand_path, cat_path, store_path):
     self.tagger = QueryTagger(brand_path + "/brands.csv",
                               cat_path + "/categories.csv",
                               store_path + "/stores.csv")
     self.matcher = QueryMatcher(brand_path + "/db/brands.db",
                                 cat_path + "/db/categories.db",
                                 store_path + "/db/stores.db")
Example #2
0
 def __init__(self, k1=1.2, k2=100, b=0.75):
     self.helper = Helper()
     self.k1 = k1
     self.k2 = k2
     self.b = b
     self.calcAVDL()
     self.queryMatcher = QueryMatcher()
     self.queryFrequencyDict = defaultdict(int)
     self.docScoreDict = defaultdict(int)
Example #3
0
    def __init__(self, parent=None, show=True):
        Qt.QMainWindow.__init__(self, parent)
        with open('config.json') as f:
            self.config_data = json.load(f)
        self.query_matcher = QueryMatcher(self.config_data["FEATURE_DATA_FILE"])
        self.supported_file_types = [".ply", ".off"]
        self.buttons = {}
        self.ds = reader.DataSet("")
        self.meshes = []
        self.normalizer = Normalizer()
        self.smlw = None
        self.setWindowTitle('Source Mesh Window')
        self.frame = Qt.QFrame()
        self.QTIplotter = None
        self.vlayout = Qt.QVBoxLayout()
        self.frame.setLayout(self.vlayout)
        self.setCentralWidget(self.frame)
        self.hist_dict = {}
        self.setAcceptDrops(True)
        # Create main menu
        mainMenu = self.menuBar()
        fileMenu = mainMenu.addMenu('File')
        exitButton = Qt.QAction('Exit', self)
        exitButton.setShortcut('Ctrl+Q')
        exitButton.triggered.connect(self.close)
        fileMenu.addAction(exitButton)

        viewMenu = mainMenu.addMenu('View')
        exitButton = Qt.QAction('Plot tSNE', self)
        exitButton.triggered.connect(self.plot_tsne)
        viewMenu.addAction(exitButton)

        # Create load button and init action
        self.load_button = QPushButton("Load or drop mesh to query")
        self.load_button.clicked.connect(lambda: self.load_and_prep_query_mesh(self.open_file_name_dialog()))
        self.load_button.setFont(QtGui.QFont("arial", 30))
        # Create Plots widget
        self.graphWidget = pg.PlotWidget()
        self.graphWidget.setBackground('w')

        # Create and add widgets to layout

        n_sing, n_hist, mapping_of_labels = get_sizes_features(features_file=self.config_data["FEATURE_DATA_FILE"],with_labels=True)

        # self.hist_labels = list({**FeatureExtractor.get_pipeline_functions()[1]}.values())
        self.hist_labels = [val for key, val in mapping_of_labels.items() if "hist_" in key]
        self.tableWidget = TableWidget({}, self, {})
        self.tableWidget.hide()
        self.vlayout.addWidget(self.load_button)

        # Position MainWindow
        screen_topleft = QDesktopWidget().availableGeometry().topLeft()
        screen_height = QDesktopWidget().availableGeometry().height()
        width = (QDesktopWidget().availableGeometry().width() * 0.4)
        self.move(screen_topleft)
        self.resize(width, screen_height - 50)

        if show:
            self.show()
Example #4
0
 def __init__(self, feature_data_file=FEATURE_DATA_FILE, label_coarse=False):
     self.label_coarse = label_coarse
     self.reader = DataSet("")
     self.query_matcher = QueryMatcher(feature_data_file, label_coarse=label_coarse)
     self.feature_db_flattened = self.query_matcher.features_flattened
     self.feature_db_raw = self.query_matcher.features_raw
     self.features_df_flat = pd.DataFrame(self.feature_db_flattened).set_index('name')
     self.features_df_raw = pd.DataFrame(self.feature_db_raw).set_index('name')
     self.mesh_classes_count = self.features_df_flat['label'].value_counts()
     if self.label_coarse:
         self.mesh_classes_count = self.features_df_flat['label_coarse'].value_counts().rename("label")
    if workers == 1:
        for params in tqdm(set(combinations), total=len(combinations)):
            if os.path.exists(params[-1]):
                continue
            save_all_in_one(params)

    if workers > 1:
        pool = mp.Pool(workers)
        results = pool.imap(save_all_in_one,
                            tqdm(set(combinations), total=len(combinations)))
        _ = list(results)


if __name__ == "__main__":
    query_matcher = QueryMatcher(FEATURE_DATA_FILE)
    arguments = sys.argv[1:]

    if "-p" in arguments:
        run_all_in_one_experiment(query_matcher, 6)

        # if "-p" not in arguments:
        #     show_all_in_one(query_matcher, 0, 20, int(10e7), 1, show=True, top_n=10, reverse=False, strange_scaling=False, is_coarse=True)

        # if "-p" not in arguments:
        #     show_all_in_one(query_matcher, 0, 19, int(10e7), 1, show=True, top_n=15, reverse=False, strange_scaling=False, is_coarse=False)
        # tsne_result_fine_data = compute_tsne_for_figure(class_to_id=class_to_id,
        #                                                 flat_data=fine_data,
        #                                                 perplexity=19,
        #                                                 n_iter=n_iter,
        #                                                 lr=1,
Example #6
0
__author__ = 'ASUS-PC'
from query_matcher import QueryMatcher
from spacy.en import English

#this will generate the mysql query according to the user queries
#knowledge be added in the Data file
#that spacy.io is bit slow and it's entity recognition is also not good enough
nlp = English()
import time
var = QueryMatcher(nlp)
for i in range(10):
    localtime = time.time()
    stmnt = "is there any pizza less than 400 rupees?"
    query, score = var.getQuery(stmnt)
    print("user query: " + stmnt)
    print("mysql query: " + query)
    print("score: " + str(score))
    print(time.time() - localtime)
import nltk
Example #7
0
    def __init__(self, feature_dict):
        super().__init__()
        with open('config.json') as f:
            self.config_data = json.load(f)
        self.smw_list = []
        self.query_matcher = QueryMatcher(self.config_data["FEATURE_DATA_FILE"])
        self.query_mesh_features = feature_dict
        self.layout = Qt.QVBoxLayout()
        self.setLayout(self.layout)
        self.setWindowTitle('Similar Meshes Widget')
        self.smw = None
        self.scalarDistancesDict = {
            "Cosine": cosine,
            "Manhattan": cityblock,
            "K-Nearest Neighbors": QueryMatcher.perform_knn,
            "Squared Euclidian": sqeuclidean,
            "Euclidean": euclidean
        }

        self.histDistancesDict = {
            "EMD": wasserstein_distance,
            "Cosine": cosine,
            "Manhattan": cityblock,
            "K-Nearest Neighbors": QueryMatcher.perform_knn,
            "Squared Euclidian": sqeuclidean,
            "Euclidean": euclidean
        }

        self.skeletonDistancesDict = {
            "EMD": wasserstein_distance,
            "Cosine": cosine,
            "Manhattan": cityblock,
            "K-Nearest Neighbors": QueryMatcher.perform_knn,
            "Squared Euclidian": sqeuclidean,
            "Euclidean": euclidean
        }

        self.scalarDistanceMethodList = Qt.QComboBox()
        self.scalarDistanceMethodList.addItems(self.scalarDistancesDict.keys())

        self.histDistanceMethodList = Qt.QComboBox()
        self.histDistanceMethodList.addItems(self.histDistancesDict.keys())

        self.skeletonDistancesMethodList = Qt.QComboBox()
        self.skeletonDistancesMethodList.addItems(self.skeletonDistancesDict.keys())

        self.sliderK = QSlider(QtCore.Horizontal)
        self.sliderK.setRange(5, 20)
        self.sliderK.valueChanged.connect(self.update_K_label)
        self.Klabel = Qt.QLabel("K: 5", self)

        self.scalarSliderWeights = QSlider(QtCore.Horizontal)
        self.scalarSliderWeights.setRange(0, 100)
        self.scalarSliderWeights.setValue(3.84)
        self.scalarSliderWeights.valueChanged.connect(self.update_scalar_label)
        self.scalarLabelWeights = Qt.QLabel(f"Scalar weight: {self.scalarSliderWeights.value()}", self)

        self.histSliderWeights = QSlider(QtCore.Horizontal)
        self.histSliderWeights.setRange(0, 100)
        self.histSliderWeights.setValue(284.26)
        self.histSliderWeights.valueChanged.connect(self.update_hist_label)
        self.histLabelWeights = Qt.QLabel(f"Histogram weight: {self.histSliderWeights.value()}", self)

        self.skeletonSliderWeights = QSlider(QtCore.Horizontal)
        self.skeletonSliderWeights.setRange(0, 100)
        self.skeletonSliderWeights.setValue(2.89)
        self.skeletonSliderWeights.valueChanged.connect(self.update_skel_label)
        self.skeletonLabelWeights = Qt.QLabel(f"Skeleton weight: {self.skeletonSliderWeights.value()}", self)

        self.list = QListWidget()
        self.list.setViewMode(Qt.QListView.ListMode)
        self.list.setIconSize(Qt.QSize(150, 150))

        self.matchButton = QPushButton('Match with Database', self)
        self.matchButton.clicked.connect(self.update_similar_meshes_list)

        self.plotButton = QPushButton('Plot selected mesh', self)
        self.plotButton.clicked.connect(self.plot_selected_mesh)
        self.plotButton.setEnabled(False)
        self.list.currentItemChanged.connect(lambda: self.plotButton.setEnabled(True))

        self.layout.addWidget(Qt.QLabel("Scalar Distance Function", self))
        self.layout.addWidget(self.scalarDistanceMethodList)
        self.layout.addWidget(Qt.QLabel("Histogram Distance Function", self))
        self.layout.addWidget(self.histDistanceMethodList)
        self.layout.addWidget(Qt.QLabel("Skeleton Distance Function", self))
        self.layout.addWidget(self.skeletonDistancesMethodList)
        self.layout.addWidget(self.Klabel)
        self.layout.addWidget(self.sliderK)
        self.layout.addWidget(self.scalarLabelWeights)
        self.layout.addWidget(self.scalarSliderWeights)
        self.layout.addWidget(self.histLabelWeights)
        self.layout.addWidget(self.histSliderWeights)
        self.layout.addWidget(self.skeletonLabelWeights)
        self.layout.addWidget(self.skeletonSliderWeights)
        self.layout.addWidget(self.matchButton)
        self.layout.addWidget(self.plotButton)
        self.layout.addWidget(self.list)

        # Position MainWindow
        screen_height = QDesktopWidget().availableGeometry().height()
        width = (QDesktopWidget().availableGeometry().width() * 0.2)
        self.move((QDesktopWidget().availableGeometry().width() * 0.4), 0)
        self.resize(width, screen_height - 50)
Example #8
0
class SimilarMeshesListWindow(Qt.QWidget):
    def __init__(self, feature_dict):
        super().__init__()
        with open('config.json') as f:
            self.config_data = json.load(f)
        self.smw_list = []
        self.query_matcher = QueryMatcher(self.config_data["FEATURE_DATA_FILE"])
        self.query_mesh_features = feature_dict
        self.layout = Qt.QVBoxLayout()
        self.setLayout(self.layout)
        self.setWindowTitle('Similar Meshes Widget')
        self.smw = None
        self.scalarDistancesDict = {
            "Cosine": cosine,
            "Manhattan": cityblock,
            "K-Nearest Neighbors": QueryMatcher.perform_knn,
            "Squared Euclidian": sqeuclidean,
            "Euclidean": euclidean
        }

        self.histDistancesDict = {
            "EMD": wasserstein_distance,
            "Cosine": cosine,
            "Manhattan": cityblock,
            "K-Nearest Neighbors": QueryMatcher.perform_knn,
            "Squared Euclidian": sqeuclidean,
            "Euclidean": euclidean
        }

        self.skeletonDistancesDict = {
            "EMD": wasserstein_distance,
            "Cosine": cosine,
            "Manhattan": cityblock,
            "K-Nearest Neighbors": QueryMatcher.perform_knn,
            "Squared Euclidian": sqeuclidean,
            "Euclidean": euclidean
        }

        self.scalarDistanceMethodList = Qt.QComboBox()
        self.scalarDistanceMethodList.addItems(self.scalarDistancesDict.keys())

        self.histDistanceMethodList = Qt.QComboBox()
        self.histDistanceMethodList.addItems(self.histDistancesDict.keys())

        self.skeletonDistancesMethodList = Qt.QComboBox()
        self.skeletonDistancesMethodList.addItems(self.skeletonDistancesDict.keys())

        self.sliderK = QSlider(QtCore.Horizontal)
        self.sliderK.setRange(5, 20)
        self.sliderK.valueChanged.connect(self.update_K_label)
        self.Klabel = Qt.QLabel("K: 5", self)

        self.scalarSliderWeights = QSlider(QtCore.Horizontal)
        self.scalarSliderWeights.setRange(0, 100)
        self.scalarSliderWeights.setValue(3.84)
        self.scalarSliderWeights.valueChanged.connect(self.update_scalar_label)
        self.scalarLabelWeights = Qt.QLabel(f"Scalar weight: {self.scalarSliderWeights.value()}", self)

        self.histSliderWeights = QSlider(QtCore.Horizontal)
        self.histSliderWeights.setRange(0, 100)
        self.histSliderWeights.setValue(284.26)
        self.histSliderWeights.valueChanged.connect(self.update_hist_label)
        self.histLabelWeights = Qt.QLabel(f"Histogram weight: {self.histSliderWeights.value()}", self)

        self.skeletonSliderWeights = QSlider(QtCore.Horizontal)
        self.skeletonSliderWeights.setRange(0, 100)
        self.skeletonSliderWeights.setValue(2.89)
        self.skeletonSliderWeights.valueChanged.connect(self.update_skel_label)
        self.skeletonLabelWeights = Qt.QLabel(f"Skeleton weight: {self.skeletonSliderWeights.value()}", self)

        self.list = QListWidget()
        self.list.setViewMode(Qt.QListView.ListMode)
        self.list.setIconSize(Qt.QSize(150, 150))

        self.matchButton = QPushButton('Match with Database', self)
        self.matchButton.clicked.connect(self.update_similar_meshes_list)

        self.plotButton = QPushButton('Plot selected mesh', self)
        self.plotButton.clicked.connect(self.plot_selected_mesh)
        self.plotButton.setEnabled(False)
        self.list.currentItemChanged.connect(lambda: self.plotButton.setEnabled(True))

        self.layout.addWidget(Qt.QLabel("Scalar Distance Function", self))
        self.layout.addWidget(self.scalarDistanceMethodList)
        self.layout.addWidget(Qt.QLabel("Histogram Distance Function", self))
        self.layout.addWidget(self.histDistanceMethodList)
        self.layout.addWidget(Qt.QLabel("Skeleton Distance Function", self))
        self.layout.addWidget(self.skeletonDistancesMethodList)
        self.layout.addWidget(self.Klabel)
        self.layout.addWidget(self.sliderK)
        self.layout.addWidget(self.scalarLabelWeights)
        self.layout.addWidget(self.scalarSliderWeights)
        self.layout.addWidget(self.histLabelWeights)
        self.layout.addWidget(self.histSliderWeights)
        self.layout.addWidget(self.skeletonLabelWeights)
        self.layout.addWidget(self.skeletonSliderWeights)
        self.layout.addWidget(self.matchButton)
        self.layout.addWidget(self.plotButton)
        self.layout.addWidget(self.list)

        # Position MainWindow
        screen_height = QDesktopWidget().availableGeometry().height()
        width = (QDesktopWidget().availableGeometry().width() * 0.2)
        self.move((QDesktopWidget().availableGeometry().width() * 0.4), 0)
        self.resize(width, screen_height - 50)

    def closeEvent(self, event):
        if self.smw:
            self.smw.deleteLater()

    def update_similar_meshes_list(self):
        scalarDistFunction = self.scalarDistancesDict[self.scalarDistanceMethodList.currentText()]
        histDistFunction = self.histDistancesDict[self.histDistanceMethodList.currentText()]
        skelDistFunction = self.skeletonDistancesDict[self.skeletonDistancesMethodList.currentText()]

        if (scalarDistFunction or histDistFunction) == QueryMatcher.perform_knn:
            self.scalarDistanceMethodList.setCurrentText("K-Nearest Neighbors")
            self.histDistanceMethodList.setCurrentText("K-Nearest Neighbors")
            self.skeletonDistancesMethodList.setCurrentText("K-Nearest Neighbors")

        n_singletons, n_distributionals, mapping_of_labels = get_sizes_features(features_file=self.config_data["FEATURE_DATA_FILE"],with_labels=True)

        n_hist = len([key for key, val in mapping_of_labels.items() if "hist_" in key])
        n_skeleton = len([key for key, val in mapping_of_labels.items() if "skeleton_" in key])

        weights = ([self.scalarSliderWeights.value()]) + \
                  ([self.histSliderWeights.value()] * n_hist) + \
                  ([self.skeletonSliderWeights.value()] * n_skeleton)

        function_pipeline = [scalarDistFunction] + \
                            ([histDistFunction] * n_hist) + \
                            ([skelDistFunction] * n_skeleton)

        indices, distance_values, labels = self.query_matcher.match_with_db(self.query_mesh_features, k=self.sliderK.value(), distance_functions=function_pipeline, weights=weights)

        print(f"Distance values and indices are {list(zip(indices, distance_values))}")

        self.list.clear()
        for i, ind in enumerate(indices):
            item = Qt.QListWidgetItem()
            icon = Qt.QIcon()
            filename = str(ind) + "_thumb.jpg"
            path_to_thumb = glob.glob(self.config_data["DATA_PATH_PSB"] + "\\**\\" + filename, recursive=True)
            icon.addPixmap(Qt.QPixmap(path_to_thumb[0]), QtGui.QIcon.Normal, QtGui.QIcon.Off)
            item.setIcon(icon)
            item.setText("ID: " + str(ind) + "\nDistance: " + str("{:.2f}".format(distance_values[i])))
            item.setToolTip(str(ind))
            self.list.addItem(item)

    def plot_selected_mesh(self):
        mesh_name = self.list.selectedItems()[0].toolTip()
        path_to_mesh = glob.glob(self.config_data["DATA_PATH_NORMED"] + "\\**\\" + mesh_name + ".*", recursive=True)
        data = DataSet._load_ply(path_to_mesh[0])
        mesh = pv.PolyData(data["vertices"], data["faces"])
        mesh_features = [d for d in self.query_matcher.features_raw_init if d["name"] == mesh_name][0]
        if len(self.smw_list) != 0:
            self.smw_list[0].deleteLater()
            self.smw_list.remove(self.smw_list[0])
        self.smw = SimilarMeshWindow(mesh, mesh_features)
        self.smw_list.append(self.smw)
        self.smw.show()

    def update_K_label(self, value):
        self.Klabel.setText("K: " + str(value))

    def update_scalar_label(self, value):
        self.scalarLabelWeights.setText("Scalar weight: " + str(value))

    def update_hist_label(self, value):
        self.histLabelWeights.setText("Histogram weight: " + str(value))

    def update_skel_label(self, value):
        self.skeletonLabelWeights.setText("Skeleton weight: " + str(value))
Example #9
0
class BM25WithFeedback:
    def __init__(self, k1=1.2, k2=100, b=0.75):
        self.helper = Helper()
        self.k1 = k1
        self.k2 = k2
        self.b = b
        self.calcAVDL()
        self.queryMatcher = QueryMatcher()
        self.queryFrequencyDict = defaultdict(int)
        self.docScoreDict = defaultdict(int)

    def initializeDocScoreDict(self):
        for key in self.helper.number_of_terms_doc.keys():
            self.docScoreDict[key] = 0

    def calculateK(self, doc_id):
        retVal = self.k1 * (
            (1 - self.b) + self.b *
            (self.helper.number_of_terms_doc[doc_id] / self.avdl))
        return retVal

    def calcAVDL(self):
        sum = 0
        for key in self.helper.number_of_terms_doc.keys():
            sum += self.helper.number_of_terms_doc[key]
            self.avdl = sum / len(self.helper.number_of_terms_doc.keys())

    def calculateDocumentScore(self, term, doc_id, qId):
        r = 0
        R = 0
        n = len(self.helper.unigram_inverted_index[term].keys())
        # N total number of docs
        N = len(self.helper.number_of_terms_doc.keys())
        K = self.calculateK(doc_id)
        if doc_id not in self.helper.unigram_inverted_index[term]:
            f = 0
            docScore = 0
        else:
            f = self.helper.unigram_inverted_index[term][doc_id]
            qf = self.queryFrequencyDict[term]
            docScore = \
                math.log(((r + 0.5)/(R - r + 0.5))/((n-r+0.5)/(N - n - R + r + 0.5))
                         * (((self.k1+1)*f)/(K + f))
                         * (((self.k2 + 1)*qf)/(self.k2 + qf)))

        self.docScoreDict[doc_id] += docScore

    def createQueryFrequencyDict(self, queryAr):
        for term in queryAr:
            if term in self.queryFrequencyDict.keys():
                self.queryFrequencyDict[term] += 1
            else:
                self.queryFrequencyDict[term] = 1

    def calculateTermScore(self, term, qId, find_type, k):
        # get doc from query matcher
        # 1 exact match
        # 2 best match
        # 3 best match with proximity
        if find_type == 1:
            for doc_id in self.queryMatcher.get_exact_match_docs(self.query):
                self.calculateDocumentScore(term, doc_id, qId)
        elif find_type == 2:
            for doc_id in self.queryMatcher.get_best_match_docs(self.query):
                self.calculateDocumentScore(term, doc_id, qId)
        else:
            for doc_id in self.queryMatcher.get_ordered_best_match_docs(
                    self.query, k):
                self.calculateDocumentScore(term, doc_id, qId)

    def score(self, qId, queryAr, find_type, k):
        for term in queryAr:
            self.calculateTermScore(term, qId, find_type, k)

    def printScores(self, qId, find_type, k):
        sortedDict = sorted(self.docScoreDict.items(),
                            key=operator.itemgetter(1),
                            reverse=True)

        # file = open("Extra_Output/"+str(qId) +
        #             ".txt", "w")
        rank = 0
        for tup in sortedDict:
            rank += 1
            if find_type == 1:
                print(
                    str(qId) + " Q0 " + str(tup[0]) + " " + str(rank) + " " +
                    str(tup[1]) + " ExactMatch\n")
            elif find_type == 2:
                print(
                    str(qId) + " Q0 " + str(tup[0]) + " " + str(rank) + " " +
                    str(tup[1]) + " BestMatch\n")
            elif find_type == 3:
                print(
                    str(qId) + " Q0 " + str(tup[0]) + " " + str(rank) + " " +
                    str(tup[1]) + " OrderedBestWithProximity" + str(k) + "\n")

            if rank == 100:
                break

    def main(self, q, find_type, k=0):
        self.query = q
        self.query = self.helper.parse_query(self.query)
        queryAr = self.query.split()
        self.queryFrequencyDict = defaultdict(int)
        self.docScoreDict = defaultdict(int)
        self.createQueryFrequencyDict(queryAr)
        self.score(q, queryAr, find_type, k)
        self.printScores(q, find_type, k)
Example #10
0
class Tagger(object):
    def __init__(self, brand_path, cat_path, store_path):
        self.tagger = QueryTagger(brand_path + "/brands.csv",
                                  cat_path + "/categories.csv",
                                  store_path + "/stores.csv")
        self.matcher = QueryMatcher(brand_path + "/db/brands.db",
                                    cat_path + "/db/categories.db",
                                    store_path + "/db/stores.db")

    def tokenize(self, title):
        tokens = [
            token.strip(" \t\n\r") for token in title.split(" ") if token
        ]
        for e in PREPOSITIONS:
            if tokens.count(e) == 1:
                tokens.remove(e)
        return tokens

    def ngrams(self, title):
        title_tokens = self.tokenize(title)
        num_tokens = len(title_tokens)
        tokens = [
            " ".join(title_tokens[j:i + 1]) for i in range(num_tokens)
            for j in range(num_tokens)
        ]
        tokens = [token for token in tokens if token]
        return sorted(tokens, key=lambda token: len(token))

    def tagToken(self, token):
        brand_matches = self.matcher.brandMatches(token)
        cat_matches = self.matcher.catMatches(token)
        store_matches = self.matcher.storeMatches(token)
        values = self.tagger.tag(brand_matches, cat_matches, store_matches)
        return values

    def cut_token(self, token, title):
        #print "cut: ", token, ", title=", title
        idx = title.find(token)
        if idx == -1:
            return title
        elif idx == 0:
            return title[len(token):]
        else:
            return title[0:idx] + title[idx + len(token):]

    def tagDimension(self, token, dimensionMatches, dimensionTag):
        matches = dimensionMatches(token)
        values = dimensionTag(matches)
        return values

    def fullMatches(self, title, dimensionMatches, dimensionTag):
        mod_title = title
        ngrams = self.ngrams(title)
        brand_to_ids = {}
        for token in ngrams:
            values = self.tagDimension(token, dimensionMatches, dimensionTag)
            if values:
                brand_to_ids[token] = [e for e in set(values)]
                mod_title = self.cut_token(token, mod_title)
                #print mod_title
        return (brand_to_ids, mod_title.strip())

    def getMatches(self, word_to_ids):
        c_ids = []
        for item in word_to_ids.iteritems():
            matches = {}
            matches['matches'] = item[1]
            matches['token'] = item[0]
            c_ids.append(matches)
        return c_ids

    def tag(self, title):
        title = title.lower()
        (brand_to_ids,
         mod_title) = self.fullMatches(title, self.matcher.exactBrandMatches,
                                       self.tagger.tagBrand)
        #print mod_title
        (cat_to_ids,
         mod_title) = self.fullMatches(mod_title,
                                       self.matcher.exactCategoryMatches,
                                       self.tagger.tagCategory)
        #print mod_title
        words_to_category = {}
        words_to_brand = {}
        words_to_store = {}
        cat_ids = []
        brand_ids = []
        store_ids = []
        ngrams = self.ngrams(mod_title)
        for token in ngrams:
            if token:
                values = self.tagToken(token)
                brand_ids += values[0]
                cat_ids += values[1]
                store_ids += values[2]
                if values[0]:
                    words_to_brand[token] = [e for e in set(values[0])]
                if values[1]:
                    words_to_category[token] = [e for e in set(values[1])]
                if values[2]:
                    words_to_brand[token] = [e for e in set(values[2])]
        result_dict = {}
        b_ids = self.getMatches(brand_to_ids)
        b_ids += self.getMatches(words_to_brand)
        result_dict['brands'] = b_ids
        result_dict['categories'] = self.getMatches(
            cat_to_ids) + self.getMatches(words_to_category)
        result_dict['stores'] = self.getMatches(words_to_store)
        #print mod_title
        suggestion = " ".join(e for e in self.tokenize(mod_title))
        #print suggestion
        return (result_dict, suggestion)
Example #11
0
def plot_comparison(sample_labels, distance):
    qm = QueryMatcher(FEATURE_DATA_FILE)
    labelled_occurences = tuple(
        zip(sample_labels, [
            Counter(pd.DataFrame(qm.features_flattened)["label"]).get(lbl)
            for lbl in sample_labels
        ]))
    names = [[f for f in qm.features_raw if f["label"] == lbl][0]["name"]
             for lbl in sample_labels]
    sampled_labelled = dict(zip(labelled_occurences, names))
    paths = []

    for path, subdirs, files in os.walk(DATA_PATH_PSB):
        for name in files:
            if ("off" or "ply") in name:
                paths.append(os.path.join(path, name))

    n_singletons, n_distributionals, mapping_of_labels = get_sizes_features(
        with_labels=True)

    n_hist = len(
        [key for key, val in mapping_of_labels.items() if "hist_" in key])
    n_skeleton = len(
        [key for key, val in mapping_of_labels.items() if "skeleton_" in key])

    if distance != "knn":
        # Custom
        weights = ([3]) + \
                  ([100] * n_hist) + \
                  ([1] * n_skeleton)

        function_pipeline = [cosine] + \
                            ([wasserstein_distance] * n_hist) + \
                            ([wasserstein_distance] * n_skeleton)
    else:
        # KNN
        weights = ([1]) + \
                  ([1] * n_hist) + \
                  ([1] * n_skeleton)

        function_pipeline = [QueryMatcher.perform_knn] + (
            [QueryMatcher.perform_knn] * n_distributionals)

    normalizer = Normalizer()
    out_dict = defaultdict(list)
    for info_tuple, mesh_idx in sampled_labelled.items():
        full_path = [p for p in paths if mesh_idx in p][0]
        print(f"Processing: {full_path}")
        mesh = DataSet._read(Path(full_path))
        normed_data = normalizer.mono_run_pipeline(mesh)
        normed_mesh = pv.PolyData(
            normed_data["history"][-1]["data"]["vertices"],
            normed_data["history"][-1]["data"]["faces"])
        normed_data['poly_data'] = normed_mesh

        features_dict = FeatureExtractor.mono_run_pipeline_old(normed_data)

        indices, distance_values, _ = qm.match_with_db(
            features_dict,
            k=10,
            distance_functions=function_pipeline,
            weights=weights)
        if mesh_idx in indices:
            idx_of_idx = indices.index(mesh_idx)
            indices.remove(mesh_idx)
            del distance_values[idx_of_idx]
            distance_values.insert(0, 0)

        indices = indices[4:]
        indices.insert(0, mesh_idx)
        distance_values = distance_values[5:]
        out_dict[info_tuple].append({mesh_idx: (indices, distance_values)})
        print(out_dict)

    class_idx = 0
    plt = pv.Plotter(off_screen=True, shape=(6, 5))
    for key, val in out_dict.items():
        print(class_idx)
        for v in val:
            el_idx = 0
            distances = list(list(v.values())[0][1])
            for name, dist in zip(list(v.values())[0][0], distances):
                print(el_idx)
                plt.subplot(class_idx, el_idx)

                full_path = [p for p in paths if name in p][0]

                mesh = DataSet._read(Path(full_path))
                curr_mesh = pv.PolyData(mesh["data"]["vertices"],
                                        mesh["data"]["faces"])
                plt.add_mesh(curr_mesh, color='r')
                plt.reset_camera()
                plt.view_isometric()
                if el_idx != 0:
                    plt.add_text(f"{el_idx} - Dist: {round(dist,4)}",
                                 font_size=20)
                elif el_idx == 0 and class_idx == 0:
                    plt.add_text(
                        f"             Query\nClass: {key[0].replace('_', ' ').title()}"
                        + f"\nInstances: {key[1]}",
                        font_size=20)
                else:
                    plt.add_text(f"Class: {key[0].replace('_', ' ').title()}" +
                                 f"\nInstances: {key[1]}",
                                 font_size=20)

                el_idx += 1
        class_idx += 1

    if distance == "knn":
        plt.screenshot(f"fig\\comparison_knn.jpg", window_size=(1920, 2160))
    else:
        plt.screenshot(f"figs\\comparison_custom_distance.jpg",
                       window_size=(1920, 2160))