Beispiel #1
19
 def delete_documents(self, doc_set, paths):
     """Delete documents from the index."""
     index = open_dir(self.index_path)
     writer = AsyncWriter(index)
     query = And([
         Term('set', doc_set),
         Or([Term('path', path) for path in paths])
     ])
     writer.delete_by_query(query)
     writer.commit()
Beispiel #2
0
 def delete_documents(self, doc_set, paths):
     """Delete documents from the index."""
     index = open_dir(self.index_path)
     writer = AsyncWriter(index)
     query = And(
         [Term('set', doc_set),
          Or([Term('path', path) for path in paths])])
     writer.delete_by_query(query)
     writer.commit()
Beispiel #3
0
    def delPage(self, item):

        index = item.childCount()
        while index > 0:
            index = index - 1
            self.dirname = item.child(index).text(0)
            self.delPage(item.child(index))

        # remove attachment folder
        attDir = self.itemToAttachmentDir(item)
        for info in QDir(attDir).entryInfoList():
            QDir().remove(info.absoluteFilePath())
        QDir().rmdir(attDir)

        pagePath = self.itemToPage(item)
        self.ix = open_dir(self.settings.indexdir)
        query = QueryParser('path', self.ix.schema).parse(pagePath)
        #writer = self.ix.writer()
        writer = AsyncWriter(self.ix)
        n = writer.delete_by_query(query)
        # n = writer.delete_by_term('path', pagePath)
        writer.commit()
        #self.ix.close()
        b = QDir(self.notePath).remove(self.pageToFile(pagePath))
        parent = item.parent()
        parentPage = self.itemToPage(parent)
        if parent is not None:
            index = parent.indexOfChild(item)
            parent.takeChild(index)
            if parent.childCount() == 0:  # if no child, dir not needed
                QDir(self.notePath).rmdir(parentPage)
        else:
            index = self.indexOfTopLevelItem(item)
            self.takeTopLevelItem(index)
        QDir(self.notePath).rmdir(pagePath)
Beispiel #4
0
    def delPage(self, item):

        index = item.childCount()
        while index > 0:
            index = index - 1
            self.dirname = item.child(index).text(0)
            self.delPage(item.child(index))

        # remove attachment folder
        attDir = self.itemToAttachmentDir(item)
        for info in QtCore.QDir(attDir).entryInfoList():
            QtCore.QDir().remove(info.absoluteFilePath())
        QtCore.QDir().rmdir(attDir)

        pagePath = self.itemToPage(item)
        self.ix = open_dir(self.settings.indexdir)
        query = QueryParser("path", self.ix.schema).parse(pagePath)
        # writer = self.ix.writer()
        writer = AsyncWriter(self.ix)
        n = writer.delete_by_query(query)
        # n = writer.delete_by_term('path', pagePath)
        writer.commit()
        # self.ix.close()
        b = QtCore.QDir(self.notePath).remove(self.pageToFile(pagePath))
        parent = item.parent()
        parentPage = self.itemToPage(parent)
        if parent is not None:
            index = parent.indexOfChild(item)
            parent.takeChild(index)
            if parent.childCount() == 0:  # if no child, dir not needed
                QtCore.QDir(self.notePath).rmdir(parentPage)
        else:
            index = self.indexOfTopLevelItem(item)
            self.takeTopLevelItem(index)
        QtCore.QDir(self.notePath).rmdir(pagePath)
Beispiel #5
0
    def load_all_dset_metadata(self, dsetname, create_index=False):
        """
            Loads into memory the metadata of a dataset. The metadata is read from a CSV file, which should
            have at least two columns:
             - filename: Paths to the images in the dataset, relative to the image data folder. For backward
                         compatibility '#filename' is also accepted
             - file_attributes: JSON string containing information about the file. The most important file
                                attributes are 'caption' and 'keywords'. The 'caption' field should be a short
                                string which will be used as the caption of the image in result lists. The
                                'keywords' field must contain a comma-separated list of keywords. Each keyword
                                can be used as the source for a search.
            If create_index is True, it builds a search index with the 'keywords' in the file_attributes.
            Arguments:
                dsetname: String corresponding to the dataset within the list of supported
                          datasets.
                create_index: Boolean indicating whether or not to build a search index
                              with the metadata
        """
        metaindex = None
        t = time.time()
        try:
            for afile in os.listdir(os.path.join(self.metadata_dir, dsetname)):
                if afile.endswith(".csv"):
                    metadata_file = os.path.join(self.metadata_dir, dsetname,
                                                 afile)
                    print('Found metadata file at', metadata_file)
                    if create_index:
                        metaindex = open_dir(self.index_dir)
                    with open(metadata_file, 'r') as fin:
                        reader = csv.DictReader(fin)
                        for row in reader:
                            id_field = None
                            if 'filename' in row.keys():
                                id_field = 'filename'
                            elif '#filename' in row.keys():
                                id_field = '#filename'
                            if id_field and 'file_attributes' in row.keys():
                                filename = row[id_field]
                                try:
                                    self.fname2meta[dsetname][
                                        filename] = json.loads(
                                            row['file_attributes'])
                                except:
                                    self.fname2meta[dsetname][filename] = None
                                metadata = self.fname2meta[dsetname][filename]
                                keyword_list = None
                                if metadata and 'keywords' in metadata.keys():
                                    keyword_list = metadata['keywords']
                                if keyword_list and create_index:
                                    keyword_list_splitted = keyword_list.split(
                                        ',')
                                    writer = AsyncWriter(metaindex)
                                    for key in keyword_list_splitted:
                                        key = key.strip()
                                        # delete previous entry if found
                                        query = QueryParser(
                                            'key', metaindex.schema).parse(key)
                                        writer.delete_by_query(
                                            query, metaindex.searcher())
                                        # add document
                                        writer.add_document(
                                            key=str(key),
                                            dataset=str(dsetname))
                                    writer.commit()
                                if keyword_list:  # we would like to do this, even if the index is not created
                                    # register link keyword-file
                                    keyword_list_splitted = keyword_list.split(
                                        ',')
                                    for key in keyword_list_splitted:
                                        key = key.strip()
                                        if key in self.keyword2fname[
                                                dsetname].keys():
                                            self.keyword2fname[dsetname][
                                                key].append(filename)
                                        else:
                                            self.keyword2fname[dsetname][
                                                key] = [filename]
                            else:
                                raise Exception(
                                    '"filename" and/or "file_attributes" columns not found in '
                                    + afile +
                                    ' (are you missing the column names?). Metadata will not be available!.'
                                )

                        print('Finished loading metadata for %s in %s' %
                              (dsetname, str(time.time() - t)))
                        self.is_all_metadata_loaded = True
                    break
        except Exception as e:
            print("load_all_dset_metadata Exception:" + str(e) + '\n')