Esempio n. 1
0
	def update(self, force=False):

		"""
		Checks if the local copy of the zotero database is up to date. If not,
		the data is also indexed.

		Arguments:
		force		--	Indicates that the data should also be indexed, even
						if the local copy is up to date. (default=False)
		"""

		try:
			stats = os.stat(self.zotero_database)
		except Exception as e:
			print(u"libzotero.update(): %s" % e)
			return False

		# Only update if necessary
		if not force and stats[8] > self.last_update:
			t = time.time()
			self.last_update = stats[8]
			self.index = {}
			self.collection_index = []
			self.search_cache = {}
			# Copy the zotero database to the gnotero copy
			shutil.copyfile(self.zotero_database, self.gnotero_database)
			self.conn = sqlite3.connect(self.gnotero_database)
			self.cur = self.conn.cursor()
			# First create a list of deleted items, so we can ignore those later
			deleted = []
			self.cur.execute(self.deleted_query)
			for item in self.cur.fetchall():
				deleted.append(item[0])
			# Retrieve information about date, publication, volume, issue and
			# title
			self.cur.execute(self.info_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				key = item[3]
				if item_id not in deleted:
					item_name = item[1]
					# Parse date fields, because we only want a year or a #
					# 'special' date
					if item_name == u"date":
						item_value = None
						for sd in self.special_dates:
							if sd in item[2].lower():
								item_value = sd
								break
						# Dates can have months, days, and years, or just a
						# year, and can be split by '-' and '/' characters.
						if item_value == None:
							# Detect whether the date should be split
							if u'/' in item[2]:
								split = u'/'
							elif u'-' in item[3]:
								split = u'-'
							else:
								split = None
							# If not, just use the last four characters
							if split == None:
								item_value = item[2][-4:]
							# Else take the first slice that is four characters
							else:
								l = item[2].split(split)
								for i in l:
									if len(i) == 4:
										item_value = i
										break
					else:
						item_value = item[2]
					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id, \
							noteProvider=self.noteProvider)
						self.index[item_id].key = key
					if item_name == u"publicationTitle":
						self.index[item_id].publication = unicode(item_value)
					elif item_name == u"date":
						self.index[item_id].date = item_value
					elif item_name == u"volume":
						self.index[item_id].volume = item_value
					elif item_name == u"issue":
						self.index[item_id].issue = item_value
					elif item_name == u"title":
						self.index[item_id].title = unicode(item_value)
			# Retrieve author information
			self.cur.execute(self.author_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					item_author = item[1].capitalize()
					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id)
					self.index[item_id].authors.append(item_author)
			# Retrieve collection information
			self.cur.execute(self.collection_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					item_collection = item[1]
					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id)
					self.index[item_id].collections.append(item_collection)
					if item_collection not in self.collection_index:
						self.collection_index.append(item_collection)
			# Retrieve tag information
			self.cur.execute(self.tag_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					item_tag = item[1]
					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id)
					self.index[item_id].tags.append(item_tag)
					if item_tag not in self.tag_index:
						self.tag_index.append(item_tag)
			# Retrieve attachments
			self.cur.execute(self.attachment_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					if item[1] != None:
						att = item[1]
						# If the attachment is stored in the Zotero folder, it is preceded
						# by "storage:"
						if att[:8] == u"storage:":
							item_attachment = att[8:]
							attachment_id = item[2]
							if item_attachment[-4:].lower() in \
								self.attachment_ext:
								if item_id not in self.index:
									self.index[item_id] = zotero_item(item_id)
								self.cur.execute( \
									u"select items.key from items where itemID = %d" \
									% attachment_id)
								key = self.cur.fetchone()[0]
								self.index[item_id].fulltext = os.path.join( \
									self.storage_path, key, item_attachment)
						# If the attachment is linked, it is simply the full
						# path to the attachment
						else:
							self.index[item_id].fulltext = att
			self.cur.close()
			print(u"libzotero.update(): indexing completed in %.3fs" \
				% (time.time() - t))
		return True
Esempio n. 2
0
	def update(self, force = False):

		"""
		This function checks if the local copy of the zotero
		database is up to date. If not, the data is also indexed.
		"""

		try:
			stats = os.stat(self.zotero_database)
		except Exception as e:
			print "libzotero.update(): %s" % e
			return False

		# Only update if necessary
		if not force and stats[8] > self.last_update:

			t = time.time()

			self.last_update = stats[8]
			self.index = {}
			self.collection_index = []
			self.search_cache = {}

			# Copy the zotero database to the gnotero copy
			shutil.copyfile(self.zotero_database, self.gnotero_database)
			self.conn = sqlite3.connect(self.gnotero_database)
			self.cur = self.conn.cursor()

			# First create a list of deleted items, so we can ignore those later
			deleted = []
			self.cur.execute(self.deleted_query)
			for item in self.cur.fetchall():
				deleted.append(item[0])

			# Retrieve information about date, publication, volume, issue and title
			self.cur.execute(self.info_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				key = item[3]
				if item_id not in deleted:
					item_name = item[1]

					# Parse date fields, because we only want a year or a 'special' date
					if item_name == "date":
						item_value = None
						for sd in self.special_dates:
							if sd in item[2].lower():
								item_value = sd
								break
						if item_value == None:
							item_value = item[2][-4:]
					else:
						item_value = item[2]

					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id, \
							noteProvider=self.noteProvider)
						self.index[item_id].key = key

					if item_name == "publicationTitle":
						self.index[item_id].publication = unicode(item_value)
					elif item_name == "date":
						self.index[item_id].date = item_value
					elif item_name == "volume":
						self.index[item_id].volume = item_value
					elif item_name == "issue":
						self.index[item_id].issue = item_value
					elif item_name == "title":
						self.index[item_id].title = unicode(item_value)

			# Retrieve author information
			self.cur.execute(self.author_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					item_author = item[1].capitalize()
					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id)
					self.index[item_id].authors.append(item_author)

			# Retrieve collection information
			self.cur.execute(self.collection_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					item_collection = item[1]
					if item_id not in self.index:
						self.index[item_id] = zotero_item(item_id)
					self.index[item_id].collections.append(item_collection)
					if item_collection not in self.collection_index:
						self.collection_index.append(item_collection)

			# Retrieve attachments
			self.cur.execute(self.attachment_query)
			for item in self.cur.fetchall():
				item_id = item[0]
				if item_id not in deleted:
					if item[1] != None:

						att = item[1]

						# If the attachment is stored in the Zotero folder, it is preceded
						# by "storage:"
						if att[:8] == "storage:":
							item_attachment = att[8:]
							attachment_id = item[2]
							if item_attachment[-4:].lower() in self.attachment_ext:
								if item_id not in self.index:
									self.index[item_id] = zotero_item(item_id)
								self.cur.execute("select items.key from items where itemID = %d" % attachment_id)
								key = self.cur.fetchone()[0]
								self.index[item_id].fulltext = os.path.join(self.storage_path, key, item_attachment)

						# If the attachment is linked, it is simply the full path to the attachment
						else:
							self.index[item_id].fulltext = att

			self.cur.close()

			print "libzotero.update(): indexing completed in %.3fs" % (time.time() - t)
		return True
Esempio n. 3
0
    def update(self, force=False):

        """
        Checks if the local copy of the zotero database is up to date. If not,
        the data is also indexed.

        Arguments:
        force		--	Indicates that the data should also be indexed, even
                        if the local copy is up to date. (default=False)
        """

        try:
            stats = os.stat(self.zotero_database)
        except Exception as e:
            self.debug.print_debug(self, u"zotero.update(): %s" % e)
            return False

        # Only update if necessary
        if not force and stats[8] > self.last_update:
            t = time.time()
            self.last_update = stats[8]
            self.index = {}
            self.collection_index = []
            self.search_cache = {}

            # Copy the zotero database to the gnotero copy
            shutil.copyfile(self.zotero_database, self.gnotero_database)
            self.conn = sqlite3.connect(self.gnotero_database)
            self.cur = self.conn.cursor()

            # First create a list of deleted items, so we can ignore those later
            deleted = []
            self.cur.execute(self.deleted_query)
            for item in self.cur.fetchall():
                deleted.append(item[0])

            # Retrieve information about date, publication, volume, issue and
            # title
            self.cur.execute(self.info_query)
            for item in self.cur.fetchall():
                item_id = item[0]
                key = item[3]

                if item_id not in deleted:
                    item_name = item[1]

                    # Parse date fields, because we only want a year or a #
                    # 'special' date
                    if item_name == u"date":
                        item_value = None
                        for sd in self.special_dates:
                            if sd in item[2].lower():
                                item_value = sd
                                break

                        # Dates can have months, days, and years, or just a
                        # year, and can be split by '-' and '/' characters.
                        if item_value is None:
                            # Detect whether the date should be split
                            if u'/' in item[2]:
                                split = u'/'
                            elif u'-' in item[3]:
                                split = u'-'
                            else:
                                split = None
                            # If not, just use the last four characters
                            if split is None:
                                item_value = item[2][-4:]
                            # Else take the first slice that is four characters
                            else:
                                l = item[2].split(split)
                                for i in l:
                                    if len(i) == 4:
                                        item_value = i
                                        break
                    else:
                        item_value = item[2]

                    if item_id not in self.index:
                        self.index[item_id] = zotero_item(item_id, noteProvider=self.noteProvider)
                        self.index[item_id].key = key
                        self.index[item_id].item_type = item[4]

                    if item_name == u"publicationTitle" or item_name == u'bookTitle' or item_name == 'websiteTitle':
                        self.index[item_id].publication = unicode(item_value)
                    elif item_name == u"date":
                        self.index[item_id].date = item_value
                    elif item_name == u"volume":
                        self.index[item_id].volume = item_value
                    elif item_name == u"issue":
                        self.index[item_id].issue = item_value
                    elif item_name == u"title":
                        self.index[item_id].title = unicode(item_value)
                    elif item_name == u"DOI":
                        self.index[item_id].doi = unicode(item_value)
                    elif item_name == u"pages":
                        self.index[item_id].pages = unicode(item_value)
                    elif item_name == u"place":
                        self.index[item_id].place = unicode(item_value)
                    elif item_name == u"publisher":
                        self.index[item_id].publisher = unicode(item_value)
                    elif item_name == u"url":
                        self.index[item_id].url = unicode(item_value)
                    else:
                        self.debug.print_debug(self, u'Unindexed field: {0}'.format(item_name))

            # Retrieve author information
            self.cur.execute(self.creator_query('author'))
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    # slice tuple as first column is an integer index
                    # next two columns represent lastname and firstname
                    new_authors = item[1:]
                    self.index[item_id].authors.append(new_authors)

            # Retrieve editor information
            self.cur.execute(self.creator_query('editor'))
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    # slice tuple as first column is an integer index
                    # next two columns represent lastname and firstname
                    new_authors = item[1:]
                    self.index[item_id].editors.append(new_authors)

            # Retrieve translator information
            self.cur.execute(self.creator_query('translator'))
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    # slice tuple as first column is an integer index
                    # next two columns represent lastname and firstname
                    new_authors = item[1:]
                    self.index[item_id].translators.append(new_authors)

            # Retrieve translator information
            self.cur.execute(self.creator_query('bookAuthor'))
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    # slice tuple as first column is an integer index
                    # next two columns represent lastname and firstname
                    new_authors = item[1:]
                    self.index[item_id].book_authors.append(new_authors)

            # Retrieve collection information
            self.cur.execute(self.collection_query)
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    item_collection = item[1]
                    if item_id not in self.index:
                        self.index[item_id] = zotero_item(item_id)
                    self.index[item_id].collections.append(item_collection)
                    if item_collection not in self.collection_index:
                        self.collection_index.append(item_collection)
            # Retrieve tag information
            self.cur.execute(self.tag_query)
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    item_tag = item[1]
                    if item_id not in self.index:
                        self.index[item_id] = zotero_item(item_id)
                    self.index[item_id].tags.append(item_tag)
                    if item_tag not in self.tag_index:
                        self.tag_index.append(item_tag)
            # Retrieve attachments
            self.cur.execute(self.attachment_query)
            for item in self.cur.fetchall():
                item_id = item[0]
                if item_id not in deleted:
                    if item[1] != None:
                        att = item[1]
                        # If the attachment is stored in the Zotero folder, it is preceded
                        # by "storage:"
                        if att[:8] == u"storage:":
                            item_attachment = att[8:]
                            attachment_id = item[2]
                            if item_attachment[-4:].lower() in \
                                    self.attachment_ext:
                                if item_id not in self.index:
                                    self.index[item_id] = zotero_item(item_id)
                                self.cur.execute( \
                                    u"select items.key from items where itemID = %d" \
                                    % attachment_id)
                                key = self.cur.fetchone()[0]
                                self.index[item_id].fulltext = os.path.join( \
                                    self.storage_path, key, item_attachment)
                        # If the attachment is linked, it is simply the full
                        # path to the attachment
                        else:
                            self.index[item_id].fulltext = att
            self.cur.close()
            self.debug.print_debug(self, u"zotero.update(): indexing completed in %.3fs" % (time.time() - t))

        return True