Esempio n. 1
0
class Wows_database:
	"""
	Scrapes and registers data into database.
	"""
	__slots__ = ('database', 'logger')

	def __init__(self, db_path):
		self.database = Database(db_path)
		self.logger = Logger(__name__)
		# if db file not found or empty, create file
		try:
			with open(db_path, 'rb') as f:
				if f.read() == b'':
					self._create_table()
		except Exception as e:
			self.logger.info('Database not found.')
			self._create_table()


	async def update(self):
		"""
		Update database.
		"""
		self.logger.info('Update started.')
		try:
			# await self._update_medium()
			# await self._update_facebook()
			await self._update_hp()
		except Exception:
			self.logger.critical(f'Exception while updating: {traceback.format_exc()}')
		self.logger.info('Update finished.')


	def _create_table(self):
		self.logger.debug('Creating wows database table.')
		self.database.executescript("""
		CREATE TABLE wowsnews(
			id INTEGER PRIMARY KEY AUTOINCREMENT,
			source TEXT,
			title TEXT,
			description TEXT,
			url TEXT,
			img TEXT
		);
		""")
		self.logger.debug('Created wows database table.')


	def _get_latest(self, source:str):
		"""
		Get latest news stored in database.

		Returns
		-------
		res : tuple, None
		"""
		self.logger.debug(f'Starting _get_latest source: {source}')
		res = self.database.fetchone('SELECT * FROM wowsnews WHERE source==? ORDER BY id DESC', (source,))
		self.logger.debug(f'_get_latest result: {res}')
		
		return res


	async def _update_hp(self):
		"""
		Check for new articles, if found update db.
		"""
		self.logger.info('Updating wows hp.')
		# get data and data from db.
		try:
			data = get_hp_articles()
		except ScrapingException:
			self.logger.critical('Scraping failed.')
			return
		data_db = self._get_latest('wowshomepage')
		
		# if database is up to date return
		if _is_same_data(data_db, data[0]):
			self.logger.info('Wows hp is up to date.')
			return
		# if same url already exists in database, return
		elif self._url_exists(data[0][3]):
			self.logger.info('Article with same url exists.')
			return
		# update db
		self.logger.info('Saving data.')
		try:
			self.database.execute('INSERT INTO wowsnews(source, title, description, url, img) VALUES(?, ?, ?, ?, ?)', data[0])
		except Exception as e:
			self.logger.critical(f'Inserting into database failed: {e}')
			return
		self.logger.info('Updated wows hp.')


	async def _update_facebook(self):
		"""
		Check for new articles, if found update db.
		"""
		self.logger.info('Updating facebook.')
		try:
			data = get_facebook_articles()
		except ScrapingException:
			self.logger.critical('Scraping failed.')
			return
		data_db = self._get_latest('facebook')

		# if up to date, return
		if _has_same_url(data_db, data[0]):
			self.logger.info('Facebook is up to date.')
			return
		# counter shows how many articles to update
		counter = 0
		for d in data:
			# if data is most recent in db
			if d == data_db:
				break
			counter += 1
		# data.reverse() not working, so using temp.reverse()
		temp = data
		temp.reverse()
		news = temp[:counter]
		# update db
		try:
			for new in news:
				# continue if url already exists in database
				if self._url_exists(new[3]):
					self.logger.info('Article with same url exists.')
					continue
				self.database.execute('INSERT INTO wowsnews(source, title, description, url, img) VALUES(?, ?, ?, ?, ?)', new)
		except Exception as e:
			self.logger.critical(f'Inserting into database failed: {e}')
			return
		self.logger.info('Updated facebook.')


	async def _update_medium(self):
		"""
		Check for new articles, if found update db.
		"""
		self.logger.info('Updating medium.')
		try:
			data = get_medium_articles()
		except ScrapingException:
			self.logger.critical('Scraping failed.')
			return

		# get latest data in database
		data_db = self._get_latest('medium')
		# if up to date, return
		if _is_same_data(data_db, data[0]):
			self.logger.info('Medium is up to date.')
			return
		# counter shows how many articles to update
		counter = 0
		for d in data:
			# if url is most recent in db
			if d == data_db:
				break
			counter += 1
		# data.reverse() not working, so using temp reverse()
		temp = data
		temp.reverse()
		news = temp[:counter]
		try:
			for new in news:
				self.database.execute('INSERT INTO wowsnews(source, title, description, url, img) VALUES(?, ?, ?, ?, ?)', new)
		except Exception as e:
			self.logger.critical(f'Inserting into database failed: {e}')
			return
		self.logger.info('Updated medium.')


	def _url_exists(self, url:str):
		self.logger.debug('Checking if url already exists in database.')
		self.logger.debug(f'url is {url[10:]}')
		try:
			res = self.database.fetchone(f'SELECT * FROM wowsnews WHERE url==?', (url,))
		except Exception as e:
			self.logger.critical(f'Fetching datbase failed: {e}')
			res = None
		self.logger.debug(f'Result: {res}')
		if not res:
			return False
		return True