async def _create_publication_from_img( self, img: Tag, url: Optional[str] = None, title: Optional[str] = None, check_cache: Optional = True, ) -> Optional[Publication]: file_name = await self._get_filename_from_url(url=img.attrs['src']) if file_name not in self._cache or not check_cache: title = title or self._TITLE url = url or img.attrs['src'] file = await self._get_file_value_object( url=img.attrs['src'], pretty_name=title, filename_unique=self._FILENAME_UNIQUE, public_url=self._PUBLIC_URL) return Publication( publication_id=file_name, title=title, url=url, timestamp=datetime.utcnow(), color=self._colour, images=[file], author=self._author, )
async def _send_main_message(self, publication: Publication) -> None: text = publication.to_format(format_data=self._FORMAT_DATA) text_chunks = await self._get_text_chunks( text, max_length=self._MAX_TEXT_LENGTH) first_iteration = True for text_chunk in text_chunks: # Is required evaluate each iteration if message box is available. Because if image is sent this could be # not available. message_box = await self._page.waitForSelector( f'[data-tab="1"]', options={'timeout': self._LOAD_PAGE_TIMEOUT_MS}) await message_box.click() for paragraph_lf in re.split(r'(\n)', text_chunk): if paragraph_lf == '\n': await self._page.keyboard.down('Shift') await self._page.keyboard.down('Enter') await self._page.keyboard.up('Shift') await self._page.keyboard.up('Enter') else: await message_box.type(paragraph_lf) if first_iteration and publication.images: await asyncio.sleep(1) # Javascript Rules await self._attach_click() await self._attach_image(publication.images[0]) await asyncio.sleep(1) first_iteration = False await self._send_image() elif text_chunk: await self._send_text()
async def _get_new_cards(self, card: Tag) -> Optional[Publication]: file_name = os.path.basename(card.attrs['src']) file_name: str = file_name.split('?')[0] file = None if 'ws_today_' in file_name: file = await self._get_file_value_object(url=card.attrs['src'], pretty_name=self._title, filename_unique=False, public_url=False) file_name = file.filename if file_name in self._cache: return None if file is None: file = await self._get_file_value_object(url=urllib.parse.urljoin( self._domain, card.attrs['src']), pretty_name=self._title, public_url=True) rich_title = RichText(data=self._add_html_tag(self._title, self._TITLE_HTML_TAG), format_data=FormatData.HTML) return Publication( publication_id=file_name, title=rich_title, url=self._url, timestamp=datetime.utcnow(), images=[file], )
async def _get_new_cards(self, card: Tag) -> Optional[Publication]: file_name = os.path.basename(card.attrs['src']) file_name: str = file_name.split('?')[0] file = None if 'ws_today_' in file_name: file = await self._get_file_value_object(url=card.attrs['src'], pretty_name=self._title, filename_unique=False, public_url=False) file_name = file.filename if file_name in self._cache: return None if file is None: file = await self._get_file_value_object(url=urllib.parse.urljoin( self._domain, card.attrs['src']), pretty_name=self._title, public_url=True) return Publication( publication_id=file_name, title=self._title, url=self._url, timestamp=datetime.utcnow(), color=self._colour, images=[file], author=self._AUTHOR, )
async def _get_new_new(self, new: element.Tag) -> Optional[Publication]: url: str = new.find('a').attrs['href'] parsed_url = urllib.parse.urlparse(url) images = [] files = [] if not parsed_url.netloc: url = urllib.parse.urljoin(self._DOMAIN, url) parsed_url = urllib.parse.urlparse(url) if url in self._cache: return title_str = new.find(class_='title').text.strip() title_rich = RichText(data=self._add_html_tag( string=str(title_str), tag=self._TITLE_HTML_TAG), format_data=FormatData.HTML) description = None if self._NETLOC == parsed_url.netloc: headers = await self._get_site_head(url=url) if headers.content_type == 'text/html': beautiful_soap = BeautifulSoup( await self._get_site_content(url=url), 'html5lib') data = beautiful_soap.find(class_='entry-content') description = await self._get_description(data=data) images = await self._get_images(data=data, title=title_str, max_images=5) else: file = await self._get_file_value_object( url=url, pretty_name=title_str, filename_unique=self._FILENAME_UNIQUE, public_url=self._PUBLIC_URL) files.append(file) else: file = await self._get_file_value_object( url=new.find('img').attrs['src'].split('?')[0], pretty_name=title_str, filename_unique=self._FILENAME_UNIQUE, public_url=self._PUBLIC_URL) images.append(file) return Publication( publication_id=url, title=title_rich, description=description, url=url, files=files, timestamp=datetime.utcnow(), images=images, )
async def put(self, publication: Publication): """For item in queue context list (channel, queue) upload in the queue a QueueData. Also replace all values that are specified in configuration of each channel-sender. See QueueContext.Channel """ for queue_context in self.queue_context_list: updated_publication = Publication.from_dict({ **publication.__dict__, **queue_context.publication_data }) queue_data = QueueData(channel=queue_context.channel, publication=updated_publication) await queue_context.publication_queue.put(queue_data)