class VkPostParser: def __init__(self, post, domain, session, sign_posts=False, what_to_parse=None): self.session = session try: self.audio_session = VkAudio(session) except IndexError: self.audio_session = None self.sign_posts = sign_posts self.pattern = "@" + sub(DOMAIN_REGEX, "", domain) self.raw_post = post self.post_url = "https://vk.com/wall{owner_id}_{id}".format( **self.raw_post) self.text = "" self.user = None self.repost = None self.repost_source = None self.reply_markup = None self.media = [] self.docs = [] self.tracks = [] self.poll = None self.attachments_types = [] self.what_to_parse = what_to_parse if what_to_parse else {"all"} def generate_post(self): log.info("[AP] Парсинг поста.") if self.what_to_parse.intersection({"text", "all"}): self.generate_text() if "attachments" in self.raw_post: self.attachments_types = tuple( x["type"] for x in self.raw_post["attachments"]) for attachment in self.raw_post["attachments"]: if attachment["type"] in [ "link", "page", "album" ] and self.what_to_parse.intersection({"link", "all"}): self.generate_link(attachment) if attachment[ "type"] == "photo" and self.what_to_parse.intersection( {"photo", "all"}): self.generate_photo(attachment) if attachment[ "type"] == "video" and self.what_to_parse.intersection( {"video", "all"}): self.generate_video(attachment) if attachment[ "type"] == "doc" and self.what_to_parse.intersection( {"doc", "all"}): self.generate_doc(attachment) if attachment[ "type"] == "poll" and self.what_to_parse.intersection( {"polls", "all"}): self.generate_poll(attachment) if self.what_to_parse.intersection({"music", "all"}): self.generate_music() if self.sign_posts: self.generate_user() self.sign_post() def generate_text(self): if self.raw_post["text"]: log.info("[AP] Обнаружен текст. Извлечение.") self.text += self.raw_post["text"] + "\n" if self.pattern != "@": self.text = sub(self.pattern, "", self.text, flags=IGNORECASE) self.text = self.text.replace("&", "&").replace( "<", "<").replace(">", ">") self.text = sub(r"\[(.*?)\|(.*?)\]", r'<a href="https://vk.com/\1">\2</a>', self.text, flags=MULTILINE) def generate_link(self, attachment): log.info("[AP] Парсинг ссылки...") if attachment["type"] == "link" and attachment["link"]["title"]: log.debug("Detected link. Adding to message") self.text += '\n🔗 <a href="{url}">{title}</a>'.format( **attachment["link"]) elif attachment["type"] == "page": log.debug("Detected wiki page. Adding to message") self.text += '\n🔗 <a href="{view_url}">{title}</a>\n👁 {views} раз(а)'.format( **attachment["page"]) elif attachment["type"] == "album": log.debug("Detected album. Adding to message") self.text += ('\n🖼 <a href="https://vk.com/album{owner_id}_{id}">' "Альбом с фотографиями: {title}</a>\n" "Описание: {description}".format( **attachment["album"])) def generate_photo(self, attachment): photo = None for i in attachment["photo"]["sizes"]: photo = i["url"] photo = download(photo, bar=None) if photo: self.media.append(InputMediaPhoto(photo)) def generate_doc(self, attachment): try: attachment["doc"]["title"] = sub(r"[/\\:*?\"><|]", "", attachment["doc"]["title"]) if attachment["doc"]["title"].endswith(attachment["doc"]["ext"]): doc = download(attachment["doc"]["url"], out="{title}".format(**attachment["doc"])) else: doc = download(attachment["doc"]["url"], out="{title}.{ext}".format(**attachment["doc"])) self.docs.append(InputMediaDocument(doc)) except urllib.error.URLError as error: log.exception("[AP] Невозможно скачать вложенный файл: {0}.", error) self.text += '\n📃 <a href="{url}">{title}</a>'.format( **attachment["doc"]) def generate_video(self, attachment): log.info("[AP] Извлечение видео...") video_link = "https://m.vk.com/video{owner_id}_{id}".format( **attachment["video"]) if not attachment["video"].get("platform"): soup = BeautifulSoup( self.session.http.get(video_link).text, "html.parser") if len(soup.find_all("source")) >= 2: video_link = soup.find_all("source")[1].get("src") file = download_video(self.session.http, video_link) if getsize(file) >= 2097152000: log.info( "[AP] Видео весит более 2 ГБ. Добавляем ссылку на видео в текст." ) self.text += '\n🎥 <a href="{0}">{1[title]}</a>\n👁 {1[views]} раз(а) ⏳ {1[duration]} сек'.format( video_link.replace("m.", ""), attachment["video"]) del file return None self.media.append(InputMediaVideo(file)) else: self.text += '\n🎥 <a href="{0}">{1[title]}</a>\n👁 {1[views]} раз(а) ⏳ {1[duration]} сек'.format( video_link.replace("m.", ""), attachment["video"]) def generate_music(self): if "audio" in self.attachments_types: log.info("[AP] Извлечение аудио...") try: tracks = self.audio_session.get_post_audio( self.raw_post["owner_id"], self.raw_post["id"]) except Exception as error: log.error("Ошибка получения аудиозаписей: {0}", error) else: for track in tracks: name = (sub(r"[^a-zA-Z '#0-9.а-яА-Я()-]", "", track["artist"] + " - " + track["title"])[:MAX_FILENAME_LENGTH - 16] + ".mp3") if ".m3u8" in track["url"]: log.warning( "Файлом аудиозаписи является m3u8 плейлист.") file = name streamlink_args = [ "streamlink", "--output", name.replace(".mp3", ".ts"), track["url"], "best" ] ffmpeg_args = [ "ffmpeg", "-i", name.replace(".mp3", ".ts"), "-b:a", "320k", name ] result = start_process(streamlink_args) if result > 0: log.critical( "При запуске команды {} произошла ошибка.", " ".join(streamlink_args)) continue result = start_process(ffmpeg_args) if result > 0: log.critical( "При запуске команды {} произошла ошибка", " ".join(ffmpeg_args)) continue else: try: file = download(track["url"], out=name) except (urllib.error.URLError, IndexError): log.exception( "[AP] Не удалось скачать аудиозапись. Пропускаем ее..." ) continue track_cover = download(track["track_covers"][-1] ) if track["track_covers"] else None log.debug("Adding tags in track") result = add_audio_tags( file, title=track["title"], artist=track["artist"], track_cover=track_cover, ) if result: log.debug("Track {} ready for sending", name) self.tracks.append( InputMediaAudio( name, track_cover, duration=track["duration"], performer=track["artist"], title=track["title"], )) def generate_poll(self, attachment): self.poll = { "question": attachment["poll"]["question"], "options": [answer["text"] for answer in attachment["poll"]["answers"]], "allows_multiple_answers": attachment["poll"]["multiple"], "is_anonymous": attachment["poll"]["anonymous"], } if len(self.poll["options"]) == 1: self.poll["options"].append("...") def sign_post(self): button_list = [] log.info( "[AP] Подписывание поста и добавление ссылки на его оригинал.") if self.user: user = "******".format(self.user) button_list.append( InlineKeyboardButton( "Автор поста: {first_name} {last_name}".format( **self.user), url=user)) if self.attachments_types.count("photo") > 1: if self.user: self.text += '\nАвтор поста: <a href="{}">{first_name} {last_name}</a>'.format( user, **self.user) self.text += '\n<a href="{}">Оригинал поста</a>'.format( self.post_url) else: button_list.append( InlineKeyboardButton("Оригинал поста", url=self.post_url)) self.reply_markup = InlineKeyboardMarkup( build_menu(button_list, n_cols=1)) if button_list else None def generate_user(self): if "signer_id" in self.raw_post: log.debug("Retrieving signer_id") self.user = self.session.method(method="users.get", values={ "user_ids": self.raw_post["signer_id"], "fields": "domain" })[0] elif self.raw_post["owner_id"] != self.raw_post["from_id"]: self.user = self.session.method(method="users.get", values={ "user_ids": self.raw_post["from_id"], "fields": "domain" })[0] def generate_repost(self): log.info("Включена отправка репоста. Начинаем парсинг репоста.") source_id = int(self.raw_post["copy_history"][0]["from_id"]) try: source_info = self.session.method(method="groups.getById", values={"group_id": -source_id})[0] repost_source = 'Репост из <a href="https://vk.com/{screen_name}">{name}</a>:\n\n'.format( **source_info) except exceptions.ApiError: source_info = self.session.method(method="users.get", values={"user_ids": source_id})[0] repost_source = 'Репост от <a href="https://vk.com/id{id}">{first_name} {last_name}</a>:\n\n'.format( **source_info) self.repost = VkPostParser( self.raw_post["copy_history"][0], source_info.get("screen_name", ""), self.session, self.sign_posts, self.what_to_parse, ) self.repost.generate_post() self.repost.text = repost_source + self.repost.text
class GetAudioListThread(QThread): signal = pyqtSignal("PyQt_PyObject") str_signal = pyqtSignal(str) image_signal = pyqtSignal("QImage") def __init__(self, cookie, window): QThread.__init__(self) self.login = "" self.password = "" self.user_link = "" self.statusBar = None self.save_password = False self.authorized = False self.cookie = cookie self.window = window def __del__(self): self.wait() def _user_auth(self): if self.login: self.session = VkApi( login=self.login, password=self.password, auth_handler=self.auth_handler, captcha_handler=self.captcha_handler, config_filename=self.cookie, ) self.statusBar.showMessage("Авторизация.") self.session.auth() else: self.statusBar.showMessage( "Логин не указан, использование пароля в качестве токена") self.session = VkApi(token=self.password, captcha_handler=self.captcha_handler) self.vk_audio = VkAudio(self.session) self.authorized = True def _get_audio(self): tracks = [] albums = [] string = str() # Try to get post audio list post = self.get_group_and_post_id(self.user_link) album = self.get_album_id(self.user_link) if isinstance(post, tuple): owner_id, post_id = post self.statusBar.showMessage("Получение списка аудиозаписей поста.") string = "Аудиозаписи поста" tracks = self.vk_audio.get_post_audio(owner_id, post_id) audios = ",".join(["{owner_id}_{id}".format(**i) for i in tracks]) tracks = self.session.method(method="audio.getById", values={"audios": audios}) elif isinstance(album, tuple): owner_id, album_id, *_ = album self.statusBar.showMessage( "Получение списка аудиозаписей альбома.") string = "Аудиозаписи альбома" tracks = self._get_tracks(owner_id, album_id) else: user_id = self.get_user_id(self.user_link) # Try to get user or group audio list # noinspection PyBroadException try: owner_id = self.session.method("users.get", dict(user_ids=user_id))[0] self.statusBar.showMessage( "Получение списка аудиозаписей пользователя: {first_name} {last_name}" .format(**owner_id)) string = "Музыка пользователя: {first_name} {last_name}".format( **owner_id) except Exception: group_id = self.session.method("groups.getById", dict(group_id=user_id))[0] self.statusBar.showMessage( "Получение списка аудиозаписей сообщества: {name}".format( **group_id)) string = "Музыка сообщества: {}".format(group_id["name"]) albums = self._get_albums(-group_id["id"]) tracks = self._get_tracks(-group_id["id"]) else: albums = self._get_albums(owner_id["id"]) tracks = self._get_tracks(owner_id["id"]) for album in albums: try: album["tracks"] = self.vk_audio.get( owner_id=album["owner_id"], album_id=album["id"], access_hash=album["access_hash"], ) except: album["tracks"] = self._get_tracks(owner_id["id"], album["id"]) return tracks, string, albums def _get_tracks(self, owner_id, album_id=None, access_hash=None): try: tracks = self.vk_audio.get(owner_id, album_id, access_hash) except: values = {"owner_id": owner_id} if album_id: values.update({"album_id": album_id}) res = self.session.method( method="audio.get", values=values, ) count = res["count"] offset = 0 tracks = [] while count != 0: audios = ",".join( ["{owner_id}_{id}".format(**i) for i in res["items"]]) tracks.extend( self.session.method(method="audio.getById", values={"audios": audios})) offset += 200 if count >= 200 else count % 200 count -= 200 if count >= 200 else count % 200 values.update({"offset": offset}) res = self.session.method( method="audio.get", values=values, ) return tracks def _get_albums(self, owner_id): try: albums = self.vk_audio.get_albums(owner_id["id"]) except: res = self.session.method( method="audio.getPlaylists", values={"owner_id": owner_id}, ) count = res["count"] offset = 0 albums = [] while count != 0: albums.extend(res["items"]) offset += 10 if count >= 10 else count % 10 count -= 10 if count >= 10 else count % 10 res = self.session.method( method="audio.getPlaylists", values={ "owner_id": owner_id, "offset": offset }, ) return albums def auth_handler(self): """ При двухфакторной аутентификации вызывается эта функция. :return: key, remember_device """ self.str_signal.emit("Введите код авторизации:") while not self.window.key: pass return self.window.key, self.save_password def captcha_handler(self, captcha): url = captcha.get_url() file = TemporaryFile() res = self.session.http.get(url, stream=True) res.raw.decode_content = True shutil.copyfileobj(res.raw, file) file.seek(0) image = QImage() image.loadFromData(file.read()) self.image_signal.emit(image) while not self.window.key: pass return captcha.try_again(self.window.key) def run(self): try: if not self.authorized: self._user_auth() result = self._get_audio() self.signal.emit(result) except exceptions.BadPassword: self.signal.emit("Неверный логин или пароль.") except exceptions.LoginRequired: self.signal.emit("Требуется логин.") except exceptions.PasswordRequired: self.signal.emit("Требуется пароль.") except (IndexError, AttributeError): self.signal.emit( "Невозможно получить список аудиозаписей. Проверьте, открыты ли они у пользователя." ) except exceptions.ApiError as e: if "113" in str(e): self.signal.emit( "Неверная ссылка на профиль пользователя (неверный ID пользователя)." ) elif "100" in str(e): self.signal.emit( "Неверная ссылка на профиль пользователя (сообщества).") else: self.signal.emit(str(e)) except Exception as e: self.signal.emit(str(type(e)) + str(e)) @staticmethod def get_user_id(link): result = findall(r"(https?://m?\.?vk\.com/)?(.*)$", link)[0][1] return result if result else None @staticmethod def get_group_and_post_id(link): result = findall(r"wall(.*?)_(.*?)$", link) return result[0] if result else None @staticmethod def get_album_id(link): link = link.replace("%2F", "/") result = findall(r"playlist/(.*)_(.*)_(.*)\?", link) if not result: result = findall(r"playlist/(.*)_(.*)_(.*)", link) if not result: result = findall(r"audio_playlist(.*)_(.*)&access_hash=(.*)", link) if not result: result = findall(r"audio_playlist(.*)_(.*)/(.*)", link) if not result: result = findall(r"audio_playlist(.*)_(.*)", link) return result[0] if result else None