def populate_backlog(self): path = os.path.abspath( self.config.get("queue_manager", "fallback_media_dir", fallback="media_fallback")) files = get_files_in_dir(path) add_to_end = [] for file in files: file_path = os.path.join(path, file) try: title, artist, duration = get_mp3_info(file_path) except HeaderNotFoundError as e: self.logger.warning( f"Not loading {file} because it does not look like mp3") continue title = remove_links(title) artist = remove_links(artist) if file_path in self.backlog_played_media: add_to_end.append(Song(file_path, title, artist, duration, -1)) else: self.backlog.append( Song(file_path, title, artist, duration, -1)) random.shuffle(self.backlog) random.shuffle(add_to_end) self.backlog += add_to_end self.logger.info("Fallback playlist length: %d " % len(self.backlog))
def download(self, query, user_message=lambda text: True): file_id = query["id"] duration = query["duration"] file_size = query["size"] file_info = query["info"] self.logger.debug("Downloading song #" + str(file_id)) artist = remove_links(query["artist"]).strip() title = remove_links(query["title"]).strip() self.logger.debug("Title for song #" + str(file_id) + ": " + title) if duration > self.config.getint("downloader", "max_duration", fallback=self._default_max_duration): raise MediaIsTooLong(duration) if file_size > 1000000 * self.config.getint( "downloader", "max_file_size", fallback=self._default_max_size): raise MediaIsTooBig(file_size) file_dir = self.config.get("downloader", "media_dir", fallback="media") file_name = file_id + ".mp3" file_path = os.path.join(file_dir, file_name) if self.is_in_cache(file_path): return file_path, title, artist, duration user_message("Скачиваем...\n%s" % title) self.logger.debug("Querying Telegram API") tg_api_url = self.config.get("telegram", "api_url", fallback="https://api.telegram.org/") bot_token = self.config.get("telegram", "token") self.get_file( url=tg_api_url + 'file/bot{0}/{1}'.format(bot_token, file_info.file_path), file_path=file_path, file_size=file_size, percent_callback=lambda p: user_message("Скачиваем [%d%%]...\n%s" % (int(p), title)), ) self.logger.debug("Download complete #" + str(file_id)) self.touch_without_creation(file_path) self.logger.debug("File stored in path: " + file_path) return file_path, title, artist, duration
def test_remove_links(self): tweet = 'SCALABLE SCRAPING USING MACHINE LEARNING https://t.co/WZFCMy7tNz' expected = 'SCALABLE SCRAPING USING MACHINE LEARNING' self.assertEqual(expected, remove_links(tweet))
def test_no_links(self): s = ("some links http://t.co/rlqo5xfbul www.google.com" " bplaced.homepage.net/article/2221 g.com g.co") self.assertEqual(ut.remove_links(s), "some links")
def clean(self, content): link_to_text(content) remove_links(content) remove_iframes(content) remove_scripts(content) return content
def download(self, query, user_message=lambda text: True): url = None match = self.mp3_dns_regex.search(query) if match: url = match.group(0) match = self.mp3_ip4_regex.search(query) if match: url = match.group(0) if url is None: raise UnappropriateArgument() self.logger.debug("Sending HEAD to url: " + url) media_dir = self.config.get("downloader", "media_dir", fallback="media") file_dir = os.path.join(os.getcwd(), media_dir) file_name = sanitize_file_name( parse.unquote(url).split("/")[-1] + ".mp3") file_path = os.path.join(file_dir, file_name) if os.path.exists(file_path) and os.path.getsize(file_path) > 0: title, artist, duration = get_mp3_info(file_path) title = remove_links(title) artist = remove_links(artist) return file_path, title, artist, duration user_message("Скачиваем...") self.logger.debug("Querying URL") try: response_head = requests.head(url, allow_redirects=True) except requests.exceptions.ConnectionError as e: raise UrlOrNetworkProblem(e) if response_head.status_code != 200: raise BadReturnStatus(response_head.status_code) try: file_size = int(response_head.headers['content-length']) except KeyError: self.logger.error("No content-length header. Headers: %s", str(response_head.headers)) raise MediaSizeUnspecified() if file_size > 1000000 * self.config.getint( "downloader", "max_file_size", fallback=self._default_max_size): raise MediaIsTooBig() self.get_file( url=url, file_path=file_path, file_size=file_size, percent_callback=lambda p: user_message("Скачиваем [%d%%]...\n" % int(p)), ) title, artist, duration = get_mp3_info(file_path) title = remove_links(title) artist = remove_links(artist) if duration > self.config.getint("downloader", "max_duration", fallback=self._default_max_duration): os.unlink(file_path) raise MediaIsTooLong() self.touch_without_creation(file_path) return file_path, title, artist, duration
def download(self, query, user_message=lambda text: True): match = self.yt_regex.search(query) if match: url = match.group(0) else: raise UnappropriateArgument() self.logger.info("Getting url: " + url) user_message("Загружаем информацию о видео...") media_dir = self.config.get("downloader", "media_dir", fallback="media") try: video = YouTube(url, on_progress_callback=self.video_download_progress) stream = video.streams.filter(only_audio=True).first() except Exception: traceback.print_exc() raise ApiError() video_id = video.video_id video_details = video.player_config_args.get('player_response', {}).get('videoDetails', {}) if video_id is None: raise UrlProblem() try: video_title = html.unescape(video.title) self.logger.debug("Video title [using primary method]: " + video_title) except KeyError: video_title = html.unescape(video_details.get('title', 'Unknown YT video')) self.logger.debug("Video title [using fallback method]: " + video_title) video_title = remove_links(video_title) try: file_size = int(stream.filesize) except HTTPError as e: traceback.print_exc() raise BadReturnStatus(e.code) if file_size > 1000000 * self.config.getint("downloader", "max_file_size", fallback=self._default_max_size): raise MediaIsTooBig() file_dir = media_dir file_name = sanitize_file_name("youtube-" + str(video_id)) seconds = video.length if seconds > self.config.getint("downloader", "max_duration", fallback=self._default_max_duration): raise MediaIsTooLong() self.download_status[str(video_id)] = { "start_time": time.time(), "last_update": time.time(), "file_size": file_size, "stream": stream, "title": video_title, "user_message": user_message, } file_path = os.path.join(file_dir, file_name) + ".mp4" if self.is_in_cache(file_path): self.logger.debug("Loading from cache: " + file_path) return file_path, video_title, "", seconds if not os.path.exists(file_dir): os.makedirs(file_dir) self.logger.debug("Media dir have been created: " + file_dir) self.logger.info("Downloading audio from video: " + video_id) user_message("Скачиваем...\n%s" % video_title) try: stream.download(output_path=file_dir, filename=file_name) except HTTPError as e: traceback.print_exc() raise BadReturnStatus(e.code) self.touch_without_creation(file_path) self.logger.debug("File stored in path: " + file_path) return file_path, video_title, "", seconds