Python remove_links Examples, utils.remove_links Python Examples

Example #1

0

Show file

    def populate_backlog(self):
        path = os.path.abspath(
            self.config.get("queue_manager",
                            "fallback_media_dir",
                            fallback="media_fallback"))
        files = get_files_in_dir(path)
        add_to_end = []
        for file in files:
            file_path = os.path.join(path, file)
            try:
                title, artist, duration = get_mp3_info(file_path)
            except HeaderNotFoundError as e:
                self.logger.warning(
                    f"Not loading {file} because it does not look like mp3")
                continue
            title = remove_links(title)
            artist = remove_links(artist)
            if file_path in self.backlog_played_media:
                add_to_end.append(Song(file_path, title, artist, duration, -1))
            else:
                self.backlog.append(
                    Song(file_path, title, artist, duration, -1))

        random.shuffle(self.backlog)
        random.shuffle(add_to_end)
        self.backlog += add_to_end

        self.logger.info("Fallback playlist length: %d " % len(self.backlog))

Example #2

0

Show file

    def download(self, query, user_message=lambda text: True):
        file_id = query["id"]
        duration = query["duration"]
        file_size = query["size"]
        file_info = query["info"]

        self.logger.debug("Downloading song #" + str(file_id))

        artist = remove_links(query["artist"]).strip()
        title = remove_links(query["title"]).strip()

        self.logger.debug("Title for song #" + str(file_id) + ": " + title)

        if duration > self.config.getint("downloader",
                                         "max_duration",
                                         fallback=self._default_max_duration):
            raise MediaIsTooLong(duration)

        if file_size > 1000000 * self.config.getint(
                "downloader", "max_file_size",
                fallback=self._default_max_size):
            raise MediaIsTooBig(file_size)

        file_dir = self.config.get("downloader", "media_dir", fallback="media")
        file_name = file_id + ".mp3"
        file_path = os.path.join(file_dir, file_name)

        if self.is_in_cache(file_path):
            return file_path, title, artist, duration

        user_message("Скачиваем...\n%s" % title)
        self.logger.debug("Querying Telegram API")
        tg_api_url = self.config.get("telegram",
                                     "api_url",
                                     fallback="https://api.telegram.org/")
        bot_token = self.config.get("telegram", "token")

        self.get_file(
            url=tg_api_url +
            'file/bot{0}/{1}'.format(bot_token, file_info.file_path),
            file_path=file_path,
            file_size=file_size,
            percent_callback=lambda p: user_message("Скачиваем [%d%%]...\n%s" %
                                                    (int(p), title)),
        )

        self.logger.debug("Download complete #" + str(file_id))

        self.touch_without_creation(file_path)

        self.logger.debug("File stored in path: " + file_path)

        return file_path, title, artist, duration

Example #3

0

Show file

    def test_remove_links(self):
        tweet = 'SCALABLE SCRAPING USING MACHINE LEARNING https://t.co/WZFCMy7tNz'
        expected = 'SCALABLE SCRAPING USING MACHINE LEARNING'

        self.assertEqual(expected, remove_links(tweet))

Example #4

0

Show file

 def test_no_links(self):
     s = ("some links http://t.co/rlqo5xfbul www.google.com"
          " bplaced.homepage.net/article/2221 g.com g.co")
     self.assertEqual(ut.remove_links(s), "some links")

Example #5

0

Show file

 def clean(self, content):
     link_to_text(content)
     remove_links(content)
     remove_iframes(content)
     remove_scripts(content)
     return content

Example #6

0

Show file

    def download(self, query, user_message=lambda text: True):
        url = None
        match = self.mp3_dns_regex.search(query)
        if match:
            url = match.group(0)
        match = self.mp3_ip4_regex.search(query)
        if match:
            url = match.group(0)
        if url is None:
            raise UnappropriateArgument()

        self.logger.debug("Sending HEAD to url: " + url)

        media_dir = self.config.get("downloader",
                                    "media_dir",
                                    fallback="media")

        file_dir = os.path.join(os.getcwd(), media_dir)
        file_name = sanitize_file_name(
            parse.unquote(url).split("/")[-1] + ".mp3")
        file_path = os.path.join(file_dir, file_name)

        if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
            title, artist, duration = get_mp3_info(file_path)
            title = remove_links(title)
            artist = remove_links(artist)
            return file_path, title, artist, duration

        user_message("Скачиваем...")
        self.logger.debug("Querying URL")

        try:
            response_head = requests.head(url, allow_redirects=True)
        except requests.exceptions.ConnectionError as e:
            raise UrlOrNetworkProblem(e)
        if response_head.status_code != 200:
            raise BadReturnStatus(response_head.status_code)
        try:
            file_size = int(response_head.headers['content-length'])
        except KeyError:
            self.logger.error("No content-length header. Headers: %s",
                              str(response_head.headers))
            raise MediaSizeUnspecified()
        if file_size > 1000000 * self.config.getint(
                "downloader", "max_file_size",
                fallback=self._default_max_size):
            raise MediaIsTooBig()

        self.get_file(
            url=url,
            file_path=file_path,
            file_size=file_size,
            percent_callback=lambda p: user_message("Скачиваем [%d%%]...\n" %
                                                    int(p)),
        )

        title, artist, duration = get_mp3_info(file_path)
        title = remove_links(title)
        artist = remove_links(artist)
        if duration > self.config.getint("downloader",
                                         "max_duration",
                                         fallback=self._default_max_duration):
            os.unlink(file_path)
            raise MediaIsTooLong()

        self.touch_without_creation(file_path)

        return file_path, title, artist, duration

Example #7

0

Show file

File: YoutubeDownloader.py Project: vladertel/tg_dj

    def download(self, query, user_message=lambda text: True):
        match = self.yt_regex.search(query)
        if match:
            url = match.group(0)
        else:
            raise UnappropriateArgument()

        self.logger.info("Getting url: " + url)
        user_message("Загружаем информацию о видео...")

        media_dir = self.config.get("downloader", "media_dir", fallback="media")

        try:
            video = YouTube(url, on_progress_callback=self.video_download_progress)
            stream = video.streams.filter(only_audio=True).first()
        except Exception:
            traceback.print_exc()
            raise ApiError()
        video_id = video.video_id
        video_details = video.player_config_args.get('player_response', {}).get('videoDetails', {})
        if video_id is None:
            raise UrlProblem()
        try:
            video_title = html.unescape(video.title)
            self.logger.debug("Video title [using primary method]: " + video_title)
        except KeyError:
            video_title = html.unescape(video_details.get('title', 'Unknown YT video'))
            self.logger.debug("Video title [using fallback method]: " + video_title)

        video_title = remove_links(video_title)

        try:
            file_size = int(stream.filesize)
        except HTTPError as e:
            traceback.print_exc()
            raise BadReturnStatus(e.code)
        if file_size > 1000000 * self.config.getint("downloader", "max_file_size", fallback=self._default_max_size):
            raise MediaIsTooBig()

        file_dir = media_dir
        file_name = sanitize_file_name("youtube-" + str(video_id))

        seconds = video.length

        if seconds > self.config.getint("downloader", "max_duration", fallback=self._default_max_duration):
            raise MediaIsTooLong()

        self.download_status[str(video_id)] = {
            "start_time": time.time(),
            "last_update": time.time(),
            "file_size": file_size,
            "stream": stream,
            "title": video_title,
            "user_message": user_message,
        }

        file_path = os.path.join(file_dir, file_name) + ".mp4"
        if self.is_in_cache(file_path):
            self.logger.debug("Loading from cache: " + file_path)
            return file_path, video_title, "", seconds

        if not os.path.exists(file_dir):
            os.makedirs(file_dir)
            self.logger.debug("Media dir have been created: " + file_dir)

        self.logger.info("Downloading audio from video: " + video_id)
        user_message("Скачиваем...\n%s" % video_title)

        try:
            stream.download(output_path=file_dir, filename=file_name)
        except HTTPError as e:
            traceback.print_exc()
            raise BadReturnStatus(e.code)
        self.touch_without_creation(file_path)

        self.logger.debug("File stored in path: " + file_path)

        return file_path, video_title, "", seconds