def process_entry(self, e_hash: str, entry: Any) -> Tuple[str, Optional[Video]]: with Database(self.db_path) as database: if database.get_extractor_fail_count(e_hash) >= self.max_fail: return e_hash, None with youtube_dl.YoutubeDL(self.ydl_opts) as ydl: try: processed = ydl.process_ie_result(entry, False) except youtube_dl.DownloadError as download_error: logging.warning("Failed to get a video. Youtube-dl said: '%s'", download_error) return e_hash, None else: publish_date = 0.0 date_str = processed.get("upload_date") if date_str: publish_date = datetime.datetime.strptime( date_str, "%Y%m%d").timestamp() if processed.get("age_limit", 0) > config.ytcc.age_limit: logger.warning("Ignoring video '%s' due to age limit", processed.get("title")) return e_hash, None logger.info("Processed video '%s'", processed.get("title")) return e_hash, Video(url=processed["webpage_url"], title=processed["title"], description=processed.get( "description", ""), publish_date=publish_date, watch_date=None, duration=processed.get("duration", -1), extractor_hash=e_hash)
def init_db(): db = Database(":memory:") db.add_channel("publisher1", "id_publisher1") db.add_channel("publisher2", "id_publisher2") db.add_channel("publisher3", "id_publisher3") db.add_videos(insert_list) return db
def get_new_entries(self, playlist: Playlist) -> List[Tuple[Any, str, Playlist]]: with Database(self.db_path) as database: hashes = frozenset( x.extractor_hash for x in database.list_videos(playlists=[playlist.name])) result = [] with youtube_dl.YoutubeDL(self.ydl_opts) as ydl: logger.info("Checking playlist '%s'...", playlist.name) try: info = ydl.extract_info(playlist.url, download=False, process=False) except youtube_dl.DownloadError as download_error: logging.error( "Failed to get playlist %s. Youtube-dl said: '%s'", playlist.name, download_error) else: for entry in take(self.max_items, info.get("entries", [])): e_hash = ydl._make_archive_id(entry) # pylint: disable=protected-access if e_hash is None: logger.warning( "Ignoring malformed playlist entry from %s", playlist.name) elif e_hash not in hashes: result.append((entry, e_hash, playlist)) return result
def __call__(self, *args, **kwargs): from ytcc.cli import cli if kwargs.get("subscribe", False): from ytcc.database import Database with Database(self.db_file) as db: db.add_playlist(WEBDRIVER_PLAYLIST.name, WEBDRIVER_PLAYLIST.url) del kwargs["subscribe"] if kwargs.get("update", False): from ytcc.database import Database with Database(self.db_file) as db: db.add_videos(WEBDRIVER_VIDEOS, WEBDRIVER_PLAYLIST) del kwargs["update"] return self.invoke(cli, ["--conf", self.conf_file, *args], **kwargs)
def __init__(self, override_cfg_file: Optional[str] = None) -> None: self.config = Config(override_cfg_file) self.db = Database(self.config.db_path) self.channel_filter: List[str] = [] self.date_begin_filter = 0.0 self.date_end_filter = time.mktime(time.gmtime()) + 20 self.include_watched_filter = False self.search_filter = ""
def __init__(self, override_cfg_file: Optional[str] = None) -> None: self.config = Config(override_cfg_file) self.database = Database(self.config.db_path) self.video_id_filter: List[int] = [] self.channel_filter: List[str] = [] self.date_begin_filter = 0.0 self.date_end_filter = (0.0, False) self.include_watched_filter = False
def test_add_and_get_channels(self): db = Database(":memory:") db.add_channel("Webdriver Torso", "UCsLiV4WJfkTEHH0b9PmRklw") db.add_channel("Webdriver YPP", "UCxexYYtOetqikZqriLuTS-g") channels = db.get_channels() self.assertEqual(len(channels), 2) self.assertEqual(channels[0].displayname, "Webdriver Torso") self.assertEqual(channels[0].yt_channelid, "UCsLiV4WJfkTEHH0b9PmRklw") self.assertEqual(channels[1].displayname, "Webdriver YPP") self.assertEqual(channels[1].yt_channelid, "UCxexYYtOetqikZqriLuTS-g")
def init_db(): insert_list = [ Video(yt_videoid="0", title="title1", description="description1", publisher="id_publisher1", publish_date=1488286166, watched=False), Video(yt_videoid="0", title="title1", description="description1", publisher="id_publisher1", publish_date=1488286167, watched=False), Video(yt_videoid="1", title="title2", description="description1", publisher="id_publisher1", publish_date=1488286168, watched=False), Video(yt_videoid="1", title="title2", description="description2", publisher="id_publisher2", publish_date=1488286170, watched=False), Video(yt_videoid="2", title="title3", description="description3", publisher="id_publisher2", publish_date=1488286171, watched=False) ] db = Database(":memory:") db.add_channel(Channel(displayname="publisher1", yt_channelid="id_publisher1")) db.add_channel(Channel(displayname="publisher2", yt_channelid="id_publisher2")) db.add_channel(Channel(displayname="publisher3", yt_channelid="id_publisher3")) db.add_videos(insert_list) return db
def update(self): num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 4 with Pool(num_workers) as pool, Database(self.db_path) as database: playlists = database.list_playlists() raw_entries = dict() playlists_mapping = defaultdict(list) full_content = pool.map(self.get_new_entries, playlists) for entry, e_hash, playlist in itertools.chain.from_iterable( full_content): raw_entries[e_hash] = entry playlists_mapping[e_hash].append(playlist) results = dict( pool.map(self.process_entry, *zip(*raw_entries.items()))) for key in raw_entries: for playlist in playlists_mapping[key]: if results[key] is not None: database.add_videos([results[key]], playlist) else: database.increase_extractor_fail_count( key, max_fail=self.max_fail)
def database(self) -> Database: if self._database is None: self._database = Database(config.ytcc.db_path) return self._database
def test_add_channel_duplicate(self): db = Database(":memory:") db.add_channel("Webdriver Torso", "UCsLiV4WJfkTEHH0b9PmRklw") db.add_channel("Webdriver Torso2", "UCsLiV4WJfkTEHH0b9PmRklw")
def test_add_channel_duplicate(self): db = Database(":memory:") db.add_channel(Channel(displayname="Webdriver Torso", yt_channelid="UCsLiV4WJfkTEHH0b9PmRklw")) db.add_channel(Channel(displayname="Webdriver Torso2", yt_channelid="UCsLiV4WJfkTEHH0b9PmRklw"))