예제 #1
0
 def test_private_vid_marked_as_private(self):
     record_download_video(self.private_vid, self.temp_dir.name)
     vid: Videos = Videos.objects.get(url=self.private_vid)
     self.assertEqual(CheckStatus[vid.check_status], CheckStatus.PRIVATE)
     self.assertEqual(vid.checked, True)
     vid.save()
     print(vid)
예제 #2
0
    def test_present_vid_is_downloaded(self):
        record_download_video(self.present_vid, self.temp_dir.name)
        present_vid: Videos = Videos.objects.get(url=self.present_vid)
        self.assertEqual(present_vid.AdFile_ID.ad_filepath, f"CyAds/kPBtDHiHJuM.mkv")
        self.assertTrue(Path(self.temp_dir.name).joinpath(present_vid.AdFile_ID.ad_filepath).exists())

        self.assertEqual(CheckStatus[present_vid.check_status], CheckStatus.FOUND)
        self.assertEqual(present_vid.checked, True)
예제 #3
0
    def handle(self, *args, **options):  # Only download youtube videos
        # Only download ads
        download_dir = os.environ["AD_ARCHIVE_FILESTORE_DIR"]
        assert download_dir is not None

        youtube_urls = Videos.objects.annotate(url_len=Length("url")).filter(
            url_len=11, watched_as_ad__gte=1, checked=False)

        vid: Videos
        for vid in youtube_urls:
            try:
                video_with_adfile = record_download_video(
                    vid.url, download_dir)
                video_with_adfile.save()
                if video_with_adfile.check_status == CheckStatus.FOUND.value:
                    print(
                        f"downloaded ad to: {video_with_adfile.AdFile_ID.ad_filepath} for video: {video_with_adfile.url}"
                    )
            except Exception as e:
                print(f"Got error while downloading video {vid.url}: {e}")
        self.stdout.write(self.style.SUCCESS("Downloaded ads"))
예제 #4
0
 def test_missing_vid_gets_marked_as_missing_downloaded(self):
     record_download_video(self.missing_vid, self.temp_dir.name)
     missing_video: Videos = Videos.objects.get(url=self.missing_vid)
     self.assertEqual(CheckStatus[missing_video.check_status], CheckStatus.MISSING)
     self.assertEqual(missing_video.checked, True)
     missing_video.save()
예제 #5
0
 def test_duplicate_videos_raises_errors(self):
     record_download_video(self.double_vid, self.temp_dir.name)
     with self.assertRaises(DuplicateVideoError):
         record_download_video(self.double_vid, self.temp_dir.name)
예제 #6
0
 def test_video_terminated_account_marked(self):
     vid = record_download_video(self.terminated_account_vid, self.temp_dir.name)
     vid.save()
     vid = Videos.objects.get(url=self.terminated_account_vid)
     self.assertEqual(CheckStatus[vid.check_status], CheckStatus.ACCOUNT_TERMINATED)
예제 #7
0
 def test_video_removed_by_user_marked(self):
     record_download_video(self.user_removed_vid, self.temp_dir.name)
     vid = Videos.objects.get(url=self.user_removed_vid)
     self.assertEqual(CheckStatus[vid.check_status], CheckStatus.USER_REMOVED)
예제 #8
0
    def save_video_metadata(self, video_list: List[str], is_ad: bool):
        """Save the video metadata of the video the bot requested"""
        self.logger.info("Enter save_video_metadata")
        # Only need to lookup video once. Increase by overall views by bot
        # Reduces queries to YouTube data API

        # all videos + ads watched
        viewed_videos: DefaultDict[str, int] = defaultdict(int)

        # videos/ads no info on yet
        not_viewed: DefaultDict[str, int] = defaultdict(int)

        for video in video_list:
            # store video_id and times seen for later
            # This creates a set of videos as well
            viewed_videos[video] += 1
        self.logger.info("videos_watched", counts=viewed_videos)

        self.logger.info("Starting to check if videos already saved")
        vid_id: str
        times_seen: int
        for vid_id, times_seen in viewed_videos.items():
            # Do we already have the video info?
            try:
                vid: Videos = Videos.objects.get(url=vid_id)
                # If the video info is already in db, update existing counts
            except Videos.DoesNotExist:
                # We don't have the video info yet
                # Lookup later
                not_viewed[vid_id] = times_seen
                continue
            except Videos.MultipleObjectsReturned:
                # Workaround for multiple entries for the same url. There should only be one!

                # Use the first of the duplicates
                vids: QuerySet[Videos] = Videos.objects.filter(url=vid_id)
                vid = vids[0]
            # Save our new count of times seen
            if is_ad:
                vid.watched_as_ad = True
            else:
                vid.watched_as_video = True
            vid.save()

        self.logger.info("Finished checking if videos already saved")

        self.logger.info("need to lookup videos",
                         number=len(not_viewed),
                         videos=list(not_viewed.keys()))
        # Benchmark
        max_queries = len(not_viewed.keys())
        actual_queries = 0

        self.logger.info("Starting to grab YT metadata for videos not saved")
        # Get and save info on videos we don't have info on yet.
        # Can only get info 50 videos at a time from YouTube data API
        for chunk in chunked(not_viewed.keys(), n=50):

            chunk = list(chunk)
            all_metadata = VideoMetadata(chunk, self.api_key)
            metadata: VideoMetadata
            # Made X queries
            actual_queries += len(all_metadata)
            for idx, metadata in enumerate(all_metadata):
                self.logger.info(
                    f"idx: {idx}, vid_id: {metadata.id}, available: {metadata.available()}"
                )
                # Create the video entry since it doesn't exist

                # If video is removed from YouTube
                if not metadata.available():
                    vid = Videos.objects.missing(metadata.id)
                else:
                    cat = Categories.objects.from_valid_category_and_name(
                        metadata.category_id, metadata.category_name)
                    channel = Channels.objects.from_valid_channel_and_name(
                        metadata.channel_id, metadata.channel_title)
                    vid, created = Videos.objects.get_or_create(
                        url=metadata.id, category=cat, channel=channel)

                    vid.keywords = json.dumps(metadata.keywords)
                    vid.description = metadata.description
                    vid.title = metadata.title

                # Use youtube video id as key to lookup total times seen in batch
                times_viewed = not_viewed[metadata.id]
                if is_ad:
                    vid.watched_as_ad = True
                else:
                    vid.watched_as_video = True
                vid.save()

                # Download ads only
                should_download = vid.check_status != CheckStatus.FOUND.value and vid.watched_as_ad >= 1
                if should_download:
                    self.logger.info(f"Downloading video: {vid.url}")
                    vid_with_adfile = record_download_video(
                        vid.url, self.download_path)
                    vid_with_adfile.save()
                    vid.check_status = CheckStatus.FOUND.value
                    self.logger.info(
                        f"Downloaded video: {vid_with_adfile.url}, status={vid_with_adfile.check_status}"
                    )

        self.logger.info("Finished grabbing YT metadata for videos not saved")

        self.logger.info(
            f"Made {actual_queries} youtube queries. Max should be: {max_queries}"
        )
        self.logger.info("exit save_video_metadata")