Beispiel #1
0
 def create_random_video_file(self):
     """
     Helper function for testing video files.
     """
     video_id = ID2SLUG_MAP.keys()[0]#random.choice(ID2SLUG_MAP.keys())
     fake_video_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % video_id)
     with open(fake_video_file, "w") as fh:
         fh.write("")
     self.assertTrue(os.path.exists(fake_video_file), "Make sure the video file was created, video_id='%s'." % video_id)
     return (fake_video_file, video_id)
Beispiel #2
0
 def create_random_video_file(self):
     """
     Helper function for testing video files.
     """
     video_id = ID2SLUG_MAP.keys()[0]#random.choice(ID2SLUG_MAP.keys())
     fake_video_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % video_id)
     with open(fake_video_file, "w") as fh:
         fh.write("")
     self.assertTrue(os.path.exists(fake_video_file), "Make sure the video file was created, video_id='%s'." % video_id)
     return (fake_video_file, video_id)
Beispiel #3
0
def generate_dubbed_video_mappings(
        download_url=DUBBED_VIDEOS_SPREADSHEET_CSV_URL, csv_data=None):
    """
    Function to do the heavy lifting in getting the dubbed videos map.
    
    Could be moved into utils
    """
    if not csv_data:
        logging.info("Downloading dubbed video data from %s" % download_url)
        response = requests.get(download_url)
        if response.status_code != 200:
            raise CommandError(
                "Failed to download dubbed video CSV data: status=%s" %
                response.status)
        csv_data = response.content

    # This CSV file is in standard format: separated by ",", quoted by '"'
    logging.info("Parsing csv file.")
    reader = csv.reader(StringIO(csv_data))

    # Build a two-level video map.
    #   First key: language name
    #   Second key: english youtube ID
    #   Value: corresponding youtube ID in the new language.
    video_map = {}

    row_num = -1
    try:
        # Loop through each row in the spreadsheet.
        while (True):
            row_num += 1
            row = reader.next()

            if row_num < 5:
                # Rows 1-4 are crap.
                continue

            elif row_num == 5:
                # Row 5 is the header row.
                header_row = [
                    v.lower() for v in row
                ]  # lcase all header row values (including language names)
                slug_idx = header_row.index("titled id")
                english_idx = header_row.index("english")
                assert slug_idx != -1, "Video slug column header should be found."
                assert english_idx != -1, "English video column header should be found."

            else:
                # Rows 6 and beyond are data.
                assert len(row) == len(
                    header_row
                ), "Values line length equals headers line length"

                # Grab the slug and english video ID.
                video_slug = row[slug_idx]
                english_video_id = row[english_idx]
                assert english_video_id, "English Video ID should not be empty"
                assert video_slug, "Slug should not be empty"

                # English video is the first video ID column,
                #   and following columns (until the end) are other languages.
                # Loop through those columns and, if a video exists,
                #   add it to the dictionary.
                for idx in range(english_idx, len(row)):
                    if row[idx]:  # make sure there's a dubbed video
                        lang = header_row[idx]
                        if lang not in video_map:  # add the first level if it doesn't exist
                            video_map[lang] = {}
                        video_map[lang][english_video_id] = row[
                            idx]  # add the corresponding video id for the video, in this language.

    except StopIteration:
        # The loop ends when the CSV file hits the end and throws a StopIteration
        pass

    # Now, validate the mappings with our topic data
    missing_videos = set(ID2SLUG_MAP.keys()) - set(video_map["english"].keys())
    extra_videos = set(video_map["english"].keys()) - set(ID2SLUG_MAP.keys())
    if missing_videos:
        logging.warn(
            "There are %d known videos not in the list of dubbed videos" %
            len(missing_videos))
    if extra_videos:
        logging.warn(
            "There are %d videos in the list of dubbed videos that we have never heard of."
            % len(extra_videos))

    return (video_map, csv_data)
def generate_dubbed_video_mappings(download_url=DUBBED_VIDEOS_SPREADSHEET_CSV_URL, csv_data=None):
    """
    Function to do the heavy lifting in getting the dubbed videos map.
    
    Could be moved into utils
    """
    if not csv_data:
        logging.info("Downloading dubbed video data from %s" % download_url)
        response = requests.get(download_url)
        if response.status_code != 200:
            raise CommandError("Failed to download dubbed video CSV data: status=%s" % response.status)
        csv_data = response.content

    # This CSV file is in standard format: separated by ",", quoted by '"'
    logging.info("Parsing csv file.")
    reader = csv.reader(StringIO(csv_data))

    # Build a two-level video map.
    #   First key: language name
    #   Second key: english youtube ID
    #   Value: corresponding youtube ID in the new language.
    video_map = {}

    row_num = -1
    try:
        # Loop through each row in the spreadsheet.
        while (True):
            row_num += 1
            row = reader.next()


            if row_num < 5:
                # Rows 1-4 are crap.
                continue

            elif row_num == 5:
                # Row 5 is the header row.
                header_row = [v.lower() for v in row]  # lcase all header row values (including language names)
                slug_idx = header_row.index("titled id")
                english_idx = header_row.index("english")
                assert slug_idx != -1, "Video slug column header should be found."
                assert english_idx != -1, "English video column header should be found."

            else:
                # Rows 6 and beyond are data.
                assert len(row) == len(header_row), "Values line length equals headers line length"

                # Grab the slug and english video ID.
                video_slug = row[slug_idx]
                english_video_id = row[english_idx]
                assert english_video_id, "English Video ID should not be empty"
                assert video_slug, "Slug should not be empty"

                # English video is the first video ID column,
                #   and following columns (until the end) are other languages.
                # Loop through those columns and, if a video exists,
                #   add it to the dictionary.
                for idx in range(english_idx, len(row)):
                    if row[idx]:  # make sure there's a dubbed video
                        lang = header_row[idx]
                        if lang not in video_map:  # add the first level if it doesn't exist
                            video_map[lang] = {}
                        video_map[lang][english_video_id] = row[idx]  # add the corresponding video id for the video, in this language.

    except StopIteration:
        # The loop ends when the CSV file hits the end and throws a StopIteration
        pass

    # Now, validate the mappings with our topic data
    missing_videos = set(ID2SLUG_MAP.keys()) - set(video_map["english"].keys())
    extra_videos = set(video_map["english"].keys()) - set(ID2SLUG_MAP.keys())
    if missing_videos:
        logging.warn("There are %d known videos not in the list of dubbed videos" % len(missing_videos))
    if extra_videos:
        logging.warn("There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos))

    return (video_map, csv_data)