Esempio n. 1
0
def download(video_id, max_workers, format='mkv', **kwargs):
    video_id = parse_video_id(video_id)

    print("Looking up video...")
    video = twitch.get_video(video_id)

    print("Fetching access token...")
    access_token = twitch.get_access_token(video_id)

    print("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
    quality, playlist_url = _select_quality(playlists)

    print("\nFetching playlist...")
    base_url, filenames = twitch.get_playlist_urls(playlist_url)

    # Create a temp dir to store downloads if it doesn't exist
    directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id,
                                            quality)
    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    print("Download dir: {}".format(directory))

    print("Downloading VODs with {} workers...".format(max_workers))
    paths = _download_files(base_url, directory, filenames, max_workers)

    print("\n\nJoining files...")
    target = _video_target_filename(video, format)
    _join_vods(directory, paths, target)

    print("\nDeleting vods...")
    for path in paths:
        os.unlink(path)

    print("\nDownloaded: {}".format(target))
Esempio n. 2
0
def get_vods(video_id, access_token=None):
    """

    generates a list of all the vods for the video

    :param access_token: access token for the video
    :param video_id: ID of the video
    :return: list of vods the video has
    """

    start = None
    end = None

    if access_token is None:
        access_token = twitch.get_access_token(video_id)

    playlists_m3u8 = twitch.get_playlists(video_id, access_token)
    playlists = list(_parse_playlists(playlists_m3u8))
    playlist_uri = _get_playlist_by_name(
        playlists, constants.quality(constants.dimensions))

    response = requests.get(playlist_uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    return _get_vod_paths(playlist, start, end)
Esempio n. 3
0
    def __init__(self,
                 video_id,
                 step=2,
                 end_transition_step=constants.end_transition_step,
                 verbose=True,
                 batch_size=32):
        """

        initializes a DataCollector from a specified video ID

        :param video_id: ID of the video to seek through
        :param step: step between images
        :param batch_size: size of the batches that the video is processed in
        """

        # copy the parameters into the object
        self.video_id = video_id
        self.verbose = verbose
        self.batch_size = batch_size

        # set the steps
        self.step = step
        self.end_transition_step = end_transition_step

        # load NNs
        self.classifier = models.load_model(constants.game_classifier)
        self.crewmate_identifier = models.load_model(
            constants.crewmate_identifier)

        # get video information & vods
        self.access_token = twitch.get_access_token(video_id)

        self.url = web_scrapper.get_base_url(self.video_id, self.access_token)
        self.full_vods = web_scrapper.get_vods(self.video_id,
                                               self.access_token)

        # take every [step] vods
        self.vods = self.full_vods[::self.step]

        self.vods = [(vod, 0) for vod in self.vods]

        # batches object (initially None)
        self.batches = None

        # transitions object
        self.transitions = None

        # game_classifier_predictions object
        self.game_classifier_predictions = None

        # object containing all of the games in the stream
        self.games = None

        # transition tensor
        self.transition_tensor = None

        # transition predictions
        self.transition_predictions = None
Esempio n. 4
0
def _download_video(video_id,
                    max_workers,
                    format='mp4',
                    start=None,
                    end=None,
                    keep=False,
                    **kwargs):

    if start and end and end <= start:
        raise ConsoleError("End time must be greater than start time")

    _log(video_id, "Recherche la video ")
    video = twitch.get_video(video_id)

    _log(video_id, "Informations sur {}".format(video['title']))
    print_out("Trouvé: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['channel']['display_name']))

    access_token = twitch.get_access_token(video_id)

    _log(video_id, "Obtention de la liste des fichiers...")
    playlists = twitch.get_playlists(video_id, access_token)
    parsed = m3u8.loads(playlists)
    selected = _select_quality(parsed.playlists)

    # print_out("\nListe...")
    response = requests.get(selected.uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    base_uri = re.sub("/[^/]+$", "/", selected.uri)
    target_dir = _crete_temp_dir(base_uri)
    filenames = list(_get_files(playlist, start, end))

    # Save playlists for debugging purposes
    with open(target_dir + "playlists.m3u8", "w") as f:
        f.write(playlists)
    with open(target_dir + "playlist.m3u8", "w") as f:
        f.write(response.text)

    # print_out("\nTélécharge {} VODs avec {} threads dans {}".format(
    #     len(filenames), max_workers, target_dir))
    file_paths = download_files(video_id, base_uri, target_dir, filenames,
                                max_workers)

    target = _video_target_filename(video, format)
    print_out("\nCible: {}".format(target))
    _join_vods(target_dir, file_paths, "videos/Download/{}".format(target))

    if keep:
        print_out("\nTemporary files not deleted: {}".format(target_dir))
    else:
        # print_out("\nSupprime le fichier temporaire...")
        shutil.rmtree(target_dir)

    print_out("Fichier téléchargé: {}".format(target))
    _log(video_id, "Terminé {}".format(target))
Esempio n. 5
0
def download(video_id,
             max_workers,
             format='mkv',
             start=None,
             end=None,
             keep=False,
             **kwargs):
    video_id = _parse_video_id(video_id)

    if start and end and end <= start:
        raise ConsoleError("End time must be greater than start time")

    print_out("Looking up video...")
    video = twitch.get_video(video_id)

    print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['channel']['display_name']))

    print_out("Fetching access token...")
    access_token = twitch.get_access_token(video_id)

    print_out("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
    parsed = m3u8.loads(playlists)
    selected = _select_quality(parsed.playlists)

    print_out("\nFetching playlist...")
    response = requests.get(selected.uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    base_uri = re.sub("/[^/]+$", "/", selected.uri)
    target_dir = _crete_temp_dir(base_uri)
    filenames = list(_get_files(playlist, start, end))

    # Save playlists for debugging purposes
    with open(target_dir + "playlists.m3u8", "w") as f:
        f.write(playlists)
    with open(target_dir + "playlist.m3u8", "w") as f:
        f.write(response.text)

    print_out("\nDownloading {} VODs using {} workers to {}".format(
        len(filenames), max_workers, target_dir))
    _download_files(base_uri, target_dir, filenames, max_workers)

    print_out("\n\nJoining files...")
    target = _video_target_filename(video, format)
    _join_vods(target_dir, filenames, target)

    if keep:
        print_out("\nTemporary files not deleted: {}".format(target_dir))
    else:
        print_out("\nDeleting temporary files...")
        shutil.rmtree(target_dir)

    print_out("Downloaded: {}".format(target))
def update_resolution(input_dir, output_dir, new_resolution):
    """

    convert each file in the input directory into a higher resolution copy in the
    output directory

    :param input_dir: input directory
    :param output_dir: output directory
    :param label_length: number of items separated by dashes in the label
    :param new_resolution: quality of the new images
    :return: None
    """

    files = os.listdir(input_dir)

    # sort the files by video_id ID
    files.sort(key=lambda f: get_timestamp(f)[0])

    previous_video = ""
    vods = None
    url = ""

    for file in files:

        # collect the data from the file
        video_id, vod, frame = get_timestamp(file)

        try:
            if video_id != previous_video or vods is None:

                previous_video = video_id

                # update the access token
                access_token = twitch.get_access_token(video_id)
                vods = web_scrapper.get_vods(video_id, access_token)

                # get the url
                url = web_scrapper.get_base_url(
                    video_id, access_token, constants.quality(new_resolution))
        except TypeError:
            continue
        except ConsoleError:
            continue

        image = web_scrapper.get_still_frame(url + vods[vod], index=frame)

        # save the image
        cv2.imwrite(
            os.path.join(
                output_dir,
                "-".join(["Meeting", video_id,
                          str(vod), str(frame)]) + ".jpg"), image)
Esempio n. 7
0
def download(video_id,
             max_workers,
             format='mkv',
             start=None,
             end=None,
             **kwargs):
    video_id = parse_video_id(video_id)

    if start and end and end <= start:
        raise ConsoleError("End time must be greater than start time")

    print_out("Looking up video...")
    video = twitch.get_video(video_id)

    print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['channel']['display_name']))

    print_out("Fetching access token...")
    access_token = twitch.get_access_token(video_id)

    print_out("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
    quality, playlist_url = _select_quality(playlists)

    print_out("\nFetching playlist...")
    base_url, filenames = twitch.get_playlist_urls(playlist_url, start, end)

    if not filenames:
        raise ConsoleError("No vods matched, check your start and end times")

    # Create a temp dir to store downloads if it doesn't exist
    directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id,
                                            quality)
    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    print_out("Download dir: {}".format(directory))

    print_out("Downloading {} VODs using {} workers...".format(
        len(filenames), max_workers))
    paths = _download_files(base_url, directory, filenames, max_workers)

    print_out("\n\nJoining files...")
    target = _video_target_filename(video, format)
    _join_vods(directory, paths, target)

    print_out("\nDeleting vods...")
    for path in paths:
        os.unlink(path)

    print_out("\nDownloaded: {}".format(target))
Esempio n. 8
0
    def __init__(self, video_id, starting_index=0):
        """

        initializes the image generator

        :param video_id: video ID to generate images for
        """

        self.video_id = video_id
        self.start_index = starting_index
        access_token = twitch.get_access_token(video_id)

        self.base_url = web_scrapper.get_base_url(video_id, access_token)
        self.vods = web_scrapper.get_vods(video_id, access_token)

        self.end_index = len(self.vods)

        self.image = None  # initally None
Esempio n. 9
0
def get_base_url(video_id,
                 access_token=None,
                 quality=constants.quality(constants.res_360p)):
    """

    generates the URL of a video with a given ID

    :param access_token: access token for the video
    :param video_id: ID of the video
    :param quality: quality of the video
    :return: generated URL
    """

    if access_token is None:
        access_token = twitch.get_access_token(video_id)

    playlists_m3u8 = twitch.get_playlists(video_id, access_token)
    playlists = list(_parse_playlists(playlists_m3u8))
    playlist_url = _get_playlist_by_name(playlists, quality)

    return re.sub("/[^/]+$", "/", playlist_url)
Esempio n. 10
0
def get_training_data(video_id, sampling_rate=constants.sampling_rate):
    """

    generates a set of images from a video ID

    :param video_id: ID of the video
    :param sampling_rate: number of items to skip over
    :return: None
    """

    access_token = twitch.get_access_token(video_id)

    base_url = get_base_url(video_id, access_token)
    vods = get_vods(video_id, access_token)

    for i, vod in enumerate(vods):

        # only take 1 in ten frames
        if i % sampling_rate == 0:
            image = get_still_frame(base_url + vod)
            cv2.imwrite("Data/images/" + video_id + "-" + str(i) + ".jpg",
                        image)
Esempio n. 11
0
def download(video_id, max_workers, format='mkv', **kwargs):
    print("Looking up video...")
    video = twitch.get_video(video_id)

    print("Fetching access token...")
    access_token = twitch.get_access_token(video_id)

    print("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
    playlist_url = _select_playlist_by_quality(playlists)

    print("\nFetching playlist...")
    base_url, filenames = twitch.get_playlist_urls(playlist_url)

    target = _video_target_filename(video, format)

    with tempfile.TemporaryDirectory() as directory:
        print("Downloading...")
        _download_files(base_url, directory, filenames, max_workers)

        print("\n\nJoining files...")
        _join_vods(directory, filenames, target)

    print("\nDownloaded: {}".format(target))
Esempio n. 12
0
def info(args):
    video_id = utils.parse_video_identifier(args.identifier)
    if video_id:
        print_log("Fetching video...")
        video = twitch.get_video(video_id)

        if not video:
            raise ConsoleError("Video {} not found".format(video_id))

        print_log("Fetching access token...")
        access_token = twitch.get_access_token(video_id)

        print_log("Fetching playlists...")
        playlists = twitch.get_playlists(video_id, access_token)

        if video:
            if args.json:
                video_json(video, playlists)
            else:
                video_info(video, playlists)
            return

    clip_slug = utils.parse_clip_identifier(args.identifier)
    if clip_slug:
        print_log("Fetching clip...")
        clip = twitch.get_clip(clip_slug)
        if not clip:
            raise ConsoleError("Clip {} not found".format(clip_slug))

        if args.json:
            print_json(clip)
        else:
            clip_info(clip)
        return

    raise ConsoleError("Invalid input: {}".format(args.video))
Esempio n. 13
0
def _download_video(video_id, args):
    if args.start and args.end and args.end <= args.start:
        raise ConsoleError("End time must be greater than start time")

    if os.path.isfile(str(Path.home()) + "/.twitchdownloads/" + video_id):
        print("File already downloaded")
        return

    print_out("<dim>Looking up video...</dim>")
    video = twitch.get_video(video_id)

    save_json_video(video_id, video, _video_target_filename(video, "json"))

    print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['channel']['display_name']))

    print_out("<dim>Fetching access token...</dim>")
    access_token = twitch.get_access_token(video_id)

    print_out("<dim>Fetching playlists...</dim>")
    playlists_m3u8 = twitch.get_playlists(video_id, access_token)
    playlists = list(_parse_playlists(playlists_m3u8))
    playlist_uri = (_get_playlist_by_name(playlists, args.quality) if
                    args.quality else _select_playlist_interactive(playlists))

    print_out("<dim>Fetching playlist...</dim>")
    response = requests.get(playlist_uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    base_uri = re.sub("/[^/]+$", "/", playlist_uri)
    target_dir = _crete_temp_dir(base_uri)
    vod_paths = _get_vod_paths(playlist, args.start, args.end)

    # Save playlists for debugging purposes
    with open(path.join(target_dir, "playlists.m3u8"), "w") as f:
        f.write(playlists_m3u8)
    with open(path.join(target_dir, "playlist.m3u8"), "w") as f:
        f.write(response.text)

    print_out("\nDownloading {} VODs using {} workers to {}".format(
        len(vod_paths), args.max_workers, target_dir))
    path_map = download_files(base_uri, target_dir, vod_paths,
                              args.max_workers)

    # Make a modified playlist which references downloaded VODs
    # Keep only the downloaded segments and skip the rest
    org_segments = playlist.segments.copy()
    playlist.segments.clear()
    for segment in org_segments:
        if segment.uri in path_map:
            segment.uri = path_map[segment.uri]
            playlist.segments.append(segment)

    playlist_path = path.join(target_dir, "playlist_downloaded.m3u8")
    playlist.dump(playlist_path)

    print_out("\n\nJoining files...")
    target = _video_target_filename(video, args.format)
    _join_vods(playlist_path, target)

    if args.keep:
        print_out(
            "\n<dim>Temporary files not deleted: {}</dim>".format(target_dir))
    else:
        print_out("\n<dim>Deleting temporary files...</dim>")
        shutil.rmtree(target_dir)

    print_out("\nDownloaded: <green>{}</green>".format(target))
Esempio n. 14
0
def _download_video(video_id, args):
    if args.start and args.end and args.end <= args.start:
        raise ConsoleError("End time must be greater than start time")

    print_out("<dim>Looking up video...</dim>")
    video = twitch.get_video(video_id)

    if not video:
        raise ConsoleError("Video {} not found".format(video_id))

    print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['creator']['displayName']))

    target = _video_target_filename(video, args)
    print_out("Output: <blue>{}</blue>".format(target))

    if not args.overwrite and path.exists(target):
        response = input("File exists. Overwrite? [Y/n]: ")
        if response.lower().strip() not in ["", "y"]:
            raise ConsoleError("Aborted")
        args.overwrite = True

    print_out("<dim>Fetching access token...</dim>")
    access_token = twitch.get_access_token(video_id)

    print_out("<dim>Fetching playlists...</dim>")
    playlists_m3u8 = twitch.get_playlists(video_id, access_token)
    playlists = list(_parse_playlists(playlists_m3u8))
    playlist_uri = (_get_playlist_by_name(playlists, args.quality) if
                    args.quality else _select_playlist_interactive(playlists))

    print_out("<dim>Fetching playlist...</dim>")
    response = requests.get(playlist_uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    base_uri = re.sub("/[^/]+$", "/", playlist_uri)
    target_dir = _crete_temp_dir(base_uri)
    vod_paths = _get_vod_paths(playlist, args.start, args.end)

    # Save playlists for debugging purposes
    with open(path.join(target_dir, "playlists.m3u8"), "w") as f:
        f.write(playlists_m3u8)
    with open(path.join(target_dir, "playlist.m3u8"), "w") as f:
        f.write(response.text)

    print_out("\nDownloading {} VODs using {} workers to {}".format(
        len(vod_paths), args.max_workers, target_dir))
    path_map = download_files(base_uri, target_dir, vod_paths,
                              args.max_workers)

    # Make a modified playlist which references downloaded VODs
    # Keep only the downloaded segments and skip the rest
    org_segments = playlist.segments.copy()
    playlist.segments.clear()
    for segment in org_segments:
        if segment.uri in path_map:
            segment.uri = path_map[segment.uri]
            playlist.segments.append(segment)

    playlist_path = path.join(target_dir, "playlist_downloaded.m3u8")
    playlist.dump(playlist_path)

    if args.no_join:
        print_out("\n\n<dim>Skipping joining files...</dim>")
        print_out("VODs downloaded to:\n<blue>{}</blue>".format(target_dir))
        return

    print_out("\n\nJoining files...")
    _join_vods(playlist_path, target, args.overwrite, video)

    if args.keep:
        print_out(
            "\n<dim>Temporary files not deleted: {}</dim>".format(target_dir))
    else:
        print_out("\n<dim>Deleting temporary files...</dim>")
        shutil.rmtree(target_dir)

    print_out("\nDownloaded: <green>{}</green>".format(target))