コード例 #1
0
ファイル: delete_dne.py プロジェクト: jtara1/misc_scripts
def delete_dne(path, recursive=False, verbose=False):
    """Delete duplicate file if its byte array matches that of the reference
    """

    media = GetMediaFiles()
    files = media.get_info(path=path,
                           recursive=recursive,
                           track_types=['Image'],
                           sort=False)

    init_t = time.time()

    if verbose:
        print(files)
    print('%s files found' % len(files))  # debug
    print('-------------------------')  # debug

    amount_deleted = 0
    # loop over files & check if it's an Imgur DNE image
    for f in files:
        if verbose:
            print(f[0])  # debug

        if is_imgur_dne_image(f[0]):
            amount_deleted += 1
            print('%s' % (os.path.split(f[0])[1]))
            os.remove(f[0])

    print("delete_dne func took %d seconds\n" % (int(time.time() - init_t)))
    return amount_deleted
コード例 #2
0
ファイル: delete_dne.py プロジェクト: jtara1/misc_scripts
def delete_dne_hash_cmp(path, recursive=False, verbose=False):
    """Delete file if its hash matches that of the reference file"""

    media = GetMediaFiles()
    files = media.get_info(path=path,
                           recursive=recursive,
                           track_types=['Image'],
                           sort=False)

    if verbose:
        print(files)
    print('%s files found' % len(files))  # debug
    print('-------------------------')  # debug

    init_t = time.time()

    # imgur dne image hashlib
    dne_hash = hashfile(open('imgur-dne.png', 'rb'), hashlib.sha256())

    # list of hashes
    hashes = list(
        (hashfile(open(fname[0], 'rb'), hashlib.sha256())) for fname in files)
    amount_deleted = 0
    for index in range(len(hashes)):
        if hashes[index] == dne_hash:
            amount_deleted += 1
            os.remove(files[index][0])

    print("delete_dne_hash_cmp func took %d seconds\n" %
          (int(time.time() - init_t)))

    return amount_deleted
コード例 #3
0
def test_data_of_individual_file():
    expected_data = [[
        abspath('tests/media/044837513-two-women-walking-hill-1930s-v.jpg'), {
            'General': {
                'size': (None, None),
                'format': 'JPEG',
                'duration': None
            },
            'Image': {
                'size': (1024, 772),
                'format': 'JPEG',
                'duration': None
            }
        }, 1513722595.3275266
    ]]

    media_file = join(media_dir,
                      '044837513-two-women-walking-hill-1930s-v.jpg')
    media2 = GetMediaFiles(media_file)
    assert (media2.get_info() == expected_data)
コード例 #4
0
ファイル: __main__.py プロジェクト: jtara1/media_to_video
    def __init__(self,
                 src_path,
                 sort='st_ctime',
                 sort_reverse=False,
                 interval_duration=8,
                 audio_index=0,
                 audio_folder=None,
                 renders_heap_file_path=None,
                 dont_load_renders_heap=False,
                 output_width=1920,
                 output_height=1080):
        """
        Given a directory (path), get media files in path, convert &
        concatenate into clips where the duration of each is
        interval_duration or the duration of the src vid,
        until max_duration is reached.
        
        :param src_path: path containing sources of media files to use in video
        :param sort: value from os.stat(...) func, viable values:
            https://docs.python.org/3/library/os.html#os.stat_result
        :param sort_reverse: Reverse after sorting; Default sorts from least to
            greatest (oldest to newest)
        :param interval_duration: duration of each image shown in the video
        :param audio_index: The index used to choose the audio file from the
            sorted list of audio_files in the src_path for the final
            render
        :param audio_folder: only search for songs to use in the video in this
            folder, otherwise, search for songs in src_path
        :param renders_heap_file_path: file path of the renders heap that keeps
            track of the information of each rendered video, defaults to a bin
            file stored in the src_path
        :param dont_load_renders_heap: if True, does not attempt to deserailize
            the renders heap which also means it won't attempt to skip media
            that has already been used in a render
        """
        # setup logging
        self.log = logging.getLogger(__class__.__name__)
        handler = logging.StreamHandler(stream=sys.stdout)
        handler.setLevel(logging.INFO)
        handler.setFormatter(
            logging.Formatter("[%(name)s] %(levelname)s "
                              "%(asctime)s %(message)s"))
        self.log.addHandler(handler)

        # source media to be used in final video is in this path
        self.src_path = os.path.abspath(src_path)

        # output files stored here
        self.out_path = os.path.join(self.src_path,
                                     self.relative_output_directory)
        Serialization.make_paths_for_file(self.out_path, is_file=False)

        # duration of each media file in video
        self.interval_duration = interval_duration
        # maximum duration allowed - determined by length of audio file
        self.max_duration = None

        self.owidth = output_width  # output width
        self.oheight = output_height  # output height

        # Get list of media files with certain extension from path (sorted)
        self.src_files = GetMediaFiles(self.src_path)
        # list of files paths for each diff media type
        self.image_files = self.src_files.get_info(sort=sort,
                                                   sort_reverse=sort_reverse,
                                                   track_types=['Image'])
        self.video_files = self.src_files.get_info(sort=sort,
                                                   sort_reverse=sort_reverse,
                                                   track_types=['Video'])
        self.audio_files = self.src_files.get_info(
            path=os.path.abspath(audio_folder)
            if audio_folder else self.src_path,
            sort=sort,
            sort_reverse=sort_reverse,
            track_types=['Audio'])
        print('number of songs found: {}'.format(len(self.audio_files)))

        # files that can be used in the final rendered video
        self.media_files = self.image_files + self.video_files
        print('number of non-audio-only media files found: {}'.format(
            len(self.media_files)))

        self.vid_time = 0  # time a clip is placed in the timeline of final vid
        self.audio_index = audio_index

        heap_fp = renders_heap_file_path if renders_heap_file_path is not None\
            else join(self.src_path, self.renders_heap_file_name)
        self.renders_heap = Heap(file_path=heap_fp)

        if not dont_load_renders_heap:
            self.renders_heap.deserialize()  # try to load from file
        if self.renders_heap.peek() is not None:
            self.log.debug(pformat(dict(self.renders_heap.peek())))

        self.image_files_range = [0, 0]
        self.video_files_range = [0, 0]

        self._render_queue = Queue()
コード例 #5
0
ファイル: __main__.py プロジェクト: jtara1/media_to_video
class MediaToVideo:
    relative_output_directory = '_output'  # like '/home/user/src_path/_output'
    renders_heap_file_name = '_renders_heap.bin'  # stores metadata of renders

    def __init__(self,
                 src_path,
                 sort='st_ctime',
                 sort_reverse=False,
                 interval_duration=8,
                 audio_index=0,
                 audio_folder=None,
                 renders_heap_file_path=None,
                 dont_load_renders_heap=False,
                 output_width=1920,
                 output_height=1080):
        """
        Given a directory (path), get media files in path, convert &
        concatenate into clips where the duration of each is
        interval_duration or the duration of the src vid,
        until max_duration is reached.
        
        :param src_path: path containing sources of media files to use in video
        :param sort: value from os.stat(...) func, viable values:
            https://docs.python.org/3/library/os.html#os.stat_result
        :param sort_reverse: Reverse after sorting; Default sorts from least to
            greatest (oldest to newest)
        :param interval_duration: duration of each image shown in the video
        :param audio_index: The index used to choose the audio file from the
            sorted list of audio_files in the src_path for the final
            render
        :param audio_folder: only search for songs to use in the video in this
            folder, otherwise, search for songs in src_path
        :param renders_heap_file_path: file path of the renders heap that keeps
            track of the information of each rendered video, defaults to a bin
            file stored in the src_path
        :param dont_load_renders_heap: if True, does not attempt to deserailize
            the renders heap which also means it won't attempt to skip media
            that has already been used in a render
        """
        # setup logging
        self.log = logging.getLogger(__class__.__name__)
        handler = logging.StreamHandler(stream=sys.stdout)
        handler.setLevel(logging.INFO)
        handler.setFormatter(
            logging.Formatter("[%(name)s] %(levelname)s "
                              "%(asctime)s %(message)s"))
        self.log.addHandler(handler)

        # source media to be used in final video is in this path
        self.src_path = os.path.abspath(src_path)

        # output files stored here
        self.out_path = os.path.join(self.src_path,
                                     self.relative_output_directory)
        Serialization.make_paths_for_file(self.out_path, is_file=False)

        # duration of each media file in video
        self.interval_duration = interval_duration
        # maximum duration allowed - determined by length of audio file
        self.max_duration = None

        self.owidth = output_width  # output width
        self.oheight = output_height  # output height

        # Get list of media files with certain extension from path (sorted)
        self.src_files = GetMediaFiles(self.src_path)
        # list of files paths for each diff media type
        self.image_files = self.src_files.get_info(sort=sort,
                                                   sort_reverse=sort_reverse,
                                                   track_types=['Image'])
        self.video_files = self.src_files.get_info(sort=sort,
                                                   sort_reverse=sort_reverse,
                                                   track_types=['Video'])
        self.audio_files = self.src_files.get_info(
            path=os.path.abspath(audio_folder)
            if audio_folder else self.src_path,
            sort=sort,
            sort_reverse=sort_reverse,
            track_types=['Audio'])
        print('number of songs found: {}'.format(len(self.audio_files)))

        # files that can be used in the final rendered video
        self.media_files = self.image_files + self.video_files
        print('number of non-audio-only media files found: {}'.format(
            len(self.media_files)))

        self.vid_time = 0  # time a clip is placed in the timeline of final vid
        self.audio_index = audio_index

        heap_fp = renders_heap_file_path if renders_heap_file_path is not None\
            else join(self.src_path, self.renders_heap_file_name)
        self.renders_heap = Heap(file_path=heap_fp)

        if not dont_load_renders_heap:
            self.renders_heap.deserialize()  # try to load from file
        if self.renders_heap.peek() is not None:
            self.log.debug(pformat(dict(self.renders_heap.peek())))

        self.image_files_range = [0, 0]
        self.video_files_range = [0, 0]

        self._render_queue = Queue()

    @property
    def render_queue(self):
        """When a render completes, the file_path to the rendered file will
        be put in this `multiprocessing.Queue` object
        """
        return self._render_queue

    def render(self, limit=1):
        """ The user using the API should call this method to render the images
        and videos from the provided path as a video based on the length of
        the audio file used in self._get_audio_file().
        :param limit: maximum number of video to render; -1 implies endless
        """
        def continuation_generator(length):
            if length == -1:
                while True:
                    yield True
            for _ in range(length):
                yield True

        limit_generator = continuation_generator(limit)
        try:
            while next(limit_generator):
                try:
                    self._render()
                except (KeyboardInterrupt, M2VException) as e:
                    print("{}: {}".format(type(e).__name__, e.args))
                    break
                except IndexError:
                    traceback.print_exc(file=sys.stdout)
                    break

                # file path of rendered video
                fp = self.renders_heap.peek().main_key
                # put file_path to successfully rendered video into the queue
                self._render_queue.put(
                    (fp, dict(self.renders_heap.peek().data[fp])))
        except StopIteration:
            print("Rendered {} videos".format(limit if limit != -1 else "all"))

    def _render(self):
        """Render a single video"""
        datum = self.renders_heap.peek()

        if datum is not None:
            if self._out_of_media(datum):
                raise M2VException("No more media available")
            if self._get_number_of_extra_images(datum) <= 0:
                raise M2VException("Not enough images or videos.")
            self.audio_index, \
                self.image_files_range, \
                self.video_files_range = datum.get_next()

        # find the audio clip we're using to determine how long this rendered
        # video will be
        audio_clip = self._get_audio_clip()
        self.max_duration = audio_clip.duration

        # render the thing with all the media
        render_file_path = \
            self._composite_clips(self._get_clips(), audio_clip=audio_clip)

        # create datum object that holds info on completed render
        data_file = os.path.join(os.path.dirname(render_file_path),
                                 'datum.json')
        datum = RenderDatum(data_file=data_file,
                            main_key=render_file_path,
                            date_created=os.stat(render_file_path).st_ctime,
                            images=self._image_files_used(),
                            videos=self._video_files_used(),
                            audio=self.audio_files[self.audio_index],
                            audio_index=self.audio_index + 1,
                            images_range=self.image_files_range,
                            videos_range=self.video_files_range,
                            finished_render=True,
                            uploaded_to=[])
        self.log.debug(pformat(dict(datum), width=150))  # debug
        self.renders_heap.push(datum)  # store datum in heap
        self.renders_heap.serialize()  # save heap to file
        self.vid_time = 0  # reset in case we're doing another render

    def _get_clips(self):
        """ Get list of Clip objects of videos & images """
        return self._get_image_clips(self.image_files_range[1]) + \
            self._get_video_clips(self.video_files_range[1])

    def _get_image_clips(self, image_index=0):
        """ Creates moviepy clips for images & returns a list of them """
        transition_t = 0.3
        clips = []
        last_index = image_index
        for i, clip_data in enumerate(self.image_files[image_index:],
                                      start=image_index):
            last_index = i
            if self.vid_time < self.max_duration:
                clips.append(
                    ImageClip(clip_data[0],
                              duration=self.interval_duration).set_start(
                                  self.vid_time).set_pos('center').
                    crossfadein(transition_t).resize(
                        self._fit_img(clip_data[1]['Image']['size'][0],
                                      clip_data[1]['Image']['size'][1])))
                self.vid_time += self.interval_duration
            else:
                break

        self.image_files_range = [image_index, last_index]
        return clips

    def _get_video_clips(self, video_index=0):
        """ Creates moviepy clips for video & returns a list of them """
        transition_t = 0.3
        clips = []
        # i = 0
        last_index = video_index
        for i, clip_data in enumerate(self.video_files[video_index:],
                                      start=video_index):
            last_index = i
            if self.vid_time < self.max_duration:
                src_clip_duration = float(
                    clip_data[1]['Video']['duration']) / 1000
                clips.append(
                    VideoFileClip(clip_data[0],
                                  audio=True).set_start(self.vid_time).
                    set_pos('center').crossfadein(transition_t).set_duration(
                        src_clip_duration).volumex(1).set_fps(30).resize(
                            self._fit_img(clip_data[1]['Video']['size'][0],
                                          clip_data[1]['Video']['size'][1])))
                self.vid_time += src_clip_duration
            else:
                break

        self.video_files_range = [video_index, last_index]
        return clips

    def _get_audio_clip(self):
        """ Make audio clip from one of the files found in the main directory
        given
        """
        try:
            return AudioFileClip(self.audio_files[self.audio_index][0])\
                .set_start(0)\
                .volumex(1)
        except M2VException:
            raise M2VException("No more audio files available")

    def _composite_clips(self, clips, ofname='output', audio_clip=None):
        """ Renders and saves video made of clips from self._get_clips(...) 
        :returns opath: output_path of video file rendered 
        """
        if len(clips) == 0:
            raise M2VException("No more images or videos available")

        video = CompositeVideoClip(clips, size=(self.owidth, self.oheight))

        # combine audio if audio was already found in video
        if video.audio is not None:
            audio_clip = CompositeAudioClip([video.audio, audio_clip])

        video.audio = audio_clip

        opath = os.path.join(self.out_path,
                             get_slugified_datetime_now() + '.mp4')
        # pcm_s16le
        # libvorbis
        video.write_videofile(opath, fps=30, codec="libx264")
        return opath

    def _concatenate_clips(self, clips, ofname='output', audio_clip=None):
        """ Deprecated
        Takes list of VideoFileClip objects & concatenates them to make 
        one video. 
        """
        video = concatenate_videoclips(clips, transition=None, bg_color=None)
        video.set_audio(audio_clip)

        opath = os.path.join(self.out_path, str(int(time.time())) + '.mp4')
        video.write_videofile(opath, fps=30)

    def _write_clips(self, clips, ofnames=[]):
        """ Create an .mp4 of each clip individually """
        opath = os.path.join(self.out_path, str(int(time.time())) + '.mp4')
        temp = [
            clip.write_videofile(opath[:-4] + str(i) + opath[-4:], fps=30)
            for i, clip in enumerate(clips)
        ]
        return temp  # probably returns [None] * len(clips)

    def _fit_img(self, w, h):
        """ Get width & height to scale image to to fit self.owidth & 
        self.oheight 
        """
        w2, h2, = None, None
        # assumes self.owidth > self.oheight (most aspect ratios work that way)
        if w > h:
            w2 = self.owidth
            ratio = float(w2) / w
            h2 = ratio * h
        elif h >= w:
            h2 = self.oheight
            ratio = float(h2) / h
            w2 = ratio * w
        return w2, h2

    def _image_files_used(self):
        """Should only be called after self._get_image_files() is called"""
        return self.image_files[self.image_files_range[0]:self.
                                image_files_range[1]]

    def _video_files_used(self):
        """Should only be called after self._get_video_files() is called"""
        return self.video_files[self.video_files_range[0]:self.
                                video_files_range[1]]

    def _out_of_media(self, datum):
        """
        Checks if there's at least one
        media to play for the duration of the audio
        :param datum: The datum that's about to be used to help choose the
            next media for the video render
        :type datum: serialization.RenderDatum
        :return: True if there's not enough media, False otherwise
        """
        imgs_range = datum['images_range']
        vids_range = datum['videos_range']
        if imgs_range[1] - imgs_range[0] == 0 and \
                vids_range[1] - vids_range[0] == 0:
            return True
        return False

    def _get_number_of_extra_images(self, datum):
        """Returns the number of images remaining after the next render
        uses the currently available images with the next available song
        :param datum: The datum that's about to be used to help choose the
            next media for the video render
        :type datum: serialization.RenderDatum
        :return: integer of the number of images after the next render (can be
            negative, zero, or positive)
        """
        audio_index = datum['audio_index']
        try:
            media_file = self.audio_files[audio_index]
        except IndexError:
            raise M2VException("Not enough audio_files")
        audio_duration = media_file[1]['Audio']['duration'] / 1000  # seconds

        imgs_range = datum['images_range']
        vids_range = datum['videos_range']

        total_non_audio_media = len(self.image_files) + len(self.video_files)
        remaining_images = total_non_audio_media - \
            imgs_range[1] + vids_range[1]

        self.log.info("checking for number of extra images: audio_index = {}; "
                      "images_range = [{}, {}); number of audio files = {}; "
                      "number of non-audio-only files = {}".format(
                          audio_index, imgs_range[0], imgs_range[1],
                          len(self.audio_files), total_non_audio_media))

        min_images_needed = audio_duration // self.interval_duration
        return remaining_images - min_images_needed
コード例 #6
0
    def download(self,
                 save_folder,
                 tags='classical',
                 sort='date',
                 limit=1,
                 reverse=False,
                 license='by',
                 skip_previous_songs=True):
        """Downloads songs from ccMixter and saves them. All arguments
        exception save_folder and skip_previous_songs are used for
        building the query

        :param save_folder: location of saved music files
        :param tags: <str> in url, tags of songs used as a filter
        :param sort: <str> in url, sort type used to filter songs
        :param limit: <int> amount of songs to download before stopping
        :param reverse: <bool> reverses the order in which the \n
            list of songs are returned from ccmixter
        :param license: <str> the type of matching license of songs \n
            for query building
        :param skip_previous_songs: <bool> if true, checks for previous \n
            queries made and skips the amount downloaded (as offset in url \n
            query filter).
        :returns: <dict> metadata of the songs just downloaded \n
            following JSON format in the \n
            schema of: {"artist_-_song_name.mp3": {"artist": "Johnny", ... }}\n
            where each key is the song file name and it's value is the JSON \n
            formatted SongMetadata
        """
        # location of music files downloaded
        save_folder = os.path.abspath(save_folder)
        self.log.info('### CCMixterSongDownloader.download begin ###')

        if not skip_previous_songs:
            history_data = {}
            offset = 0
        else:
            history_data, offset = History.get_previous_download_amount(
                tags, sort, save_folder)
            if offset == '':
                offset = 0

        self.log.debug('history_data = {}'.format(history_data))
        self.log.debug('Offset for this query: {}'.format(offset))

        query_url = self.URL_TEMPLATE.format(
            tags=tags,
            sort=sort,
            limit=limit,
            offset=offset,
            reverse='ASC' if reverse else 'DESC',
            license=license)
        self.log.debug("Query created: {}".format(query_url))
        response = requests.get(query_url)
        self.log.debug("Response to query: {}".format(response))
        soup = BeautifulSoup(response.text, 'lxml')

        downloaded = 0  # amount of songs downloaded
        song_tags = soup.find_all('div', attrs={'class': 'upload_info'})
        self.log.debug('HTML song tags found: {}'.format(len(song_tags)))

        # iterate over the HTML <div> tag that contains the direct link to .mp3
        for count, tag in enumerate(song_tags, start=0):
            # we've downloaded enough songs to reach the limit
            if downloaded >= limit:
                self.log.debug(
                    'Dl limit reached, downloaded = {}, limit = {}'.format(
                        downloaded, limit))
                break

            direct_link = tag['about']
            # avoid downloading zip files
            if direct_link.endswith(('.zip', '.zip ')):
                self.log.debug(
                    'Zip file encountered, skipping {}'.format(direct_link))
                continue

            # convert URL text elements (%2D -> '-')
            # and make it valid file name
            file_name = slugify(basename(unquote(direct_link)))
            save_path = os.path.join(save_folder, file_name)
            self.log.info('Saving: {} as {}'.format(tag['about'], save_path))

            # download the song
            CCMixterSongDownloader._direct_link_download(
                tag['about'].strip(), save_path)

            # get length of song
            files = GetMediaFiles(save_path).get_info()
            length = files[0][1]['Audio']['duration']
            if length:  # length is occasionally None
                length /= 1000
            else:
                if length == '':
                    length = '""'
                self.log.critical('{} HAS LENGTH OF {}'.format(
                    save_path, length))

            # keep info of the song
            artist, song, link, lic, lic_url = self._parse_info_from_tag(tag)
            metadata = SongMetadata(length=length,
                                    artist=artist,
                                    name=song,
                                    link=link,
                                    license_url=lic_url,
                                    license=lic,
                                    direct_link=direct_link)

            # update metadata in file with new song downloaded
            History.history_log(
                wdir=save_folder,
                log_file=self.METADATA_FILE,
                mode='update',
                write_data=self._create_metadata_serialization_data(
                    file_name, metadata))

            downloaded += 1

        if downloaded <= 0:
            self.log.error('No songs found with {} query'.format(query_url))
        elif downloaded < limit:
            self.log.warning('Downloaded {} songs when limit = {}'.format(
                downloaded, limit))

        History.history_log(wdir=save_folder,
                            log_file=History.log_file,
                            mode='write',
                            write_data=self._create_history_log_info(
                                history_data, tags, sort, offset + downloaded))

        try:
            new_metadata = History.history_log(wdir=save_folder,
                                               log_file=self.METADATA_FILE,
                                               mode='read')
        except (FileExistsError, FileNotFoundError):
            # no songs found with query can cause this
            new_metadata = {}

        self.songs_metadata.update(new_metadata)
        return new_metadata
コード例 #7
0
import sys
from os.path import join, dirname, abspath
__file_path = dirname(__file__)
sys.path.append(join(__file_path, '..'))

from get_media_files import GetMediaFiles

media_dir = join(__file_path, 'media')
media = GetMediaFiles(media_dir)  # GetMediaFiles object


def test_data():
    print(media)

    expected_data = [
        [
            abspath('tests/media/about.txt'), {
                'General': {
                    'duration': None,
                    'format': None,
                    'size': (None, None)
                }
            }, 1513722595.3195267
        ],
        [
            abspath(
                'tests/media/044838940-mountain-range-and-lake-2006-a.jpg'), {
                    'General': {
                        'duration': None,
                        'format': 'JPEG',
                        'size': (None, None)