def delete_dne(path, recursive=False, verbose=False): """Delete duplicate file if its byte array matches that of the reference """ media = GetMediaFiles() files = media.get_info(path=path, recursive=recursive, track_types=['Image'], sort=False) init_t = time.time() if verbose: print(files) print('%s files found' % len(files)) # debug print('-------------------------') # debug amount_deleted = 0 # loop over files & check if it's an Imgur DNE image for f in files: if verbose: print(f[0]) # debug if is_imgur_dne_image(f[0]): amount_deleted += 1 print('%s' % (os.path.split(f[0])[1])) os.remove(f[0]) print("delete_dne func took %d seconds\n" % (int(time.time() - init_t))) return amount_deleted
def delete_dne_hash_cmp(path, recursive=False, verbose=False): """Delete file if its hash matches that of the reference file""" media = GetMediaFiles() files = media.get_info(path=path, recursive=recursive, track_types=['Image'], sort=False) if verbose: print(files) print('%s files found' % len(files)) # debug print('-------------------------') # debug init_t = time.time() # imgur dne image hashlib dne_hash = hashfile(open('imgur-dne.png', 'rb'), hashlib.sha256()) # list of hashes hashes = list( (hashfile(open(fname[0], 'rb'), hashlib.sha256())) for fname in files) amount_deleted = 0 for index in range(len(hashes)): if hashes[index] == dne_hash: amount_deleted += 1 os.remove(files[index][0]) print("delete_dne_hash_cmp func took %d seconds\n" % (int(time.time() - init_t))) return amount_deleted
def test_data_of_individual_file(): expected_data = [[ abspath('tests/media/044837513-two-women-walking-hill-1930s-v.jpg'), { 'General': { 'size': (None, None), 'format': 'JPEG', 'duration': None }, 'Image': { 'size': (1024, 772), 'format': 'JPEG', 'duration': None } }, 1513722595.3275266 ]] media_file = join(media_dir, '044837513-two-women-walking-hill-1930s-v.jpg') media2 = GetMediaFiles(media_file) assert (media2.get_info() == expected_data)
def __init__(self, src_path, sort='st_ctime', sort_reverse=False, interval_duration=8, audio_index=0, audio_folder=None, renders_heap_file_path=None, dont_load_renders_heap=False, output_width=1920, output_height=1080): """ Given a directory (path), get media files in path, convert & concatenate into clips where the duration of each is interval_duration or the duration of the src vid, until max_duration is reached. :param src_path: path containing sources of media files to use in video :param sort: value from os.stat(...) func, viable values: https://docs.python.org/3/library/os.html#os.stat_result :param sort_reverse: Reverse after sorting; Default sorts from least to greatest (oldest to newest) :param interval_duration: duration of each image shown in the video :param audio_index: The index used to choose the audio file from the sorted list of audio_files in the src_path for the final render :param audio_folder: only search for songs to use in the video in this folder, otherwise, search for songs in src_path :param renders_heap_file_path: file path of the renders heap that keeps track of the information of each rendered video, defaults to a bin file stored in the src_path :param dont_load_renders_heap: if True, does not attempt to deserailize the renders heap which also means it won't attempt to skip media that has already been used in a render """ # setup logging self.log = logging.getLogger(__class__.__name__) handler = logging.StreamHandler(stream=sys.stdout) handler.setLevel(logging.INFO) handler.setFormatter( logging.Formatter("[%(name)s] %(levelname)s " "%(asctime)s %(message)s")) self.log.addHandler(handler) # source media to be used in final video is in this path self.src_path = os.path.abspath(src_path) # output files stored here self.out_path = os.path.join(self.src_path, self.relative_output_directory) Serialization.make_paths_for_file(self.out_path, is_file=False) # duration of each media file in video self.interval_duration = interval_duration # maximum duration allowed - determined by length of audio file self.max_duration = None self.owidth = output_width # output width self.oheight = output_height # output height # Get list of media files with certain extension from path (sorted) self.src_files = GetMediaFiles(self.src_path) # list of files paths for each diff media type self.image_files = self.src_files.get_info(sort=sort, sort_reverse=sort_reverse, track_types=['Image']) self.video_files = self.src_files.get_info(sort=sort, sort_reverse=sort_reverse, track_types=['Video']) self.audio_files = self.src_files.get_info( path=os.path.abspath(audio_folder) if audio_folder else self.src_path, sort=sort, sort_reverse=sort_reverse, track_types=['Audio']) print('number of songs found: {}'.format(len(self.audio_files))) # files that can be used in the final rendered video self.media_files = self.image_files + self.video_files print('number of non-audio-only media files found: {}'.format( len(self.media_files))) self.vid_time = 0 # time a clip is placed in the timeline of final vid self.audio_index = audio_index heap_fp = renders_heap_file_path if renders_heap_file_path is not None\ else join(self.src_path, self.renders_heap_file_name) self.renders_heap = Heap(file_path=heap_fp) if not dont_load_renders_heap: self.renders_heap.deserialize() # try to load from file if self.renders_heap.peek() is not None: self.log.debug(pformat(dict(self.renders_heap.peek()))) self.image_files_range = [0, 0] self.video_files_range = [0, 0] self._render_queue = Queue()
class MediaToVideo: relative_output_directory = '_output' # like '/home/user/src_path/_output' renders_heap_file_name = '_renders_heap.bin' # stores metadata of renders def __init__(self, src_path, sort='st_ctime', sort_reverse=False, interval_duration=8, audio_index=0, audio_folder=None, renders_heap_file_path=None, dont_load_renders_heap=False, output_width=1920, output_height=1080): """ Given a directory (path), get media files in path, convert & concatenate into clips where the duration of each is interval_duration or the duration of the src vid, until max_duration is reached. :param src_path: path containing sources of media files to use in video :param sort: value from os.stat(...) func, viable values: https://docs.python.org/3/library/os.html#os.stat_result :param sort_reverse: Reverse after sorting; Default sorts from least to greatest (oldest to newest) :param interval_duration: duration of each image shown in the video :param audio_index: The index used to choose the audio file from the sorted list of audio_files in the src_path for the final render :param audio_folder: only search for songs to use in the video in this folder, otherwise, search for songs in src_path :param renders_heap_file_path: file path of the renders heap that keeps track of the information of each rendered video, defaults to a bin file stored in the src_path :param dont_load_renders_heap: if True, does not attempt to deserailize the renders heap which also means it won't attempt to skip media that has already been used in a render """ # setup logging self.log = logging.getLogger(__class__.__name__) handler = logging.StreamHandler(stream=sys.stdout) handler.setLevel(logging.INFO) handler.setFormatter( logging.Formatter("[%(name)s] %(levelname)s " "%(asctime)s %(message)s")) self.log.addHandler(handler) # source media to be used in final video is in this path self.src_path = os.path.abspath(src_path) # output files stored here self.out_path = os.path.join(self.src_path, self.relative_output_directory) Serialization.make_paths_for_file(self.out_path, is_file=False) # duration of each media file in video self.interval_duration = interval_duration # maximum duration allowed - determined by length of audio file self.max_duration = None self.owidth = output_width # output width self.oheight = output_height # output height # Get list of media files with certain extension from path (sorted) self.src_files = GetMediaFiles(self.src_path) # list of files paths for each diff media type self.image_files = self.src_files.get_info(sort=sort, sort_reverse=sort_reverse, track_types=['Image']) self.video_files = self.src_files.get_info(sort=sort, sort_reverse=sort_reverse, track_types=['Video']) self.audio_files = self.src_files.get_info( path=os.path.abspath(audio_folder) if audio_folder else self.src_path, sort=sort, sort_reverse=sort_reverse, track_types=['Audio']) print('number of songs found: {}'.format(len(self.audio_files))) # files that can be used in the final rendered video self.media_files = self.image_files + self.video_files print('number of non-audio-only media files found: {}'.format( len(self.media_files))) self.vid_time = 0 # time a clip is placed in the timeline of final vid self.audio_index = audio_index heap_fp = renders_heap_file_path if renders_heap_file_path is not None\ else join(self.src_path, self.renders_heap_file_name) self.renders_heap = Heap(file_path=heap_fp) if not dont_load_renders_heap: self.renders_heap.deserialize() # try to load from file if self.renders_heap.peek() is not None: self.log.debug(pformat(dict(self.renders_heap.peek()))) self.image_files_range = [0, 0] self.video_files_range = [0, 0] self._render_queue = Queue() @property def render_queue(self): """When a render completes, the file_path to the rendered file will be put in this `multiprocessing.Queue` object """ return self._render_queue def render(self, limit=1): """ The user using the API should call this method to render the images and videos from the provided path as a video based on the length of the audio file used in self._get_audio_file(). :param limit: maximum number of video to render; -1 implies endless """ def continuation_generator(length): if length == -1: while True: yield True for _ in range(length): yield True limit_generator = continuation_generator(limit) try: while next(limit_generator): try: self._render() except (KeyboardInterrupt, M2VException) as e: print("{}: {}".format(type(e).__name__, e.args)) break except IndexError: traceback.print_exc(file=sys.stdout) break # file path of rendered video fp = self.renders_heap.peek().main_key # put file_path to successfully rendered video into the queue self._render_queue.put( (fp, dict(self.renders_heap.peek().data[fp]))) except StopIteration: print("Rendered {} videos".format(limit if limit != -1 else "all")) def _render(self): """Render a single video""" datum = self.renders_heap.peek() if datum is not None: if self._out_of_media(datum): raise M2VException("No more media available") if self._get_number_of_extra_images(datum) <= 0: raise M2VException("Not enough images or videos.") self.audio_index, \ self.image_files_range, \ self.video_files_range = datum.get_next() # find the audio clip we're using to determine how long this rendered # video will be audio_clip = self._get_audio_clip() self.max_duration = audio_clip.duration # render the thing with all the media render_file_path = \ self._composite_clips(self._get_clips(), audio_clip=audio_clip) # create datum object that holds info on completed render data_file = os.path.join(os.path.dirname(render_file_path), 'datum.json') datum = RenderDatum(data_file=data_file, main_key=render_file_path, date_created=os.stat(render_file_path).st_ctime, images=self._image_files_used(), videos=self._video_files_used(), audio=self.audio_files[self.audio_index], audio_index=self.audio_index + 1, images_range=self.image_files_range, videos_range=self.video_files_range, finished_render=True, uploaded_to=[]) self.log.debug(pformat(dict(datum), width=150)) # debug self.renders_heap.push(datum) # store datum in heap self.renders_heap.serialize() # save heap to file self.vid_time = 0 # reset in case we're doing another render def _get_clips(self): """ Get list of Clip objects of videos & images """ return self._get_image_clips(self.image_files_range[1]) + \ self._get_video_clips(self.video_files_range[1]) def _get_image_clips(self, image_index=0): """ Creates moviepy clips for images & returns a list of them """ transition_t = 0.3 clips = [] last_index = image_index for i, clip_data in enumerate(self.image_files[image_index:], start=image_index): last_index = i if self.vid_time < self.max_duration: clips.append( ImageClip(clip_data[0], duration=self.interval_duration).set_start( self.vid_time).set_pos('center'). crossfadein(transition_t).resize( self._fit_img(clip_data[1]['Image']['size'][0], clip_data[1]['Image']['size'][1]))) self.vid_time += self.interval_duration else: break self.image_files_range = [image_index, last_index] return clips def _get_video_clips(self, video_index=0): """ Creates moviepy clips for video & returns a list of them """ transition_t = 0.3 clips = [] # i = 0 last_index = video_index for i, clip_data in enumerate(self.video_files[video_index:], start=video_index): last_index = i if self.vid_time < self.max_duration: src_clip_duration = float( clip_data[1]['Video']['duration']) / 1000 clips.append( VideoFileClip(clip_data[0], audio=True).set_start(self.vid_time). set_pos('center').crossfadein(transition_t).set_duration( src_clip_duration).volumex(1).set_fps(30).resize( self._fit_img(clip_data[1]['Video']['size'][0], clip_data[1]['Video']['size'][1]))) self.vid_time += src_clip_duration else: break self.video_files_range = [video_index, last_index] return clips def _get_audio_clip(self): """ Make audio clip from one of the files found in the main directory given """ try: return AudioFileClip(self.audio_files[self.audio_index][0])\ .set_start(0)\ .volumex(1) except M2VException: raise M2VException("No more audio files available") def _composite_clips(self, clips, ofname='output', audio_clip=None): """ Renders and saves video made of clips from self._get_clips(...) :returns opath: output_path of video file rendered """ if len(clips) == 0: raise M2VException("No more images or videos available") video = CompositeVideoClip(clips, size=(self.owidth, self.oheight)) # combine audio if audio was already found in video if video.audio is not None: audio_clip = CompositeAudioClip([video.audio, audio_clip]) video.audio = audio_clip opath = os.path.join(self.out_path, get_slugified_datetime_now() + '.mp4') # pcm_s16le # libvorbis video.write_videofile(opath, fps=30, codec="libx264") return opath def _concatenate_clips(self, clips, ofname='output', audio_clip=None): """ Deprecated Takes list of VideoFileClip objects & concatenates them to make one video. """ video = concatenate_videoclips(clips, transition=None, bg_color=None) video.set_audio(audio_clip) opath = os.path.join(self.out_path, str(int(time.time())) + '.mp4') video.write_videofile(opath, fps=30) def _write_clips(self, clips, ofnames=[]): """ Create an .mp4 of each clip individually """ opath = os.path.join(self.out_path, str(int(time.time())) + '.mp4') temp = [ clip.write_videofile(opath[:-4] + str(i) + opath[-4:], fps=30) for i, clip in enumerate(clips) ] return temp # probably returns [None] * len(clips) def _fit_img(self, w, h): """ Get width & height to scale image to to fit self.owidth & self.oheight """ w2, h2, = None, None # assumes self.owidth > self.oheight (most aspect ratios work that way) if w > h: w2 = self.owidth ratio = float(w2) / w h2 = ratio * h elif h >= w: h2 = self.oheight ratio = float(h2) / h w2 = ratio * w return w2, h2 def _image_files_used(self): """Should only be called after self._get_image_files() is called""" return self.image_files[self.image_files_range[0]:self. image_files_range[1]] def _video_files_used(self): """Should only be called after self._get_video_files() is called""" return self.video_files[self.video_files_range[0]:self. video_files_range[1]] def _out_of_media(self, datum): """ Checks if there's at least one media to play for the duration of the audio :param datum: The datum that's about to be used to help choose the next media for the video render :type datum: serialization.RenderDatum :return: True if there's not enough media, False otherwise """ imgs_range = datum['images_range'] vids_range = datum['videos_range'] if imgs_range[1] - imgs_range[0] == 0 and \ vids_range[1] - vids_range[0] == 0: return True return False def _get_number_of_extra_images(self, datum): """Returns the number of images remaining after the next render uses the currently available images with the next available song :param datum: The datum that's about to be used to help choose the next media for the video render :type datum: serialization.RenderDatum :return: integer of the number of images after the next render (can be negative, zero, or positive) """ audio_index = datum['audio_index'] try: media_file = self.audio_files[audio_index] except IndexError: raise M2VException("Not enough audio_files") audio_duration = media_file[1]['Audio']['duration'] / 1000 # seconds imgs_range = datum['images_range'] vids_range = datum['videos_range'] total_non_audio_media = len(self.image_files) + len(self.video_files) remaining_images = total_non_audio_media - \ imgs_range[1] + vids_range[1] self.log.info("checking for number of extra images: audio_index = {}; " "images_range = [{}, {}); number of audio files = {}; " "number of non-audio-only files = {}".format( audio_index, imgs_range[0], imgs_range[1], len(self.audio_files), total_non_audio_media)) min_images_needed = audio_duration // self.interval_duration return remaining_images - min_images_needed
def download(self, save_folder, tags='classical', sort='date', limit=1, reverse=False, license='by', skip_previous_songs=True): """Downloads songs from ccMixter and saves them. All arguments exception save_folder and skip_previous_songs are used for building the query :param save_folder: location of saved music files :param tags: <str> in url, tags of songs used as a filter :param sort: <str> in url, sort type used to filter songs :param limit: <int> amount of songs to download before stopping :param reverse: <bool> reverses the order in which the \n list of songs are returned from ccmixter :param license: <str> the type of matching license of songs \n for query building :param skip_previous_songs: <bool> if true, checks for previous \n queries made and skips the amount downloaded (as offset in url \n query filter). :returns: <dict> metadata of the songs just downloaded \n following JSON format in the \n schema of: {"artist_-_song_name.mp3": {"artist": "Johnny", ... }}\n where each key is the song file name and it's value is the JSON \n formatted SongMetadata """ # location of music files downloaded save_folder = os.path.abspath(save_folder) self.log.info('### CCMixterSongDownloader.download begin ###') if not skip_previous_songs: history_data = {} offset = 0 else: history_data, offset = History.get_previous_download_amount( tags, sort, save_folder) if offset == '': offset = 0 self.log.debug('history_data = {}'.format(history_data)) self.log.debug('Offset for this query: {}'.format(offset)) query_url = self.URL_TEMPLATE.format( tags=tags, sort=sort, limit=limit, offset=offset, reverse='ASC' if reverse else 'DESC', license=license) self.log.debug("Query created: {}".format(query_url)) response = requests.get(query_url) self.log.debug("Response to query: {}".format(response)) soup = BeautifulSoup(response.text, 'lxml') downloaded = 0 # amount of songs downloaded song_tags = soup.find_all('div', attrs={'class': 'upload_info'}) self.log.debug('HTML song tags found: {}'.format(len(song_tags))) # iterate over the HTML <div> tag that contains the direct link to .mp3 for count, tag in enumerate(song_tags, start=0): # we've downloaded enough songs to reach the limit if downloaded >= limit: self.log.debug( 'Dl limit reached, downloaded = {}, limit = {}'.format( downloaded, limit)) break direct_link = tag['about'] # avoid downloading zip files if direct_link.endswith(('.zip', '.zip ')): self.log.debug( 'Zip file encountered, skipping {}'.format(direct_link)) continue # convert URL text elements (%2D -> '-') # and make it valid file name file_name = slugify(basename(unquote(direct_link))) save_path = os.path.join(save_folder, file_name) self.log.info('Saving: {} as {}'.format(tag['about'], save_path)) # download the song CCMixterSongDownloader._direct_link_download( tag['about'].strip(), save_path) # get length of song files = GetMediaFiles(save_path).get_info() length = files[0][1]['Audio']['duration'] if length: # length is occasionally None length /= 1000 else: if length == '': length = '""' self.log.critical('{} HAS LENGTH OF {}'.format( save_path, length)) # keep info of the song artist, song, link, lic, lic_url = self._parse_info_from_tag(tag) metadata = SongMetadata(length=length, artist=artist, name=song, link=link, license_url=lic_url, license=lic, direct_link=direct_link) # update metadata in file with new song downloaded History.history_log( wdir=save_folder, log_file=self.METADATA_FILE, mode='update', write_data=self._create_metadata_serialization_data( file_name, metadata)) downloaded += 1 if downloaded <= 0: self.log.error('No songs found with {} query'.format(query_url)) elif downloaded < limit: self.log.warning('Downloaded {} songs when limit = {}'.format( downloaded, limit)) History.history_log(wdir=save_folder, log_file=History.log_file, mode='write', write_data=self._create_history_log_info( history_data, tags, sort, offset + downloaded)) try: new_metadata = History.history_log(wdir=save_folder, log_file=self.METADATA_FILE, mode='read') except (FileExistsError, FileNotFoundError): # no songs found with query can cause this new_metadata = {} self.songs_metadata.update(new_metadata) return new_metadata
import sys from os.path import join, dirname, abspath __file_path = dirname(__file__) sys.path.append(join(__file_path, '..')) from get_media_files import GetMediaFiles media_dir = join(__file_path, 'media') media = GetMediaFiles(media_dir) # GetMediaFiles object def test_data(): print(media) expected_data = [ [ abspath('tests/media/about.txt'), { 'General': { 'duration': None, 'format': None, 'size': (None, None) } }, 1513722595.3195267 ], [ abspath( 'tests/media/044838940-mountain-range-and-lake-2006-a.jpg'), { 'General': { 'duration': None, 'format': 'JPEG', 'size': (None, None)