def encode_media(process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC], **kwargs): meta_manager = MetaManagerExtended(**kwargs) # path_meta=kwargs['path_meta'], path_source=kwargs['path_source'] meta_manager.load_all() encoder = Encoder(meta_manager, **kwargs) # In the full system, encode will probably be driven from a rabitmq endpoint. # For testing locally we are monitoring the 'pendings_actions' list for name in progress_bar( process_order_function( m.name for m in meta_manager.meta.values() if PENDING_ACTION["encode"] in m.pending_actions or not m.source_hashs # ( #'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu', #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de', #'Gosick - ED2 - Unity (full length)', #'Ikimonogakari - Sakura', # Takes 2 hours to encode #'Frozen Japanise (find real name)' # took too long to process # 'Parasite Eve - Somnia Memorias', # Non unicode characterset # 'Akira Yamaoka - Día de los Muertos', # Non unicode characterset # 'Higurashi no Naku koro ni - ED - why or why not (full length)', # When subs import from SSA they have styling information still attached # 'Gatekeepers - OP - For the Smiles of Tomorrow.avi', # It's buggered. Looks like it's trying to containerize subs in a txt file? # 'Get Backers - ED2 - Namida no Hurricane', # It's just f****d # 'Nana (anime) - OP - Rose', # SSA's have malformed unicode characters # 'Lunar Silver Star Story - OP - Wings (Japanese Version)', # 'Evangleion ED - Fly Me To The Moon', # Odd dimensions and needs to be normalised # 'Ranma Half OP1 - Jajauma ni Sasenaide', # 'Tamako Market - OP - Dramatic Market Ride', # 'Fullmetal Alchemist - OP1 - Melissa', # Exhibits high bitrate pausing at end # 'Samurai Champloo - OP - Battlecry', # Missing title sub with newline # 'KAT-TUN Your side [Instrumental]', ) ): encoder.encode(name)
def encode_media(process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC] , **kwargs): meta = MetaManager(kwargs['path_meta']) meta.load_all() encoder = Encoder(meta, **kwargs) # In the full system, encode will probably be driven from a rabitmq endpoint. # For testing locally we are monitoring the 'pendings_actions' list for name in progress_bar(process_order_function( m.name for m in meta.meta.values() if PENDING_ACTION['encode'] in m.pending_actions or not m.source_hash #( #'Lunar Silver Star Story - OP - Wings (Japanese Version)', #'Evangleion ED - Fly Me To The Moon', # 'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu', #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de', #'Gosick - ED2 - Unity (full length)', #'Ranma Half OP1 - Jajauma ni Sasenaide', #'Tamako Market - OP - Dramatic Market Ride', #'Fullmetal Alchemist - OP1 - Melissa', # Exhibits high bitrate pausing at end #'Samurai Champloo - OP - Battlecry', # Missing title sub with newline #'KAT-TUN Your side [Instrumental]', #) )): encoder.encode(name)
def import_media(**kwargs): """ """ stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set()) def get_db_track_names(): return set(t.source_filename for t in DBSession.query(Track.source_filename)) meta_manager = MetaManagerExtended(**kwargs) importer = TrackImporter(meta_manager=meta_manager) stats['db_start'] = get_db_track_names() meta_manager.load_all() # mtime=epoc(last_update()) meta_processed_track_ids = set(meta_manager.source_hashs) stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash) for name in progress_bar(meta_manager.meta.keys()): try: if importer.import_track(name): stats['meta_imported'].add(name) else: stats['meta_hash_matched_db_hash'].add(name) except TrackNotProcesedException: log.debug('Unprocessed (no source_hash): %s', name) stats['meta_unprocessed'].add(name) except TrackMissingProcessedFiles as ex: if ex.id: log.warning('Missing (processed files) delete existing: %s', name) delete_track(ex.id) commit() stats['missing_processed_deleted'].add(name) else: log.warning('Missing (processed files) abort import: %s', name) stats['missing_processed_aborted'].add(name) for unneeded_track_id in importer.exisiting_track_ids - meta_processed_track_ids: log.warning('Remove: %s', unneeded_track_id) stats['db_removed'].append(DBSession.query(Track).get(unneeded_track_id).source_filename or unneeded_track_id) delete_track(unneeded_track_id) commit() stats['db_end'] = get_db_track_names() #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported'] # TODO! Reinstate this return stats
def encode_media( process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC], **kwargs): meta_manager = MetaManagerExtended( **kwargs ) #path_meta=kwargs['path_meta'], path_source=kwargs['path_source'] meta_manager.load_all() encoder = Encoder(meta_manager, **kwargs) # In the full system, encode will probably be driven from a rabitmq endpoint. # For testing locally we are monitoring the 'pendings_actions' list for name in progress_bar( process_order_function( m.name for m in meta_manager.meta.values() if PENDING_ACTION['encode'] in m.pending_actions or not m.source_hashs #( #'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu', #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de', #'Gosick - ED2 - Unity (full length)', #'Ikimonogakari - Sakura', # Takes 2 hours to encode #'Frozen Japanise (find real name)' # took too long to process # 'Parasite Eve - Somnia Memorias', # Non unicode characterset # 'Akira Yamaoka - Día de los Muertos', # Non unicode characterset # 'Higurashi no Naku koro ni - ED - why or why not (full length)', # When subs import from SSA they have styling information still attached # 'Gatekeepers - OP - For the Smiles of Tomorrow.avi', # It's buggered. Looks like it's trying to containerize subs in a txt file? # 'Get Backers - ED2 - Namida no Hurricane', # It's just f****d # 'Nana (anime) - OP - Rose', # SSA's have malformed unicode characters # 'Lunar Silver Star Story - OP - Wings (Japanese Version)', # 'Evangleion ED - Fly Me To The Moon', # Odd dimensions and needs to be normalised # 'Ranma Half OP1 - Jajauma ni Sasenaide', # 'Tamako Market - OP - Dramatic Market Ride', # 'Fullmetal Alchemist - OP1 - Melissa', # Exhibits high bitrate pausing at end # 'Samurai Champloo - OP - Battlecry', # Missing title sub with newline # 'KAT-TUN Your side [Instrumental]', )): encoder.encode(name)
def import_media(**kwargs): """ """ stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set()) track_api = partial(_track_api, kwargs['api_host']) meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() # mtime=epoc(last_update()) processed_track_ids = set(meta_manager.source_hashs) processed_files_lookup = set( f.relative for f in fast_scan(meta_manager.processed_files_manager.path)) existing_tracks = track_api()['data']['tracks'] existing_track_ids = existing_tracks.keys() generate_track_dict = partial( _generate_track_dict, meta_manager=meta_manager, existing_track_ids=existing_track_ids, processed_files_lookup=processed_files_lookup) stats['db_start'] = set(existing_tracks.values()) stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash) tracks_to_add = [] track_ids_to_delete = [] log.info( f'Importing tracks - Existing:{len(existing_track_ids)} Processed{len(processed_track_ids)}' ) for name in progress_bar(meta_manager.meta.keys()): try: track = generate_track_dict(name) if track: stats['meta_imported'].add(name) #tracks_to_add.append(track) track_api([track], method='POST') else: stats['meta_hash_matched_db_hash'].add(name) except TrackNotProcesedException: log.debug('Unprocessed (no source_hash): %s', name) stats['meta_unprocessed'].add(name) except TrackMissingProcessedFiles as ex: if ex.id: log.warning('Missing (processed files) delete existing: %s', name) track_ids_to_delete.append(ex.id) stats['missing_processed_deleted'].add(name) else: log.warning('Missing (processed files) abort import: %s', name) stats['missing_processed_aborted'].add(name) for unneeded_track_id in existing_track_ids - processed_track_ids: log.warning('Remove: %s', unneeded_track_id) stats['db_removed'].append(existing_tracks[unneeded_track_id]) track_ids_to_delete.append(unneeded_track_id) log.info( f"""{kwargs['api_host']} -> Add:{len(tracks_to_add)} Delete:{len(track_ids_to_delete)}""" ) #track_api(tracks_to_add, method='POST') track_api(track_ids_to_delete, method='DELETE') stats['db_end'] = track_api()['data']['tracks'].values() #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported'] # TODO! Reinstate this return stats
def scan_media(**kwargs): """ """ meta = MetaManager(kwargs['path_meta']) meta.load_all() log.info('1.) Read file structure into memory') folder_structure = FolderStructure.factory( path=kwargs['path_source'], search_filter=fast_scan_regex_filter( file_regex=file_extension_regex(ALL_EXTS), ignore_regex=DEFAULT_IGNORE_FILE_REGEX, ) ) log.info('2.) Locate primary files') # Note: Duplicate media is completely ignored/removed in this list primary_files = locate_primary_files(folder_structure, file_regex=file_extension_regex(PRIMARY_FILE_RANKED_EXTS)) log.info("3.) Find associated files as a 'file collection' (based on the name of the primary file)") file_collections = { f.file_no_ext: get_file_collection(folder_structure, f) for f in primary_files } log.info('4.) Associate file_collections with existing metadata objects') for name, file_collection in progress_bar(file_collections.items()): meta.load(name) m = meta.get(name) for f in file_collection: m.associate_file(f) meta.save(name) log.info('5.) Attempt to find associate unassociated files but finding them on the folder_structure in memory') # TODO: The step in 5b (mtime and hash check) is not sufficent # We need to identify all unmatched files and generate a hash lookup and match them that way. # It may take longer but it's the only true way of matching a file. # It might in some cases be quicker because we don't need to craw the entire file structure in memory. # These are meta items that have a filecollection matched, # but that file collection is incomplete, so we have some child files missing has_unassociated_files = operator.attrgetter('unassociated_files') for m in filter(has_unassociated_files, meta.meta.values()): for filename, scan_data in m.unassociated_files.items(): # 5a.) The unassociated file may not have been found in the inital collection scan, # check it's original location and associate if it exists f = folder_structure.get(scan_data.get('relative')) if scan_data.get('relative') else None if f: m.associate_file(f) log.warning('Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly', f.relative, m.name) continue # 5b.) Search the whole folder_structure in memory for a matching hash mtime = scan_data['mtime'] for f in folder_structure.scan( lambda f: not IGNORE_SEARCH_EXTS_REGEX.search(f.file) and (f.file == filename or f.stats.st_mtime == mtime) # TODO: Depricate this! and str(f.hash) == scan_data['hash'] ): log.warning('Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly', f.relative, m.name) m.associate_file(f) break # 5c.) # We have done our best at locating missing files # Remove them from the tracked list of files. m.unlink_unassociated_files() log.info('6.) Remove unmatched meta entrys') for m in [m for m in meta.meta.values() if not m.file_collection]: log.info('Removing meta %s', m.name) meta.delete(m.name) # (If processed data already exisits, it will be relinked at the encode level) meta.save_all()
def scan_media(**kwargs): """ """ meta = MetaManager(kwargs['path_meta']) meta.load_all() log.info('1.) Read file structure into memory') folder_structure = FolderStructure.factory( path=kwargs['path_source'], search_filter=fast_scan_regex_filter( file_regex=file_extension_regex(ALL_EXTS), ignore_regex=DEFAULT_IGNORE_FILE_REGEX, )) log.info('2.) Locate primary files') # Note: Duplicate media is completely ignored/removed in this list primary_files = locate_primary_files( folder_structure, file_regex=file_extension_regex(PRIMARY_FILE_RANKED_EXTS)) log.info( "3.) Find associated files as a 'file collection' (based on the name of the primary file)" ) file_collections = { f.file_no_ext: get_file_collection(folder_structure, f) for f in primary_files } log.info('4.) Associate file_collections with existing metadata objects') for name, file_collection in progress_bar(file_collections.items()): meta.load(name) m = meta.get(name) for f in file_collection: m.associate_file(f) meta.save(name) log.info( '5.) Attempt to find associate unassociated files but finding them on the folder_structure in memory' ) # TODO: The step in 5b (mtime and hash check) is not sufficent # We need to identify all unmatched files and generate a hash lookup and match them that way. # It may take longer but it's the only true way of matching a file. # It might in some cases be quicker because we don't need to craw the entire file structure in memory. # These are meta items that have a filecollection matched, # but that file collection is incomplete, so we have some child files missing has_unassociated_files = operator.attrgetter('unassociated_files') for m in filter(has_unassociated_files, meta.meta.values()): for filename, scan_data in m.unassociated_files.items(): # 5a.) The unassociated file may not have been found in the inital collection scan, # check it's original location and associate if it exists f = folder_structure.get(scan_data.get( 'relative')) if scan_data.get('relative') else None if f: m.associate_file(f) log.warning( 'Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly', f.relative, m.name) continue # 5b.) Search the whole folder_structure in memory for a matching hash mtime = scan_data['mtime'] for f in folder_structure.scan( lambda f: not IGNORE_SEARCH_EXTS_REGEX.search(f.file) and (f.file == filename or f.stats.st_mtime == mtime ) # TODO: Depricate this! and str(f.hash) == scan_data['hash']): log.warning( 'Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly', f.relative, m.name) m.associate_file(f) break # 5c.) # We have done our best at locating missing files # Remove them from the tracked list of files. m.unlink_unassociated_files() log.info('6.) Remove unmatched meta entrys') for m in [m for m in meta.meta.values() if not m.file_collection]: log.info('Removing meta %s', m.name) meta.delete(m.name) # (If processed data already exisits, it will be relinked at the encode level) meta.save_all()
def import_media(**kwargs): """ """ stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set()) track_api = partial(_track_api, kwargs['api_host']) meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() # mtime=epoc(last_update()) processed_track_ids = set(meta_manager.source_hashs) processed_files_lookup = set(f.relative for f in fast_scan(meta_manager.processed_files_manager.path)) existing_tracks = track_api()['data']['tracks'] existing_track_ids = existing_tracks.keys() generate_track_dict = partial(_generate_track_dict, meta_manager=meta_manager, existing_track_ids=existing_track_ids, processed_files_lookup=processed_files_lookup) stats['db_start'] = set(existing_tracks.values()) stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash) tracks_to_add = [] track_ids_to_delete = [] log.info('Importing tracks - Existing:{} Processed:{}'.format(len(existing_track_ids), len(processed_track_ids))) # TODO: replace with formatstring for name in progress_bar(meta_manager.meta.keys()): try: track = generate_track_dict(name) if track: stats['meta_imported'].add(name) #tracks_to_add.append(track) track_api([track], method='POST') else: stats['meta_hash_matched_db_hash'].add(name) except TrackNotProcesedException: log.debug('Unprocessed (no source_hash): %s', name) stats['meta_unprocessed'].add(name) except TrackMissingProcessedFiles as ex: if ex.id: log.warning('Missing (processed files) delete existing: %s', name) track_ids_to_delete.append(ex.id) stats['missing_processed_deleted'].add(name) else: log.warning('Missing (processed files) abort import: %s', name) stats['missing_processed_aborted'].add(name) for unneeded_track_id in existing_track_ids - processed_track_ids: log.warning('Remove: %s', unneeded_track_id) stats['db_removed'].append(existing_tracks[unneeded_track_id]) track_ids_to_delete.append(unneeded_track_id) log.info("""{api_host} -> Add:{add_count} Delete:{delete_count}""".format( api_host=kwargs['api_host'], add_count=len(tracks_to_add), delete_count=len(track_ids_to_delete), )) # TODO: replace with formatstring #track_api(tracks_to_add, method='POST') track_api(track_ids_to_delete, method='DELETE') stats['db_end'] = track_api()['data']['tracks'].values() #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported'] # TODO! Reinstate this return stats