def __init__(self, pipe_conf): self.pipe_conf = pipe_conf self.name = pipe_conf.name self.tasks = pipe_conf.tasks # list of dictionaries self.options = pipe_conf.options self.tasks_dict = dict() self.workers_dict = dict() self.workers_list = list() self.queues = list() self.queue_names = list() self.options = pipe_conf.options self.dbio = DatabaseIO() self.pipe_conf = pipe_conf ### create an entry in database # get maximum model id in database right now data, fields = self.dbio.get_max_model_number() last_num = data[0][0] if data[0][0] is not None else 0 self.model_number = last_num + 1 setup(f"pipeline_{self.model_number}") # set model id and upload to database self.dbio.insert_into_table("results", "models", ['"model_number"', '"pipeline_config"'], [f"{self.model_number}", "'" + json.dumps(self.pipe_conf, sort_keys=True, default=str, separators=(",", ":")).replace("'", "''") + "'"]) self.out_path = os.path.join(conf.dirs.output, f"{self.model_number}") if not os.path.isdir(self.out_path): os.makedirs(self.out_path)
vid_characteristics = pd.read_csv(os.path.join(conf.dirs.output, 'online_samples.csv')) camera_list = vid_characteristics.camera.unique() n_cameras = len(camera_list) camera_to_idx = {camera_list[i]:i for i in range(n_cameras)} analysis_group_cameras = [Analysis() for _ in range(n_cameras)] analysis_group = Analysis() for filename in os.listdir(conf.dirs.annotations): if filename[-4:] != '.csv': continue vid_ref = filename[:-4] logger.info(f'Starting to process the video {vid_ref}.') analysis_group.pr_curves_onevid(vid_ref) camera = vid_characteristics[vid_characteristics['video_segment_name'] == vid_ref]['camera'].squeeze() analysis_group_cameras[camera_to_idx[camera]].pr_curves_onevid(vid_ref) logger.info(f'Done processing {vid_ref}.') analysis_group.pr_curves_agg() for i in range(n_cameras): try: analysis_group_cameras[i].pr_curves_agg(camera_list[i]) except ValueError: logger.info(f'No objects for camera {camera_list[i]}!') if __name__ == "__main__": script_name = os.path.basename(__file__).split(".")[0] setup(script_name) run_and_catch_exceptions(logger, main)
import glob import os # ####### Logging and config file loading ######## import logging from src.modules.utils.setup import setup, IndentLogger logger = IndentLogger(logging.getLogger(''), {}) # ########### Config File Loading ################ from src.modules.utils.config_loader import get_config conf, confp = get_config() ################################################## conf, confp = setup("process_subtitles") # import / output files subtitle_dir = confp.dirs.subtitles output_file_name = "video_metadata_full.csv" # create metadata file f_out = open(subtitle_dir + output_file_name, 'w') f_out.write("file,time_stamp,date_and_time\n") for i, f_path in enumerate(glob.glob(subtitle_dir + "*.srt")): with open(f_path) as f: f_name = os.path.basename(f_path).split(".")[0] print("Collecting metadata from file {}:{}".format(i, f_name)) for line in f.readlines(): if "AM" in line or "PM" in line: f_out.write(line.replace(",", "").replace("\n", "") + '\n') elif '-->' in line: f_out.write(f_name + ',' + line[:8] + ',') f_out.close()
logger.info('Fixed in third batch.') wehave += 1 else: logger.info( '%s is the hash in the third batch: still no match!' % hashes3[file_name]) else: wehave += 1 elif file_name in videos3: if not check_hash(hashes3[file_name], etag): logger.info( '%s does not match in the third batch:\n%s is our hash, %s is theirs.' % (file_name, hashes3[file_name], etag)) else: wehave += 1 else: logger.info('We are missing file %s' % file_name) line = f.readline() logger.info('We have successfully downloaded %i files.' % wehave) f_hashes.close() f_hashes2.close() f_hashes3.close() f.close() if __name__ == "__main__": setup('hash_check_final') check_files()
2:] + '_' + split_url[4][2:] file_name = partial_name + '_' + part + '.mkv' return file_name def download(): f = open('files_to_dnld') url = f.readline().strip() while url: # get just a few bits for file naming file_name = url_to_filename(url) # check if file already exists full_path = vid_dir + '/' + file_name if os.path.isfile(full_path): logger.info("skipping" + file_name) url = f.readline().strip() continue logger.info(file_name) urllib.request.urlretrieve(url, full_path) url = f.readline().strip() f.close() return (True) if __name__ == "__main__": setup('download') download()
vid = VideoFile(f_path) logger.info("Extracting subtitles") vid.extract_subtitles() logger.info("Extracting Frame statistics") vid.extract_frame_stats() logger.info("Extracting Packet statistics") vid.extract_packet_stats() upload = False if aws.s3_vid_exists(vid.basename): logger.info("Video already exists on aws") if aws.s3_get_vid_size(vid.basename) == os.path.getsize( (os.path.join(vid_dir, vid.basename))): logger.info("Sizes match") else: logger.info("Sizes don't match") upload = True else: logger.info("Video not already on aws") upload = True if upload: logger.info("Uploading") aws.vid_copy_file_to_s3(vid.path) logger.info("Done") else: logger.info("Not uploading") if __name__ == "__main__": setup("process_videos") main()
# packet stats # prepend uuid to file logger.debug("creating uuid column in packet stats file") new_packet_stats_file = f'{conf.dirs.packet_stats}{self.name}_uuid.csv' sp(['sed', f's/^/{uuid[1:-1]},/', self.packet_stats_path], stdout=open(new_packet_stats_file, 'w'), stderr=None) # subtitles # prepend uuid to file logger.debug("creating uuid column in subtitles file") new_subtitles_file = f'{conf.dirs.subtitles}{self.name}_uuid.csv' sp(['sed', f's/^/{uuid[1:-1]},/', self.subtitles_path], stdout=open(new_subtitles_file, 'w'), stderr=None) # copy to db self.dbio.insert_into_table("raw", "video_metadata", ("id", "file_md5_chunk_7mb", "file_name", "camera_id", "time_start_subtitles", "time_end_subtitles", "file_location", "file_path"), (uuid, file_md5_chunk_7mb, file_name, camera_id, time_start_subtitles, time_end_subtitles, file_location, file_path)) self.dbio.copy_file_to_table("raw", "frame_stats", new_frame_stats_file) self.dbio.copy_file_to_table("raw", "subtitles", new_subtitles_file) self.dbio.copy_file_to_table("raw", "packet_stats", new_packet_stats_file) if __name__ == "__main__": setup("video_file") vid_file = glob(os.path.join(conf.dirs.videos, "*.mkv"))[0] vid = VideoFile(path=vid_file, dbio=DBIO(testing=False)) vid.upload_vid_metadata_to_db()
# ============== Logging ======================== import logging from src.modules.utils.setup import setup, IndentLogger logger = IndentLogger(logging.getLogger(''), {}) # =========== Config File Loading ================ from src.modules.utils.config_loader import get_config conf, confp = get_config() # ======== Load Configuration Parameters ========= subs_path = confp.dirs.subtitles vid_run_path = confp.paths.video_runs_old subtitles_dirs = confp.dirs.subtitles # ================================================ setup("meta_data_contiguous") # threshold for run length run_len = 15 frame_skip_tolerance = 0.01 # how close do the video times have to be between subtitles? ideally they should be 1 second apart run_len += 0 # add buffer to run length to shave off later all_sub_files = [ fn for fn in glob.glob(subs_path + "/*.csv") if (fn[-8:-4] != 'uuid' and fn.split('/')[-1][:4] != 'test') ] # loop through files in the subtitles directory runs = [] fmt = '%H:%M:%S.%f' fmt2 = '%b %d %Y %I:%M:%S %p'
elif file_name in videos2: if check_hash(hashes2[file_name], etag): wehaveit = True else: corrupted2 += 1 else: new += 1 if not wehaveit: # logger.info('We don\'t have this file') fw.write(url + '\n') # else: # logger.info('We have this file!') line = f.readline() print( 'We found %i corrupted files in the first batch, %i in the second batch, and %i new files' % (corrupted1, corrupted2, new)) f_hashes.close() f_hashes2.close() f.close() fw.close() if __name__ == "__main__": setup('hash_check') check_files()
import pandas as pd # ====== Internal package imports ================ # ============== Logging ======================== import logging from src.modules.utils.setup import setup, IndentLogger logger = IndentLogger(logging.getLogger(''), {}) # =========== Config File Loading ================ from src.modules.utils.config_loader import get_config conf, confp = get_config() # ======== Load Configuration Parameters ========= path = confp.dirs.subtitles out_dir = confp.dirs.output vid_run_path = confp.paths.video_runs # ================================================ setup("video_sampling") outpath = os.path.join(out_dir, "video_sample.csv") logger.info("Loading Data") vids = pd.read_csv(vid_run_path) #vids = pd.read_csv(vid_run_path) logger.info("Adding Columns") all_cams = vids["camera"].unique() logger.info(f"Found {len(all_cams)} cameras") n_missing = 0