def __init__(self, pipe_conf):
     self.pipe_conf = pipe_conf
     self.name = pipe_conf.name
     self.tasks = pipe_conf.tasks  # list of dictionaries
     self.options = pipe_conf.options
     self.tasks_dict = dict()
     self.workers_dict = dict()
     self.workers_list = list()
     self.queues = list()
     self.queue_names = list()
     self.options = pipe_conf.options
     self.dbio = DatabaseIO()
     self.pipe_conf = pipe_conf
     ### create an entry in database
     # get maximum model id in database right now
     data, fields = self.dbio.get_max_model_number()
     last_num = data[0][0] if data[0][0] is not None else 0
     self.model_number = last_num + 1
     setup(f"pipeline_{self.model_number}")
     # set model id and upload to database
     self.dbio.insert_into_table("results", "models", ['"model_number"', '"pipeline_config"'], [f"{self.model_number}", "'" + json.dumps(self.pipe_conf, sort_keys=True, default=str, separators=(",", ":")).replace("'", "''") + "'"])
     self.out_path = os.path.join(conf.dirs.output, f"{self.model_number}")
     if not os.path.isdir(self.out_path):
         os.makedirs(self.out_path)
    vid_characteristics = pd.read_csv(os.path.join(conf.dirs.output, 'online_samples.csv'))
    camera_list = vid_characteristics.camera.unique()
    n_cameras = len(camera_list)
    camera_to_idx = {camera_list[i]:i for i in range(n_cameras)}

    analysis_group_cameras = [Analysis() for _ in range(n_cameras)]

    analysis_group = Analysis()
    
    for filename in os.listdir(conf.dirs.annotations):
        if filename[-4:] != '.csv':
            continue
        vid_ref = filename[:-4]
        logger.info(f'Starting to process the video {vid_ref}.')
        analysis_group.pr_curves_onevid(vid_ref)
        camera = vid_characteristics[vid_characteristics['video_segment_name'] == vid_ref]['camera'].squeeze()
        analysis_group_cameras[camera_to_idx[camera]].pr_curves_onevid(vid_ref)
        logger.info(f'Done processing {vid_ref}.')

    analysis_group.pr_curves_agg()
    for i in range(n_cameras):
        try:
            analysis_group_cameras[i].pr_curves_agg(camera_list[i])
        except ValueError:
            logger.info(f'No objects for camera {camera_list[i]}!')

if __name__ == "__main__":
    script_name = os.path.basename(__file__).split(".")[0]
    setup(script_name)
    run_and_catch_exceptions(logger, main)
예제 #3
0
import glob
import os
# ####### Logging and config file loading ########
import logging
from src.modules.utils.setup import setup, IndentLogger
logger = IndentLogger(logging.getLogger(''), {})
# ########### Config File Loading ################
from src.modules.utils.config_loader import get_config
conf, confp = get_config()
##################################################

conf, confp = setup("process_subtitles")

# import / output files
subtitle_dir = confp.dirs.subtitles
output_file_name = "video_metadata_full.csv"

# create metadata file
f_out = open(subtitle_dir + output_file_name, 'w')
f_out.write("file,time_stamp,date_and_time\n")
for i, f_path in enumerate(glob.glob(subtitle_dir + "*.srt")):
    with open(f_path) as f:
        f_name = os.path.basename(f_path).split(".")[0]
        print("Collecting metadata from file {}:{}".format(i, f_name))
        for line in f.readlines():
            if "AM" in line or "PM" in line:
                f_out.write(line.replace(",", "").replace("\n", "") + '\n')
            elif '-->' in line:
                f_out.write(f_name + ',' + line[:8] + ',')
f_out.close()
                    logger.info('Fixed in third batch.')
                    wehave += 1
                else:
                    logger.info(
                        '%s is the hash in the third batch: still no match!' %
                        hashes3[file_name])
            else:
                wehave += 1
        elif file_name in videos3:
            if not check_hash(hashes3[file_name], etag):
                logger.info(
                    '%s does not match in the third batch:\n%s is our hash, %s is theirs.'
                    % (file_name, hashes3[file_name], etag))
            else:
                wehave += 1
        else:
            logger.info('We are missing file %s' % file_name)

        line = f.readline()

    logger.info('We have successfully downloaded %i files.' % wehave)
    f_hashes.close()
    f_hashes2.close()
    f_hashes3.close()
    f.close()


if __name__ == "__main__":
    setup('hash_check_final')
    check_files()
예제 #5
0
        2:] + '_' + split_url[4][2:]
    file_name = partial_name + '_' + part + '.mkv'
    return file_name


def download():
    f = open('files_to_dnld')
    url = f.readline().strip()
    while url:
        # get just a few bits for file naming
        file_name = url_to_filename(url)
        # check if file already exists
        full_path = vid_dir + '/' + file_name
        if os.path.isfile(full_path):
            logger.info("skipping" + file_name)
            url = f.readline().strip()
            continue

        logger.info(file_name)
        urllib.request.urlretrieve(url, full_path)

        url = f.readline().strip()

    f.close()
    return (True)


if __name__ == "__main__":
    setup('download')
    download()
        vid = VideoFile(f_path)
        logger.info("Extracting subtitles")
        vid.extract_subtitles()
        logger.info("Extracting Frame statistics")
        vid.extract_frame_stats()
        logger.info("Extracting Packet statistics")
        vid.extract_packet_stats()
        upload = False
        if aws.s3_vid_exists(vid.basename):
            logger.info("Video already exists on aws")
            if aws.s3_get_vid_size(vid.basename) == os.path.getsize(
                (os.path.join(vid_dir, vid.basename))):
                logger.info("Sizes match")
            else:
                logger.info("Sizes don't match")
                upload = True
        else:
            logger.info("Video not already on aws")
            upload = True
        if upload:
            logger.info("Uploading")
            aws.vid_copy_file_to_s3(vid.path)
            logger.info("Done")
        else:
            logger.info("Not uploading")


if __name__ == "__main__":
    setup("process_videos")
    main()
예제 #7
0
        # packet stats
        # prepend uuid to file
        logger.debug("creating uuid column in packet stats file")
        new_packet_stats_file = f'{conf.dirs.packet_stats}{self.name}_uuid.csv'
        sp(['sed', f's/^/{uuid[1:-1]},/', self.packet_stats_path], stdout=open(new_packet_stats_file, 'w'), stderr=None)

        # subtitles
        # prepend uuid to file
        logger.debug("creating uuid column in subtitles file")
        new_subtitles_file = f'{conf.dirs.subtitles}{self.name}_uuid.csv'
        sp(['sed', f's/^/{uuid[1:-1]},/', self.subtitles_path], stdout=open(new_subtitles_file, 'w'), stderr=None)


        # copy to db
        self.dbio.insert_into_table("raw", "video_metadata", ("id", "file_md5_chunk_7mb", "file_name", "camera_id",
                                                               "time_start_subtitles", "time_end_subtitles",
                                                               "file_location", "file_path"),
                                    (uuid, file_md5_chunk_7mb, file_name, camera_id,
                                     time_start_subtitles, time_end_subtitles,
                                     file_location, file_path))
        self.dbio.copy_file_to_table("raw", "frame_stats", new_frame_stats_file)
        self.dbio.copy_file_to_table("raw", "subtitles", new_subtitles_file)
        self.dbio.copy_file_to_table("raw", "packet_stats", new_packet_stats_file)


if __name__ == "__main__":
    setup("video_file")
    vid_file = glob(os.path.join(conf.dirs.videos, "*.mkv"))[0]
    vid = VideoFile(path=vid_file, dbio=DBIO(testing=False))
    vid.upload_vid_metadata_to_db()
예제 #8
0
# ============== Logging  ========================
import logging
from src.modules.utils.setup import setup, IndentLogger

logger = IndentLogger(logging.getLogger(''), {})
# =========== Config File Loading ================
from src.modules.utils.config_loader import get_config

conf, confp = get_config()
# ======== Load Configuration Parameters =========
subs_path = confp.dirs.subtitles
vid_run_path = confp.paths.video_runs_old
subtitles_dirs = confp.dirs.subtitles
# ================================================

setup("meta_data_contiguous")

# threshold for run length
run_len = 15
frame_skip_tolerance = 0.01  # how close do the video times have to be between subtitles? ideally they should be 1 second apart

run_len += 0  # add buffer to run length to shave off later
all_sub_files = [
    fn for fn in glob.glob(subs_path + "/*.csv")
    if (fn[-8:-4] != 'uuid' and fn.split('/')[-1][:4] != 'test')
]

# loop through files in the subtitles directory
runs = []
fmt = '%H:%M:%S.%f'
fmt2 = '%b %d %Y %I:%M:%S %p'
        elif file_name in videos2:
            if check_hash(hashes2[file_name], etag):
                wehaveit = True
            else:
                corrupted2 += 1
        else:
            new += 1

        if not wehaveit:
            #            logger.info('We don\'t have this file')
            fw.write(url + '\n')


#        else:
#            logger.info('We have this file!')

        line = f.readline()

    print(
        'We found %i corrupted files in the first batch, %i in the second batch, and %i new files'
        % (corrupted1, corrupted2, new))

    f_hashes.close()
    f_hashes2.close()
    f.close()
    fw.close()

if __name__ == "__main__":
    setup('hash_check')
    check_files()
예제 #10
0
import pandas as pd
# ====== Internal package imports ================
# ============== Logging  ========================
import logging
from src.modules.utils.setup import setup, IndentLogger
logger = IndentLogger(logging.getLogger(''), {})
# =========== Config File Loading ================
from src.modules.utils.config_loader import get_config
conf, confp = get_config()
# ======== Load Configuration Parameters =========
path = confp.dirs.subtitles
out_dir = confp.dirs.output
vid_run_path = confp.paths.video_runs
# ================================================

setup("video_sampling")

outpath = os.path.join(out_dir, "video_sample.csv")

logger.info("Loading Data")
vids = pd.read_csv(vid_run_path)
#vids = pd.read_csv(vid_run_path)

logger.info("Adding Columns")


all_cams = vids["camera"].unique()
logger.info(f"Found {len(all_cams)} cameras")

n_missing = 0