def check_frame_counts(C):
    """ Gather stats on input and output frame counts
    """
    dirs = C['dirs']['test_set_dirs'] + C['dirs']['val_set_dirs']
    #dirs = [os.path.join(C['dirs']['outdir'], 'train')]    #{'max': 193, 'min': 106, 'mean': 136.8842105263158, 'count': 9405}
    stats_dir = {}
    for inp_dir in dirs:
        stats_dir[inp_dir] = {'max': 0, 'min': 9999999999, 'mean' : 0.0, 'count': 0} 
        curr_input_dir = os.path.join(C['dirs']['dict_pkls'], inp_dir)
        feature_files = cs760.list_files_pattern(curr_input_dir, '*.pkl')
        for feat_file in feature_files:
            sample = cs760.loadas_pickle(os.path.join(curr_input_dir, feat_file))
            (frame_count,feat_count) = sample.shape
            assert feat_count == 2560, f"ERROR: Invalid Feature Count: {feat_count}. Must be 2560."
            stats_dir[inp_dir]['count'] += 1
            stats_dir[inp_dir]['mean'] += frame_count
            if frame_count > stats_dir[inp_dir]['max']:  
                stats_dir[inp_dir]['max'] = frame_count
            if frame_count < stats_dir[inp_dir]['min']:  
                stats_dir[inp_dir]['min'] = frame_count
        stats_dir[inp_dir]['mean'] /= stats_dir[inp_dir]['count']
        print("****************************************************")
        print(f'Video Frame Stats for subdir: {inp_dir}')
        print(stats_dir[inp_dir])
    return stats_dir            
def check_files(C, inputdirs_key='test_set_dirs'):
    """ Check output file names and stats
    """
    sign_rejects = set()
    out_files = []
    in_files = []
    sign_counts = {}
    for inp_dir in C['dirs'][inputdirs_key]:    
        curr_input_dir = os.path.join(C['dirs']['dict_pkls'], inp_dir)
        feat_files = cs760.list_files_pattern(curr_input_dir, '*.pkl')
        feat_files.sort()
        for feat_file in feat_files:
            in_files.append(feat_file)
            end_idx = feat_file.find('_')  #'ADVISE.INFLUENCE_10__BOT.pkl' 16
            sign = feat_file[:end_idx]      # 'ADVISE.INFLUENCE'
            rest_of_file = feat_file[end_idx:]
            #end_idx = rest_of_file.find('__')
            #if end_idx == -1:  #__TOP wasn't added
            #    end_idx = end_idx = rest_of_file.find('.pkl')
            #    rest_of_file = rest_of_file[:end_idx] + '__TOP.pkl'
            rest_of_file = '_' + rest_of_file  #add extra underscore after sign
            if sign == 'STAND.UP':
                sign = 'STAND-UP'
            if sign == 'CANCEL.CRITISIZE':
                sign = 'CANCEL.CRITICIZE'
            if sign == 'ADVISE':
                sign = 'ADVISE.INFLUENCE'
            if sign == 'GOLD':
                sign = 'GOLD.ns-CALIFORNIA'
            if sign not in C["sign_classes"]:
                sign_rejects.add(sign)
            new_file = sign + rest_of_file    
            out_files.append(new_file)
            if sign_counts.get(sign) is None:
                sign_counts[sign] = 1
            else:
                sign_counts[sign] += 1
    print('STATS FOR ', inputdirs_key)        
    if len(sign_rejects) > 0:
        print(f"Number of Reject Signs: {len(sign_rejects)}")   # WAS {'STAND.UP', 'CANCEL.CRITISIZE'}
        print(f"Reject Signs:", sign_rejects)
    else:
        print("All signs OK")
    print('EXAMPLE OUTPUT FILE NAMES:')    
    print(out_files[:12])                        
    print(f'Number of signs: {len(sign_counts)}')
    print('Sign Counts:', [ (s, sign_counts[s]//10) for s in sign_counts])      #there are 1+9 augmentations = 10 files per sign vid
    return sign_counts, out_files, in_files, sign_rejects
print("Running with parameters:", C)

traindir = os.path.join(feature_directory, "train")
valdir = os.path.join(feature_directory, "val")
testdir = os.path.join(feature_directory, "test")

print(f'Creating TRAIN Dir: {traindir}')
os.makedirs(traindir, exist_ok=True)        #if dir already exists will continue and WILL NOT delete existing files in that directory

print(f'Creating VAL Dir: {valdir}')
os.makedirs(valdir, exist_ok=True)        #if dir already exists will continue and WILL NOT delete existing files in that directory

print(f'Creating TEST Dir: {testdir}')
os.makedirs(testdir, exist_ok=True)        #if dir already exists will continue and WILL NOT delete existing files in that directory

feat_files = cs760.list_files_pattern(feature_directory, '*.pkl')
#print(f"Input Feature files: {feat_files}")

feat_files.sort()
random.seed(42)                     #this should make it reproducable


# build dict of [files] for each sign
curr_sign_dict = {}                 # dict['sign'] = [filename1, filename2,....] 

for feat_file in feat_files:
    end_idx = feat_file.find('__')  #'ADVISE.INFLUENCE__10__BOT.pkl' 16
    sign = feat_file[:end_idx]      # 'ADVISE.INFLUENCE'
    if curr_sign_dict.get(sign) is None:
        curr_sign_dict[sign] = []       # create new sign entry in dict
    curr_sign_dict[sign].append(feat_file)
Ejemplo n.º 4
0
    C['dirs'] = Cdirs
    
    feature_directory = C['dirs']['dict_pkls']
    print(f"Base dir: {feature_directory}")


    traindir = os.path.join(feature_directory, "train")
    valdir = os.path.join(feature_directory, "val")
    testdir = os.path.join(feature_directory, "test")

    os.makedirs(valdir, exist_ok=True)

    random.seed(42)
    for sign in C["sign_classes"]:
        print(f"Processing sign {sign} ...")
        files = cs760.list_files_pattern(traindir, sign + "__*")
        valchoice = random.randint(0, len(files)-1)
        file = files[valchoice]
        idx1 = file.find('__') + 2   
        rest = file[idx1:]
        idx2 = rest.find('__') + 2
        idx3 = idx1 + idx2
        valfilebase = file[:idx3]
        valfiles = [f for f in files if (f.find(valfilebase) != -1)]
        for valfile in valfiles:
            print(f"Moving {os.path.join(traindir, valfile)} to {os.path.join(valdir, valfile)}")
            shutil.move(os.path.join(traindir, valfile), os.path.join(valdir, valfile))
    print('Finished!')        


def check_vid_sizes(C):
    """ check video dimensions and number of frames
    """
    crops_for_resolutions = {}
    example_vids = {}
    for subdir in ['NZ', 'US']:
        video_directory = "/home/tim/OneDrive/Documents/uni/760 Data Mining and Machine Learning/GroupProj/all signs/" + subdir  #C['dirs']['indir']
        vids = cs760.list_files_pattern(video_directory, C["vid_type"])
        frames_max = 0
        frames_min = 99999999
        frames_mean = 0
        vids_count = 0
        
        for i, vid in enumerate(vids):
            vid_np = cs760.get_vid_frames(vid, 
                            video_directory, 
                            writejpgs=False,
                            writenpy=False,
                            returnnp=True)
            (framecount, frameheight, framewidth, channels) = vid_np.shape    
            vids_count += 1
            if framecount > frames_max:
                frames_max = framecount
            if framecount < frames_min:
                frames_min = framecount
            frames_mean += framecount    
            if crops_for_resolutions.get( str(frameheight) + "-" + str(framewidth) ) is None:
                crops_for_resolutions[str(frameheight) + "-" + str(framewidth)] = [0,0,0,0]
                #print("NEW RESOLUTION:", str(frameheight) + "-" + str(framewidth))
                example_vids[str(frameheight) + "-" + str(framewidth)] = vid_np[4]#os.path.join(video_directory, vid)
                #plt.imshow(vid_np[4])
                #inp = input("Hit any key to continue..")
        frames_mean /= vids_count
        print('Stats for Video directory: ', video_directory)
        print(f"Vid Count:{vids_count}  Frames Max:{frames_max}  Min:{frames_min}  Mean:{frames_mean}")
        print("Resolutions found:", crops_for_resolutions)

        plt.imshow(example_vids['368-480'])  
        tst = cs760.crop_image(example_vids['368-480'], [60, 5, 420, 365])
        plt.imshow(tst)
        tst = cs760.image_resize(tst, height=600, width=600)
        plt.imshow(tst[0])

        plt.imshow(example_vids['360-640'])
        tst = cs760.crop_image(example_vids['360-640'], [145, 10, 495, 360])
        plt.imshow(tst)
        tst = cs760.image_resize(tst, height=600, width=600)
        plt.imshow(tst[0])

        plt.imshow(example_vids['240-320'])  
        tst = cs760.crop_image(example_vids['240-320'], [0, 0, 320, 240])
        plt.imshow(tst)
        tst = cs760.image_resize(tst, height=600, width=600)
        plt.imshow(tst[0])

        plt.imshow(example_vids['480-640'])  
        tst = cs760.crop_image(example_vids['480-640'], [0, 0, 640, 480])
        plt.imshow(tst)
        tst = cs760.image_resize(tst, height=600, width=600)
        plt.imshow(tst[0])
        return
def main():

    try:
        config_dirs_file = sys.argv[1] # directories file
        config_file = sys.argv[2]      # main params file
    except:
        print("Config file names not specified, setting them to default namess")
        config_dirs_file = "config_dirs.json"
        config_file = "config760.json"
    print(f'USING CONFIG FILES: config dirs:{config_dirs_file}  main config:{config_file}')    
    
    #print(type(feature_directory))
    C = cs760.loadas_json('config760.json')
    print("Running with parameters:", C)
    
    Cdirs = cs760.loadas_json(config_dirs_file)
    print("Directories:", Cdirs)
    
    C['dirs'] = Cdirs
    video_directory = C['dirs']['indir']
    feature_directory = C['dirs']['outdir']
    
    print(f'Creating feature file Dir: {feature_directory}')
    os.makedirs(feature_directory, exist_ok=True)        #if dir already exists will continue and WILL NOT delete existing files in that directory


    sometimes = lambda aug: iaa.Sometimes(C["augmentation_chance"][0], aug)
    sequential_list = [iaa.Sequential([sometimes(iaa.Fliplr(1.0))]), # horizontal flip
    iaa.Sequential([sometimes(iaa.Rotate(-5, 5))]), # rotate 5 degrees +/-
    iaa.Sequential([sometimes(iaa.CenterCropToAspectRatio(1.15))]),
    iaa.Sequential([sometimes(iaa.MultiplyBrightness((2.0, 2.0)))]), # increase brightness
    iaa.Sequential([sometimes(iaa.MultiplyHue((0.5, 1.5)))]), # change hue random
    iaa.Sequential([sometimes(iaa.RemoveSaturation(1.0))]), # effectively greyscale
    iaa.Sequential([sometimes(iaa.pillike.FilterContour())]), # edge detection
    iaa.Sequential([sometimes(iaa.AdditiveLaplaceNoise(scale=0.05*255, per_channel=True))]), # add colourful noise
    iaa.Sequential([sometimes(iaa.Invert(1))]) # invert colours
    ]


    print("Reading videos from " + video_directory)
    print("Outputting features to " + feature_directory)

    print("Loading pretrained CNN...")
    model = hub.KerasLayer(C["module_url"])  # can be used like any other kera layer including in other layers...
    print("Pretrained CNN Loaded OK")

    vids = cs760.list_files_pattern(video_directory, C["vid_type"])
    print(f'Processing {len(vids)} videos...')

    for i, vid in enumerate(vids):
        print(f'{i} Processing: {vid}')    
        vid_np = cs760.get_vid_frames(vid, 
                        video_directory, 
                        writejpgs=False,
                        writenpy=False,
                        returnnp=True)
        (framecount, frameheight, framewidth, channels) = vid_np.shape
        res_key = str(frameheight) + "-" + str(framewidth)
        #print(vid, vid_np.shape)
        outfile = os.path.splitext(vid)[0]
        
        print(f"Vid frames, h, w, c = {(framecount, frameheight, framewidth, channels)}")
        
        if C["crop_by_res"].get(res_key) is not None:
            vid_np_top = cs760.crop_image(vid_np, C["crop_by_res"][res_key])
            print(f"Cropped by resolution to {C['crop_by_res'][res_key]}")
        else:    
            vid_np_top = cs760.crop_image(vid_np, C["crop_top"])
            print(f"Cropped by default to {C['crop_top']}")

        outfile_top = outfile + "__TOP.pkl"

        for n in range((len(sequential_list) + 1)):
            if n != 0:
                vid_aug = sequential_list[n - 1](images=vid_np_top) # augments frames
                if type(vid_aug) is list:
                    vid_aug = np.asarray(vid_aug)
                batch = cs760.resize_batch(vid_aug, width=C["expect_img_size"], height=C["expect_img_size"], pad_type='L',
                            inter=cv2.INTER_CUBIC, BGRtoRGB=False, 
                            simplenormalize=True,
                            imagenetmeansubtract=False)
                temp_outfile = outfile_top[:-4] + C["augmentation_type"][n - 1] + ".pkl"
                features = extract(C, model, batch)
                cs760.saveas_pickle(features, os.path.join(feature_directory, temp_outfile))
            else:
                batch = cs760.resize_batch(vid_np_top, width=C["expect_img_size"], height=C["expect_img_size"], pad_type='L',
                                inter=cv2.INTER_CUBIC, BGRtoRGB=False, 
                                simplenormalize=True,
                                imagenetmeansubtract=False)
                features = extract(C, model, batch)
                cs760.saveas_pickle(features, os.path.join(feature_directory, outfile_top))
                print(f'Features output shape: {features.shape}')
                
        if C["crop_type"] == 'B':  # only for boston vids
            vid_np_bot = cs760.crop_image(vid_np, C["crop_bottom"])
            outfile_bot = outfile + "__BOT.pkl"  
            batch = cs760.resize_batch(vid_np_bot, width=C["expect_img_size"], height=C["expect_img_size"], pad_type='L',
                        inter=cv2.INTER_CUBIC, BGRtoRGB=False, 
                        simplenormalize=True,
                        imagenetmeansubtract=False)
            features = extract(C, model, batch)
            cs760.saveas_pickle(features, os.path.join(feature_directory, outfile_bot))

    print('Finished outputting features!!')
def main():

    excel_file = pd.read_excel(
        "../dataset/dai-asllvd-BU_glossing_with_variations_HS_information-extended-urls-RU.xlsx",
        sheet_name="Sheet1")

    signs = find_class_occurance_and_download_video(
        excel_file
    )  # Outputs a csv containing all the different signs and how many times they are signed in videos (in the dataset). Downloads and sanitisies video names and stores them in a video folder.
    outputmatches = calc_filename_and_signer(excel_file, signs)

    df = pd.DataFrame(outputmatches)

    signers = df['signer'].unique().tolist()
    df.shape
    df.to_csv('signers_vids.csv', index=False)

    gb = df.groupby('signer')['signer'].count()
    gb

    df['sign'] = df.apply(lambda row: get_sign(row['video_name']), axis=1)

    signs = df.groupby('sign')['sign'].count()
    signs
    signslist = signs.keys().tolist()

    signers_signs = df.groupby(['signer', 'sign'])['sign'].count()
    signers_signs

    sign_counts = signers_signs.to_dict()

    sign_counts_tuple = {}
    signs_set = set()
    for k in sign_counts:
        newk = (k[0], k[1], sign_counts[k])
        sign_counts_tuple[newk] = sign_counts[k]  # ('Tyler', 'TOUGH', 2)
        signs_set.add(k[1])
    print(len(signs_set))

    random.seed(42)
    selected_signers = {}
    for sign in signs_set:
        random.shuffle(signers)
        got_signer = False
        for signer in signers:
            checkkey = (signer, sign, 1)
            if sign_counts_tuple.get(checkkey):
                got_signer = True
                selected_signers[sign] = signer
                print(checkkey)
                break
        if not got_signer:
            for signer in signers:
                checkkey = (signer, sign, 2)
                if sign_counts_tuple.get(checkkey):
                    got_signer = True
                    selected_signers[sign] = signer
                    print(checkkey)
                    break
        if not got_signer:
            for signer in signers:
                checkkey = (signer, sign, 3)
                if sign_counts_tuple.get(checkkey):
                    got_signer = True
                    selected_signers[sign] = signer
                    print(checkkey)
                    break
        if not got_signer:
            for signer in signers:
                checkkey = (signer, sign, 4)
                if sign_counts_tuple.get(checkkey):
                    got_signer = True
                    selected_signers[sign] = signer
                    print(checkkey)
                    break
        if not got_signer:
            print('No Signer selected for ', sign)

    val_list = []
    reject_list = []
    for sign in selected_signers:
        signer = selected_signers[sign]
        tmpdf = df[(df['sign'] == sign) & (df['signer'] == signer)]
        first = True
        for i, row in enumerate(tmpdf.iterrows()):
            name = row[1]['video_name']
            if first:
                first = False
                val_list.append(name)
            else:
                reject_list.append(name)

    print(
        val_list
    )  #['CUTE__5.mov', 'DISAPPOINT__9.mov', 'FACE__3.mov', 'INCLUDE.INVOLVE__11.mov', 'COPY__7.mov', 'GUITAR__4.mov', 'APPOINTMENT__8.mov', 'ADVISE.INFLUENCE__9.mov', 'GROUND__3.mov', 'MACHINE__7.mov', 'WALK__8.mov', 'LOOK__14.mov', 'WEEKEND__9.mov', 'CITY.COMMUNITY__5.mov', 'EXCUSE__16.mov', 'COME__2.mov', 'DISCUSS__8.mov', 'GO__9.mov', 'GOVERNMENT__12.mov', 'ART.DESIGN__6.mov', 'DEVIL.MISCHIEVOUS__2.mov', 'ISLAND.INTEREST__10.mov', 'BOWTIE__4.mov', 'SILLY__14.mov', 'SHELF.FLOOR__5.mov', 'VACATION__4.mov', 'DEVELOP__4.mov', 'MAD__8.mov', 'CANCEL.CRITICIZE__7.mov', 'FIRE.BURN__2.mov', 'DATE.DESSERT__4.mov', 'EMPHASIZE__10.mov', 'COP__11.mov', 'GOLD.ns-CALIFORNIA__7.mov', 'SAME__6.mov', 'HAPPY__6.mov', 'AFRAID__11.mov', 'INFORM__8.mov', 'LIVE__7.mov', 'SHOW__4.mov', 'PAST__12.mov', 'COLLECT__3.mov', 'DRESS.CLOTHES__18.mov', 'REPLACE__9.mov', 'RUN__2.mov', 'FIFTH__6.mov', 'EXPERT__4.mov', 'INJECT__1.mov', 'FED-UP.FULL__4.mov', 'FINGERSPELL__9.mov', 'NICE.CLEAN__11.mov', 'BOSS__9.mov', 'ANSWER__11.mov', 'BIG__3.mov', 'STAND-UP__4.mov', 'TOUGH__4.mov', 'WORK-OUT__11.mov', 'CHAT__10.mov', 'DRIP__2.mov', 'AGAIN__11.mov', 'EAT__2.mov', 'MARRY__9.mov', 'BLAME__3.mov', 'DECREASE__5.mov', 'CAMP__10.mov', 'IN__9.mov', 'GET-TICKET__4.mov', 'DEPRESS__6.mov', 'DOCTOR__7.mov', 'DRINK__6.mov']
    print(
        reject_list
    )  #['FACE__9.mov', 'GUITAR__5.mov', 'GUITAR__6.mov', 'GROUND__4.mov', 'WEEKEND__10.mov', 'COME__6.mov', 'ART.DESIGN__7.mov', 'ART.DESIGN__10.mov', 'ART.DESIGN__16.mov', 'SHELF.FLOOR__6.mov', 'DEVELOP__5.mov', 'CANCEL.CRITICIZE__8.mov', 'DATE.DESSERT__5.mov', 'COLLECT__4.mov', 'EXPERT__5.mov', 'INJECT__2.mov', 'FED-UP.FULL__5.mov', 'TOUGH__5.mov', 'DRIP__6.mov', 'EAT__11.mov', 'BLAME__4.mov']

    for file in val_list:
        base = os.path.splitext(file)[0] + '__*'
        move_files = cs760.list_files_pattern(srcdir, base)
        if not move_files:
            print(f'NO files for {base}')
        for currfile in move_files:
            print(f"Moving {os.path.join(srcdir, currfile)} to {valdir}")
            shutil.move(os.path.join(srcdir, currfile), valdir)

    for file in reject_list:
        base = os.path.splitext(file)[0] + '__*'
        move_files = cs760.list_files_pattern(srcdir, base)
        if not move_files:
            print(f'NO files for {base}')
        for currfile in move_files:
            print(f"Moving {os.path.join(srcdir, currfile)} to {rejdir}")
            shutil.move(os.path.join(srcdir, currfile), rejdir)