def batch_imagenet(model, raws, vid_id, vlen, fps, stride, log): ## raws : T.Tensor(vlen, pic_d) #stride_t = stride / FPS_O PART_SIZE = 20 #dur = vlen / fps #stride_f = fps * stride_t #nsteps = int(np.floor(dur / stride_t)) nsteps = vlen // stride steps = np.arange(nsteps) steps = T.LongTensor(np.int32(np.floor(steps * stride)))[:-1] try: raws = raws[steps] except: line = f"RAWS: {raws.shape}\n" line += f"STEPS: {steps.shape}\n" line += f"Steps: {steps[-5:]}\n" line += f"ID: {vid_id}\n" sys.exit(line) vid_feats = np.empty((raws.size(0), 2048)) n_parts = raws.size(0) // PART_SIZE for j in range(n_parts): log2 = f'\r{log} [ImageNet {j/n_parts*100:3.0f}%]' print(f'{log2} >gpu ', flush=True, end='') part_range = np.s_[j * PART_SIZE:(j + 1) * PART_SIZE] v = raws[part_range] if (v != v).sum() > 0: print("ERROR! NaN before FWD") sys.exit() v = T.autograd.Variable(v).cuda() print(f'{log2} >fwd ', flush=True, end='') part_feats = model(v) print(f'{log2} >cpu ', flush=True, end='') vid_feats[part_range] = part_feats.data.cpu().numpy().reshape( part_feats.shape[:2]) print(f'{log2} .... ', flush=True, end='') del v cclean() del raws cclean() return vid_feats.reshape(-1)
def batch_c3d(model, raws, vid_id, vlen, width, stride, log): PART_SIZE = 2 nsteps = np.ceil(vlen / stride) batch = [] try: for i in range(nsteps): batch += [raws[i * stride:i * stride + width].unsqueeze(0)] batch = T.cat(batch, 0) except: line = f"RAWS: {raws.shape}\n" line += f"STEPS: {steps.shape}\n" line += f"Steps: {steps[-5:]}\n" line += f"ID: {vid_id}\n" sys.exit(line) vid_feats = np.empty((batch.size(0), 4096)) n_parts = batch.size(0) // PART_SIZE for j in range(n_parts): log2 = f'\r{log} [DenseNet {j/n_parts*100:3.0f}%]' print(f'{log2} >fwd ', flush=True, end='') part_range = np.s_[j * PART_SIZE:(j + 1) * PART_SIZE] v = batch[part_range] v = T.autograd.Variable(v).cuda() part_feats = model(v) vid_feats[part_range] = part_feats.data.cpu().numpy().reshape( part_feats.shape[:2]) print(f'{log2} .... ', flush=True, end='') del v cclean() del raws cclean() return vid_feats.reshape(-1)
def imagenet(args, movies, flag=False): vids_path = args.data out_path = args.out error_log = open('errors_lsmdc_resnet.log', 'a+') dataset_write_interval = 10 model_db = h5.File(out_path, 'a') if '/features' in model_db: model_feats = model_db['features'] else: model_feats = model_db.create_group('features') imagenet_m = DenseNet(args.densenet) if flag else ResNet(args.resnet) print("Using {}".format("DenseNet" if flag else "ResNet")) for movie in movies: vid_ids = [] vid_files = [] movie_path = os.path.join(vids_path, movie) vids_list = os.listdir(movie_path) num_vids = len(vids_list) for v in vids_list: v_id = v[:v.rfind('.')] movie_id = v_id[:4] time_range = v_id[v_id.rfind('_') + 1:] start = time_range[:time_range.find('-')] end = time_range[time_range.find('-') + 1:] start = time2sec(start) end = time2sec(end) v_id = f"{movie_id}_{start}-{end}" if v_id in model_feats: continue vid_ids += [v_id] vid_files += [v] n_existing = len(vid_files) start_vid = num_vids - n_existing print( f'Extracting features for {movie}: {num_vids - n_existing}/{num_vids} videos' ) log_line = '' errors = [] for vi, vid_id in enumerate(vid_ids): vidx = vi + start_vid vid_file = vid_files[vi] try: reader = iio.get_reader(os.path.join(movie_path, vid_file), '.avi') metadata = Bunch(reader.get_meta_data()) vlen = int(reader.get_length()) except: print("Error!!") d = str((vidx, vid_id, vid_file)) m = str(metadata) error_log.write(f"{d}\n{m}\n") error_log.flush() ## We're skipping this bad file (probably a .webm) continue raw = [] for i in range(vlen): try: r = reader.get_data(i) r = Image.fromarray(r) r = preprocess(r) r = r.unsqueeze_(0) raw.append(r) except: errors.append(vid_id) vlen = i break raw = T.cat(raw, 0) clear_line = '\r' + (' ' * 100) + '\r' log_line = f'{clear_line} ({vidx+1:4}) {vid_file:18} | {vidx/num_vids*100:3.1f}% {vi+1}/{len(vid_ids)} videos' print(f'\r{log_line}', end='') vid_feats = batch_imagenet(imagenet_m, raw, vid_id, vlen, metadata.fps, args.stride, log_line) model_feats[vid_id] = vid_feats if vidx % dataset_write_interval or num_vids - vidx <= dataset_write_interval: model_db.flush() cclean()
def dense_net(args): vids_path = args.data out_path = args.out error_log = open('errors.log', 'a+') dataset_write_interval = 10 densnet_db = h5.File(out_path, 'a') if '/features' in densnet_db: densenet_feats = densnet_db['features'] else: densenet_feats = densnet_db.create_group('features') vid_ids = [] vid_files = [] vids_list = sorted([l.rstrip() for l in open(args.list)]) num_vids = len(vids_list) for v in vids_list: v_id = v[:v.rindex('.')] if v_id in densenet_feats: continue vid_ids += [v_id] vid_files += [v] n_existing = len(vid_files) start_vid = num_vids - n_existing print(f'Extracting features for {n_existing}/{num_vids} videos') densenet_m = DenseNet(args.densenet) log_line = '' errors = [] for vi, vid_id in enumerate(vid_ids): vidx = vi + start_vid vid_file = vid_files[vi] clear_line = '\r' + (' ' * 100) + '\r' log_line = f'{clear_line} ({vidx+1:4}) {vid_file:18} | {vidx/num_vids*100:3.1f}% {vi+1}/{len(vid_ids)} videos' try: reader = iio.get_reader(os.path.join(vids_path, vid_file), '.mp4') metadata = Bunch(reader.get_meta_data()) vlen = int(reader.get_length()) except: d = str((vidx, vid_id, vid_file)) m = str(metadata) error_log.write(f"{d}\n{m}\n") error_log.flush() ## We're skipping this bad file (probably a .webm) continue print(f'\r{log_line} [{vlen}] >loading ', end='') raw = [] for i in range(vlen): try: r = reader.get_data(i) r = Image.fromarray(r) r = preprocess(r) r = r.unsqueeze_(0) raw.append(r) except: errors.append(vid_id) vlen = i break raw = T.cat(raw, 0) vid_feats = batch_densenet(densenet_m, raw, vid_id, vlen, metadata.fps, args.stride, log_line) print(f'\r{log_line} >saving ', end='') densenet_feats[vid_id] = vid_feats if vidx % dataset_write_interval or num_vids - vidx <= dataset_write_interval: densnet_db.flush() cclean()