Exemple #1
0
def Extract_features(mode='train'):
    extractor = models.vgg16(pretrained=True).features.cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    csv = 0
    if mode == 'train':
        csv = getVideoList(gt_train)
    else:  # validation
        csv = getVideoList(gt_valid)

    frames = 0
    vdo_features = 0
    for clip_id in range(len(csv['Video_index'])):  #
        print(mode, clip_id)
        if mode == 'train':
            frames = readShortVideo(train_vdo_dir,
                                    csv['Video_category'][clip_id],
                                    csv['Video_name'][clip_id], 5)
        else:  #validation
            frames = readShortVideo(valid_vdo_dir,
                                    csv['Video_category'][clip_id],
                                    csv['Video_name'][clip_id], 5)
        skip_num = frames.shape[0] // 4  # 4 is frame number after sampling
        frame_sample = np.expand_dims(frames[0], axis=0)
        for i in range(1, 4):
            frame_sample = np.vstack(
                (frame_sample, np.expand_dims(frames[i * skip_num], axis=0)))
        ### now frame_sample is in shape of (4,240,320,3)
        frame_sample = np.transpose(frame_sample, (0, 3, 1, 2))
        frame_sample = torch.tensor(frame_sample, dtype=torch.float).cuda()
        ### now frame_sample is transformed to tensor in shape of (4,3,240,320)
        ###normalize
        frame_sample = (frame_sample / 255)
        for i in range(4):
            frame_sample[i] = normalize(frame_sample[i])

        frame_features = extractor(frame_sample)  ### in shape of (4,512,7,10)
        frame_features = frame_features.view(
            1, -1, 7, 10
        )  ### in shape of (1,2048,7,10) --> input for ActionPredictor for 1 clip

        if clip_id == 0:
            vdo_features = frame_features.cpu().detach().numpy()
        else:
            vdo_features = np.vstack(
                (vdo_features, frame_features.cpu().detach().numpy()))

    np.save(mode + '_feature.npy', vdo_features)
Exemple #2
0
def extract(folder, csvpath, load, num_class, batch_size, name):
    print("extract frames...")

    frames = []
    labels = []
    video_list = getVideoList(csvpath)

    if (load == 0):
        for i in range(len(video_list["Video_name"])):
            frame = readShortVideo(folder, video_list["Video_category"][i],
                                   video_list["Video_name"][i])
            frame = np.mean(frame, axis=0, keepdims=True)
            #print(frame.shape)
            for j in range(len(frame)):
                frames.append(np.moveaxis(frame[j], -1, 0))
                label = np.zeros(num_class)
                label[int(video_list["Action_labels"][i])] = 1
                labels.append(label)
        frames = np.array(frames, dtype=np.uint8)
        labels = np.array(labels, dtype=np.uint8)
        #np.save("./"+name+"_frames.npy",frames)
        #np.save("./"+name+"_labels.npy",labels)
    elif load == 1:
        frames = np.load("./" + name + "_frames.npy")
        labels = np.load("./" + name + "_labels.npy")
    print(frames.shape, labels.shape)
    data = [(frames[i], labels[i]) for i in range(len(frames))]
    dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
    return dataloader
Exemple #3
0
def get_features():
    extractor = models.vgg16(pretrained= True).features.cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    csv = getVideoList(csv_file)
    
    vdo_num = len(csv['Video_index'])
    vdo_features = torch.zeros((vdo_num,2048,7,10),dtype= torch.float)
    for clip_id in range(vdo_num): #
        frames = readShortVideo(vdo_dir,csv['Video_category'][clip_id],csv['Video_name'][clip_id],5)
        
        skip_num = frames.shape[0]//4     # 4 is frame number after sampling
        frame_sample = np.expand_dims(frames[0],axis=0)
        for i in range(1,4):
            frame_sample = np.vstack((frame_sample,np.expand_dims(frames[i*skip_num],axis=0)))
        ### now frame_sample is in shape of (4,240,320,3)
        frame_sample = np.transpose(frame_sample,(0,3,1,2))
        frame_sample = torch.tensor(frame_sample,dtype = torch.float).cuda()
        ### now frame_sample is transformed to tensor in shape of (4,3,240,320)
        ###normalize
        frame_sample = (frame_sample / 255)
        for i in range(4):
            frame_sample[i] = normalize(frame_sample[i])
        
        frame_features = extractor(frame_sample)        ### in shape of (4,512,7,10)
        frame_features = frame_features.view(1,-1,7,10) ### in shape of (1,2048,7,10) --> input for ActionPredictor for 1 clip
        
        vdo_features[clip_id] = frame_features.detach().cpu()
    
    print('finish extracting features')
    return vdo_features
def extract_frames(opt):

    label_dir = opt.val_label_dir
    video_dir = opt.val_video_dir

    # Read CSV label file
    video_dict = reader.getVideoList(label_dir)

    # Initialize return lists
    all_frames = [
    ]  # length equal to number of videos. Elements are sublists. Those lists contain numpy arrays of frames (240, 320, 3)

    # For length of the csv file:
    for i in range(len(video_dict["Video_index"])):

        print("Extracting frames from video %d..." % (i + 1))
        frame_list = []

        # Take video category and video name from current dict entry
        folder_name = video_dict["Video_category"][i]
        file_name = video_dict["Video_name"][i]

        # Present to helper function
        current_frames = reader.readShortVideo(video_dir, folder_name,
                                               file_name)

        # Separate each frame in returned array and put into a list
        for j in range(current_frames.shape[0]):
            frame_list.append(current_frames[j, :, :, :])

        # Append the list of individual frames, and the corresponding label, onto the lists
        all_frames.append(frame_list)

    return all_frames
def convert_videos_to_np(mode, labels_fp, videos_fp, save_fp, limit) :
    batch_max = 1000
    l = getVideoList(labels_fp)
    videos_output, labels_output = [], []

    data_num = limit if limit != None else len(l["Video_category"])

    for i in range(data_num):
        print ("Convert videos into numpy: {}/{} \r".format(i + 1, data_num), end="")

        cat = l["Video_category"][i]
        name = l["Video_name"][i]
        label = l["Action_labels"][i]
        data = readShortVideo(videos_fp, cat, name, downsample_factor=12).astype(np.int8)

        videos_output.append(data)
        labels_output.append(int(label))

        if (i+1) % batch_max == 0 :
            videos_output, labels_fp = np.array(videos_output), np.array(labels_output)
            np.save(os.path.join(save_fp, "videos_{}_{}.npy".format(mode, i//batch_max)), videos_output)
            np.save(os.path.join(save_fp, "labels_{}_{}.npy".format(mode, i//batch_max)), labels_output)
            videos_output = []
            labels_output = []

    if (i+1) % batch_max != 0 :
        videos_output, labels_fp = np.array(videos_output), np.array(labels_output)
        np.save(os.path.join(save_fp, "videos_{}_{}.npy".format(mode, (i // batch_max))), videos_output)
        np.save(os.path.join(save_fp, "labels_{}_{}.npy".format(mode, (i // batch_max))), labels_output)

    print ("\nDone !")
def import_test_trimmed():
    path = sys.argv[2]
    print('path =', path)
    od = reader.getVideoList(path)
    print('len(od) =', len(od))

    path = sys.argv[1]
    num = len(od['Video_name'])
    print('num of videos =', num)

    df = 12
    count = 0
    leng_idx = np.zeros([
        num,
    ], np.uint32)
    for i in range(num):
        if i % 100 == 0 and i > 0: print(i)

        video = reader.readShortVideo(path,
                                      od['Video_category'][i],
                                      od['Video_name'][i],
                                      downsample_factor=df,
                                      rescale_factor=1)
        if i == 0: videos = video
        else: videos = np.concatenate([videos, video])

        count += video.shape[0]
        leng_idx[i] = video.shape[0]

    print("count =", count, np.sum(leng_idx))
    print('videos.shape =', videos.shape)

    return videos, leng_idx
Exemple #7
0
def load_extract_video(video_path, df, model, filename):
    print("===== read video =====")
    codes = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(df.shape[0]):
            print(i, end="\r")
            video = readShortVideo(video_path=video_path,
                                   video_category=df.iloc[i]['Video_category'],
                                   video_name=df.iloc[i]['Video_name'],
                                   downsample_factor=12,
                                   rescale_factor=1)

            # extract features batch-wise
            if video.shape[0] < 50:
                tmp = sess.run(model.output, feed_dict={model.x: video})
            else:
                tmp = list()
                for i in range(int(video.shape[0] / 50) + 1):
                    st = 50 * i
                    ed = min(50 * i + 50, video.shape[0])
                    tmp_video = video[st:ed, :]
                    tmp.append(
                        sess.run(model.output, feed_dict={model.x: tmp_video}))
                tmp = np.concatenate(tmp, axis=0)
            codes.append(tmp)
    print('Done')

    print("===== save into %s =====" % filename)
    with open(filename, 'wb') as f:
        pickle.dump(codes, f)
Exemple #8
0
def get_data(video_path, tag_path, model):
    if torch.cuda.is_available():
        model.cuda()
    file_dict = getVideoList(tag_path)
    x, y = [], []
    print(len(file_dict['Video_index']))
    with torch.no_grad():
        for i in range(len(file_dict['Video_index'])):
            frames = readShortVideo(video_path, file_dict['Video_category'][i],
                                    file_dict['Video_name'][i])
            if frames.shape[0] > 120:
                output_1 = model(
                    torch.from_numpy(
                        frames[0:120, :, :, :]).cuda()).detach().cpu().reshape(
                            -1, 512 * 7 * 7)
                output_2 = model(
                    torch.from_numpy(
                        frames[120:, :, :, :]).cuda()).detach().cpu().reshape(
                            -1, 512 * 7 * 7)
                output = torch.cat((output_1, output_2), 0)
            else:
                output = model(
                    torch.from_numpy(frames).cuda()).detach().cpu().reshape(
                        -1, 512 * 7 * 7)
            output = torch.mean(output, 0).numpy()

            x.append(output)
            y.append(int(file_dict['Action_labels'][i]))
            print('\rreading image from {}...{}'.format(video_path, i), end='')

    print('\rreading image from {}...finished'.format(video_path))

    return np.array(x).astype(np.float32), np.array(y).astype(np.uint8)
Exemple #9
0
def Video2Seq(video_path, video_category, video_name):
    features = torch.Tensor()
    seq_length = []
    for i in range(len(video_name)):
        frames = readShortVideo(video_path, video_category[i], video_name[i])
        ts_frames = torch.from_numpy(frames.transpose(
            (0, 3, 1, 2))).float() / 255.
        sys.stdout.write('\rReading the Video... : {:}'.format(i))
        sys.stdout.flush()

        set = Data.TensorDataset(ts_frames)
        dataloader = Data.DataLoader(dataset=set, batch_size=3)

        seq_length.append(0)
        for batch_idx, b_frame in enumerate(dataloader):
            features = torch.cat(
                [features,
                 resnet50(b_frame[0].cuda()).detach().cpu()])
            seq_length[i] += len(b_frame[0])

    max_length = max(seq_length)
    seq = torch.zeros(len(seq_length), max_length, features.shape[1])
    start = 0

    for i in range(len(seq_length)):
        seq[i, 0:seq_length[i], :] = features[start:start + seq_length[i], :]
        start += seq_length[i]

    sys.stdout.write('... Done\n')
    sys.stdout.flush()
    return seq, seq_length
def load_test_pred(video_path, gt_path, model_path):
    feature_size = 512 * 7 * 7

    CNN_pre_model = torchvision.models.vgg16(pretrained=True).features
    model = RNN_model(feature_size)
    # GPU enable
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print('Device used:', device)
    if torch.cuda.is_available():
        CNN_pre_model = CNN_pre_model.to(device)
        model = model.to(device)
    load_checkpoint(model_path, model)
    CNN_pre_model.eval()

    # -> label loading
    # test_label = pd.read_csv(gt_path)["Action_labels"]

    test_features = []
    category_path = sorted(os.listdir(video_path))
    with torch.no_grad():
        for category in category_path:
            mask = pd.read_csv(gt_path)["Video_category"] == category
            test_name = pd.read_csv(gt_path)[mask]["Video_name"]
            for i, video_name in enumerate(test_name):
                print("\r%d/%d" % (i, len(test_name)), end="")
                frames = readShortVideo(video_path,
                                        category,
                                        video_name,
                                        downsample_factor=12,
                                        rescale_factor=1)
                frames = Variable(torch.from_numpy(frames)).to(device)
                tmp = CNN_pre_model(frames).cpu().view(-1, feature_size)
                test_features.append(tmp)
            print("")
            print("Processing [%s] finished!" % (category))
            print("Pre-train finished!")

    with torch.no_grad():
        RNN_feature = []
        preds = []
        model.eval()
        for i in range(0, len(test_features)):
            padded_feature, lengths = test_features[i], [
                test_features[i].shape[0]
            ]  # padded_label, test_label[i]
            padded_feature = Variable(padded_feature).to(device).unsqueeze(1)
            lengths = torch.LongTensor(lengths)
            #print(padded_feature.shape)
            #print(padded_label)
            lengths = Variable(lengths).to(device)
            output, hidden = model(padded_feature, lengths)
            pred = torch.argmax(output, 1).cpu()
            preds.append(pred)
            RNN_feature.append(hidden.cpu().data.numpy().reshape(-1))

    RNN_feature = np.array(RNN_feature)
    preds = np.array(preds)
    print(pred.shape)
    return RNN_feature, preds  # ,test_label
    def __getitem__(self, idx):
        label = torch.LongTensor(np.array([float(self.vid_label_list[idx])
                                           ]))  # (2, 3, 240, 320)
        frames = readShortVideo(self.vid_dir, self.vid_categ_list[idx],
                                self.vid_name_list[idx])
        t, h, w, c = frames.shape

        if self.model_type == 'cnn':
            frames_tensor = torch.zeros([self.num_sample, c, h, w],
                                        dtype=torch.float)
            rand_frame_idx = torch.randint(0, t, (self.num_sample, ))
            for i in range(self.num_sample):
                frames_tensor[i] = self.transform(
                    Image.fromarray(frames[rand_frame_idx[i]]))

        elif self.model_type == 'rnn':
            frames_tensor = []
            if t > 10:
                scale = round(t / self.num_sample)
                for i in range(t):
                    if i % scale == 0:
                        frames_tensor.append(
                            self.transform(Image.fromarray(frames[i])))
                if len(frames_tensor) > self.num_sample:
                    frames_tensor = frames_tensor[:10]
                frames_tensor = torch.stack(frames_tensor)
            else:
                frames_tensor = torch.zeros([t, c, h, w], dtype=torch.float)
                for i in range(t):
                    frames_tensor[i] = self.transform(
                        Image.fromarray(frames[i]))

        # label = torch.LongTensor( np.array( [ float( self.vid_label_list[idx] ) ] ) )  # (2, 3, 240, 320)
        return frames_tensor, label
Exemple #12
0
    def __getitem__(self, idx):

        # read video
        video = reader.readShortVideo(self.dir, self.vid_cat[idx],
                                      self.vid_name[idx])

        return video
def Get_data(video_path, tag_path):
    model = torchvision.models.vgg16(pretrained=True).features
    if torch.cuda.is_available():
        model.cuda()
    file_dict = getVideoList(tag_path)
    feature_size = 512 * 7 * 7
    x, y = [], []
    print(len(file_dict['Video_index']))

    with torch.no_grad():
        for i in range(len(file_dict['Video_index'])):
            frames = readShortVideo(video_path, file_dict['Video_category'][i],
                                    file_dict['Video_name'][i])
            if frames.shape[0] > 120:
                output_1 = model(
                    torch.from_numpy(
                        frames[0:120, :, :, :]).cuda()).detach().cpu().reshape(
                            -1, feature_size)
                output_2 = model(
                    torch.from_numpy(
                        frames[120:, :, :, :]).cuda()).detach().cpu().reshape(
                            -1, feature_size)
                output = torch.cat((output_1, output_2), 0)
            else:
                output = model(
                    torch.from_numpy(frames).cuda()).detach().cpu().reshape(
                        -1, feature_size)

            x.append(output)
            y.append(int(file_dict['Action_labels'][i]))
            print('\rreading image from {}...{}'.format(video_path, i), end='')

    print('\rreading image from {}...finished'.format(video_path))

    return x, y
Exemple #14
0
    def __getitem__(self, idx):
        ''' get data '''
        video_category = self.video_category_list[idx]
        video_name = self.video_name_list[idx]
        label = self.label_list[idx]
        frames = reader.readShortVideo(self.video_path,
                                       video_category,
                                       video_name,
                                       downsample_factor=12,
                                       rescale_factor=self.rescale_factor)
        #print("frames.shape: ",frames.shape)

        frames_list = []
        for f in range(frames.shape[0]):
            frame = frames[f, :, :, :]
            frame = self.transform(frame)
            frames_list.append(frame)
        """
        #handle every sample, then concat 
        imgs = []
        for frame in frames:
            imgs.append(self.transform(frame))
        imgs = np.array(imgs)
        print("imgs shape ",imgs.shape)
        """

        return torch.stack(frames_list), torch.tensor(int(label))
 def __getitem__(self, index):
     current_video_imgs = reader.readShortVideo(self.root_videos, self.csv_dict['Video_category'][index], self.csv_dict['Video_name'][index], downsample_factor=downsample_factor, rescale_factor=(224, 224))
     videos = torch.zeros([current_video_imgs.shape[0],current_video_imgs.shape[3],current_video_imgs.shape[1],current_video_imgs.shape[2]], dtype=torch.float32)
     for i in range(len(current_video_imgs)):
         current_img = Image.fromarray(current_video_imgs[i])
         if self.transform is not None:
             current_img = self.transform(current_img)
         videos[i] = current_img
     return videos, int(self.labels[index])
Exemple #16
0
    def __getitem__(self, idx):

        # read video
        video = reader.readShortVideo(self.dir, self.vid_cat[idx],
                                      self.vid_name[idx])

        # get action label
        act = self.act_label[idx]

        return video, act
Exemple #17
0
def prediction(model_fp, data_fp, label_fp, output_fp, limit):
    model = tor.load(model_fp)
    model.cuda()

    ### Load data
    l = getVideoList(label_fp)
    videos_output, labels_output = [], []

    total = len(l["Video_category"]) if not limit else limit

    for i in range(total):
        print("Convert videos into numpy: {}/{} \r".format(
            i + 1, len(l["Video_category"])),
              end="")

        cat = l["Video_category"][i]
        name = l["Video_name"][i]
        label = l["Action_labels"][i]
        data = readShortVideo(data_fp, cat, name,
                              downsample_factor=12).astype(np.int8)
        videos_output.append(data.astype(np.int16))
        labels_output.append(int(label))

    videos, labels = np.array(videos_output), np.array(labels_output).astype(
        np.uint8)

    ### Prediction
    correct, total = 0, len(labels)
    preds = []

    videos = normalize(videos / 255.)
    videos = select_data(videos, VIDEOS_MAX_BATCH)

    for i, (x, label) in enumerate(zip(videos, labels), 1):
        print("Process: {}/{}".format(i, len(videos)))
        x = tor.Tensor(x).permute(0, 3, 1, 2).cuda()
        out = model(x)
        out = out.mean(dim=0).unsqueeze(0)
        pred = model.pred(out)
        y = tor.max(pred, 1)[1]
        pred = int(y[0].data)
        if pred == label:
            correct += 1

        preds.append(pred)

    acc = correct / total
    print(acc)

    with open(os.path.join(output_fp, "p1_valid.txt"), "w") as f:
        for i, item in enumerate(preds):
            if i != len(preds) - 1:
                f.write(str(item) + "\n")
            else:
                f.write(str(item))
Exemple #18
0
def Extract_features(mode='train'):
    extractor = models.vgg16(pretrained= True).features.cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    csv = 0
    if mode == 'train':
        csv = getVideoList(gt_train)
    else: # validation
        csv = getVideoList(gt_valid)
    
    vdo_num = len(csv['Video_index'])
    frames = 0      ##vdo-frames-holder
    frames_num = 10 ## number of frames per clip 
    vdo_features = np.zeros((vdo_num,frames_num,512*3*5),dtype= np.float)

    for clip_id in range(vdo_num): 
        print(mode,clip_id)
        if mode == 'train':
            frames = readShortVideo(train_vdo_dir,csv['Video_category'][clip_id],csv['Video_name'][clip_id],2,0.5)
        else : #validation
            frames = readShortVideo(valid_vdo_dir,csv['Video_category'][clip_id],csv['Video_name'][clip_id],2,0.5)
        # frames is in shape of (num, 120,160,3)
        vdo_size = frames.shape[0] #get length of vdo, want to sample 40 frames of it
        skip = vdo_size/frames_num
        frame_sample = np.zeros((frames_num,120,160,3))
        for i in range(frames_num):
            frame_sample[i] = frames[int(i*skip)]
        frame_sample = np.transpose(frame_sample,(0,3,1,2))
        frame_sample = torch.tensor(frame_sample,dtype= torch.float).cuda()
        # sample completed --> (frames_num,3,120,160)per clip
        # now normalize it 
        frame_sample = (frame_sample/255)
        for i in range(frames_num):
            frame_sample[i] = normalize(frame_sample[i])
        
        feature_batchframe_features = extractor(frame_sample)  
            
        frame_features = frame_features.view(frames_num,-1)  ## in shape of (frames_num,512*3*5)
        
        vdo_features[clip_id] = frame_features.cpu().detach().numpy()
    
    np.save('rnn_'+mode+'_feature.npy',vdo_features) #shape = (vdo_num, seq = frames_num, 512*h*w)
Exemple #19
0
def evaluate(feature_stractor, rnn, data_loader, batch_size):
    ''' set model to evaluate mode '''
    rnn.eval()
    feature_stractor.eval()
    iters = 0
    gts = []
    preds = []
    with torch.no_grad():
        for idx, (video, video_path) in enumerate(data_loader):
            #print(iters)
            iters += 1
            batch_img = []
            batch_gt = []
            for i in range(len(video_path)):
                frames = readShortVideo(video_path[i],
                                        video.get('Video_category')[i],
                                        video.get('Video_name')[i])

                vid = []
                for j in range(frames.shape[0]):
                    im = transforms_array(frames[j])
                    vid.append(im)
                vid = torch.stack(vid).cuda()

                with torch.no_grad():
                    feature = feature_stractor(vid)

                batch_img.append(feature)

                gt = (int(video.get('Action_labels')[i]))
                batch_gt.append(gt)

            sequence, label, n_frames = batch_padding(batch_img, batch_gt)
            #print(sequence.shape)

            _, pred = rnn(sequence, n_frames)

            _, pred = torch.max(pred, dim=1)

            batch_gt = torch.from_numpy(np.asarray(batch_gt))
            # print(batch_gt.shape)

            pred = pred.cpu().numpy().squeeze()
            batch_gt = batch_gt.numpy().squeeze()

            preds.append(pred)
            gts.append(batch_gt)

        if batch_size != 1:
            gts = np.concatenate(gts)
            preds = np.concatenate(preds)
    print(preds)
    return accuracy_score(gts, preds)
Exemple #20
0
    def __getitem__(self, index):
        video_name = self.video_list['Video_name'][index]
        video_category = self.video_list['Video_category'][index]

        video_label = None
        if 'Action_labels' in self.video_list:
            video_label = torch.LongTensor(
                [self.video_list['Action_labels'][index]])

        # ---------------------------------------------------------------
        # Sample for HW4.1, pick the fixed number of frames
        # Downsample for HW4.2, pick the frames with the downsampling rate
        # ----------------------------------------------------------------
        if self.feature_path is not None:
            video = reader.readShortFeature(self.feature_path,
                                            video_category,
                                            video_name,
                                            downsample_factor=self.downsample)
        elif self.video_path is not None:
            video = reader.readShortVideo(self.video_path,
                                          video_category,
                                          video_name,
                                          downsample_factor=self.downsample,
                                          rescale_factor=self.rescale)

        if self.sample:
            step = video.shape[0] / self.sample
            frame = np.around(np.arange(0, video.shape[0], step),
                              decimals=0).astype(int)
            video = video[frame]

        # ---------------------------------------------------
        # Features Output dimension:   (frames, 2048)
        # Full video Output dimension: (frames, channel, height, width)
        # ---------------------------------------------------
        if self.transform:
            if self.feature_path is not None:
                tensor = self.transform(video)

                return tensor.squeeze(0), video_label

            if self.video_path is not None:
                tensor = torch.zeros(video.shape[0], 3, 240,
                                     320).type(torch.float32)

                for i in range(video.shape[0]):
                    tensor[i] = self.transform(video[i])

                return tensor, video_label

        return video, video_label
Exemple #21
0
def output_features(rnn, feature_stractor, data_loader, json_dir):
    ''' set model to evaluate mode '''
    rnn.eval()
    feature_stractor.eval()
    iters  = 0
    with torch.no_grad():  # do not need to caculate information for gradient during eval
        data = []
        for idx, (video, video_path) in enumerate(data_loader):

            print(iters)
            iters += 1
            batch_img = []
            batch_gt = []
            for i in range(len(video_path)):
                frames = readShortVideo(video_path[i], video.get('Video_category')[i], video.get('Video_name')[i])

                vid = []
                for j in range(frames.shape[0]):
                    im = transforms_array(frames[j])
                    vid.append(im)
                vid = torch.stack(vid).cuda()

                with torch.no_grad():
                    feature = feature_stractor(vid)

                batch_img.append(feature)

                gt = (int(video.get('Action_labels')[i]))
                batch_gt.append(gt)

            sequence, label, n_frames = batch_padding(batch_img, batch_gt)
            # print(sequence.shape)

            feat, _ = rnn(sequence, n_frames)

            features_flt = []
            for imgs in feat:
                imgs_feature = []
                for fea in imgs:
                    imgs_feature.append(float(fea))
                features_flt.append(list(imgs_feature))

            ##strore the values of the pred.

            for i in range(0, len(features_flt)):
                data.append([list(features_flt[i]), batch_gt[i]])

        data = list(data)
        with open(json_dir, 'w') as outfile:
            json.dump(data, outfile)
def test():
    # test Reader
    # input = video_path, video_category, video_name, downsample_factor=12, rescale_factor=1
    frames = readShortVideo(
        "hw4_data/TrimmedVideos/video/train/",
        "OP01-R01-PastaSalad",
        "OP01-R01-PastaSalad-66680-68130-F001597-F001639.mp4",
        downsample_factor=12,
        rescale_factor=1)

    cc = frames[0]
    cc = cc.transpose(1, 2, 0)
    print(cc.shape)
    print(cc)
Exemple #23
0
def store(feature_stractor, rnn, data_loader, batch_size):
    ''' set model to evaluate mode '''
    rnn.eval()
    feature_stractor.eval()
    iters = 0
    gts = []
    preds = []
    with torch.no_grad():
        for idx, (video, video_path) in enumerate(data_loader):
            #print(iters)
            iters += 1
            batch_img = []
            for i in range(len(video_path)):
                frames = readShortVideo(video_path[i],
                                        video.get('Video_category')[i],
                                        video.get('Video_name')[i])
                vid = []
                for j in range(frames.shape[0]):
                    im = transforms_array(frames[j])
                    vid.append(im)
                vid = torch.stack(vid).cuda()
                print('working in video ',
                      video.get('Video_index')[i], ' with size ', vid.shape)
                feature = feature_stractor(vid)
                batch_img.append(feature)

            #print(batch_img[0].shape)
            #print(batch_img[1].shape)

            sequence, n_frames = batch_padding(batch_fea=batch_img)
            #print(sequence.shape)
            #print(n_frames)
            _, pred = rnn(sequence, n_frames)
            #print(pred.shape)

            _, pred = torch.max(pred, dim=1)

            pred = pred.cpu().numpy().squeeze()

            preds.append(pred)

        if batch_size != 1:
            preds = np.concatenate(preds)
    #print(preds.shape)
    print(preds)
    f = open("p2_result.txt", "w+")
    for pred in preds:
        f.write("%d\n" % pred)
    f.close()
Exemple #24
0
    def __getitem__(self, index):
        """ Get a sample from the dataset """
        frames = readShortVideo(self.video_root,
                                self.all_video_frames[index][0],
                                self.all_video_frames[index][1],
                                downsample_factor=12,
                                rescale_factor=1)

        # image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        # label = self.landmarks_frame['label'][index]
        # label = torch.FloatTensor([label])
        frames = torch.stack(frames)
        # output = torch.mean(frames,0)
        # if self.transform:
        # frames = self.transform(frames)
        return frames, self.label[index]
    def __getitem__(self, index):
        frames = reader.readShortVideo(
            video_path=self.video_path,
            video_category=self.gt["Video_category"][index],
            video_name=self.gt["Video_name"][index])

        images = []
        for i in range(len(frames)):
            images.append(self.transform(frames[i]).unsqueeze(0))
        images = torch.cat(images)

        if self.is_train:
            label = int(self.gt["Action_labels"][index])

            return images, label
        return images
def import_four_trimmed(data='valid'):
    print('\ndata type =', data)

    path = '../HW5_data/TrimmedVideos/label/gt_' + data + '.csv'
    od = reader.getVideoList(path)
    print('len(od) =', len(od))
    print("len(od['Video_name']) =", len(od['Video_name']))
    print("len(od['Action_labels']) =", len(od['Action_labels']))
    print("len(od['Action_labels'] =", len(od['Action_labels']))

    path = '../HW5_data/TrimmedVideos/video/' + data + '/'
    num = len(od['Video_name'])

    print('num of videos =', num)
    videos = np.empty([num, 4, 240, 320, 3], np.uint8)
    labels = np.zeros([
        num,
    ], np.uint8)

    for i in range(num):
        #'Action_labels', 'Nouns', 'End_times', 'Start_times', 'Video_category', 'Video_index', 'Video_name'
        #readShortVideo(path, video_category, video_name, downsample_factor=12, rescale_factor=1)
        if i % 100 == 0 and i > 0: print(i)
        df = int((
            (int(od['End_times'][i]) - int(od['Start_times'][i])) / 35.0 - 1) /
                 3.0)
        video = reader.readShortVideo(path,
                                      od['Video_category'][i],
                                      od['Video_name'][i],
                                      downsample_factor=df,
                                      rescale_factor=1)
        print('video =', video.shape)
        count += video.shape[0]

        assert video.shape[0] >= 4
        videos[i] = video[:4]

        labels[i] = od['Action_labels'][i]
        leng_idx[i] = video.shape[0]

    print('videos.shape =', videos.shape)
    print('labels.shape =', labels.shape)
    print('labels =', labels[:13])

    return videos, labels, leng_idx
def import_trimmed(data='valid'):
    print('\ndata type =', data)

    path = '../HW5_data/TrimmedVideos/label/gt_' + data + '.csv'
    od = reader.getVideoList(path)
    print('len(od) =', len(od))
    print("len(od['Video_name']) =", len(od['Video_name']))
    print("len(od['Action_labels']) =", len(od['Action_labels']))
    print("len(od['Action_labels'] =", len(od['Action_labels']))

    path = '../HW5_data/TrimmedVideos/video/' + data + '/'
    num = len(od['Video_name'])

    print('num of videos =', num)
    videos = np.empty([4, 240, 320, 3], np.uint8)
    labels = np.zeros([
        num,
    ], np.uint8)
    df = 12
    count = 0
    leng_idx = np.zeros([
        num,
    ], np.uint32)
    for i in range(num):
        if i % 100 == 0 and i > 0: print(i)

        video = reader.readShortVideo(path,
                                      od['Video_category'][i],
                                      od['Video_name'][i],
                                      downsample_factor=df,
                                      rescale_factor=1)
        #print('video =', video.shape)
        if i == 0: videos = video
        else: videos = np.concatenate([videos, video])

        count += video.shape[0]
        labels[i] = od['Action_labels'][i]
        leng_idx[i] = video.shape[0]

    print("count =", count, np.sum(leng_idx))
    print('videos.shape =', videos.shape)
    print('labels.shape =', labels.shape)
    print('labels =', labels[:13])

    return videos, labels, leng_idx
Exemple #28
0
def output_features(classi, feaStract, data_loader, json_dir):
    ''' set model to evaluate mode '''
    classi.eval()
    feaStract.eval()
    with torch.no_grad():  # do not need to caculate information for gradient during eval
        data = []

        for idx, (video, video_path) in enumerate(data_loader):


            features = []
            clss = []
            print('Preprocessing the data')
            for i in range(len(video_path)):
                print('working ', i)
                frames = readShortVideo(video_path[i], video.get('Video_category')[i], video.get('Video_name')[i])
                frames_res = torch.from_numpy(frames)
                frames_res.resize_(len(frames), 3, 240, 240)
                frames_res = frames_res.float().cuda()
                print(feaStract(frames_res).shape)  # , end="\r")
                features.append(torch.mean(feaStract(frames_res), 0).cpu().detach().numpy())
                clss.append(int(video.get('Action_labels')[i]))
            features = torch.from_numpy(np.asarray(features))
            clss = torch.from_numpy(np.asarray(clss))

            # FC
            print('Classifier')
            features = features.cuda()

            feat, _ = classi(features)
            features_flt = []
            for imgs in feat:
                imgs_feature = []
                for fea in imgs:
                    imgs_feature.append(float(fea))
                features_flt.append(list(imgs_feature))

            ##strore the values of the pred.

            for i in range(0, len(features_flt)):
                data.append([list(features_flt[i]), clss[i]])

        data = list(data)
        with open(json_dir, 'w') as outfile:
            json.dump(data, outfile)
def extract_frames(opt, mode):

    if mode == "train":
        label_dir = opt.train_label_dir
        video_dir = opt.train_video_dir
        save_dir = opt.save_train_frames_dir
        end_video_index = opt.num_videos_train
    elif mode == "val":
        label_dir = opt.val_label_dir
        video_dir = opt.val_video_dir
        save_dir = opt.save_val_frames_dir
        end_video_index = opt.num_videos_val
    else:
        print("ERROR: invalid mode in frame generator")

    # Read CSV label file
    video_dict = reader.getVideoList(label_dir)

    # For length of the csv file:
    for i in range(end_video_index):

        # Clear dict and list for each new video
        data_dict = {}
        frame_list = []

        # Take video category and video name from current dict entry
        folder_name = video_dict["Video_category"][i]
        file_name = video_dict["Video_name"][i]

        # Present to helper function
        frames = reader.readShortVideo(video_dir, folder_name, file_name)

        # Separate each frame in returned array and put into a list
        for j in range(frames.shape[0]):
            frame_list.append(frames[j, :, :, :])

        # Populate a dictionary with the list of individual frames, and the corresponding label
        data_dict["frame_list"] = frame_list
        data_dict["label"] = video_dict["Action_labels"][i]

        # Save dict of frames/label for current video
        print("Saving frames from video %d..." % (i + 1))

        with open(os.path.join(save_dir, "{}.pk".format(i + 1)), "wb") as f:
            pk.dump(data_dict, f)
Exemple #30
0
def Video2Tensor(video_path, video_category, video_name):
    features = torch.Tensor()
    for i in range(len(video_name)):
        frames = readShortVideo(video_path, video_category[i], video_name[i])
        ts_frames = torch.from_numpy(frames.transpose(
            (0, 3, 1, 2))).float() / 255.
        sys.stdout.write('\rReading the Video... Frame: {:}'.format(i))
        sys.stdout.flush()
        set = Data.TensorDataset(ts_frames)

        dataloader = Data.DataLoader(dataset=set, batch_size=1)
        feature = torch.zeros(1, 1000).cuda()
        for batch_idx, b_frame in enumerate(dataloader):
            feature += resnet50(b_frame[0].cuda()).detach()
        features = torch.cat([features, (feature / len(set)).cpu()])
    sys.stdout.write('... Done\n')
    sys.stdout.flush()
    return features