def get(self, record, random_select = False): """ just for one segment :param record: :param random_select: train:true test:false :return: """ #read video in this place, if no video here, random select another video f = open(self.root + record.path, 'rb') video = f.read() f.close() video_frames_num, width, height = utils.video_frame_count(self.root + record.path) if video_frames_num == -1: raise RuntimeError("No video stream avilable") #if video_frames_num < self.new_length: # print("video{} farmes num is:{}".format(self.root + record.path, video_frames_num)) #opencv ususlly decode more frames, so - 10 here instead of +1 rand_index = randint(0, max(1, video_frames_num - self.new_length - 1)) if video_frames_num > self.new_length: if random_select: decoded_images_indexs = np.arange(rand_index, self.new_length + rand_index) else: decoded_images_indexs = np.arange(min(rand_index, 10), min(self.new_length + min(rand_index,10), video_frames_num)) else: decoded_images_indexs = np.arange(0, video_frames_num-1) #the video may be 224 x 144, need to do resize #if decoded_images_index is small than new_length, loop until new_length decoded_images = _load_action_frame_nums_to_4darray(video, decoded_images_indexs, width, height) if np.shape(decoded_images)[0] < self.new_length: for i in range(self.new_length - np.shape(decoded_images)[0]): decoded_images = np.concatenate((decoded_images, np.reshape(decoded_images[i%np.shape(decoded_images)[0], :, :, :], newshape=(1, height, width, 3))), axis=0) if np.shape(decoded_images)[0] != self.new_length: raise TypeError("imgs is short than need.!") process_data = np.asarray(decoded_images, dtype=np.float32) return process_data, record.label
def get_moco_cv(self, record, random_select = True): """ load all video into cv, much slow but accurate :param record: :param random_select: train:true test:false :return: """ video_frames_num, width, height = utils.video_frame_count(self.root + record.path) if video_frames_num == -1: raise RuntimeError("No video stream avilable") rand_index = randint(0, max(1, video_frames_num - self.new_length * self.stride - 1)) rand_index2 = randint(0, max(1, video_frames_num - self.new_length * self.stride - 1)) if abs(rand_index - rand_index2) < 4 * self.stride: if rand_index > rand_index2: rand_index2 += randint(6*self.stride, max(6*self.stride, video_frames_num-3*self.stride)) else: rand_index2 += randint(3*self.stride, max(6*self.stride, video_frames_num-9*self.stride)) anchor_indexs = [] negative_indexs = [] for i in range(self.new_length): anchor_indexs.append((rand_index+i*self.stride)%video_frames_num) negative_indexs.append((rand_index2+i*self.stride)%video_frames_num) videodata = skvideo.io.vread(self.root + record.path, num_frames=max(max(anchor_indexs)+1, max(negative_indexs)+1)) anchor = videodata[anchor_indexs, :, :, :] negative = videodata[negative_indexs,:,:,:] return anchor, negative, record.label
def get_moco(self, record, random_select = True): """ just for one segment :param record: :param random_select: train:true test:false :return: """ #read video in this place, if no video here, random select another video f = open(self.root + record.path, 'rb') video = f.read() f.close() video_frames_num, width, height = utils.video_frame_count(self.root + record.path) if video_frames_num == -1: raise RuntimeError("No video stream avilable") video_frames_num -= 1 # cv2 often more 1 than ffmpeg rand_index = randint(0, max(1, video_frames_num - self.new_length * self.stride - 1)) if random_select: decoded_images_indexs = np.arange(rand_index, self.new_length * self.stride + rand_index, self.stride) else: decoded_images_indexs = np.arange(min(rand_index, 10), min(self.new_length * self.stride + min(rand_index,10), video_frames_num), self.stride) #must be in grow seq decoded_images_indexs %= video_frames_num decoded_images_indexs = np.sort(decoded_images_indexs) for j in range(len(decoded_images_indexs)-1): if decoded_images_indexs[j] == decoded_images_indexs[j+1]: decoded_images_indexs[j+1] += 1 # prevent 0,0,0,1,2,3... if decoded_images_indexs[j] >= decoded_images_indexs[j+1]: decoded_images_indexs[j+1] = decoded_images_indexs[j] + 1 decoded_images = _load_action_frame_nums_to_4darray(video, np.sort(decoded_images_indexs), width, height) process_data = np.asarray(decoded_images, dtype=np.float32) rand_index2 = rand_index count = 0 # important hyperparameter thresh = 4 while abs(rand_index - rand_index2) < thresh: rand_index2 = randint(0, max(1, video_frames_num - self.stride * self.new_length - 1)) count += 1 if count > 3: rand_index2 = (rand_index2 + randint(max(video_frames_num//8, video_frames_num//3*2))) % video_frames_num break if random_select: decoded_images_indexs2 = np.arange(rand_index2, self.new_length * self.stride + rand_index2, self.stride) else: decoded_images_indexs2 = np.arange(min(rand_index2, 10), min(self.new_length + min(rand_index2,10), video_frames_num), self.stride) decoded_images_indexs2 %= video_frames_num decoded_images_indexs2 = np.sort(decoded_images_indexs2) #must be in grow seq for j in range(len(decoded_images_indexs2)-1): if decoded_images_indexs2[j] == decoded_images_indexs2[j+1]: decoded_images_indexs2[j+1] += 1 if decoded_images_indexs2[j] >= decoded_images_indexs2[j+1]: decoded_images_indexs2[j+1] = decoded_images_indexs2[j] + 1 decoded_images2 = _load_action_frame_nums_to_4darray(video, decoded_images_indexs2, width, height) process_data2 = np.asarray(decoded_images2, dtype=np.float32) return process_data, process_data2, record.label