def read_data(self, list_of_videos): T_o = self.T_o T_a = self.T_a max_sq_len = 0 totLabels= [] for vid in list_of_videos: start_To=0 file_ptr = open(vid, 'r') content = file_ptr.read().split()[:-1] #a scalar value used in normalizing the input length, #the paper specify that should be eaulq to the mean number of action per video T = (1.0/self.alpha)*len(content) #label_seq, length_seq = get_label_length_seq(content) while start_To+T_o+T_a < len(content): obs = content[start_To:start_To+T_o] target = content[start_To+T_o+T_a] label_seq, length_seq = get_label_length_seq(obs) #What i'm actually obeserving. p_seq is a matrix of 48 columns like classes + 1 where it stores the lenght of the action #Morover it has 25 rows beacuse the max number in a video is 25 for breakfast p_seq = [] seq_len = 0 for i in label_seq: p_seq.append(np.zeros((self.nClasses+1))) #Lenght of the action obeserved p_seq[-1][-1] = length_seq[label_seq.index(i)]/T #1-hot encoding p_seq[-1][self.actions_dict[i]] = 1 seq_len += 1 if i not in totLabels: totLabels.append(i) #padding to zero every other free slot if(seq_len > max_sq_len): max_sq_len = seq_len for j in range(self.max_seq_sz - seq_len): p_seq.append(np.zeros((self.nClasses+1))) #The target is gonna be the next action in 1-hot fashion #label_target,_= get_label_length_seq(target) p_tar = np.zeros((self.nClasses)) p_tar[ self.actions_dict[target]] = 1 example = [p_seq, p_tar] self.list_of_examples.append(example) start_To = start_To+T_a random.shuffle(self.list_of_examples) return
def predict(self, sess, model_save_path, input_x, sigma, actions_dict): self.saver.restore(sess, model_save_path) result = sess.run([self.prediction], feed_dict={self.input_vid: input_x})[0] result = np.reshape(result,[self.nRows, 48]) result = self.__post_process(result, sigma) output = [] for i in range(len(result)): output.append(actions_dict.keys()[actions_dict.values().index(np.argmax(result[i]))]) label_seq, length_seq = get_label_length_seq(output) return label_seq, length_seq
recog_sq[str(init)] = ["start"] for init in range(8): target_sq[str(init)] = ["start"] T = (1.0 / args.alpha) * vid_len while start_To + S_enc_frame + S_ant_frame + beta_frame < len( content): k = 0 for stride in range( start_To + S_enc_frame + beta_frame, start_To + S_enc_frame + S_ant_frame + beta_frame, beta_frame): obs = content[start_To:start_To + stride] target = content[start_To + S_enc_frame + S_ant_frame + beta_frame] label_seq, length_seq = get_label_length_seq(obs) #What i'm actually obeserving. p_seq is a matrix of 48 columns like classes + 1 where it stores the lenght of the action #Morover it has 25 rows beacuse the max number in a video is 25 for breakfast p_seq = [] seq_len = 0 for i in label_seq: p_seq.append(np.zeros((nClasses + 1))) #Lenght of the action obeserved p_seq[-1][-1] = length_seq[label_seq.index(i)] / T #1-hot encoding p_seq[-1][actions_dict[i]] = 1 seq_len += 1 #padding to zero every other free slot if (seq_len > max_sq_len): max_sq_len = seq_len for j in range(args.max_seq_sz - seq_len):
for obs_p in obs_percentages: if args.input_type == "decoded": file_ptr = open(args.decoded_path+"/obs"+str(obs_p)+"/"+f_name+'.txt', 'r') observed_content = file_ptr.read().split('\n')[:-1] vid_len = int(len(observed_content)/obs_p) elif args.input_type == "gt": observed_content = content[:int(obs_p*vid_len)] T = (1.0/args.alpha)*vid_len for pred_p in pred_percentages: pred_len = int(pred_p*vid_len) output_len = pred_len + len(observed_content) label_seq, length_seq = get_label_length_seq(observed_content) with tf.Session() as sess: label_seq, length_seq = model.predict(sess, model_restore_path, pred_len, label_seq, length_seq, actions_dict, T) recognition = [] for i in range(len(label_seq)): recognition = np.concatenate((recognition, [label_seq[i]]*int(length_seq[i]))) recognition = recognition[:output_len] #write results to file f_name = vid.split('/')[-1].split('.')[0] path=args.results_save_path+"/obs"+str(obs_p)+"-pred"+str(pred_p) write_predictions(path, f_name, recognition) elif args.model == "cnn": model = ModelCNN(args.nRows, nClasses)
n_T = np.zeros(len(classes)) n_F = np.zeros(len(classes)) totalGT = [] totalRecog = [] IoU = 0 index_act = int(args.nactions) div = 0 for filename in filelist: gt, recog = read_sequences( filename, args.ground_truth_path, obs_percentage) # gt e recog hano la stessa lunghezza if (recog != 0): label_seq_gt, length_seq_gt = get_label_length_seq(gt) label_seq_recog, length_seq_recog = get_label_length_seq(recog) gt_dict = get_label_dict(gt) recog_dict = get_label_dict(recog) if (len(label_seq_gt) >= index_act): act = label_seq_gt[index_act - 1] start_gt = gt_dict[index_act - 1][1] stop_gt = gt_dict[index_act - 1][2] # if(start_gt!=0): # print("ciao") # gt_dict = get_label_dict(recog_dict) act_recog = recog[start_gt:stop_gt]
observed_content = content[:int( obs_p * vid_len)] # observed_actions_per_frame unobserved_content = content[int(obs_p * vid_len):] T = (1.0 / args.alpha) * vid_len pred_percentages = [.1, .2, .3, .5, .7, .8] pred_percentages = [ pred_p for pred_p in pred_percentages if obs_p + pred_p <= 1.0 ] for pred_p in pred_percentages: pred_len = int(pred_p * vid_len) # num_frames_to_predict output_len = pred_len + len( observed_content) # write out observed + predictions label_seq, length_seq = get_label_length_seq( observed_content) obs_label_seq, obs_length_seq = list(label_seq), list( length_seq) num_obs_actions = len(obs_label_seq) with tf.Session() as sess: label_seq, length_seq = model.predict( sess, model_restore_path, pred_len, label_seq, length_seq, actions_dict, T) recognition = [] for i in range(len(label_seq)): recognition = np.concatenate( (recognition, [label_seq[i]] * int(length_seq[i]))) recognition = recognition[:output_len] # write results to file f_name = vid.split('/')[-1].split('.')[0]