def __init__(self, data_dir, data_split, device='cpu', scale=1, coll_th=0.2): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() self.obs_len = 8 self.pred_len = 12 skip = 1 self.scale = scale self.seq_len = self.obs_len + self.pred_len self.device = device delim = ',' dt = 0.4 min_ped = 0 data_dir = data_dir.replace('\\', '/') if data_split == 'train': max_num_file = 40 else: max_num_file = 10 self.seq_len = self.obs_len + self.pred_len n_state = 6 with open(os.path.join(data_dir, data_split + '.txt')) as f: files = f.readlines() all_files = [] prev_file = '' num_file = 0 for f in files: if f.split( '\\')[:-1] == prev_file and num_file == max_num_file: continue elif f.split('\\')[:-1] != prev_file: # print('/'.join(prev_file)) # print(num_file) num_file = 0 prev_file = f.split('\\')[:-1] num_file += 1 all_files.append(f.rstrip().replace('\\', '/')) # print('/'.join(prev_file)) # print(num_file) self.inv_h_t = {} self.maps = {} for root, subdirs, files in os.walk(data_dir): if len(subdirs) > 0: continue map_file_name = [ file_name for file_name in files if 'png' in file_name ][0] # print(root) # print(map_file_name) m = imageio.imread(os.path.join(root, map_file_name)).astype(float) / 255 self.maps.update({root[len(data_dir):].replace('\\', '/')[1:]: m}) h = read_file( os.path.join(root, map_file_name.replace('png', 'hom')), ',') inv_h_t = np.linalg.inv(np.transpose(h)) self.inv_h_t.update( {root[len(data_dir):].replace('\\', '/')[1:]: inv_h_t}) ''' fig = plt.figure(figsize=(15, 9)) i = 0 for k in self.maps.keys(): i += 1 ax = fig.add_subplot(4,5, i) ax.imshow(self.maps[k]) ax.set_title(k, size=7) fig.tight_layout() fig.tight_layout() ''' # ax.axis('off') num_peds_in_seq = [] seq_list = [] obs_frame_num = [] fut_frame_num = [] data_files = [] for path in all_files: # print('data path:', path) loaded_data = read_file(os.path.join(data_dir, path), delim) data = pd.DataFrame(loaded_data) data.columns = ['f', 'a', 'pos_x', 'pos_y'] # data.sort_values(by=['f', 'a'], inplace=True) data.sort_values(by=['f', 'a'], inplace=True) # data = data1[data1['a'] < 10] frames = data['f'].unique().tolist() frame_data = [] # data.sort_values(by=['f']) for frame in frames: frame_data.append(data[data['f'] == frame].values) num_sequences = int( math.ceil((len(frames) - self.seq_len + 1) / skip)) # print('num_sequences: ', num_sequences) # all frames를 seq_len(kernel size)만큼씩 sliding해가며 볼것. 이때 skip = stride. for idx in range(0, num_sequences * skip + 1, skip): # if len(frame_data[idx:idx + self.seq_len]) ==0: # print(idx) curr_seq_data = np.concatenate( frame_data[idx:idx + self.seq_len], axis=0 ) # frame을 seq_len만큼씩 잘라서 볼것 = curr_seq_data. 각 frame이 가진 데이터(agent)수는 다를수 잇음. 하지만 각 데이터의 길이는 4(frame #, agent id, pos_x, pos_y) peds_in_curr_seq = np.unique( curr_seq_data[:, 1]) # unique agent id curr_seq = np.zeros( (len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate( peds_in_curr_seq ): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[ curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index( curr_ped_seq[0, 0] ) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[ -1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if pad_end - pad_front != self.seq_len: # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:, 2] y = curr_ped_seq[:, 3] vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack( [x, y, vx, vy, ax, ay]) # (1,6,20) num_peds_considered += 1 if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 seq_traj = curr_seq[:num_peds_considered][:, :2] exclude_idx = [] for i in range(20): curr1 = seq_traj[:, :, i].repeat(num_peds_considered, 0) # AAABBBCCC curr2 = np.stack( [seq_traj[:, :, i]] * num_peds_considered).reshape( -1, 2) # ABCABC dist = np.linalg.norm(curr1 - curr2, axis=1) dist = dist.reshape(num_peds_considered, num_peds_considered) diff_agent_idx = np.triu_indices(num_peds_considered, k=1) dist[diff_agent_idx] = 0 under_th_idx = np.array( np.where((dist > 0) & (dist < coll_th))) # under_th_idx = np.where((dist > 0) & (dist < coll_th)) # for elt in np.unique(under_th_idx): # np.count_nonzero(under_th_idx == elt) for j in range(len(under_th_idx[0])): idx_pair = under_th_idx[:, j] exclude_idx.append(idx_pair[0]) exclude_idx = np.unique(exclude_idx) if len(exclude_idx) == num_peds_considered: continue if len(exclude_idx) > 0: print(len(exclude_idx), '/', num_peds_considered) valid_idx = [ i for i in range(num_peds_considered) if i not in exclude_idx ] num_peds_considered = len(valid_idx) seq_list.append(curr_seq[valid_idx]) else: seq_list.append(curr_seq[:num_peds_considered]) num_peds_in_seq.append(num_peds_considered) obs_frame_num.append( np.ones((num_peds_considered, self.obs_len)) * frames[idx:idx + self.obs_len]) fut_frame_num.append( np.ones((num_peds_considered, self.pred_len)) * frames[idx + self.obs_len:idx + self.seq_len]) # map_file_names.append(num_peds_considered*[map_file_name]) data_files.append(path) cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist() aa = np.array([ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ]) print(path, aa[-1][1], np.round((aa[:, 1] - aa[:, 0]).mean(), 2), np.round((aa[:, 1] - aa[:, 0]).max(), 2)) seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) self.obs_frame_num = np.concatenate(obs_frame_num, axis=0) self.fut_frame_num = np.concatenate(fut_frame_num, axis=0) # Convert numpy -> Torch Tensor self.obs_traj = seq_list[:, :, :self.obs_len] self.fut_traj = seq_list[:, :, self.obs_len:] # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist( ) # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] # self.num_seq = len(self.seq_start_end) self.num_seq = len(self.obs_traj) self.data_files = np.stack(data_files) self.local_map = [] self.local_homo = [] self.local_ic = [] print(self.seq_start_end[-1]) ss = np.array(self.seq_start_end) for i in range(len(self.obs_traj)): if i % 500 == 0: print(i) seq_idx = np.where((i >= ss[:, 0]) & (i < ss[:, 1]))[0][0] key = '/'.join(self.data_files[seq_idx].split('/')[:-1]) global_map = self.maps[key] # global_map = np.expand_dims(global_map, 0).repeat((end-start), axis=0) # inv_h_t = np.expand_dims(np.eye(3), 0).repeat((end-start), axis=0) inv_h_t = self.inv_h_t[key] all_traj = np.concatenate( [self.obs_traj[i, :2], self.fut_traj[i, :2]], axis=1).transpose(1, 0) map_traj = np.matmul( np.concatenate( [all_traj, np.ones((len(all_traj), 1))], 1), inv_h_t) map_traj /= np.expand_dims(map_traj[:, 2], 1) map_traj = map_traj[:, :2] ''' plt.imshow(global_map) plt.scatter(map_traj[:8, 0], map_traj[:8, 1], s=1, c='b') plt.scatter(map_traj[8:, 0], map_traj[8:, 1], s=1, c='r') ''' radius = np.sqrt( ((map_traj[1:] - map_traj[:-1])**2).sum(1)).mean() * 20 radius = np.round(radius).astype(int) local_map, local_ic, local_homo = self.get_local_map_ic( global_map, inv_h_t, map_traj, all_traj, zoom=10, radius=radius, compute_local_homo=True) self.local_map.append(local_map) self.local_ic.append(local_ic) self.local_homo.append(local_homo) self.local_ic = np.stack(self.local_ic) self.local_homo = np.stack(self.local_homo) all_data = \ {'seq_start_end': self.seq_start_end, 'obs_traj': self.obs_traj, 'fut_traj': self.fut_traj, 'obs_frame_num': self.obs_frame_num, 'fut_frame_num': self.fut_frame_num, 'data_file_name': self.data_files, 'inv_h_t': self.inv_h_t, 'local_map': self.local_map, 'local_ic': self.local_ic, 'local_homo': self.local_homo, } save_path = os.path.join( data_dir, data_split + '_threshold' + str(coll_th) + '.pkl') with open(save_path, 'wb') as handle: pickle5.dump(all_data, handle, protocol=pickle5.HIGHEST_PROTOCOL)
def __init__(self, data_dir, data_name, data_split, obs_len=8, pred_len=12, skip=1, min_ped=0, delim='\t', dt=0.4): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() self.data_dir = os.path.join(data_dir, data_name, data_split) # self.data_dir = '../../datasets/eth/test' self.obs_len = obs_len self.pred_len = pred_len self.skip = 1 self.seq_len = self.obs_len + self.pred_len self.delim = delim self.map_dir = os.path.join(data_dir, 'nmap/map/') n_state = 6 all_files = os.listdir(self.data_dir) all_files = [os.path.join(self.data_dir, _path) for _path in all_files] num_peds_in_seq = [] seq_list = [] obs_frame_num = [] fut_frame_num = [] map_file_names = [] inv_h_ts = [] local_map_size = [] deli = '/' # deli = '\\' for path in all_files: print('data path:', path) # if 'zara' in path or 'eth' in path or 'hotel' in path: # if 'zara' in path or 'hotel' in path or '003' in path: # continue # if 'students003' in path: # continue if 'zara01' in path.split(deli)[-1]: map_file_name = 'zara01' elif 'zara02' in path.split(deli)[-1]: map_file_name = 'zara02' elif 'eth' in path.split(deli)[-1]: map_file_name = 'eth' elif 'hotel' in path.split(deli)[-1]: map_file_name = 'hotel' elif 'students003' in path.split(deli)[-1]: map_file_name = 'univ' else: if skip > 0: map_file_name = 'skip' print('map path: ', map_file_name) continue else: map_file_name = '' print('map path: ', map_file_name) if map_file_name != '': h = np.loadtxt( os.path.join(self.map_dir, map_file_name + '_H.txt')) inv_h_t = np.linalg.pinv(np.transpose(h)) map_file_name = os.path.join(self.map_dir, map_file_name + '_map.png') else: inv_h_t = np.zeros((3, 3)) data = read_file(path, self.delim) # print('uniq ped: ', len(np.unique(data[:, 1]))) if 'zara01' in map_file_name: frames = (np.unique(data[:, 0]) + 10).tolist() else: frames = np.unique(data[:, 0]).tolist() if data_split == 'test' and data_name != 'eth': print(len(frames)) if data_name == 'hotel': idx = 550 elif 'zara' in data_name: idx = 200 else: idx = 40 frames = frames[:idx] # print('uniq frames: ', len(frames)) frame_data = [] # all data per frame for frame in frames: frame_data.append(data[frame == data[:, 0], :]) num_sequences = int( math.ceil((len(frames) - self.seq_len + 1) / self.skip) ) # seq_len=obs+pred길이씩 잘라서 (input=obs, output=pred)주면서 train시킬것. 그래서 seq_len씩 slide시키면서 총 num_seq만큼의 iteration하게됨 this_seq_obs = [] # all frames를 seq_len(kernel size)만큼씩 sliding해가며 볼것. 이때 skip = stride. for idx in range(0, num_sequences * self.skip + 1, self.skip): curr_seq_data = np.concatenate( frame_data[idx:idx + self.seq_len], axis=0 ) # frame을 seq_len만큼씩 잘라서 볼것 = curr_seq_data. 각 frame이 가진 데이터(agent)수는 다를수 잇음. 하지만 각 데이터의 길이는 4(frame #, agent id, pos_x, pos_y) peds_in_curr_seq = np.unique( curr_seq_data[:, 1]) # unique agent id curr_seq = np.zeros( (len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate( peds_in_curr_seq ): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[ curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index( curr_ped_seq[0, 0] ) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[ -1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if pad_end - pad_front != self.seq_len: # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:, 2] y = curr_ped_seq[:, 3] vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack( [x, y, vx, vy, ax, ay]) num_peds_considered += 1 if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 num_peds_in_seq.append(num_peds_considered) # 다음 list의 initialize는 peds_in_curr_seq만큼 해뒀었지만, 조건을 만족하는 slide의 agent만 차례로 append 되었기 때문에 num_peds_considered만큼만 잘라서 씀 seq_list.append(curr_seq[:num_peds_considered]) this_seq_obs.append(curr_seq[:num_peds_considered, :2, :8]) obs_frame_num.append( np.ones((num_peds_considered, self.obs_len)) * frames[idx:idx + self.obs_len]) fut_frame_num.append( np.ones((num_peds_considered, self.pred_len)) * frames[idx + self.obs_len:idx + self.seq_len]) # map_file_names.append(num_peds_considered*[map_file_name]) map_file_names.append(map_file_name) inv_h_ts.append(inv_h_t) ### for map if map_file_name == '': per_step_dist = [] for obs_traj in np.concatenate(this_seq_obs): obs_traj = obs_traj.transpose(1, 0) per_step_dist.append( np.sqrt( ((obs_traj[1:] - obs_traj[:-1])**2).sum(1)).mean()) mean_pixel_dist = 0.7 # argmax = np.concatenate(this_seq_obs)[np.array(per_step_dist).argmax()].transpose(1,0) # plt.scatter(argmax[:, 1], argmax[:, 0], s=1) else: all_pixels = [] for obs_traj in np.concatenate(this_seq_obs): obs_traj = obs_traj.transpose(1, 0) all_pixel = np.matmul( np.concatenate( [obs_traj, np.ones((len(obs_traj), 1))], axis=1), inv_h_t) all_pixel /= np.expand_dims(all_pixel[:, 2], 1) all_pixels.append(all_pixel[:, :2]) all_pixels = np.stack(all_pixels) per_step_dist = [] for all_pixel in all_pixels: per_step_dist.append( np.sqrt(((all_pixel[1:] - all_pixel[:-1])**2).sum(1)).mean()) two_wc_pts = np.array([[0, 0], [0, 0.7]]) two_ic_pts = np.matmul( np.concatenate( [two_wc_pts, np.ones((len(two_wc_pts), 1))], axis=1), inv_h_t) two_ic_pts /= np.expand_dims(two_ic_pts[:, 2], 1) mean_pixel_dist = np.linalg.norm(two_ic_pts[1, :2] - two_ic_pts[0, :2]) # map_path = os.path.join(self.map_dir, map_file_name + '_map.png') # global_map = imageio.imread(map_path) # plt.imshow(global_map) # argmax = all_pixels[np.array(per_step_dist).argmax()] # plt.scatter(argmax[:, 1], argmax[:, 0], s=1) per_step_dist = np.array(per_step_dist) # max_per_step_dist_of_seq = per_step_dist[np.where(per_step_dist>0.1)[0]].max() # max_per_step_dist_of_seq = per_step_dist.max() # local_map_size.extend([int(max_per_step_dist_of_seq * 13)] * len(this_seq_obs)) local_map_size.extend( list((np.clip(per_step_dist, mean_pixel_dist, None) * 16).astype(int))) self.num_seq = len(seq_list) # = slide (seq. of 16 frames) 수 = 2692 cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist( ) # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] print(self.seq_start_end[-1]) seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) self.obs_frame_num = np.concatenate(obs_frame_num, axis=0) self.fut_frame_num = np.concatenate(fut_frame_num, axis=0) # Convert numpy -> Torch Tensor self.obs_traj = seq_list[:, :, :self.obs_len] self.fut_traj = seq_list[:, :, self.obs_len:] # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 self.map_file_name = map_file_names self.local_map_size = local_map_size self.inv_h_t = inv_h_ts self.local_map = [] self.local_homo = [] self.local_ic = [] self.undated_local_map_size = [] for seq_i in range(len(self.seq_start_end)): start, end = self.seq_start_end[seq_i] local_maps = [] local_ics = [] local_homos = [] if self.map_file_name[seq_i] == '': zoom = 40 for idx in range(start, end): all_traj = np.concatenate( [self.obs_traj[idx, :2], self.fut_traj[idx, :2]], axis=1).transpose(1, 0) local_map, local_ic, local_h = get_local_map_ic_no_map( all_traj, zoom=zoom, radius=self.local_map_size[idx]) local_maps.append(local_map) local_ics.append(local_ic) local_homos.append(local_h) local_map_size = np.array( self.local_map_size[start:end]) * 2 * zoom else: global_map = 255 - imageio.imread(self.map_file_name[seq_i]) inv_h_t = self.inv_h_t[seq_i] for idx in range(start, end): all_traj = np.concatenate( [self.obs_traj[idx, :2], self.fut_traj[idx, :2]], axis=1).transpose(1, 0) # plt.imshow(global_map) # plt.scatter(all_traj[:8,0], all_traj[:8,1], s=1, c='b') # plt.scatter(all_traj[8:,0], all_traj[8:,1], s=1, c='r') # plt.show() # eth = 256, zara1 =384 = hotel # students003: 470 local_map, local_ic, local_h = get_local_map_ic( global_map, all_traj, inv_h_t, zoom=1, radius=self.local_map_size[idx]) local_maps.append(local_map) local_ics.append(local_ic) local_homos.append(local_h) local_map_size = np.array(self.local_map_size[start:end]) * 2 # plt.imshow(local_map[0]) # plt.scatter(local_ic[:, 1], local_ic[:, 0], s=1) # plt.scatter(local_ic[7, 1], local_ic[7, 0], s=1, c='r') self.local_map.append(np.stack(local_maps)) self.local_ic.append(np.stack(local_ics)) self.local_homo.append(np.stack(local_homos)) self.undated_local_map_size.append(local_map_size) self.local_map = np.concatenate(self.local_map) self.local_ic = np.concatenate(self.local_ic) self.local_homo = np.concatenate(self.local_homo) self.undated_local_map_size = np.concatenate( self.undated_local_map_size) all_data = \ {'seq_start_end': self.seq_start_end, 'obs_traj': self.obs_traj, 'fut_traj': self.fut_traj, 'obs_frame_num': self.obs_frame_num, 'fut_frame_num': self.fut_frame_num, 'map_file_name': self.map_file_name, 'inv_h_t': self.inv_h_t, 'local_map': self.local_map, 'local_ic': self.local_ic, 'local_homo': self.local_homo, 'local_map_size': self.undated_local_map_size, } save_path = os.path.join(data_dir, data_name, data_split + '.pickle') with open(save_path, 'wb') as handle: pickle.dump(all_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
def __init__( self, data_dir, obs_len=8, pred_len=12, skip=1, context_size=198, min_ped=0, delim='\t', device='cpu', dt=0.4, map_ae=False ): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() self.data_dir = data_dir self.obs_len = obs_len self.pred_len = pred_len self.skip = skip self.seq_len = self.obs_len + self.pred_len self.delim = delim self.device = device self.context_size = context_size n_pred_state = 2 n_state = 6 # root_dir = '/dresden/users/ml1323/crowd/baseline/HTP-benchmark/A2E Data' # root_dir = 'C:\dataset\HTP-benchmark\A2E Data' root_dir = 'D:\crowd\datasets\Trajectories\Trajectories' all_files = [e for e in os.listdir(root_dir) if ('.csv' in e) and ('h**o' not in e)] all_files = all_files[:30] # with open(self.data_dir) as f: # all_files = np.array(f.readlines()) # if 'Train' in self.data_dir: # path_finding_files = all_files[['Pathfinding' in e for e in all_files]] # all_files = np.concatenate((all_files[['Pathfinding' not in e for e in all_files]], np.repeat(path_finding_files, 10))) num_peds_in_seq = [] seq_list = [] seq_list_rel = [] seq_past_obst_list = [] seq_fut_obst_list = [] obs_frame_num = [] fut_frame_num = [] map_file_names = [] for path in all_files: path = os.path.join(root_dir, path.rstrip().replace('\\', '/')) print('data path:', path) # if 'Pathfinding' not in path: # continue map_file_name = path.replace('.csv', '.png') print('map path: ', map_file_name) loaded_data = read_file(path, delim) data1 = pd.DataFrame(loaded_data) data1.columns = ['f', 'a', 'pos_x', 'pos_y'] # data.sort_values(by=['f', 'a'], inplace=True) data1.sort_values(by=['f', 'a'], inplace=True) for agent_idx in data1['a'].unique(): data = data1[data1['a'] == agent_idx].iloc[::5] frames = data['f'].unique().tolist() frame_data = [] # data.sort_values(by=['f']) for frame in frames: frame_data.append(data[data['f'] == frame].values) num_sequences = int( math.ceil((len(frames) - self.seq_len + 1) / skip)) # print('num_sequences: ', num_sequences) # all frames를 seq_len(kernel size)만큼씩 sliding해가며 볼것. 이때 skip = stride. for idx in range(0, num_sequences * self.skip + 1, skip): curr_seq_data = np.concatenate( frame_data[idx:idx + self.seq_len], axis=0) # frame을 seq_len만큼씩 잘라서 볼것 = curr_seq_data. 각 frame이 가진 데이터(agent)수는 다를수 잇음. 하지만 각 데이터의 길이는 4(frame #, agent id, pos_x, pos_y) peds_in_curr_seq = np.unique(curr_seq_data[:, 1]) # unique agent id curr_seq_rel = np.zeros((len(peds_in_curr_seq), n_pred_state, self.seq_len)) curr_seq = np.zeros((len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate(peds_in_curr_seq): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index(curr_ped_seq[ 0, 0]) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[-1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if pad_end - pad_front != self.seq_len: # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:, 2] y = curr_ped_seq[:, 3] vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack([x, y, vx, vy, ax, ay]) curr_seq_rel[_idx, :, pad_front:pad_end] = np.stack([vx, vy]) num_peds_considered += 1 ### others per_frame_past_obst = [] per_frame_fut_obst = [] if map_file_name is '': per_frame_past_obst = [[]] * self.obs_len per_frame_fut_obst = [[]] * self.pred_len else: curr_obst_seq = curr_seq_data[curr_seq_data[:, 1] != ped_id, :] # frame#, agent id, pos_x, pos_y i = 0 for frame in np.unique(curr_ped_seq[:, 0]): # curr_ped_seq는 continue를 지나왔으므로 반드시 20임 neighbor_ped = curr_obst_seq[curr_obst_seq[:, 0] == frame][:, 2:] if i < self.obs_len: # print('neighbor_ped:', len(neighbor_ped)) if len(neighbor_ped) == 0: per_frame_past_obst.append([]) else: per_frame_past_obst.append(np.around(neighbor_ped, decimals=4)) else: if len(neighbor_ped) == 0: per_frame_fut_obst.append([]) else: per_frame_fut_obst.append(np.around(neighbor_ped, decimals=4)) i += 1 seq_past_obst_list.append(per_frame_past_obst) seq_fut_obst_list.append(per_frame_fut_obst) if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 num_peds_in_seq.append(num_peds_considered) # 다음 list의 initialize는 peds_in_curr_seq만큼 해뒀었지만, 조건을 만족하는 slide의 agent만 차례로 append 되었기 때문에 num_peds_considered만큼만 잘라서 씀 seq_list.append(curr_seq[:num_peds_considered]) seq_list_rel.append(curr_seq_rel[:num_peds_considered]) obs_frame_num.append( np.ones((num_peds_considered, self.obs_len)) * frames[idx:idx + self.obs_len]) fut_frame_num.append(np.ones((num_peds_considered, self.pred_len)) * frames[ idx + self.obs_len:idx + self.seq_len]) # map_file_names.append(num_peds_considered*[map_file_name]) map_file_names.append(map_file_name) # print(len(seq_list)) # ped_ids = np.array(ped_ids) # # if 'test' in path and len(ped_ids) > 0: # if len(ped_ids) > 0: # df.append([idx, len(ped_ids)]) # df = np.array(df) # df = pd.DataFrame(df) # print(df.groupby(by=1).size()) # print("frame idx:", idx, "num_ped: ", len(ped_ids), " ped_ids: ", ",".join(ped_ids.astype(int).astype(str))) self.num_seq = len(seq_list) # = slide (seq. of 16 frames) 수 = 2692 seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) seq_list_rel = np.concatenate(seq_list_rel, axis=0) self.obs_frame_num = np.concatenate(obs_frame_num, axis=0) self.fut_frame_num = np.concatenate(fut_frame_num, axis=0) self.past_obst = seq_past_obst_list self.fut_obst = seq_fut_obst_list # Convert numpy -> Torch Tensor self.obs_traj = torch.from_numpy( seq_list[:, :, :self.obs_len]).type(torch.float) self.pred_traj = torch.from_numpy( seq_list[:, :, self.obs_len:]).type(torch.float) self.obs_traj_rel = torch.from_numpy( seq_list_rel[:, :, :self.obs_len]).type(torch.float) self.pred_traj_rel = torch.from_numpy( seq_list_rel[:, :, self.obs_len:]).type(torch.float) # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 cum_start_idx = [0] + np.cumsum( num_peds_in_seq).tolist() # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] self.map_file_name = map_file_names print(self.seq_start_end[-1]) print(len(self.seq_start_end))
def __init__( self, data_dir, obs_len=8, pred_len=12, skip=1, context_size=198, resize=64, min_ped=0, delim='\t', device='cpu', dt=0.4, map_ae=False ): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() self.data_dir = data_dir # self.data_dir = '../../datasets/eth/test' self.obs_len = obs_len self.pred_len = pred_len self.skip = 1 self.seq_len = self.obs_len + self.pred_len self.delim = '\t' self.device = device self.map_dir = '../datasets/nmap/map/' n_pred_state=2 n_state=6 self.context_size=context_size self.resize=resize all_files = os.listdir(self.data_dir) all_files = [os.path.join(self.data_dir, _path) for _path in all_files] num_peds_in_seq = [] seq_list = [] seq_past_obst_list = [] seq_fut_obst_list = [] obs_frame_num = [] fut_frame_num = [] map_file_names=[] inv_h_ts=[] local_map_size = [] # deli = '/' deli = '\\' for path in all_files: print('data path:', path) # if 'zara' in path or 'eth' in path or 'hotel' in path: # if 'zara' in path or 'hotel' in path or '003' in path: # continue if 'zara03' not in path: continue if 'zara01' in path.split(deli)[-1]: map_file_name = 'zara01' elif 'zara02' in path.split(deli)[-1] or 'zara03' in path.split(deli)[-1]: map_file_name = 'zara02' elif 'eth' in path.split(deli)[-1]: map_file_name = 'eth' elif 'hotel' in path.split(deli)[-1]: map_file_name = 'hotel' elif 'students003' in path.split(deli)[-1]: map_file_name = 'univ' else: if skip > 0: map_file_name = 'skip' print('map path: ', map_file_name) continue else: if 'students001' in path.split(deli)[-1]: map_file_name = 's001' else: map_file_name = 'zara03' print('map path: ', map_file_name) if map_file_name != '': h = np.loadtxt(os.path.join(self.map_dir, map_file_name + '_H.txt')) inv_h_t = np.linalg.pinv(np.transpose(h)) map_file_name = os.path.join(self.map_dir, map_file_name + '_map.png') else: inv_h_t = np.zeros((3, 3)) data = read_file(path, self.delim) # print('uniq ped: ', len(np.unique(data[:, 1]))) if 'zara01' in map_file_name: frames = (np.unique(data[:, 0]) + 10).tolist() else: frames = np.unique(data[:, 0]).tolist() # if 'test' in data_dir or 'val' in data_dir: # frames = frames[:150] # print('uniq frames: ', len(frames)) frame_data = [] # all data per frame for frame in frames: frame_data.append(data[frame == data[:, 0], :]) num_sequences = int(math.ceil((len(frames) - self.seq_len + 1) / self.skip)) # seq_len=obs+pred길이씩 잘라서 (input=obs, output=pred)주면서 train시킬것. 그래서 seq_len씩 slide시키면서 총 num_seq만큼의 iteration하게됨 this_seq_obs = [] # all frames를 seq_len(kernel size)만큼씩 sliding해가며 볼것. 이때 skip = stride. for idx in range(0, num_sequences * self.skip + 1, self.skip): curr_seq_data = np.concatenate( frame_data[idx:idx + self.seq_len], axis=0) # frame을 seq_len만큼씩 잘라서 볼것 = curr_seq_data. 각 frame이 가진 데이터(agent)수는 다를수 잇음. 하지만 각 데이터의 길이는 4(frame #, agent id, pos_x, pos_y) peds_in_curr_seq = np.unique(curr_seq_data[:, 1]) # unique agent id curr_seq = np.zeros((len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate(peds_in_curr_seq): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index(curr_ped_seq[0, 0]) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[-1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if pad_end - pad_front != self.seq_len: # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:,2] y = curr_ped_seq[:,3] vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack([x, y, vx, vy, ax, ay]) num_peds_considered += 1 if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 num_peds_in_seq.append(num_peds_considered) # 다음 list의 initialize는 peds_in_curr_seq만큼 해뒀었지만, 조건을 만족하는 slide의 agent만 차례로 append 되었기 때문에 num_peds_considered만큼만 잘라서 씀 seq_list.append(curr_seq[:num_peds_considered]) this_seq_obs.append(curr_seq[:num_peds_considered, :2, :8]) obs_frame_num.append(np.ones((num_peds_considered, self.obs_len)) * frames[idx:idx + self.obs_len]) fut_frame_num.append(np.ones((num_peds_considered, self.pred_len)) * frames[idx + self.obs_len:idx + self.seq_len]) # map_file_names.append(num_peds_considered*[map_file_name]) map_file_names.append(map_file_name) inv_h_ts.append(inv_h_t) ### for map if map_file_name == '': per_step_dist = [] for obs_traj in np.concatenate(this_seq_obs): obs_traj = obs_traj.transpose(1, 0) per_step_dist.append(np.sqrt(((obs_traj[1:] - obs_traj[:-1]) ** 2).sum(1)).mean()) mean_pixel_dist = 0.7 # argmax = np.concatenate(this_seq_obs)[np.array(per_step_dist).argmax()].transpose(1,0) # plt.scatter(argmax[:, 1], argmax[:, 0], s=1) else: all_pixels = [] for obs_traj in np.concatenate(this_seq_obs): obs_traj = obs_traj.transpose(1, 0) all_pixel = np.matmul(np.concatenate([obs_traj, np.ones((len(obs_traj), 1))], axis=1), inv_h_t) all_pixel /= np.expand_dims(all_pixel[:, 2], 1) all_pixels.append(all_pixel[:, :2]) all_pixels = np.stack(all_pixels) per_step_dist = [] for all_pixel in all_pixels: per_step_dist.append(np.sqrt(((all_pixel[1:] - all_pixel[:-1]) ** 2).sum(1)).mean()) two_wc_pts = np.array([[0,0], [0,0.7]]) two_ic_pts = np.matmul(np.concatenate([two_wc_pts, np.ones((len(two_wc_pts), 1))], axis=1), inv_h_t) two_ic_pts /= np.expand_dims(two_ic_pts[:, 2], 1) mean_pixel_dist = np.linalg.norm(two_ic_pts[1,:2] - two_ic_pts[0,:2]) # map_path = os.path.join(map_file_name) # global_map = imageio.imread(map_path) # plt.imshow(255-global_map) # argmax = all_pixels[np.array(per_step_dist).argmax()] # # argmax = all_pixels[100] # plt.scatter(argmax[:, 1], argmax[:, 0], s=1, c='b') # # fut_traj = np.concatenate(seq_list)[np.array(per_step_dist).argmax()][:2,8:] # # fut_traj = np.concatenate(seq_list)[100][:2,8:] # fut_traj = fut_traj.transpose(1, 0) # fut_pixel = np.matmul(np.concatenate([fut_traj, np.ones((len(fut_traj), 1))], axis=1), # inv_h_t) # fut_pixel /= np.expand_dims(fut_pixel[:, 2], 1) # plt.scatter(fut_pixel[:, 1], fut_pixel[:, 0], s=1, c='r') per_step_dist = np.array(per_step_dist) # max_per_step_dist_of_seq = per_step_dist[np.where(per_step_dist>0.1)[0]].max() # max_per_step_dist_of_seq = per_step_dist.max() # local_map_size.extend([int(max_per_step_dist_of_seq * 13)] * len(this_seq_obs)) local_map_size.extend(list((np.clip(per_step_dist, mean_pixel_dist, None) * 18).astype(int))) # ped_ids = np.array(ped_ids) # # if 'test' in path and len(ped_ids) > 0: # if len(ped_ids) > 0: # df.append([idx, len(ped_ids)]) # df = np.array(df) # df = pd.DataFrame(df) # print(df.groupby(by=1).size()) # print("frame idx:", idx, "num_ped: ", len(ped_ids), " ped_ids: ", ",".join(ped_ids.astype(int).astype(str))) self.num_seq = len(seq_list) # = slide (seq. of 16 frames) 수 = 2692 seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) self.obs_frame_num = np.concatenate(obs_frame_num, axis=0) self.fut_frame_num = np.concatenate(fut_frame_num, axis=0) self.past_obst = seq_past_obst_list self.fut_obst = seq_fut_obst_list # Convert numpy -> Torch Tensor self.obs_traj = torch.from_numpy( seq_list[:, :, :self.obs_len]).type(torch.float) self.pred_traj = torch.from_numpy( seq_list[:, :, self.obs_len:]).type(torch.float) # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist() # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] self.map_file_name = map_file_names self.local_map_size = local_map_size self.inv_h_t = inv_h_ts print(self.seq_start_end[-1])
def __init__(self, data_dir, data_split, device='cpu', scale=100): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() # self.data_dir = '../../datasets/eth/test' self.obs_len = 8 self.pred_len = 12 self.skip = 1 self.scale = scale self.seq_len = self.obs_len + self.pred_len self.delim = ' ' self.device = device self.map_dir = os.path.join(data_dir, 'SDD_semantic_maps', data_split + '_masks') self.data_path = os.path.join(data_dir, 'sdd_' + data_split + '.pkl') dt = 0.4 min_ped = 0 self.seq_len = self.obs_len + self.pred_len n_state = 6 # all_files = [os.path.join(self.data_dir, _path) for _path in all_files] num_peds_in_seq = [] seq_list = [] obs_frame_num = [] fut_frame_num = [] scene_names = [] local_map_size = [] self.stats = {} self.maps = {} for file in os.listdir(self.map_dir): m = imageio.imread(os.path.join(self.map_dir, file)).astype(float) # m[np.argwhere(m == 1)[:, 0], np.argwhere(m == 1)[:, 1]] = 0 # m[np.argwhere(m == 2)[:, 0], np.argwhere(m == 2)[:, 1]] = 0 # m[np.argwhere(m == 3)[:, 0], np.argwhere(m == 3)[:, 1]] = 1 # m[np.argwhere(m == 4)[:, 0], np.argwhere(m == 4)[:, 1]] = 1 # m[np.argwhere(m == 5)[:, 0], np.argwhere(m == 5)[:, 1]] = 1 self.maps.update({file.split('.')[0]: m}) with open(self.data_path, 'rb') as f: data = pickle5.load(f) data = pd.DataFrame(data) scenes = data['sceneId'].unique() for s in scenes: # if (data_split=='train') and ('hyang_7' not in s): if ('nexus_2' in s) or ('hyang_4' in s): continue # if ('hyang' not in s): # continue print(s) scene_data = data[data['sceneId'] == s] scene_data = scene_data.sort_values(by=['frame', 'trackId'], inplace=False) # print(scene_data.shape[0]) frames = scene_data['frame'].unique().tolist() scene_data = np.array(scene_data) map_size = self.maps[s + '_mask'].shape scene_data[:, 2] = np.clip(scene_data[:, 2], a_min=None, a_max=map_size[1] - 1) scene_data[:, 3] = np.clip(scene_data[:, 3], a_min=None, a_max=map_size[0] - 1) # mean = scene_data[:,2:4].astype(float).mean(0) # std = scene_data[:,2:4].astype(float).std(0) # scene_data[:, 2:4] = (scene_data[:, 2:4] - mean) / std # self.stats.update({s: {'mean': mean, 'std': std}}) ''' scene_data = data[data['sceneId'] == s] all_traj = np.array(scene_data)[:,2:4] # all_traj = np.array(scene_data[scene_data['trackId']==128])[:,2:4] plt.imshow(self.maps[s + '_mask']) plt.scatter(all_traj[:, 0], all_traj[:, 1], s=1, c='r') ''' # print('uniq frames: ', len(frames)) frame_data = [] # all data per frame for frame in frames: frame_data.append(scene_data[scene_data[:, 0] == frame]) # frame_data.append(scene_data[scene_data['frame'] == frame]) num_sequences = int( math.ceil((len(frames) - self.seq_len + 1) / self.skip) ) # seq_len=obs+pred길이씩 잘라서 (input=obs, output=pred)주면서 train시킬것. 그래서 seq_len씩 slide시키면서 총 num_seq만큼의 iteration하게됨 this_scene_seq = [] # all frames를 seq_len(kernel size)만큼씩 sliding해가며 볼것. 이때 skip = stride. for idx in range(0, num_sequences * self.skip + 1, self.skip): curr_seq_data = np.concatenate( frame_data[idx:idx + self.seq_len], axis=0 ) # frame을 seq_len만큼씩 잘라서 볼것 = curr_seq_data. 각 frame이 가진 데이터(agent)수는 다를수 잇음. 하지만 각 데이터의 길이는 4(frame #, agent id, pos_x, pos_y) peds_in_curr_seq = np.unique( curr_seq_data[:, 1]) # unique agent id curr_seq = np.zeros( (len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate( peds_in_curr_seq ): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[ curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y # curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index( curr_ped_seq[0, 0] ) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[ -1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if (pad_end - pad_front != self.seq_len) or ( curr_ped_seq.shape[0] != self.seq_len ): # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. # print(curr_ped_seq.shape[0]) continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:, 2].astype(float) y = curr_ped_seq[:, 3].astype(float) vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack( [x, y, vx, vy, ax, ay]) num_peds_considered += 1 if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 num_peds_in_seq.append(num_peds_considered) # 다음 list의 initialize는 peds_in_curr_seq만큼 해뒀었지만, 조건을 만족하는 slide의 agent만 차례로 append 되었기 때문에 num_peds_considered만큼만 잘라서 씀 seq_list.append(curr_seq[:num_peds_considered]) this_scene_seq.append(curr_seq[:num_peds_considered, :2]) obs_frame_num.append( np.ones((num_peds_considered, self.obs_len)) * frames[idx:idx + self.obs_len]) fut_frame_num.append( np.ones((num_peds_considered, self.pred_len)) * frames[idx + self.obs_len:idx + self.seq_len]) scene_names.append([s] * num_peds_considered) # inv_h_ts.append(inv_h_t) # if data_split == 'test' and np.concatenate(this_scene_seq).shape[0] > 10: # break this_scene_seq = np.concatenate(this_scene_seq) # print(s, len(scene_data), this_scene_seq.shape[0]) ''' argmax_idx = (per_step_dist * 20).argmax() # argmax_idx = 3 # plt.scatter(this_scene_seq[argmax_idx, 0, :8], this_scene_seq[argmax_idx, 1, :8], s=1, c='b') # plt.scatter(this_scene_seq[argmax_idx, 0, 8:], this_scene_seq[argmax_idx, 1, 8:], s=1, c='r') # plt.imshow(self.maps[s + '_mask']) for i in range(8): plt.scatter(this_scene_seq[argmax_idx, 0, i], this_scene_seq[argmax_idx, 1, i], s=4, c='b', alpha=(1-((i+1)/10))) for i in range(8,20): plt.scatter(this_scene_seq[argmax_idx, 0, i], this_scene_seq[argmax_idx, 1, i], s=4, c='r', alpha=(1-((i)/20))) traj = this_scene_seq[argmax_idx].transpose(1, 0) np.sqrt(((traj[1:] - traj[:-1]) ** 2).sum(1)) ''' ### for map per_step_dist = [] for traj in this_scene_seq: traj = traj.transpose(1, 0) per_step_dist.append( np.sqrt(((traj[1:] - traj[:-1])**2).sum(1)).sum()) per_step_dist = np.array(per_step_dist) # mean_dist = per_step_dist.mean() # print(mean_dist) per_step_dist = np.clip(per_step_dist, a_min=240, a_max=None) # print(per_step_dist.max()) # print(per_step_dist.mean()) # local_map_size.extend(np.round(per_step_dist).astype(int) * 13) # max_per_step_dist_of_seq = per_step_dist.max() # local_map_size.extend([int(max_per_step_dist_of_seq * 13)] * len(this_scene_seq)) local_map_size.extend(np.round(per_step_dist).astype(int)) print(self.maps[s + '_mask'].shape, ': ', (per_step_dist).max()) # print(self.maps[s + "_mask"].shape, int(max_per_step_dist_of_seq * 13) * 2) seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) self.obs_frame_num = np.concatenate(obs_frame_num, axis=0) self.fut_frame_num = np.concatenate(fut_frame_num, axis=0) # Convert numpy -> Torch Tensor self.obs_traj = torch.from_numpy(seq_list[:, :, :self.obs_len]).type( torch.float) self.pred_traj = torch.from_numpy(seq_list[:, :, self.obs_len:]).type( torch.float) # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist( ) # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] self.map_file_name = np.concatenate(scene_names) self.num_seq = len( self.obs_traj) # = slide (seq. of 16 frames) 수 = 2692 self.local_map_size = np.stack(local_map_size) self.local_ic = [[]] * self.num_seq self.local_homo = [[]] * self.num_seq self.local_map = [[]] * self.num_seq print(self.seq_start_end[-1])
def __init__(self, data_dir, data_split, device='cpu', scale=1, k_fold=0): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() self.obs_len = 8 self.pred_len = 12 skip = 1 self.scale = scale self.seq_len = self.obs_len + self.pred_len self.device = device delim = ',' dt = 0.4 min_ped = 0 data_dir = data_dir.replace('\\', '/') n_state = 6 # root_dir = '/dresden/users/ml1323/crowd/datasets/Trajectories' # root_dir = os.path.join(data_dir, data_name) # root_dir = 'D:\crowd\datasets\Trajectories\Trajectories' if data_split == 'train': n = 0 n_sample = 3000 elif data_split == 'val': n = 1 n_sample = 500 else: n = 2 n_sample = 3000 all_files = [ e for e in os.listdir(data_dir) if ('.csv' in e) and ((int(e.split('.csv')[0]) - n) % 10 == 0) ] all_files = sorted(all_files, key=lambda x: int(x.split('.')[0])) # all_files = [all_files[1]] if data_split == 'test': all_files = all_files[3 * k_fold:3 * k_fold + 3] else: all_files = all_files[:3 * k_fold] + all_files[3 * k_fold + 3:] all_files = np.array(all_files) print(', '.join(all_files)) num_peds_in_seq = [] seq_list = [] obs_frame_num = [] fut_frame_num = [] map_file_names = [] inv_h_ts = [] curvature = [] max_dist = 0 for path in all_files: # exit_wc = np.array(all_exit_wc[path]) num_data_from_one_file = 0 path = os.path.join(data_dir, path.rstrip().replace('\\', '/')) print('data path:', path) map_file_name = path.replace('.csv', '.png') print('map path: ', map_file_name) inv_h_t = np.eye(3) * 2 inv_h_t[-1, -1] = 1 loaded_data = read_file(path, delim) data = pd.DataFrame(loaded_data[:, :4]) data.columns = ['f', 'a', 'pos_x', 'pos_y'] data.sort_values(by=['f', 'a'], inplace=True) # data = data[150000:400000] frames = data['f'].unique().tolist() frame_data = [] # data.sort_values(by=['f']) for frame in frames: frame_data.append(data[data['f'] == frame].values) num_sequences = int( math.ceil((len(frames) - self.seq_len + 1) / skip)) # print('num_sequences: ', num_sequences) for idx in range(0, num_sequences * skip + 1, skip): curr_seq_data = np.concatenate(frame_data[idx:idx + self.seq_len], axis=0) peds_in_curr_seq = np.unique( curr_seq_data[:, 1]) # unique agent id curr_seq = np.zeros( (len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate( peds_in_curr_seq ): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[ curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index( curr_ped_seq[0, 0] ) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[ -1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if pad_end - pad_front != self.seq_len: # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:, 2] y = curr_ped_seq[:, 3] vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack( [x, y, vx, vy, ax, ay]) # (1,6,20) num_peds_considered += 1 if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 seq_traj = curr_seq[:num_peds_considered] ## find the agent with max num of neighbors at the beginning of future steps curr1 = seq_traj[:, :2, self.obs_len].repeat( num_peds_considered, 0) # AAABBBCCC curr2 = np.stack([seq_traj[:, :2, self.obs_len]] * num_peds_considered).reshape(-1, 2) # ABCABC dist = np.linalg.norm(curr1 - curr2, axis=1) dist = dist.reshape(num_peds_considered, num_peds_considered) if random.random() < 0.5: target_agent_idx = [0] * 100 while len(target_agent_idx) > 60: d = random.randint(0, len(dist) - 1) target_agent_idx = np.where((dist[d] < 5))[0] else: target_agent_idx = [] for d in range(len(dist)): neighbor_idx = np.where((dist[d] < 5))[0] if (len(neighbor_idx) > len(target_agent_idx) ) and len(neighbor_idx) <= 60: target_agent_idx = neighbor_idx seq_traj = seq_traj[target_agent_idx] dist = np.sqrt(((seq_traj[:, :2, 7] - seq_traj[:, :2, -1])**2).sum(-1)) max_dist = max(max_dist, dist.max()) num_peds_considered = len(target_agent_idx) # for a in range(seq_traj.shape[0]): # gt_traj = seq_traj[a, :2].T # c = np.round(trajectory_curvature(gt_traj), 4) # curvature.append(c) # if c > 100: # print(c) ''' with open('C:\dataset\large-real\Trajectories/test.pkl', 'rb') as f: aa = pickle5.load(f) o_traj = aa['obs_traj'][:,:2] f_traj = aa['fut_traj'][:,:2] seq_s_e = aa['seq_start_end'] s_idx = 12 s, e = seq_s_e[s_idx] print(s,e) map_file_name = aa['map_file_name'][s_idx].replace('../../datasets', 'C:/dataset') print(map_file_name) seq_traj = np.concatenate([o_traj[s:e], f_traj[s:e]], -1) #=================================== colors = ['red', 'magenta', 'lightgreen', 'slateblue', 'blue', 'darkgreen', 'darkorange', 'gray', 'purple', 'turquoise', 'midnightblue', 'olive', 'black', 'pink', 'burlywood', 'yellow'] global_map = imageio.imread(map_file_name) env = np.stack([global_map, global_map, global_map]).transpose(1, 2, 0) / 255 plt.imshow(env) cc = [] for idx in range(seq_traj.shape[0]): gt_xy = seq_traj[idx, :2].T c = np.round(trajectory_curvature(gt_xy),4) cc.append(c) print(c, colors[idx%16]) all_traj = gt_xy * 2 # plt.plot(all_traj[:, 0], all_traj[:, 1], c=colors[idx % 16], marker='.', linewidth=1) # plt.scatter(all_traj[0, 0], all_traj[0, 1], s=30, c=colors[idx % 16], marker='x') plt.scatter(all_traj[:, 0], all_traj[:, 1], c=colors[idx%16], s=1) plt.scatter(all_traj[0, 0], all_traj[0, 1], s=20, c=colors[idx%16], marker='x') plt.show() cc = np.array(cc) n, bins, patches = plt.hist(cc) ''' ####### # curr1 = seq_traj[:, :2, self.obs_len].repeat(num_peds_considered, 0) # AAABBBCCC # curr2 = np.stack([seq_traj[:, :2, self.obs_len]] * num_peds_considered).reshape(-1, 2) # ABCABC # dist = np.linalg.norm(curr1 - curr2, axis=1) # dist = dist.reshape(num_peds_considered, num_peds_considered) # diff_agent_idx = np.triu_indices(num_peds_considered, k=1) # if len(dist[diff_agent_idx]) >0: # print(np.round(dist[diff_agent_idx].min(), 2), np.round(dist[diff_agent_idx].max(), 2)) ####### seq_list.append(seq_traj) num_data_from_one_file += num_peds_considered num_peds_in_seq.append(num_peds_considered) obs_frame_num.append(frames[idx:idx + self.obs_len]) fut_frame_num.append(frames[idx + self.obs_len:idx + self.seq_len]) # map_file_names.append(num_peds_considered*[map_file_name]) map_file_names.append(map_file_name) inv_h_ts.append(inv_h_t) # if frames[idx + self.obs_len] >= 1840: # break if num_data_from_one_file > n_sample: break cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist() aa = np.array([ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ]) print('num data, min/avg/max #agent') print(num_data_from_one_file, np.round((aa[:, 1] - aa[:, 0]).min(), 2), np.round((aa[:, 1] - aa[:, 0]).mean(), 2), np.round((aa[:, 1] - aa[:, 0]).max(), 2)) seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) self.obs_frame_num = np.stack(obs_frame_num) self.fut_frame_num = np.stack(fut_frame_num) # Convert numpy -> Torch Tensor self.obs_traj = seq_list[:, :, :self.obs_len] self.fut_traj = seq_list[:, :, self.obs_len:] # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist( ) # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] # self.num_seq = len(self.seq_start_end) self.num_seq = len(self.obs_traj) self.map_file_name = map_file_names self.inv_h_t = inv_h_ts self.local_map = [] self.local_homo = [] self.local_ic = [] print(self.seq_start_end[-1]) print(max_dist) # c = np.array(curvature) # # n, bins, patches = plt.hist(c) # # plt.show() # np.save(data_split + '_curvature.npy', c) # print(c.min(), np.round(c.mean(),4), np.round(c.max(),4)) # c.sort() # print(np.round(c[len(c)//2])) u = 0 for seq_i in range(len(self.seq_start_end)): u += 1 if u % 100 == 0: print(u) start, end = self.seq_start_end[seq_i] global_map = imageio.imread(self.map_file_name[seq_i]) local_maps = [] local_ics = [] local_homos = [] for idx in range(start, end): all_traj = np.concatenate( [self.obs_traj[idx, :2], self.fut_traj[idx, :2]], axis=1).transpose(1, 0) ''' plt.imshow(global_map) plt.scatter(all_traj[:8,0]*2, all_traj[:8,1]*2, s=1, c='b') plt.scatter(all_traj[8:,0]*2, all_traj[8:,1]*2, s=1, c='r') plt.show() ''' local_map, local_ic, local_h = get_local_map_ic(global_map, all_traj, zoom=5, radius=19.2) local_maps.append(local_map) local_ics.append(local_ic) local_homos.append(local_h) # plt.imshow(local_map[0]) # plt.scatter(local_ic[:,1], local_ic[:,0], s=1, c='r') # plt.show() ''' seq_i+=1 start, end = self.seq_start_end[seq_i] global_map = imageio.imread(self.map_file_name[seq_i]) env = np.stack([global_map, global_map, global_map]).transpose(1,2,0) / 255 plt.imshow(env) colors = ['red', 'magenta', 'lightgreen', 'slateblue', 'blue', 'darkgreen', 'darkorange', 'gray', 'purple', 'turquoise', 'midnightblue', 'olive', 'black', 'pink', 'burlywood', 'yellow'] for idx in range(start, end): all_traj = np.concatenate([self.obs_traj[idx, :2], self.fut_traj[idx, :2]], axis=1).transpose(1, 0) * 2 plt.plot(all_traj[:, 0], all_traj[:, 1], c=colors[idx%16], marker='.', linewidth=1) plt.scatter(all_traj[0, 0], all_traj[0, 1], s=20, c=colors[idx%16], marker='x') # plt.scatter(all_traj[:, 0], all_traj[:, 1], c=colors[idx%16], s=1) # plt.scatter(all_traj[0, 0], all_traj[0, 1], s=10, c=colors[idx%16], marker='x') plt.show() ''' self.local_map.append(np.stack(local_maps)) self.local_ic.append(np.stack(local_ics)) self.local_homo.append(np.stack(local_homos)) self.local_map = np.concatenate(self.local_map) self.local_ic = np.concatenate(self.local_ic) self.local_homo = np.concatenate(self.local_homo) all_data = \ {'seq_start_end': self.seq_start_end, 'obs_traj': self.obs_traj, 'fut_traj': self.fut_traj, 'obs_frame_num': self.obs_frame_num, 'fut_frame_num': self.fut_frame_num, 'map_file_name': self.map_file_name, 'inv_h_t': self.inv_h_t, 'local_map': self.local_map, 'local_ic': self.local_ic, 'local_homo': self.local_homo, } save_path = os.path.join(data_dir, str(k_fold), data_split + '.pkl') with open(save_path, 'wb') as handle: pickle5.dump(all_data, handle, protocol=pickle5.HIGHEST_PROTOCOL)
def __init__(self, data_dir, data_split, device): json_dataset = os.path.join(data_dir, 'dataset_kitti_' + data_split + '.json') tracks = json.load(open(json_dataset)) self.device = device self.index = [] self.pasts = [] # [len_past, 2] self.futures = [] # [len_future, 2] # self.positions_in_map = [] # position in complete scene self.rotation_angles = [] # trajectory angle in complete scene self.scenes = [] # [360, 360, 1] self.videos = [] # '0001' self.classes = [] # 'Car' self.num_vehicles = [] # 0 is ego-vehicle, >0 other agents self.step_sequences = [] self.obs_traj = [] self.pred_traj = [] self.h**o = np.array([[2, 0, 180], [0, 2, 180], [0, 0, 1]]) self.local_map_size = [] self.obs_len = 20 self.scale = 1 self.zoom = zoom = 4 self.h**o = np.array([[2 * zoom, 0, 180 * zoom], [0, 2 * zoom, 180 * zoom], [0, 0, 1]]) dt = 0.1 scene_tracks = {} for map_file in os.listdir(data_dir + '/maps'): video = map_file.split('drive_')[1].split('_sync')[0] scene_track = cv2.imread(os.path.join(data_dir, 'maps', map_file), 0) scene_track[np.where(scene_track == 3)] = 0 scene_track[np.where(scene_track == 4)] -= 1 scene_tracks.update({video: scene_track}) # Preload data for t in tracks.keys(): past = np.asarray(tracks[t]['past']) future = np.asarray(tracks[t]['future']) position_in_map = np.asarray(tracks[t]['position_in_map']) rotation_angle = tracks[t]['angle_rotation'] video = tracks[t]['video'] class_vehicle = tracks[t]['class'] num_vehicle = tracks[t]['num_vehicle'] step_sequence = tracks[t]['step_sequence'] scene_track = scene_tracks[video] scene_track = scene_track[int(position_in_map[1]) * 2 - 180:int(position_in_map[1]) * 2 + 180, int(position_in_map[0]) * 2 - 180:int(position_in_map[0]) * 2 + 180] matRot_scene = cv2.getRotationMatrix2D((180, 180), rotation_angle, 1) scene_track = cv2.warpAffine( scene_track, matRot_scene, (scene_track.shape[0], scene_track.shape[1]), borderValue=0, flags=cv2.INTER_NEAREST) self.index.append(t) self.pasts.append(past) self.futures.append(future) curr_ped_seq = np.concatenate([past, future]) x = curr_ped_seq[:, 0].astype(float) y = curr_ped_seq[:, 1].astype(float) vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) states = np.stack([x, y, vx, vy, ax, ay]) self.obs_traj.append(states[:, :self.obs_len]) self.pred_traj.append(states[:, self.obs_len:]) # self.positions_in_map.append(position_in_map) self.rotation_angles.append(rotation_angle) self.videos.append(video) self.classes.append(class_vehicle) self.num_vehicles.append(num_vehicle) self.step_sequences.append(step_sequence) self.scenes.append(scene_track) traj = curr_ped_seq local_map_size = np.sqrt( (((traj[1:] - traj[:-1]) * 2)**2).sum(1)).mean() * 60 local_map_size = np.clip(local_map_size, a_min=32, a_max=None) self.local_map_size.append(np.round(local_map_size).astype(int)) self.obs_traj = torch.from_numpy(np.stack(self.obs_traj)).type( torch.float) self.pred_traj = torch.from_numpy(np.stack(self.pred_traj)).type( torch.float) self.local_ic = [[]] * len(self.obs_traj) self.local_homo = [[]] * len(self.obs_traj)
def next_sample(self): obs_traj = [] fut_traj = [] rng_idx = self.idx_list[self.index] self.index += 1 local_maps = [] local_ics = [] local_homos = [] scene_maps = [] for sample_index in self.sample_list[rng_idx[0]:rng_idx[1]]: seq_index, frame = self.get_seq_and_frame(sample_index) seq = self.sequence[seq_index] # get valid seq data = seq(frame) if data is None: # print(0) continue # return data # print(len(data['pre_motion_3D'])) # if self.split == 'train' and len(data['pre_motion_3D']) > self.max_train_agent: # in_data = {} # ind = np.random.choice(len(data['pre_motion_3D']), self.max_train_agent).tolist() # for key in ['pre_motion_3D', 'fut_motion_3D', 'fut_motion_mask', 'pre_motion_mask', 'heading']: # in_data[key] = [data[key][i] for i in ind if data[key] is not None] # else: in_data = data obs_traj.append(torch.stack(in_data['pre_motion_3D'])) fut_traj.append(torch.stack(in_data['fut_motion_3D'])) all_traj = torch.cat([obs_traj[-1], fut_traj[-1]], dim=1) # get local map scene_map = data['scene_map'] # scene_points = obs_traj[:, -1] * data['traj_scale'] scene_points = all_traj * data['traj_scale'] radius = [] for i in range(len(all_traj)): map_traj = scene_map.to_map_points(scene_points[i]) r = np.clip(np.sqrt( ((map_traj[1:] - map_traj[:-1])**2).sum(1)).mean() * 20, a_min=128, a_max=None) radius.append(np.round(r).astype(int)) # print(r) comput_local_homo = (len(self.local_ic[sample_index]) == 0) local_map, local_ic, local_homo = scene_map.get_cropped_maps( scene_points, radius, compute_local_homo=comput_local_homo) # local_map, local_ic, local_homo = [],[],[] if comput_local_homo: self.local_ic[sample_index] = np.stack(local_ic) self.local_homo[sample_index] = np.stack(local_homo) local_maps.extend(local_map) local_ics.append(self.local_ic[sample_index]) local_homos.append(self.local_homo[sample_index]) scene_maps.append(scene_map) if len(obs_traj) == 0: return None local_ics = np.concatenate(local_ics) local_homos = np.concatenate(local_homos) _len = [len(seq) for seq in obs_traj] cum_start_idx = [0] + np.cumsum(_len).tolist() seq_start_end = [[ start, end ] for start, end in zip(cum_start_idx, cum_start_idx[1:])] obs_traj = torch.cat(obs_traj) fut_traj = torch.cat(fut_traj) all_traj = torch.cat([obs_traj, fut_traj], dim=1) # print(all_traj.shape[0]) # 6 states all_stat = [] dt = 0.5 for one_seq in all_traj.detach().cpu().numpy(): x = one_seq[:, 0].astype(float) y = one_seq[:, 1].astype(float) vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) all_stat.append(np.stack([x, y, vx, vy, ax, ay])) all_stat = torch.tensor(np.stack(all_stat)).float().to( self.device).permute(2, 0, 1) # get vel and acc obs_traj = all_stat[:self.past_frames] fut_traj = all_stat[self.past_frames:] obs_traj_st = obs_traj.clone() # pos is stdized by mean = last obs step obs_traj_st[:, :, :2] = (obs_traj_st[:, :, :2] - obs_traj_st[-1, :, :2]) / self.scale obs_traj_st[:, :, 2:] /= self.scale # print(obs_traj_st.max(), obs_traj_st.min()) out = [ obs_traj, fut_traj, obs_traj_st, fut_traj[:, :, 2:4] / self.scale, seq_start_end, scene_maps, local_maps, local_ics, torch.tensor(local_homos).float().to(self.device) ] return out
def __init__(self, data_dir, data_split, device='cpu', scale=1, coll_th=0.2): """ Args: - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y> - obs_len: Number of time-steps in input trajectories - pred_len: Number of time-steps in output trajectories - skip: Number of frames to skip while making the dataset - threshold: Minimum error to be considered for non linear traj when using a linear predictor - min_ped: Minimum number of pedestrians that should be in a seqeunce - delim: Delimiter in the dataset files """ super(TrajectoryDataset, self).__init__() self.obs_len = 8 self.pred_len = 12 skip = 1 self.scale = scale self.seq_len = self.obs_len + self.pred_len self.device = device delim = ',' dt = 0.4 min_ped = 0 data_dir = data_dir.replace('\\', '/') n_state = 6 # root_dir = '/dresden/users/ml1323/crowd/datasets/Trajectories' # root_dir = os.path.join(data_dir, data_name) # root_dir = 'D:\crowd\datasets\Trajectories\Trajectories' all_files = [ e for e in os.listdir(data_dir) if ('.csv' in e) and ('h**o' not in e) ] all_files = np.array( sorted(all_files, key=lambda x: int(x.split('.')[0]))) if data_split == 'train': all_files = all_files[:30] elif data_split == 'val': all_files = all_files[[42, 44]] else: all_files = all_files[[43, 47, 48, 49]] # with open(os.path.join(root_dir, 'exit_wc.json')) as data_file: # all_exit_wc = json.load(data_file) num_peds_in_seq = [] seq_list = [] obs_frame_num = [] fut_frame_num = [] map_file_names = [] inv_h_ts = [] for path in all_files: # exit_wc = np.array(all_exit_wc[path]) num_data_from_one_file = 0 path = os.path.join(data_dir, path.rstrip().replace('\\', '/')) print('data path:', path) # if 'Pathfinding' not in path: # continue map_file_name = path.replace('.csv', '.png') print('map path: ', map_file_name) h = np.loadtxt(path.replace('.csv', '_homography.csv'), delimiter=',') inv_h_t = np.linalg.pinv(np.transpose(h)) loaded_data = read_file(path, delim) data = pd.DataFrame(loaded_data) data.columns = ['f', 'a', 'pos_x', 'pos_y'] # data.sort_values(by=['f', 'a'], inplace=True) data.sort_values(by=['f', 'a'], inplace=True) frames = data['f'].unique().tolist() frame_data = [] # data.sort_values(by=['f']) for frame in frames: frame_data.append(data[data['f'] == frame].values) num_sequences = int( math.ceil((len(frames) - self.seq_len + 1) / skip)) # print('num_sequences: ', num_sequences) # all frames를 seq_len(kernel size)만큼씩 sliding해가며 볼것. 이때 skip = stride. for idx in range(0, num_sequences * skip + 1, skip): curr_seq_data = np.concatenate( frame_data[idx:idx + self.seq_len], axis=0 ) # frame을 seq_len만큼씩 잘라서 볼것 = curr_seq_data. 각 frame이 가진 데이터(agent)수는 다를수 잇음. 하지만 각 데이터의 길이는 4(frame #, agent id, pos_x, pos_y) peds_in_curr_seq = np.unique( curr_seq_data[:, 1]) # unique agent id curr_seq = np.zeros( (len(peds_in_curr_seq), n_state, self.seq_len)) num_peds_considered = 0 ped_ids = [] for _, ped_id in enumerate( peds_in_curr_seq ): # current frame sliding에 들어온 각 agent에 대해 curr_ped_seq = curr_seq_data[ curr_seq_data[:, 1] == ped_id, :] # frame#, agent id, pos_x, pos_y curr_ped_seq = np.around(curr_ped_seq, decimals=4) pad_front = frames.index( curr_ped_seq[0, 0] ) - idx # sliding idx를 빼주는 이유?. sliding이 움직여온 step인 idx를 빼줘야 pad_front=0 이됨. 0보다 큰 pad_front라는 것은 현ped_id가 처음 나타난 frame이 desired first frame보다 더 늦은 경우. pad_end = frames.index(curr_ped_seq[ -1, 0]) - idx + 1 # pad_end까지선택하는 index로 쓰일거라 1더함 if pad_end - pad_front != self.seq_len: # seq_len만큼의 sliding동안 매 프레임마다 agent가 존재하지 않은 데이터였던것. continue ped_ids.append(ped_id) # x,y,x',y',x'',y'' x = curr_ped_seq[:, 2] y = curr_ped_seq[:, 3] vx = derivative_of(x, dt) vy = derivative_of(y, dt) ax = derivative_of(vx, dt) ay = derivative_of(vy, dt) # Make coordinates relative _idx = num_peds_considered curr_seq[_idx, :, pad_front:pad_end] = np.stack( [x, y, vx, vy, ax, ay]) # (1,6,20) num_peds_considered += 1 if num_peds_considered > min_ped: # 주어진 하나의 sliding(16초)동안 등장한 agent수가 min_ped보다 큼을 만족하는 경우에만 이 slide데이터를 채택 seq_traj = curr_seq[:num_peds_considered][:, :2] exclude_idx = [] for i in range(20): curr1 = seq_traj[:, :, i].repeat(num_peds_considered, 0) # AAABBBCCC curr2 = np.stack( [seq_traj[:, :, i]] * num_peds_considered).reshape( -1, 2) # ABCABC dist = np.linalg.norm(curr1 - curr2, axis=1) dist = dist.reshape(num_peds_considered, num_peds_considered) diff_agent_idx = np.triu_indices(num_peds_considered, k=1) dist[diff_agent_idx] = 0 under_th_idx = np.array( np.where((dist > 0) & (dist < coll_th))) # under_th_idx = np.where((dist > 0) & (dist < coll_th)) # print(len(np.array(np.where((dist > 0.5) & (dist < 1))[0]))) # for elt in np.unique(under_th_idx): # np.count_nonzero(under_th_idx == elt) for j in range(len(under_th_idx[0])): idx_pair = under_th_idx[:, j] exclude_idx.append(idx_pair[0]) exclude_idx = np.unique(exclude_idx) if len(exclude_idx) == num_peds_considered: continue if len(exclude_idx) > 0: # print(len(exclude_idx), '/', num_peds_considered) valid_idx = [ i for i in range(num_peds_considered) if i not in exclude_idx ] seq_traj = curr_seq[valid_idx] num_peds_considered = len(valid_idx) #====================== # for i in range(20): # curr1 = seq_traj[:, :2, i].repeat(num_peds_considered, 0) # AAABBBCCC # curr2 = np.stack([seq_traj[:, :2, i]] * num_peds_considered).reshape(-1, 2) # ABCABC # dist = np.linalg.norm(curr1 - curr2, axis=1) # dist = dist.reshape(num_peds_considered, num_peds_considered) # # diff_agent_idx = np.triu_indices(num_peds_considered, k=1) # dist[diff_agent_idx] = 0 # print(len(np.array(np.where((dist > 0) & (dist < 2))[0]))) #====================== else: seq_traj = curr_seq[:num_peds_considered] # find the agent with max num of neighbors at the beginning of future steps curr1 = seq_traj[:, :2, self.obs_len].repeat( num_peds_considered, 0) # AAABBBCCC curr2 = np.stack([seq_traj[:, :2, self.obs_len]] * num_peds_considered).reshape(-1, 2) # ABCABC dist = np.linalg.norm(curr1 - curr2, axis=1) dist = dist.reshape(num_peds_considered, num_peds_considered) target_agent_idx = [] for d in range(len(dist)): neighbor_idx = np.where((dist[d] < 5))[0] if len(neighbor_idx) > len(target_agent_idx): target_agent_idx = neighbor_idx seq_traj = seq_traj[target_agent_idx] num_peds_considered = len(target_agent_idx) print(num_peds_considered) seq_list.append(seq_traj) num_data_from_one_file += num_peds_considered num_peds_in_seq.append(num_peds_considered) obs_frame_num.append( np.ones((num_peds_considered, self.obs_len)) * frames[idx:idx + self.obs_len]) fut_frame_num.append( np.ones((num_peds_considered, self.pred_len)) * frames[idx + self.obs_len:idx + self.seq_len]) # map_file_names.append(num_peds_considered*[map_file_name]) map_file_names.append(map_file_name) inv_h_ts.append(inv_h_t) if num_data_from_one_file > 1000: break cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist() aa = np.array([ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ]) print('num data, min/avg/max #agent') print(num_data_from_one_file, np.round((aa[:, 1] - aa[:, 0]).min(), 2), np.round((aa[:, 1] - aa[:, 0]).mean(), 2), np.round((aa[:, 1] - aa[:, 0]).max(), 2)) seq_list = np.concatenate(seq_list, axis=0) # (32686, 2, 16) self.obs_frame_num = np.concatenate(obs_frame_num, axis=0) self.fut_frame_num = np.concatenate(fut_frame_num, axis=0) # Convert numpy -> Torch Tensor self.obs_traj = seq_list[:, :, :self.obs_len] self.fut_traj = seq_list[:, :, self.obs_len:] # frame seq순, 그리고 agent id순으로 쌓아온 데이터에 대한 index를 부여하기 위해 cumsum으로 index생성 ==> 한 슬라이드(16 seq. of frames)에서 고려된 agent의 data를 start, end로 끊어내서 index로 골래내기 위해 cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist( ) # num_peds_in_seq = 각 slide(16개 frames)별로 고려된 agent수.따라서 len(num_peds_in_seq) = slide 수 = 2692 = self.num_seq self.seq_start_end = [ (start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:]) ] # [(0, 2), (2, 4), (4, 7), (7, 10), ... (32682, 32684), (32684, 32686)] # self.num_seq = len(self.seq_start_end) self.num_seq = len(self.obs_traj) self.map_file_name = map_file_names self.inv_h_t = inv_h_ts self.local_map = [] self.local_homo = [] self.local_ic = [] print(self.seq_start_end[-1]) for seq_i in range(len(self.seq_start_end)): start, end = self.seq_start_end[seq_i] global_map = imageio.imread(self.map_file_name[seq_i]) local_maps = [] local_ics = [] local_homos = [] for idx in range(start, end): all_traj = np.concatenate( [self.obs_traj[idx, :2], self.fut_traj[idx, :2]], axis=1).transpose(1, 0) # plt.imshow(global_map) # plt.scatter(all_traj[:8,0], all_traj[:8,1], s=1, c='b') # plt.scatter(all_traj[8:,0], all_traj[8:,1], s=1, c='r') # plt.show() local_map, local_ic, local_h = get_local_map_ic(global_map, all_traj, zoom=10, radius=8) local_maps.append(local_map) local_ics.append(local_ic) local_homos.append(local_h) # plt.imshow(local_map[0]) # plt.scatter(local_ic[:,1], local_ic[:,0], s=1, c='r') # plt.show() self.local_map.append(np.stack(local_maps)) self.local_ic.append(np.stack(local_ics)) self.local_homo.append(np.stack(local_homos)) self.local_map = np.concatenate(self.local_map) self.local_ic = np.concatenate(self.local_ic) self.local_homo = np.concatenate(self.local_homo) all_data = \ {'seq_start_end': self.seq_start_end, 'obs_traj': self.obs_traj, 'fut_traj': self.fut_traj, 'obs_frame_num': self.obs_frame_num, 'fut_frame_num': self.fut_frame_num, 'map_file_name': self.map_file_name, 'inv_h_t': self.inv_h_t, 'local_map': self.local_map, 'local_ic': self.local_ic, 'local_homo': self.local_homo, } save_path = os.path.join( data_dir, data_split + '_threshold' + str(coll_th) + '.pkl') with open(save_path, 'wb') as handle: pickle5.dump(all_data, handle, protocol=pickle5.HIGHEST_PROTOCOL)