def get_trans_matrix(self, lag): filter_length = self.filter_length if filter_length: print 'using only points with x values less than ' + str( filter_length) filter_time = self.filter_time if filter_time: print 'using only points with time less than ' + str(filter_time) n_2d_class = self.mapping.n_2d_classes i_list = [] j_list = [] ij_list = set([]) val_list = [] time_step = self.time_step print 'extracting trans matrix...' for j in range(self.n_total_realz): print 'realization number: ', j file_name = "real_" + str(j) + ".pkl" input_file = os.path.join(self.input_folder, file_name) with open(input_file, 'rb') as input: dataHolder = pickle.load(input) dx = np.diff(dataHolder.x_array) dy = np.diff(dataHolder.y_array) dt = np.diff(dataHolder.t_array) + 1e-15 if not (dx.shape[0] and dy.shape[0] and dt.shape[0]): print 'some array was empty, skipping this file...' continue lastIdx = dataHolder.last_idx_array vxMatrix = np.divide(dx, dt) vyMatrix = np.divide(dy, dt) m = dx.shape[0] for i in range(m): x_start = dataHolder.x_array[i, 0] y_start = dataHolder.y_array[i, 0] # get the time process for each velocity cutOff = lastIdx[i] if filter_length: cutOff = min(cutOff, np.argmin(dataHolder.x_array < filter_length)) if filter_time: cutOff = min(cutOff, np.argmin(dataHolder.t_array < filter_time)) dxTime, dyTime, freq = get_time_dx_dy_array_with_freq( dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff], x_start, y_start, time_step) v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step theta_temp = np.arctan2(dyTime, dxTime) if len(v_temp) > lag: new_v, new_theta, new_f = remove_duplicate_xy( v_temp, theta_temp, freq) class_2d = self.mapping.class_index_2d_vtheta( new_v, new_theta) new_f = np.array(new_f, dtype=np.dtype("i")) fill_one_trajectory_sparse_with_freq_cython( lag, class_2d, new_f, i_list, j_list, ij_list, val_list) print 'done' return csc_matrix((val_list, (i_list, j_list)), shape=(n_2d_class, n_2d_class))
def get_trans_matrix_from_scratch(self, lag): """ extract the transition matrix for velocity and angle for the given lag :param lag: the lag used to derive the transition matrix :return: v_trans_matrix, theta_trans_matrix for the given lag """ # get the size of the transition matrices n_v_class, n_theta_class = self.mapping.n_abs_v_classes, self.mapping.n_theta_classes # initialize the sparse transition matrices i_list_v, j_list_v, val_list_v = [[] for _ in range(3)] i_list_theta, j_list_theta, val_list_theta = [[] for _ in range(3)] ij_set_v, ij_set_theta = [set([]) for _ in range(2)] time_step = self.time_step print 'extracting trans matrix for the velocity and angle process...' for j in range(self.n_total_realz): if not j % 5: print 'reading realization number: ', j file_name = "real_" + str(j) + ".pkl" input_file = os.path.join(self.input_folder, file_name) with open(input_file, 'rb') as input: dataHolder = pickle.load(input) dx = np.diff(dataHolder.x_array) dy = np.diff(dataHolder.y_array) dt = np.diff(dataHolder.t_array) + 1e-15 if not (dx.shape[0] and dy.shape[0] and dt.shape[0]): print 'some array was empty, skipping this file...' continue lastIdx = dataHolder.last_idx_array vxMatrix = np.divide(dx, dt) vyMatrix = np.divide(dy, dt) m = dx.shape[0] for i in range(m): x_start = dataHolder.x_array[i, 0] y_start = dataHolder.y_array[i, 0] # get the time process for each velocity cutOff = lastIdx[i] dxTime, dyTime, freq = get_time_dx_dy_array_with_freq( dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff], x_start, y_start, time_step) v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step theta_temp = np.arctan2(dyTime, dxTime) if len(v_temp) > lag: new_v, new_theta, new_f = remove_duplicate_xy( v_temp, theta_temp, freq) v_process_idx = self.mapping.find_1d_class_idx( np.log(new_v), self.mapping.v_log_edges) # fill the transition matrix for this velocity series fill_one_trajectory_sparse_with_freq_cython( lag, v_process_idx, new_f, i_list_v, j_list_v, ij_set_v, val_list_v) # fill the transition matrix for this angle series theta_process_idx = self.mapping.find_1d_class_idx( new_theta, self.mapping.theta_edges) fill_one_trajectory_sparse_with_freq_cython( lag, theta_process_idx, new_f, i_list_theta, j_list_theta, ij_set_theta, val_list_theta) print 'done' return csc_matrix((val_list_v, (i_list_v, j_list_v)), shape = (n_v_class, n_v_class)), \ csc_matrix((val_list_theta, (i_list_theta, j_list_theta)), shape = (n_theta_class, n_theta_class))
def test_mapping_theta_2(): main_folder = os.path.dirname(os.path.dirname(__file__)) input_folder = os.path.join(main_folder, 'test_related_files', 'particle_tracking_results') dt = 50.0 n_realz = 1 v_array, theta_array, freq_array, pointer_list, initial_v, initial_f, initial_theta = make_input_for_binning_v_theta_freq( input_folder, n_realz, dt) new_v, new_theta, new_f = remove_duplicate_xy(v_array, theta_array, freq_array) # make random angles n_abs_log_class = 100 n_theta_classes = 60 abs_log_v_edges = abs_vel_log_bins_low_high(new_v, n_abs_log_class, n_low=5, max_allowed=0.03) theta_bin_edges = make_theta_bins_linear(n_theta_classes) mapping = mapping_v_theta_repeat(abs_log_v_edges, theta_bin_edges, new_v, new_theta, new_f) v0_idx = mapping.find_1d_class_idx(np.log(new_v), mapping.v_log_edges) t0_idx = mapping.find_1d_class_idx(new_theta, mapping.theta_edges) # find the 3d class number for the input class_2d = mapping.class_index_2d_vtheta(new_v, new_theta) class_3d = mapping.find_3d_class_number(class_2d, new_f) # convert back from 2d class and check indices v1_idx, t1_idx = mapping.class_index_1d_v_theta_from_2d(class_2d) assert (np.all(v1_idx == v0_idx)) assert (np.all(t1_idx == t0_idx)) # convert back from 3d and check the indics v2 = np.zeros(len(class_3d)) t2 = np.zeros(len(class_3d)) f2 = np.zeros(len(class_3d)) for i in range(len(class_3d)): v2[i], t2[i], f2[i] = mapping.find_v_theta_freq(class_3d[i]) v2_idx = mapping.find_1d_class_idx(np.log(v2), mapping.v_log_edges) t2_idx = mapping.find_1d_class_idx(t2, mapping.theta_edges) assert (np.all(v2_idx == v0_idx)) assert (np.all(t2_idx == t0_idx)) assert (np.all(f2 == new_f))
def get_init_class_count(self): new_v, new_theta, new_f = remove_duplicate_xy(self.initial_v, self.initial_theta, self.initial_f) index_2d = self.mapping.class_index_2d_vtheta(new_v, new_theta) init_class_count = np.zeros(self.mapping.n_2d_classes) for i in index_2d: init_class_count[i] += 1 return init_class_count
def get_init_class_count(self, map_input): new_v, new_theta, new_f = remove_duplicate_xy(map_input.initial_v, map_input.initial_theta, map_input.initial_f) index_2d = self.mapping.class_index_2d_vtheta(new_v, new_theta) index_3d = self.mapping.find_3d_class_number(index_2d, new_f) init_class_count = np.zeros(self.mapping.n_3d_classes) for i in index_3d: init_class_count[i] += 1 return init_class_count
def get_trans_matrix_single_attrib(lag_array, n_realz, input_folder, mapping, time_step, prefix='real_', numbered=True, verbose=False): if (not numbered) and n_realz>1: raise 'Expecting only one file when no numbers are used for the input data' v_log_edges = mapping.v_log_edges n_v_class = mapping.n_abs_v_classes n_theta_class = mapping.n_theta_classes theta_edges = mapping.theta_edges v_output_list = [] theta_output_list = [] for lag in lag_array: print " extracting matrices for lag = ", lag v_count_matrix = np.zeros((n_v_class, n_v_class)) t_count_matrix = np.zeros((n_theta_class, n_theta_class)) for j in range(n_realz): if verbose and not j%20: print 'realziation ', j if numbered: file_name = prefix + str(j) + ".pkl" else: file_name = prefix + ".pkl" input_file = os.path.join(input_folder, file_name) with open(input_file, 'rb') as input: dataHolder = pickle.load(input) dx = np.diff(dataHolder.x_array) dy = np.diff(dataHolder.y_array) dt = np.diff(dataHolder.t_array) if not (dx.shape[0] and dy.shape[0] and dt.shape[0]): print 'some array was empty, skipping this file...' continue lastIdx = dataHolder.last_idx_array vxMatrix = np.divide(dx, dt) vyMatrix = np.divide(dy, dt) m = dx.shape[0] for i in range(m): x_start = dataHolder.x_array[i, 0] y_start = dataHolder.y_array[i, 0] # get the time process for each velocity cutOff = lastIdx[i] dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff], x_start, y_start, time_step) v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step theta_temp = np.arctan2(dyTime, dxTime) if len(v_temp) > lag: new_v, new_theta, new_f = remove_duplicate_xy(v_temp, theta_temp, freq) class_v = np.array(mapping.find_1d_class_idx(np.log(new_v), v_log_edges), dtype=int) class_theta = np.array(mapping.find_1d_class_idx(new_theta, theta_edges), dtype=int) count_matrix_with_freq_one_trajectory(v_count_matrix, lag, class_v, new_f) count_matrix_with_freq_one_trajectory(t_count_matrix, lag, class_theta, new_f) v_output_list.append(v_count_matrix) theta_output_list.append(t_count_matrix) return v_output_list, theta_output_list
def get_trans_matrix_from_scratch(self, lag, print_every=50, verbose=True): n_3d_class = self.mapping.n_3d_classes i_list = [] j_list = [] ij_list = set([]) val_list = [] time_step = self.time_step print 'extracting trans matrix...' for j in range(self.n_total_realz): if verbose and not j % print_every: print 'reading realization number: ', j file_name = "real_" + str(j) + ".pkl" input_file = os.path.join(self.input_folder, file_name) with open(input_file, 'rb') as input: dataHolder = pickle.load(input) dx = np.diff(dataHolder.x_array) dy = np.diff(dataHolder.y_array) dt = np.diff(dataHolder.t_array) + 1e-15 if not (dx.shape[0] and dy.shape[0] and dt.shape[0]): print 'some array was empty, skipping this file...' continue lastIdx = dataHolder.last_idx_array vxMatrix = np.divide(dx, dt) vyMatrix = np.divide(dy, dt) m = dx.shape[0] for i in range(m): x_start = dataHolder.x_array[i, 0] y_start = dataHolder.y_array[i, 0] # get the time process for each velocity cutOff = lastIdx[i] dxTime, dyTime, freq = get_time_dx_dy_array_with_freq( dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff], x_start, y_start, time_step) v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step theta_temp = np.arctan2(dyTime, dxTime) if len(v_temp) > lag: new_v, new_theta, new_f = remove_duplicate_xy( v_temp, theta_temp, freq) class_2d = self.mapping.class_index_2d_vtheta( new_v, new_theta) class_3d_array = self.mapping.find_3d_class_number( class_2d, new_f) fill_one_trajectory_sparse_cython(lag, class_3d_array, i_list, j_list, ij_list, val_list) print 'done' return csc_matrix((val_list, (i_list, j_list)), shape=(n_3d_class, n_3d_class))
def test_convert_to_time_process_xyf_1(): x_start = 0.0 y_start = 0.0 dt_array = np.array([10.0, 0.5, 1.6, 0.9], dtype=np.float) vx_array = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float) vy_array = np.array([4.0, 3.0, 2.0, 1.0], dtype=np.float) deltaT = 1.0 dx_array, dy_array, freq_array = get_time_dx_dy_array_with_freq( dt_array, vx_array, vy_array, x_start, y_start, deltaT) dx2, dy2, freq2 = remove_duplicate_xy(dx_array, dy_array, freq_array) expected_dx = np.array([1.0, 2.5, 3.0, 3.9]) expected_dy = np.array([4.0, 2.5, 2.0, 1.1]) expected_freq = np.array([10., 1., 1., 1.]) diff_dx_norm = np.linalg.norm(dx2 - expected_dx) diff_dy_norm = np.linalg.norm(dy2 - expected_dy) print "norm(diff_dx): ", diff_dx_norm tol = 1e-12 assert (diff_dx_norm < tol) assert (diff_dy_norm < tol) assert (np.all(freq2 == expected_freq))
def get_init_class_count(self, map_input): """ :return: init_v_class_count: initial count of the velocity class. size (n_velocity_class,) init_v_theta_count: initial count of the angle class. size (n_theta_class,) """ new_v, new_theta, new_f = remove_duplicate_xy(map_input.initial_v, map_input.initial_theta, map_input.initial_f) init_v_idx = self.mapping.find_1d_class_idx(np.log(new_v), map_input.v_log_edges) # all the initial paths have zeros distance from injection init_thetaY_idx = self.mapping.class_index_2d_theta_y( new_theta, np.zeros(len(new_theta))) # initialize the count for each class init_v_class_count, init_thetaY_class_count = np.zeros(self.mapping.n_abs_v_classes), \ np.zeros(self.mapping.n_2d_theta_y_classes) for v_idx, theta_idx in zip(init_v_idx, init_thetaY_idx): init_v_class_count[v_idx] += 1 init_thetaY_class_count[theta_idx] += 1 return init_v_class_count, init_thetaY_class_count
def get_init_class_count(self): """ :return: init_v_class_count: initial count of the velocity class. size (n_velocity_class,) init_v_theta_count: initial count of the angle class. size (n_theta_class,) """ map_input = self.map_input mapping = self.mapping new_v, new_theta, new_f = remove_duplicate_xy(map_input.initial_v, map_input.initial_theta, map_input.initial_f) init_v_idx = self.mapping.find_1d_class_idx(np.log(new_v), mapping.v_log_edges) init_theta_idx = self.mapping.find_1d_class_idx( map_input.initial_theta, map_input.theta_edges) init_v_class_count, init_theta_class_count = np.zeros( self.mapping.n_abs_v_classes), np.zeros( self.mapping.n_theta_classes) for v_idx, theta_idx in zip(init_v_idx, init_theta_idx): init_v_class_count[v_idx] += 1 init_theta_class_count[theta_idx] += 1 return init_v_class_count, init_theta_class_count
def get_trans_matrix_single_attrib_both_methods_from_scratch(lag_array, n_realz, input_folder, mapping, time_step, prefix='real_', numbered=True, verbose=False): """ Get the aggregate transition matrix both considering the frequency and not considering the frequency corresponding to the stencil method and the extended stencil method :param lag_array: :param n_realz: :param input_folder: :param mapping: :param time_step: :param prefix: :param numbered: :param verbose: :return: """ if (not numbered) and n_realz>1: raise 'Expecting only one file when no numbers are used for the input data' v_log_edges = mapping.v_log_edges n_v_class = mapping.n_abs_v_classes n_theta_class = mapping.n_theta_classes theta_edges = mapping.theta_edges v_output_list = [np.zeros((n_v_class, n_v_class)) for i in range(2)] theta_output_list = [np.zeros((n_theta_class, n_theta_class)) for i in range(2)] v_output_list_nofreq = [np.zeros((n_v_class, n_v_class)) for i in range(2)] theta_output_list_nofreq = [np.zeros((n_theta_class, n_theta_class)) for i in range(2)] for j in range(n_realz): if verbose and not j%20: print 'realziation ', j if numbered: file_name = prefix + str(j) + ".pkl" else: file_name = prefix + ".pkl" input_file = os.path.join(input_folder, file_name) with open(input_file, 'rb') as input: dataHolder = pickle.load(input) dx = np.diff(dataHolder.x_array) dy = np.diff(dataHolder.y_array) dt = np.diff(dataHolder.t_array) + 1e-12 if not (dx.shape[0] and dy.shape[0] and dt.shape[0]): print 'some array was empty, skipping this file...' continue lastIdx = dataHolder.last_idx_array vxMatrix = np.divide(dx, dt) vyMatrix = np.divide(dy, dt) m = dx.shape[0] for i in range(m): x_start = dataHolder.x_array[i, 0] y_start = dataHolder.y_array[i, 0] # get the time process for each velocity cutOff = lastIdx[i] dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff], x_start, y_start, time_step) v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step theta_temp = np.arctan2(dyTime, dxTime) new_v, new_theta, new_f = remove_duplicate_xy(v_temp, theta_temp, freq) for idx_lag, lag in enumerate(lag_array): if len(new_v) > lag: class_v = np.array(mapping.find_1d_class_idx(np.log(new_v), v_log_edges), dtype=int) class_theta = np.array(mapping.find_1d_class_idx(new_theta, theta_edges), dtype=int) count_matrix_with_freq_one_trajectory(v_output_list[idx_lag], lag, class_v, new_f) count_matrix_with_freq_one_trajectory(theta_output_list[idx_lag], lag, class_theta, new_f) # get the transition matrices for the extended method (v, theta, f) -> # input (v,theta) count_matrix_one_trajectory(v_output_list_nofreq[idx_lag], lag, class_v) count_matrix_one_trajectory(theta_output_list_nofreq[idx_lag], lag, class_theta) return v_output_list, theta_output_list, v_output_list_nofreq, theta_output_list_nofreq