def get_trans_matrix(self, lag):
     filter_length = self.filter_length
     if filter_length:
         print 'using only points with x values less than ' + str(
             filter_length)
     filter_time = self.filter_time
     if filter_time:
         print 'using only points with time less than ' + str(filter_time)
     n_2d_class = self.mapping.n_2d_classes
     i_list = []
     j_list = []
     ij_list = set([])
     val_list = []
     time_step = self.time_step
     print 'extracting trans matrix...'
     for j in range(self.n_total_realz):
         print 'realization number: ', j
         file_name = "real_" + str(j) + ".pkl"
         input_file = os.path.join(self.input_folder, file_name)
         with open(input_file, 'rb') as input:
             dataHolder = pickle.load(input)
         dx = np.diff(dataHolder.x_array)
         dy = np.diff(dataHolder.y_array)
         dt = np.diff(dataHolder.t_array) + 1e-15
         if not (dx.shape[0] and dy.shape[0] and dt.shape[0]):
             print 'some array was empty, skipping this file...'
             continue
         lastIdx = dataHolder.last_idx_array
         vxMatrix = np.divide(dx, dt)
         vyMatrix = np.divide(dy, dt)
         m = dx.shape[0]
         for i in range(m):
             x_start = dataHolder.x_array[i, 0]
             y_start = dataHolder.y_array[i, 0]
             # get the time process for each velocity
             cutOff = lastIdx[i]
             if filter_length:
                 cutOff = min(cutOff,
                              np.argmin(dataHolder.x_array < filter_length))
             if filter_time:
                 cutOff = min(cutOff,
                              np.argmin(dataHolder.t_array < filter_time))
             dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(
                 dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff],
                 x_start, y_start, time_step)
             v_temp = np.sqrt(np.power(dxTime, 2) +
                              np.power(dyTime, 2)) / time_step
             theta_temp = np.arctan2(dyTime, dxTime)
             if len(v_temp) > lag:
                 new_v, new_theta, new_f = remove_duplicate_xy(
                     v_temp, theta_temp, freq)
                 class_2d = self.mapping.class_index_2d_vtheta(
                     new_v, new_theta)
                 new_f = np.array(new_f, dtype=np.dtype("i"))
                 fill_one_trajectory_sparse_with_freq_cython(
                     lag, class_2d, new_f, i_list, j_list, ij_list,
                     val_list)
     print 'done'
     return csc_matrix((val_list, (i_list, j_list)),
                       shape=(n_2d_class, n_2d_class))
 def get_trans_matrix_from_scratch(self, lag):
     """
     extract the transition matrix for velocity and angle for the given lag
     :param lag: the lag used to derive the transition matrix
     :return: v_trans_matrix, theta_trans_matrix for the given lag
     """
     # get the size of the transition matrices
     n_v_class, n_theta_class = self.mapping.n_abs_v_classes, self.mapping.n_theta_classes
     # initialize the sparse transition matrices
     i_list_v, j_list_v, val_list_v = [[] for _ in range(3)]
     i_list_theta, j_list_theta, val_list_theta = [[] for _ in range(3)]
     ij_set_v, ij_set_theta = [set([]) for _ in range(2)]
     time_step = self.time_step
     print 'extracting trans matrix for the velocity and angle process...'
     for j in range(self.n_total_realz):
         if not j % 5:
             print 'reading realization number: ', j
         file_name = "real_" + str(j) + ".pkl"
         input_file = os.path.join(self.input_folder, file_name)
         with open(input_file, 'rb') as input:
             dataHolder = pickle.load(input)
         dx = np.diff(dataHolder.x_array)
         dy = np.diff(dataHolder.y_array)
         dt = np.diff(dataHolder.t_array) + 1e-15
         if not (dx.shape[0] and dy.shape[0] and dt.shape[0]):
             print 'some array was empty, skipping this file...'
             continue
         lastIdx = dataHolder.last_idx_array
         vxMatrix = np.divide(dx, dt)
         vyMatrix = np.divide(dy, dt)
         m = dx.shape[0]
         for i in range(m):
             x_start = dataHolder.x_array[i, 0]
             y_start = dataHolder.y_array[i, 0]
             # get the time process for each velocity
             cutOff = lastIdx[i]
             dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(
                 dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff],
                 x_start, y_start, time_step)
             v_temp = np.sqrt(np.power(dxTime, 2) +
                              np.power(dyTime, 2)) / time_step
             theta_temp = np.arctan2(dyTime, dxTime)
             if len(v_temp) > lag:
                 new_v, new_theta, new_f = remove_duplicate_xy(
                     v_temp, theta_temp, freq)
                 v_process_idx = self.mapping.find_1d_class_idx(
                     np.log(new_v), self.mapping.v_log_edges)
                 # fill the transition matrix for this velocity series
                 fill_one_trajectory_sparse_with_freq_cython(
                     lag, v_process_idx, new_f, i_list_v, j_list_v,
                     ij_set_v, val_list_v)
                 # fill the transition matrix for this angle series
                 theta_process_idx = self.mapping.find_1d_class_idx(
                     new_theta, self.mapping.theta_edges)
                 fill_one_trajectory_sparse_with_freq_cython(
                     lag, theta_process_idx, new_f, i_list_theta,
                     j_list_theta, ij_set_theta, val_list_theta)
     print 'done'
     return csc_matrix((val_list_v, (i_list_v, j_list_v)), shape = (n_v_class, n_v_class)), \
            csc_matrix((val_list_theta, (i_list_theta, j_list_theta)), shape = (n_theta_class, n_theta_class))
def test_mapping_theta_2():
    main_folder = os.path.dirname(os.path.dirname(__file__))
    input_folder = os.path.join(main_folder, 'test_related_files', 'particle_tracking_results')
    dt = 50.0
    n_realz = 1
    v_array, theta_array, freq_array, pointer_list, initial_v, initial_f, initial_theta = make_input_for_binning_v_theta_freq(
        input_folder, n_realz, dt)
    new_v, new_theta, new_f = remove_duplicate_xy(v_array, theta_array, freq_array)
    # make random angles
    n_abs_log_class = 100
    n_theta_classes = 60
    abs_log_v_edges = abs_vel_log_bins_low_high(new_v, n_abs_log_class, n_low=5, max_allowed=0.03)
    theta_bin_edges = make_theta_bins_linear(n_theta_classes)
    mapping = mapping_v_theta_repeat(abs_log_v_edges, theta_bin_edges, new_v, new_theta, new_f)
    v0_idx = mapping.find_1d_class_idx(np.log(new_v), mapping.v_log_edges)
    t0_idx = mapping.find_1d_class_idx(new_theta, mapping.theta_edges)
    # find the 3d class number for the input
    class_2d = mapping.class_index_2d_vtheta(new_v, new_theta)
    class_3d = mapping.find_3d_class_number(class_2d, new_f)
    # convert back from 2d class and check indices
    v1_idx, t1_idx = mapping.class_index_1d_v_theta_from_2d(class_2d)
    assert (np.all(v1_idx == v0_idx))
    assert (np.all(t1_idx == t0_idx))
    # convert back from 3d and check the indics
    v2 = np.zeros(len(class_3d))
    t2 = np.zeros(len(class_3d))
    f2 = np.zeros(len(class_3d))
    for i in range(len(class_3d)):
        v2[i], t2[i], f2[i] = mapping.find_v_theta_freq(class_3d[i])
    v2_idx = mapping.find_1d_class_idx(np.log(v2), mapping.v_log_edges)
    t2_idx = mapping.find_1d_class_idx(t2, mapping.theta_edges)
    assert (np.all(v2_idx == v0_idx))
    assert (np.all(t2_idx == t0_idx))
    assert (np.all(f2 == new_f))
 def get_init_class_count(self):
     new_v, new_theta, new_f = remove_duplicate_xy(self.initial_v,
                                                   self.initial_theta,
                                                   self.initial_f)
     index_2d = self.mapping.class_index_2d_vtheta(new_v, new_theta)
     init_class_count = np.zeros(self.mapping.n_2d_classes)
     for i in index_2d:
         init_class_count[i] += 1
     return init_class_count
 def get_init_class_count(self, map_input):
     new_v, new_theta, new_f = remove_duplicate_xy(map_input.initial_v,
                                                   map_input.initial_theta,
                                                   map_input.initial_f)
     index_2d = self.mapping.class_index_2d_vtheta(new_v, new_theta)
     index_3d = self.mapping.find_3d_class_number(index_2d, new_f)
     init_class_count = np.zeros(self.mapping.n_3d_classes)
     for i in index_3d:
         init_class_count[i] += 1
     return init_class_count
def get_trans_matrix_single_attrib(lag_array, n_realz, input_folder, mapping, time_step, prefix='real_',
                                   numbered=True, verbose=False):
    if (not numbered) and n_realz>1:
        raise 'Expecting only one file when no numbers are used for the input data'
    v_log_edges = mapping.v_log_edges
    n_v_class = mapping.n_abs_v_classes
    n_theta_class = mapping.n_theta_classes
    theta_edges = mapping.theta_edges
    v_output_list = []
    theta_output_list = []
    for lag in lag_array:
        print " extracting matrices for lag = ", lag
        v_count_matrix = np.zeros((n_v_class, n_v_class))
        t_count_matrix = np.zeros((n_theta_class, n_theta_class))
        for j in range(n_realz):
            if verbose and not j%20:
                print 'realziation ', j
            if numbered:
                file_name = prefix + str(j) + ".pkl"
            else:
                file_name = prefix + ".pkl"
            input_file = os.path.join(input_folder, file_name)
            with open(input_file, 'rb') as input:
                dataHolder = pickle.load(input)
            dx = np.diff(dataHolder.x_array)
            dy = np.diff(dataHolder.y_array)
            dt = np.diff(dataHolder.t_array)
            if not (dx.shape[0] and dy.shape[0] and dt.shape[0]):
                print 'some array was empty, skipping this file...'
                continue
            lastIdx = dataHolder.last_idx_array
            vxMatrix = np.divide(dx, dt)
            vyMatrix = np.divide(dy, dt)
            m = dx.shape[0]
            for i in range(m):
                x_start = dataHolder.x_array[i, 0]
                y_start = dataHolder.y_array[i, 0]
                # get the time process for each velocity
                cutOff = lastIdx[i]
                dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(dt[i, :cutOff], vxMatrix[i, :cutOff],
                                                                      vyMatrix[i, :cutOff], x_start, y_start,
                                                                      time_step)
                v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step
                theta_temp = np.arctan2(dyTime, dxTime)
                if len(v_temp) > lag:
                    new_v, new_theta, new_f = remove_duplicate_xy(v_temp, theta_temp, freq)
                    class_v = np.array(mapping.find_1d_class_idx(np.log(new_v), v_log_edges), dtype=int)
                    class_theta = np.array(mapping.find_1d_class_idx(new_theta, theta_edges), dtype=int)
                    count_matrix_with_freq_one_trajectory(v_count_matrix, lag, class_v, new_f)
                    count_matrix_with_freq_one_trajectory(t_count_matrix, lag, class_theta, new_f)
        v_output_list.append(v_count_matrix)
        theta_output_list.append(t_count_matrix)
    return v_output_list, theta_output_list
 def get_trans_matrix_from_scratch(self, lag, print_every=50, verbose=True):
     n_3d_class = self.mapping.n_3d_classes
     i_list = []
     j_list = []
     ij_list = set([])
     val_list = []
     time_step = self.time_step
     print 'extracting trans matrix...'
     for j in range(self.n_total_realz):
         if verbose and not j % print_every:
             print 'reading realization number: ', j
         file_name = "real_" + str(j) + ".pkl"
         input_file = os.path.join(self.input_folder, file_name)
         with open(input_file, 'rb') as input:
             dataHolder = pickle.load(input)
         dx = np.diff(dataHolder.x_array)
         dy = np.diff(dataHolder.y_array)
         dt = np.diff(dataHolder.t_array) + 1e-15
         if not (dx.shape[0] and dy.shape[0] and dt.shape[0]):
             print 'some array was empty, skipping this file...'
             continue
         lastIdx = dataHolder.last_idx_array
         vxMatrix = np.divide(dx, dt)
         vyMatrix = np.divide(dy, dt)
         m = dx.shape[0]
         for i in range(m):
             x_start = dataHolder.x_array[i, 0]
             y_start = dataHolder.y_array[i, 0]
             # get the time process for each velocity
             cutOff = lastIdx[i]
             dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(
                 dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff],
                 x_start, y_start, time_step)
             v_temp = np.sqrt(np.power(dxTime, 2) +
                              np.power(dyTime, 2)) / time_step
             theta_temp = np.arctan2(dyTime, dxTime)
             if len(v_temp) > lag:
                 new_v, new_theta, new_f = remove_duplicate_xy(
                     v_temp, theta_temp, freq)
                 class_2d = self.mapping.class_index_2d_vtheta(
                     new_v, new_theta)
                 class_3d_array = self.mapping.find_3d_class_number(
                     class_2d, new_f)
                 fill_one_trajectory_sparse_cython(lag, class_3d_array,
                                                   i_list, j_list, ij_list,
                                                   val_list)
     print 'done'
     return csc_matrix((val_list, (i_list, j_list)),
                       shape=(n_3d_class, n_3d_class))
def test_convert_to_time_process_xyf_1():
    x_start = 0.0
    y_start = 0.0
    dt_array = np.array([10.0, 0.5, 1.6, 0.9], dtype=np.float)
    vx_array = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float)
    vy_array = np.array([4.0, 3.0, 2.0, 1.0], dtype=np.float)
    deltaT = 1.0
    dx_array, dy_array, freq_array = get_time_dx_dy_array_with_freq(
        dt_array, vx_array, vy_array, x_start, y_start, deltaT)
    dx2, dy2, freq2 = remove_duplicate_xy(dx_array, dy_array, freq_array)
    expected_dx = np.array([1.0, 2.5, 3.0, 3.9])
    expected_dy = np.array([4.0, 2.5, 2.0, 1.1])
    expected_freq = np.array([10., 1., 1., 1.])
    diff_dx_norm = np.linalg.norm(dx2 - expected_dx)
    diff_dy_norm = np.linalg.norm(dy2 - expected_dy)
    print "norm(diff_dx): ", diff_dx_norm
    tol = 1e-12
    assert (diff_dx_norm < tol)
    assert (diff_dy_norm < tol)
    assert (np.all(freq2 == expected_freq))
 def get_init_class_count(self, map_input):
     """
     :return:
      init_v_class_count: initial count of the velocity class. size (n_velocity_class,)
      init_v_theta_count: initial count of the angle class. size (n_theta_class,)
     """
     new_v, new_theta, new_f = remove_duplicate_xy(map_input.initial_v,
                                                   map_input.initial_theta,
                                                   map_input.initial_f)
     init_v_idx = self.mapping.find_1d_class_idx(np.log(new_v),
                                                 map_input.v_log_edges)
     # all the initial paths have zeros distance from injection
     init_thetaY_idx = self.mapping.class_index_2d_theta_y(
         new_theta, np.zeros(len(new_theta)))
     # initialize the count for each class
     init_v_class_count, init_thetaY_class_count = np.zeros(self.mapping.n_abs_v_classes), \
                                                   np.zeros(self.mapping.n_2d_theta_y_classes)
     for v_idx, theta_idx in zip(init_v_idx, init_thetaY_idx):
         init_v_class_count[v_idx] += 1
         init_thetaY_class_count[theta_idx] += 1
     return init_v_class_count, init_thetaY_class_count
 def get_init_class_count(self):
     """
     :return:
      init_v_class_count: initial count of the velocity class. size (n_velocity_class,)
      init_v_theta_count: initial count of the angle class. size (n_theta_class,)
     """
     map_input = self.map_input
     mapping = self.mapping
     new_v, new_theta, new_f = remove_duplicate_xy(map_input.initial_v,
                                                   map_input.initial_theta,
                                                   map_input.initial_f)
     init_v_idx = self.mapping.find_1d_class_idx(np.log(new_v),
                                                 mapping.v_log_edges)
     init_theta_idx = self.mapping.find_1d_class_idx(
         map_input.initial_theta, map_input.theta_edges)
     init_v_class_count, init_theta_class_count = np.zeros(
         self.mapping.n_abs_v_classes), np.zeros(
             self.mapping.n_theta_classes)
     for v_idx, theta_idx in zip(init_v_idx, init_theta_idx):
         init_v_class_count[v_idx] += 1
         init_theta_class_count[theta_idx] += 1
     return init_v_class_count, init_theta_class_count
def get_trans_matrix_single_attrib_both_methods_from_scratch(lag_array, n_realz, input_folder, mapping, time_step,
                                                             prefix='real_', numbered=True, verbose=False):
    """
    Get the aggregate transition matrix both considering the frequency and not considering the frequency
    corresponding to the stencil method and the extended stencil method
    :param lag_array:
    :param n_realz:
    :param input_folder:
    :param mapping:
    :param time_step:
    :param prefix:
    :param numbered:
    :param verbose:
    :return:
    """
    if (not numbered) and n_realz>1:
        raise 'Expecting only one file when no numbers are used for the input data'
    v_log_edges = mapping.v_log_edges
    n_v_class = mapping.n_abs_v_classes
    n_theta_class = mapping.n_theta_classes
    theta_edges = mapping.theta_edges
    v_output_list = [np.zeros((n_v_class, n_v_class)) for i in range(2)]
    theta_output_list = [np.zeros((n_theta_class, n_theta_class)) for i in range(2)]
    v_output_list_nofreq = [np.zeros((n_v_class, n_v_class)) for i in range(2)]
    theta_output_list_nofreq = [np.zeros((n_theta_class, n_theta_class)) for i in range(2)]
    for j in range(n_realz):
        if verbose and not j%20:
            print 'realziation ', j
        if numbered:
            file_name = prefix + str(j) + ".pkl"
        else:
            file_name = prefix + ".pkl"
        input_file = os.path.join(input_folder, file_name)
        with open(input_file, 'rb') as input:
            dataHolder = pickle.load(input)
        dx = np.diff(dataHolder.x_array)
        dy = np.diff(dataHolder.y_array)
        dt = np.diff(dataHolder.t_array) + 1e-12
        if not (dx.shape[0] and dy.shape[0] and dt.shape[0]):
            print 'some array was empty, skipping this file...'
            continue
        lastIdx = dataHolder.last_idx_array
        vxMatrix = np.divide(dx, dt)
        vyMatrix = np.divide(dy, dt)
        m = dx.shape[0]
        for i in range(m):
            x_start = dataHolder.x_array[i, 0]
            y_start = dataHolder.y_array[i, 0]
            # get the time process for each velocity
            cutOff = lastIdx[i]
            dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(dt[i, :cutOff], vxMatrix[i, :cutOff],
                                                                  vyMatrix[i, :cutOff], x_start, y_start,
                                                                  time_step)
            v_temp = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2)) / time_step
            theta_temp = np.arctan2(dyTime, dxTime)
            new_v, new_theta, new_f = remove_duplicate_xy(v_temp, theta_temp, freq)
            for idx_lag, lag in enumerate(lag_array):
                if len(new_v) > lag:
                    class_v = np.array(mapping.find_1d_class_idx(np.log(new_v), v_log_edges), dtype=int)
                    class_theta = np.array(mapping.find_1d_class_idx(new_theta, theta_edges), dtype=int)
                    count_matrix_with_freq_one_trajectory(v_output_list[idx_lag], lag, class_v, new_f)
                    count_matrix_with_freq_one_trajectory(theta_output_list[idx_lag], lag, class_theta, new_f)
                    # get the transition matrices for the extended method (v, theta, f) ->
                    # input (v,theta)
                    count_matrix_one_trajectory(v_output_list_nofreq[idx_lag], lag, class_v)
                    count_matrix_one_trajectory(theta_output_list_nofreq[idx_lag], lag, class_theta)
    return v_output_list, theta_output_list, v_output_list_nofreq, theta_output_list_nofreq