def binning_input_v_theta_freq_y(input_folder,
                                 n_realizations,
                                 time_step,
                                 prefix='real',
                                 verbose=True):
    """
    generate sample processes for v, theta, freq, y to be used for creating classes
    :param input_folder: folder containing the input realizations
    :param n_realizations: number of realizations to consider
    :param time_step: time step size
    :param prefix: prefix for input files
    :param verbose: whether to write output messages or not
    :return big_v_array:
    :return big_freq_array:
    :return big_theta_array:
    :return pointer_list:
    :return initial_v0:
    :return initial f_0:
    :return initial_theta0:
    """
    if verbose:
        print "making long array for generating v, theta, frequency bins..."
    total_length = 0
    #
    pointer_list = []
    initial_v = []
    initial_f = []
    initial_theta = []
    big_v_array = np.array([], dtype=np.float)
    big_theta_array = np.array([], dtype=np.float)
    big_freq_array = np.array([], dtype=np.float)
    big_y_array = np.array([], dtype=np.float)
    for j in range(n_realizations):
        if verbose:
            print "reading realization nr ", j
        case_name = prefix + "_" + str(j) + ".pkl"
        input_file = os.path.join(input_folder, case_name)
        with open(input_file, 'rb') as input:
            dataHolder = pickle.load(input)
        dx = np.diff(dataHolder.x_array)
        dy = np.diff(dataHolder.y_array)
        dt = np.diff(dataHolder.t_array) + 1e-15
        lastIdx = dataHolder.last_idx_array
        vxMatrix = np.divide(dx, dt)
        vyMatrix = np.divide(dy, dt)
        m = dx.shape[0]
        for i in range(m):
            x_start = dataHolder.x_array[i, 0]
            y_start = dataHolder.y_array[i, 0]
            # get the time process for each velocity (averaging/integrating arrays in time)
            cutOff = lastIdx[i]
            dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(
                dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff],
                x_start, y_start, time_step)
            if len(dxTime) < 1:
                continue
            dxTime, dyTime, freq = remove_duplicate_xy(dxTime, dyTime, freq)
            # find y
            y_time = np.hstack((0.0, np.cumsum(dyTime)))
            current_length = len(dxTime)
            if current_length > 1:
                total_length += current_length
                current_v = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2))
                current_theta = np.arctan2(dyTime, dxTime)
                big_v_array = np.hstack((big_v_array, current_v))
                big_theta_array = np.hstack((big_theta_array, current_theta))
                big_freq_array = np.hstack((big_freq_array, freq))
                big_y_array = np.hstack((big_y_array, y_time))
                pointer_list.append(total_length)
                # save the first velocity for initialization
                initial_v.append(current_v[0] / time_step)
                initial_theta.append(current_theta[0])
                initial_f.append(freq[0])
    assert (len(big_v_array) == len(big_freq_array))
    initial_v = np.array(initial_v)
    initial_f = np.array(initial_f)
    initial_theta = np.array(initial_theta)
    # divide by time to get velocit
    big_v_array /= time_step
    return big_v_array, big_theta_array, big_y_array, big_freq_array, \
           pointer_list, initial_v, initial_f, initial_theta
def average_all_realizations(input_folder,
                             n_realizations,
                             time_step,
                             save_folder,
                             n_combine=None,
                             prefix='real',
                             verbose=True,
                             print_every=20):
    """
    save averaged dx, dy, freq for given dt
    save v, theta, freq for given dt
    all the things needed for creating bins
    save big_v, big_theta, big_f, big_y
    save init_v, init_theta, init_f
    :param input_folder: folder containing the input realizations
    :param n_realizations: total number of realizations
    :param time_step: time step size
    :param save_folder: full path to folder to save the averaged realizations
    :param n_combine: number of realizations ro combine in each output file
    :param prefix: prefix for input files
    :param verbose: whether to write output messages or not
    :param print_every: output messages print frequency
    """
    if verbose:
        print "averaging realizations..."
    if not n_combine:
        print "n_combine == None --> saving all trajectories in one file..."
        n_combine = n_realizations
    # make folder for saving averaged realizations
    total_length = 0
    # count number of output files
    counter = 0
    # count realizations per output file
    realz_count = 0
    # each realization has 1000 particles
    pointer_list = []
    initial_v = []
    initial_f = []
    initial_theta = []
    big_dx_list, big_dy_list, big_freq_list = [[] for i in range(3)]
    big_v_list, big_theta_list, big_y_list = [[] for i in range(3)]
    # for each realization
    for j in range(n_realizations):
        if verbose and not j % print_every:
            print "reading realization nr ", j
        case_name = prefix + "_" + str(j) + ".pkl"
        input_file = os.path.join(input_folder, case_name)
        with open(input_file, 'rb') as input:
            dataHolder = pickle.load(input)
        dx = np.diff(dataHolder.x_array)
        dy = np.diff(dataHolder.y_array)
        dt = np.diff(dataHolder.t_array) + 1e-15
        lastIdx = dataHolder.last_idx_array
        vxMatrix = np.divide(dx, dt)
        vyMatrix = np.divide(dy, dt)
        m = dx.shape[0]
        # read all the trajectories in this realization
        for i in range(m):
            x_start = dataHolder.x_array[i, 0]
            y_start = dataHolder.y_array[i, 0]
            # get the time process for each velocity
            cutOff = lastIdx[i]
            dx_time, dy_time, freq = get_time_dx_dy_array_with_freq(
                dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff],
                x_start, y_start, time_step)
            if len(dx_time) < 1:
                continue
            dx_time, dy_time, freq = remove_duplicate_xy(
                dx_time, dy_time, freq)
            current_v = np.sqrt(np.power(dx_time, 2) + np.power(dy_time, 2))
            current_theta = np.arctan2(dy_time, dx_time)
            current_y = np.cumsum(dy_time)
            current_length = len(dx_time)
            if current_length > 1:
                total_length += current_length
                big_dx_list.append(dx_time)
                big_dy_list.append(dy_time)
                big_v_list.append(current_v)
                big_theta_list.append(current_theta)
                big_y_list.append(current_y)
                big_freq_list.append(freq)
                pointer_list.append(total_length)
                # save the first velocity for initialization
                initial_v.append(current_v[0] / time_step)
                initial_theta.append(current_theta[0])
                initial_f.append(freq[0])
        realz_count += 1
        if n_combine == 1 or (j > 0 and (j + 1) % n_combine
                              == 0) or j + 1 == n_realizations:
            if verbose:
                print '     -saving combined realizations'
            # save this batch and initialize the arrays for next batch
            # flatten the big lists
            chain = itertools.chain(*big_dx_list)
            big_dx_array = np.array(list(chain), dtype=np.float)
            chain = itertools.chain(*big_dy_list)
            big_dy_array = np.array(list(chain), dtype=np.float)
            chain = itertools.chain(*big_freq_list)
            big_freq_array = np.array(list(chain), dtype=np.int)
            chain = itertools.chain(*big_y_list)
            big_y_array = np.array(list(chain), dtype=np.float)
            # save these n_combine averaged realizations in cartesian frame
            save_path = os.path.join(save_folder,
                                     'avg_cartesian_' + str(counter) + '.npz')
            np.savez(save_path,
                     DX=big_dx_array,
                     DY=big_dy_array,
                     F=big_freq_array,
                     Y=big_y_array,
                     ptr=pointer_list,
                     dt=time_step,
                     n_realz=realz_count)
            big_dx_list, big_dy_list = [[] for i in range(2)]
            del big_dx_array, big_dy_array
            # save these n_combine averaged realizations in polar coordinates
            chain = itertools.chain(*big_v_list)
            big_v_array = np.array(list(chain), dtype=np.float) / time_step
            chain = itertools.chain(*big_theta_list)
            big_theta_array = np.array(list(chain), dtype=np.float)
            save_path = os.path.join(save_folder,
                                     'avg_polar_' + str(counter) + '.npz')
            np.savez(save_path,
                     V=big_v_array,
                     Theta=big_theta_array,
                     F=big_freq_array,
                     ptr=pointer_list,
                     dt=time_step,
                     n_realz=realz_count)
            big_v_list, big_theta_list, big_freq_list, big_y_list = [
                [] for i in range(4)
            ]
            # reset pointer array
            pointer_list = []
            total_length = 0
            del big_v_array, big_theta_array, big_freq_array, big_y_array
            counter += 1
            realz_count = 0
    initial_v = np.array(initial_v)
    initial_f = np.array(initial_f, dtype=np.int)
    initial_theta = np.array(initial_theta)
    # save the initial values for v, theta, f
    save_path = os.path.join(save_folder, 'initial_arrays.npz')
    np.savez(save_path,
             v=initial_v,
             theta=initial_theta,
             f=initial_f,
             dt=time_step)
    # save number of averaged realization files
    save_path = os.path.join(save_folder, 'case_info.npz')
    np.savez(save_path, n_out=counter, n_input=n_realizations, dt=time_step)
def make_input_for_binning_v_theta_freq(input_folder,
                                        n_realizations,
                                        time_step,
                                        prefix='real',
                                        verbose=True,
                                        print_every=20):
    """
    :param input_folder: folder containing the input realizations
    :param n_realizations: number of realizations to consider
    :param time_step: time step size
    :param prefix: prefix for input files
    :param verbose: whether to write output messages or not
    :return big_v_array:
    :return big_freq_array:
    :return big_theta_array:
    :return pointer_list:
    :return initial_v0:
    :return initial f_0:
    :return initial_theta0:
    """
    if verbose:
        print "making long array for generating v, theta, frequency bins..."
    total_length = 0
    # each realization has 1000 particles
    pointer_list = []
    initial_v = []
    initial_f = []
    initial_theta = []
    big_v_list, big_theta_list, big_freq_list = [[] for i in range(3)]
    for j in range(n_realizations):
        if verbose and not j % print_every:
            print "reading realization nr ", j
        case_name = prefix + "_" + str(j) + ".pkl"
        input_file = os.path.join(input_folder, case_name)
        with open(input_file, 'rb') as input:
            dataHolder = pickle.load(input)
        dx = np.diff(dataHolder.x_array)
        dy = np.diff(dataHolder.y_array)
        dt = np.diff(dataHolder.t_array) + 1e-15
        lastIdx = dataHolder.last_idx_array
        vxMatrix = np.divide(dx, dt)
        vyMatrix = np.divide(dy, dt)
        m = dx.shape[0]
        for i in range(m):
            x_start = dataHolder.x_array[i, 0]
            y_start = dataHolder.y_array[i, 0]
            # get the time process for each velocity
            cutOff = lastIdx[i]
            dxTime, dyTime, freq = get_time_dx_dy_array_with_freq(
                dt[i, :cutOff], vxMatrix[i, :cutOff], vyMatrix[i, :cutOff],
                x_start, y_start, time_step)
            if len(dxTime) < 1:
                continue
            dxTime, dyTime, freq = remove_duplicate_xy(dxTime, dyTime, freq)
            current_length = len(dxTime)
            if current_length > 1:
                total_length += current_length
                current_v = np.sqrt(np.power(dxTime, 2) + np.power(dyTime, 2))
                current_theta = np.arctan2(dyTime, dxTime)
                big_v_list.append(current_v)
                big_theta_list.append(current_theta)
                big_freq_list.append(freq)
                pointer_list.append(total_length)
                # save the first velocity for initialization
                initial_v.append(current_v[0] / time_step)
                initial_theta.append(current_theta[0])
                initial_f.append(freq[0])
    # flatten the big lists
    chain = itertools.chain(*big_v_list)
    big_v_array = np.array(list(chain), dtype=np.float)
    chain = itertools.chain(*big_theta_list)
    big_theta_array = np.array(list(chain), dtype=np.float)
    chain = itertools.chain(*big_freq_list)
    big_freq_array = np.array(list(chain), dtype=np.float)
    assert (len(big_v_array) == len(big_freq_array))
    initial_v = np.array(initial_v)
    initial_f = np.array(initial_f)
    initial_theta = np.array(initial_theta)
    # divide by time to get velocit
    big_v_array /= time_step
    return big_v_array, big_theta_array, big_freq_array, pointer_list, initial_v, initial_f, initial_theta