Exemple #1
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()


    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)


    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)


    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    result = do_pca(cma_args.n_components, cma_args.n_comp_to_use, traj_params_dir_name, intermediate_data_dir,
                    proj=False,
                    origin="mean_param", use_IPCA=cma_args.use_IPCA, chunk_size=cma_args.chunk_size, reuse=True)
    logger.debug("after pca")

    final_pcs = result["first_n_pcs"]

    all_param_iterator = get_allinone_concat_df(dir_name=traj_params_dir_name, use_IPCA=True, chunk_size=cma_args.pc1_chunk_size)
    plane_angles_vs_final_plane_along_the_way = []
    ipca = IncrementalPCA(n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)

        first_n_pcs = ipca.components_[:cma_args.n_comp_to_use]
        assert final_pcs.shape[0] == first_n_pcs.shape[0]


        plane_angle = cal_angle_between_nd_planes(first_n_pcs, final_pcs)
        plane_angles_vs_final_plane_along_the_way.append(plane_angle)


    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    plane_angles_vs_final_plane_plot_dir = get_plane_angles_vs_final_plane_along_the_way_plot_dir(plot_dir, cma_args.n_comp_to_use)
    if not os.path.exists(plane_angles_vs_final_plane_plot_dir):
        os.makedirs(plane_angles_vs_final_plane_plot_dir)




    angles_plot_name = f"plane_angles_vs_final_plane_plot_dir "
    plot_2d(plane_angles_vs_final_plane_plot_dir, angles_plot_name, np.arange(len(plane_angles_vs_final_plane_along_the_way)), plane_angles_vs_final_plane_along_the_way, "num of chunks", "angle with diff in degrees", False)
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    from stable_baselines.low_dim_analysis.common import do_pca, plot_2d

    origin = "mean_param"
    result = do_pca(cma_args.n_components,
                    cma_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=False,
                    origin=origin,
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size)

    final_params = result["final_concat_params"]
    all_pcs = result["pcs_components"]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    angles = []
    for pc in all_pcs:
        angles.append(cal_angle(pc, final_params - start_params))

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"angles with final - start start n_comp:{all_pcs.shape[0]} dim space of mean pca plane, "
    plot_2d(plot_dir, angles_plot_name, np.arange(all_pcs.shape[0]), angles,
            "num of pcs", "angle with diff", False)
Exemple #3
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    all_grads_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size,
        index="grads")

    angles_with_pc1_along_the_way = []
    grad_vs_final_min_current_param = []
    ipca = IncrementalPCA(1)  # for sparse PCA to speed up
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")

        target_direction = final_params - chunk.values[-1]

        ipca.partial_fit(chunk.values)
        angle_with_pc1 = cal_angle(target_direction, ipca.components_[0])

        angles_with_pc1_along_the_way.append(angle_with_pc1)

        grads = all_grads_iterator.__next__().values
        for i, grad in enumerate(grads):

            grad_angle = cal_angle(grad, final_params - chunk.values[i])
            grad_vs_final_min_current_param.append(grad_angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"final - current VS so far pc1" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size}"
    plot_2d(plot_dir, angles_plot_name,
            np.arange(len(angles_with_pc1_along_the_way)),
            angles_with_pc1_along_the_way, "num of chunks",
            "angle with diff in degrees", False)
    grad_vs_current_plot_name = f"##final - current param VS current grad" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size}"
    plot_2d(plot_dir, grad_vs_current_plot_name,
            np.arange(len(grad_vs_final_min_current_param)),
            grad_vs_final_min_current_param, "num of chunks",
            "angle with diff in degrees", False)
Exemple #4
0
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    origin = "mean_param"

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)

    cma_run_num, cma_intermediate_data_dir = generate_run_dir(
        get_cma_returns_dirname,
        intermediate_dir=intermediate_data_dir,
        n_comp=cma_args.n_comp_to_use)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_param = pd.read_csv(final_file, header=None).values[0]

    final_pca = IncrementalPCA(n_components=2)  # for sparse PCA to speed up

    theta_file = get_full_param_traj_file_path(traj_params_dir_name, 0)
    concat_df = pd.read_csv(theta_file, header=None, chunksize=10000)

    tic = time.time()
    for chunk in concat_df:
        logger.log(f"currnet at : {concat_df._currow}")

        if chunk.shape[0] < 2:
            logger.log(f"last column too few: {chunk.shape[0]}")
            continue
        final_pca.partial_fit(chunk.values)

    toc = time.time()
    logger.log(
        '\nElapsed time computing the chunked PCA {:.2f} s\n'.format(toc -
                                                                     tic))

    logger.log(final_pca.explained_variance_ratio_)

    pcs_components = final_pca.components_

    first_2_pcs = pcs_components[:2]
    mean_param = final_pca.mean_

    origin_param = mean_param

    theta_file = get_full_param_traj_file_path(traj_params_dir_name, 0)
    concat_df = pd.read_csv(theta_file, header=None, chunksize=10000)

    proj_coords = do_proj_on_first_n_IPCA(concat_df, first_2_pcs, origin_param)
    '''
    ==========================================================================================
    eval all xy coords
    ==========================================================================================
    '''


    from stable_baselines.low_dim_analysis.common import plot_contour_trajectory, gen_subspace_coords,do_eval_returns, \
        get_allinone_concat_df, do_proj_on_first_n

    from stable_baselines.ppo2.run_mujoco import eval_return

    last_proj_coord = do_proj_on_first_n(final_param, first_2_pcs,
                                         origin_param)
    starting_coord = last_proj_coord

    tic = time.time()

    #TODO better starting locations, record how many samples,

    logger.log(f"CMAES STARTING :{starting_coord}")
    es = cma.CMAEvolutionStrategy(starting_coord, 5)
    total_num_of_evals = 0
    total_num_timesteps = 0

    mean_rets = []
    min_rets = []
    max_rets = []
    eval_returns = None

    optimization_path = []
    while total_num_timesteps < cma_args.cma_num_timesteps and not es.stop():
        solutions = es.ask()
        optimization_path.extend(solutions)
        thetas = [
            np.matmul(coord, first_2_pcs) + origin_param for coord in solutions
        ]
        logger.log(
            f"current time steps num: {total_num_timesteps} total time steps: {cma_args.cma_num_timesteps}"
        )
        eval_returns = Parallel(n_jobs=cma_args.cores_to_use) \
            (delayed(eval_return)(cma_args, save_dir, theta, cma_args.eval_num_timesteps, i) for
             (i, theta) in enumerate(thetas))

        mean_rets.append(np.mean(eval_returns))
        min_rets.append(np.min(eval_returns))
        max_rets.append(np.max(eval_returns))

        total_num_of_evals += len(eval_returns)
        total_num_timesteps += cma_args.eval_num_timesteps * len(eval_returns)

        logger.log(f"current eval returns: {str(eval_returns)}")
        logger.log(f"total timesteps so far: {total_num_timesteps}")
        negative_eval_returns = [-r for r in eval_returns]

        es.tell(solutions, negative_eval_returns)
        es.logger.add()  # write data to disc to be plotted
        es.disp()

    toc = time.time()
    logger.log(
        f"####################################CMA took {toc-tic} seconds")

    es_logger = es.logger

    if not hasattr(es_logger, 'xmean'):
        es_logger.load()

    n_comp_used = first_2_pcs.shape[0]
    optimization_path_mean = np.vstack(
        (starting_coord, es_logger.xmean[:, 5:5 + n_comp_used]))

    dump_rows_write_csv(cma_intermediate_data_dir, optimization_path_mean,
                        "opt_mean_path")

    plot_dir = get_plot_dir(cma_args)
    cma_plot_dir = get_cma_plot_dir(plot_dir,
                                    cma_args.n_comp_to_use,
                                    cma_run_num,
                                    origin=origin)
    if not os.path.exists(cma_plot_dir):
        os.makedirs(cma_plot_dir)

    ret_plot_name = f"cma return on {cma_args.n_comp_to_use} dim space of real pca plane, " \
                    f"explained {np.sum(final_pca.explained_variance_ratio_[:2])}"
    plot_cma_returns(cma_plot_dir,
                     ret_plot_name,
                     mean_rets,
                     min_rets,
                     max_rets,
                     show=False)

    assert proj_coords.shape[1] == 2

    xcoordinates_to_eval, ycoordinates_to_eval = gen_subspace_coords(
        cma_args,
        np.vstack((proj_coords, optimization_path_mean)).T)

    from stable_baselines.ppo2.run_mujoco import eval_return
    thetas_to_eval = [
        origin_param + x * first_2_pcs[0] + y * first_2_pcs[1]
        for y in ycoordinates_to_eval for x in xcoordinates_to_eval
    ]

    tic = time.time()

    eval_returns = Parallel(n_jobs=-1, max_nbytes='100M') \
        (delayed(eval_return)(cma_args, save_dir, theta, cma_args.eval_num_timesteps, i) for (i, theta) in
         enumerate(thetas_to_eval))
    toc = time.time()
    logger.log(
        f"####################################1st version took {toc-tic} seconds"
    )

    plot_contour_trajectory(
        cma_plot_dir,
        f"cma redo___{origin}_origin_eval_return_contour_plot",
        xcoordinates_to_eval,
        ycoordinates_to_eval,
        eval_returns,
        proj_coords[:, 0],
        proj_coords[:, 1],
        final_pca.explained_variance_ratio_,
        num_levels=25,
        show=False,
        sub_alg_path=optimization_path_mean.T)

    opt_mean_path_in_old_basis = [
        mean_projected_param.dot(first_2_pcs) + mean_param
        for mean_projected_param in optimization_path_mean
    ]
    distance_to_final = [
        LA.norm(opt_mean - final_param, ord=2)
        for opt_mean in opt_mean_path_in_old_basis
    ]
    distance_to_final_plot_name = f"cma redo distance_to_final over generations "
    plot_2d(cma_plot_dir, distance_to_final_plot_name,
            np.arange(len(distance_to_final)), distance_to_final,
            "num generation", "distance_to_final", False)
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    # origin = "final_param"
    origin = cma_args.origin

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)

    cma_run_num, cma_intermediate_data_dir = generate_run_dir(
        get_cma_returns_dirname,
        intermediate_dir=intermediate_data_dir,
        n_comp=cma_args.n_comp_to_use)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    proj_or_not = (cma_args.n_comp_to_use == 2)
    result = do_pca(cma_args.n_components,
                    cma_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=proj_or_not,
                    origin=origin,
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size,
                    reuse=False)
    '''
    ==========================================================================================
    eval all xy coords
    ==========================================================================================
    '''


    from stable_baselines.low_dim_analysis.common import plot_contour_trajectory, gen_subspace_coords,do_eval_returns\
        , do_proj_on_first_n

    if origin == "final_param":
        origin_param = result["final_concat_params"]
    else:
        origin_param = result["mean_param"]

    final_param = result["final_concat_params"]
    last_proj_coord = do_proj_on_first_n(final_param, result["first_n_pcs"],
                                         origin_param)
    starting_coord = last_proj_coord
    logger.log(f"CMA STASRTING CORRD: {starting_coord}")

    # starting_coord = (1/2*np.max(xcoordinates_to_eval), 1/2*np.max(ycoordinates_to_eval)) # use mean
    assert result["first_n_pcs"].shape[0] == cma_args.n_comp_to_use
    mean_rets, min_rets, max_rets, opt_path, opt_path_mean = do_cma(
        cma_args, result["first_n_pcs"], origin_param, save_dir,
        starting_coord, cma_args.cma_var)
    dump_rows_write_csv(cma_intermediate_data_dir, opt_path_mean,
                        "opt_mean_path")

    plot_dir = get_plot_dir(cma_args)
    cma_plot_dir = get_cma_plot_dir(plot_dir, cma_args.n_comp_to_use,
                                    cma_run_num, origin)
    if not os.path.exists(cma_plot_dir):
        os.makedirs(cma_plot_dir)

    ret_plot_name = f"cma return on {cma_args.n_comp_to_use} dim space of real pca plane, " \
                    f"explained {np.sum(result['explained_variance_ratio'][:cma_args.n_comp_to_use])}"
    plot_cma_returns(cma_plot_dir,
                     ret_plot_name,
                     mean_rets,
                     min_rets,
                     max_rets,
                     show=False)

    if cma_args.n_comp_to_use == 2:
        proj_coords = result["proj_coords"]
        assert proj_coords.shape[1] == 2

        xcoordinates_to_eval, ycoordinates_to_eval = gen_subspace_coords(
            cma_args,
            np.vstack((proj_coords, opt_path_mean)).T)

        eval_returns = do_eval_returns(cma_args,
                                       intermediate_data_dir,
                                       result["first_n_pcs"],
                                       origin_param,
                                       xcoordinates_to_eval,
                                       ycoordinates_to_eval,
                                       save_dir,
                                       pca_center=origin,
                                       reuse=False)

        plot_contour_trajectory(cma_plot_dir,
                                f"{origin}_origin_eval_return_contour_plot",
                                xcoordinates_to_eval,
                                ycoordinates_to_eval,
                                eval_returns,
                                proj_coords[:, 0],
                                proj_coords[:, 1],
                                result["explained_variance_ratio"][:2],
                                num_levels=25,
                                show=False,
                                sub_alg_path=opt_path_mean)

    opt_mean_path_in_old_basis = [
        mean_projected_param.dot(result["first_n_pcs"]) + result["mean_param"]
        for mean_projected_param in opt_path_mean
    ]
    distance_to_final = [
        LA.norm(opt_mean - final_param, ord=2)
        for opt_mean in opt_mean_path_in_old_basis
    ]
    distance_to_final_plot_name = f"distance_to_final over generations "
    plot_2d(cma_plot_dir, distance_to_final_plot_name,
            np.arange(len(distance_to_final)), distance_to_final,
            "num generation", "distance_to_final", False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    result = do_pca(cma_args.n_components,
                    cma_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=False,
                    origin="mean_param",
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size,
                    reuse=True)
    logger.debug("after pca")

    final_plane = result["first_n_pcs"]

    count_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "total_num_dumped")
    total_num = pd.read_csv(count_file, header=None).values[0]

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    unduped_angles_along_the_way = []
    duped_angles_along_the_way = []
    diff_along = []

    unweighted_pc1_vs_V_angles = []
    duped_pc1_vs_V_angles = []
    pc1_vs_V_diffs = []

    unweighted_ipca = IncrementalPCA(
        n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up

    all_matrix_buffer = []

    try:
        i = -1
        for chunk in all_param_iterator:
            i += 1
            if i >= 2:
                break
            chunk = chunk.values
            unweighted_ipca.partial_fit(chunk)
            unweighted_angle = cal_angle_between_nd_planes(
                final_plane,
                unweighted_ipca.components_[:cma_args.n_comp_to_use])
            unweighted_pc1_vs_V_angle = postize_angle(
                cal_angle_between_nd_planes(V, unweighted_ipca.components_[0]))

            unweighted_pc1_vs_V_angles.append(unweighted_pc1_vs_V_angle)

            #TODO ignore 90 or 180 for now
            if unweighted_angle > 90:
                unweighted_angle = 180 - unweighted_angle
            unduped_angles_along_the_way.append(unweighted_angle)

            np.testing.assert_almost_equal(
                cal_angle_between_nd_planes(
                    unweighted_ipca.components_[:cma_args.n_comp_to_use][0],
                    final_plane[0]),
                cal_angle(
                    unweighted_ipca.components_[:cma_args.n_comp_to_use][0],
                    final_plane[0]))

            all_matrix_buffer.extend(chunk)

            weights = gen_weights(all_matrix_buffer,
                                  Funcs[cma_args.func_index_to_use])
            logger.log(f"currently at {all_param_iterator._currow}")
            # ipca = PCA(n_components=1)  # for sparse PCA to speed up
            # ipca.fit(duped_in_so_far)
            wpca = WPCA(n_components=cma_args.n_comp_to_use
                        )  # for sparse PCA to speed up
            tic = time.time()
            wpca.fit(all_matrix_buffer, weights=weights)
            toc = time.time()

            logger.debug(
                f"WPCA of {len(all_matrix_buffer)} data took {toc - tic} secs "
            )
            duped_angle = cal_angle_between_nd_planes(
                final_plane, wpca.components_[:cma_args.n_comp_to_use])

            duped_pc1_vs_V_angle = postize_angle(
                cal_angle_between_nd_planes(V, wpca.components_[0]))
            duped_pc1_vs_V_angles.append(duped_pc1_vs_V_angle)
            pc1_vs_V_diffs.append(duped_pc1_vs_V_angle -
                                  unweighted_pc1_vs_V_angle)

            #TODO ignore 90 or 180 for now
            if duped_angle > 90:
                duped_angle = 180 - duped_angle
            duped_angles_along_the_way.append(duped_angle)
            diff_along.append(unweighted_angle - duped_angle)
    finally:
        plot_dir = get_plot_dir(cma_args)
        if not os.path.exists(plot_dir):
            os.makedirs(plot_dir)

        angles_plot_name = f"WPCA" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(duped_angles_along_the_way)),
                duped_angles_along_the_way, "num of chunks",
                "angle with diff in degrees", False)

        angles_plot_name = f"Not WPCA exponential 2" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(unduped_angles_along_the_way)),
                unduped_angles_along_the_way, "num of chunks",
                "angle with diff in degrees", False)


        angles_plot_name = f"Not WPCA - WPCA diff_along exponential 2," \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name, np.arange(len(diff_along)),
                diff_along, "num of chunks", "angle with diff in degrees",
                False)




        angles_plot_name = f"PC1 VS VWPCA PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(duped_pc1_vs_V_angles)), duped_pc1_vs_V_angles,
                "num of chunks", "angle with diff in degrees", False)

        angles_plot_name = f"PC1 VS VNot WPCA PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(unweighted_pc1_vs_V_angles)),
                unweighted_pc1_vs_V_angles, "num of chunks",
                "angle with diff in degrees", False)


        angles_plot_name = f"PC1 VS VNot WPCA - WPCA diff PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name, np.arange(len(pc1_vs_V_diffs)),
                pc1_vs_V_diffs, "num of chunks", "angle with diff in degrees",
                False)

        del all_matrix_buffer
        import gc
        gc.collect()
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []

    latest_thetas = deque(maxlen=cma_args.deque_len)

    for chunk in all_param_iterator:
        pca = PCA(
            n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up

        if chunk.shape[0] < cma_args.n_comp_to_use:
            logger.log("skipping too few data")
            continue

        latest_thetas.extend(chunk.values)

        logger.log(f"currently at {all_param_iterator._currow}")

        pca.fit(latest_thetas)
        pcs = pca.components_[:cma_args.n_comp_to_use]
        angle = cal_angle(V, pcs[0])
        angles_along_the_way.append(angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"lastest angles algone the way start start n_comp_used :{cma_args.n_comp_to_use} dim space of mean pca plane, " \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name, np.arange(len(angles_along_the_way)),
            angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []

    ipca = IncrementalPCA(n_components=1)  # for sparse PCA to speed up

    for chunk in all_param_iterator:
        if all_param_iterator._currow <= cma_args.pc1_chunk_size * cma_args.skipped_chunks:
            logger.log(
                f"skipping: currow: {all_param_iterator._currow} skip threshold {cma_args.pc1_chunk_size * cma_args.skipped_chunks}"
            )
            continue

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)
        angle = cal_angle(V, ipca.components_[0])
        #TODO ignore 90 or 180 for now
        if angle > 90:
            angle = 180 - angle
        angles_along_the_way.append(angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"skipped angles algone the way skipped {cma_args.skipped_chunks}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name, np.arange(len(angles_along_the_way)),
            angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)
def main(n_comp_start=2, do_eval=True):

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir,
                                                      params_scope="pi")
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    from stable_baselines.low_dim_analysis.common import do_pca, get_projected_data_in_old_basis, \
        calculate_projection_errors, plot_2d

    origin = "mean_param"
    result = do_pca(cma_args.n_components,
                    cma_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=False,
                    origin=origin,
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size)

    final_params = result["final_concat_params"]
    all_pcs = result["pcs_components"]
    mean_param = result["mean_param"]
    projected = []
    projection_errors = []

    for num_pcs in range(n_comp_start, all_pcs.shape[0] + 1):
        projected.append(
            get_projected_data_in_old_basis(mean_param, all_pcs, final_params,
                                            num_pcs))
        proj_to_n_pcs_error = calculate_projection_errors(
            mean_param, all_pcs, final_params, num_pcs)
        assert len(proj_to_n_pcs_error) == 1
        projection_errors.extend(proj_to_n_pcs_error)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    if do_eval:

        from stable_baselines.ppo2.run_mujoco import eval_return
        thetas_to_eval = projected

        tic = time.time()

        eval_returns = Parallel(n_jobs=cma_args.cores_to_use, max_nbytes='100M') \
            (delayed(eval_return)(cma_args, save_dir, theta, cma_args.eval_num_timesteps, i) for (i, theta) in
             enumerate(thetas_to_eval))
        toc = time.time()
        logger.log(
            f"####################################1st version took {toc-tic} seconds"
        )

        np.savetxt(get_projected_finals_eval_returns_filename(
            intermediate_dir=intermediate_data_dir,
            n_comp_start=n_comp_start,
            np_comp_end=all_pcs.shape[0],
            pca_center=origin),
                   eval_returns,
                   delimiter=',')

        ret_plot_name = f"final project performances on start: {n_comp_start} end:{all_pcs.shape[0]} dim space of mean pca plane, "
        plot_final_project_returns_returns(plot_dir,
                                           ret_plot_name,
                                           eval_returns,
                                           n_comp_start,
                                           all_pcs.shape[0],
                                           show=False)

    error_plot_name = f"final project errors on start: {n_comp_start} end:{all_pcs.shape[0]} dim space of mean pca plane, "
    plot_2d(plot_dir, error_plot_name,
            np.arange(n_comp_start, all_pcs.shape[0] + 1), projection_errors,
            "num of pcs", "projection error", False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params
    all_grads_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size,
        index="grads")
    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []
    grad_vs_pull = []
    pc1s = []
    ipca = IncrementalPCA(n_components=1)  # for sparse PCA to speed up

    i = 1
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)
        pc1 = ipca.components_[0]
        if i % 2 == 0:
            pc1 = -pc1
        angle = cal_angle(V, pc1)
        angles_along_the_way.append(angle)
        pc1s.append(pc1)

        current_grad = all_grads_iterator.__next__().values[-1]
        current_param = chunk.values[-1]
        delta = unit_vector(current_param - start_params)
        pull_dir = V - delta
        pull_dir_vs_grad = cal_angle(pull_dir, current_grad)
        grad_vs_pull.append(pull_dir_vs_grad)
        i += 1

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    first_n_pc1_vs_V_plot_dir = get_first_n_pc1_vs_V_plot_dir(
        plot_dir, cma_args.pc1_chunk_size)
    if not os.path.exists(first_n_pc1_vs_V_plot_dir):
        os.makedirs(first_n_pc1_vs_V_plot_dir)

    angles_plot_name = f"angles algone the way dim space of mean pca plane "
    plot_2d(first_n_pc1_vs_V_plot_dir, angles_plot_name,
            np.arange(len(angles_along_the_way)), angles_along_the_way,
            "num of chunks", "angle with diff in degrees", False)

    grad_vs_pull_plot_name = f"grad vs V - delta_theta"
    plot_2d(first_n_pc1_vs_V_plot_dir, grad_vs_pull_plot_name,
            np.arange(len(grad_vs_pull)), grad_vs_pull, "num of chunks",
            "angle in degrees", False)

    pcpca = PCA(n_components=min(len(pc1s), 100))
    pcpca.fit(pc1s)
    logger.log(pcpca.explained_variance_ratio_)
    logger.log(cal_angle_plane(V, pcpca.components_[:2]))
    np.savetxt(f"{first_n_pc1_vs_V_plot_dir}/pcs_pcs.txt",
               pcpca.explained_variance_ratio_,
               delimiter=',')
    np.savetxt(
        f"{first_n_pc1_vs_V_plot_dir}/pcs_V_vs_pcapca_first_2_comp_plane.txt",
        np.array([cal_angle_plane(V, pcpca.components_[:2])]),
        delimiter=',')

    i = 0
    for angle in angles_along_the_way:
        if angle > 90:
            i += 1

    np.savetxt(f"{first_n_pc1_vs_V_plot_dir}/num of angles bigger than 90.txt",
               np.array([i]),
               delimiter=',')
Exemple #11
0
def main(n_comp_start=2, do_eval=True):


    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()


    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)


    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)


    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    from stable_baselines.low_dim_analysis.common import \
        calculate_projection_errors, plot_2d, get_allinone_concat_df, calculate_num_axis_to_explain

    origin = "mean_param"
    ratio_threshold = 0.99
    consec_threshold = 5
    error_threshold = 0.05

    tic = time.time()
    all_param_matrix = get_allinone_concat_df(dir_name=traj_params_dir_name).values
    toc = time.time()
    print('\nElapsed time getting the chunk concat diff took {:.2f} s\n'
          .format(toc - tic))

    n_comps = min(cma_args.n_comp_to_use, cma_args.chunk_size)

    num_to_explains = []

    deviates = []
    for i in range(0, len(all_param_matrix), cma_args.chunk_size):
        if i + cma_args.chunk_size >= len(all_param_matrix):
            break
        chunk = all_param_matrix[i:i + cma_args.chunk_size]

        pca = PCA(n_components=n_comps) # for sparse PCA to speed up
        pca.fit(chunk)

        num, explained = calculate_num_axis_to_explain(pca, ratio_threshold)
        num_to_explains.append(num)


        pcs_components = pca.components_

        num_to_deviate = 0
        consec = 0

        for j in range(i + cma_args.chunk_size, len(all_param_matrix)):

            errors = calculate_projection_errors(pca.mean_, pcs_components, all_param_matrix[j], num)
            if errors[0] >= error_threshold:

                consec += 1
                if consec >= consec_threshold:
                    break

            num_to_deviate += 1

        deviates.append(num_to_deviate)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    deviate_plot_name = f"num of steps to deviates from this plane chunk_size: {cma_args.chunk_size} ratio_threshold: {ratio_threshold} consec_threshold: {consec_threshold}error_threshold: {error_threshold}, "
    plot_2d(plot_dir, deviate_plot_name, np.arange(len(deviates)), deviates, "num of chunks", "num of steps to deviates from this plane", False)

    num_to_explain_plot_name = f"num to explain chunk_size: {cma_args.chunk_size} "
    plot_2d(plot_dir, num_to_explain_plot_name, np.arange(len(num_to_explains)), num_to_explains, "num of chunks", "num_to_explains", False)
Exemple #12
0
def main():

    import sys
    logger.log(sys.argv)
    ppos_arg_parser = get_common_parser()

    ppos_args, ppos_unknown_args = ppos_arg_parser.parse_known_args()
    full_space_alg = ppos_args.alg

    # origin = "final_param"
    origin = ppos_args.origin

    this_run_dir = get_dir_path_for_this_run(ppos_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)

    ppos_run_num, ppos_intermediate_data_dir = generate_run_dir(
        get_ppos_returns_dirname,
        intermediate_dir=intermediate_data_dir,
        n_comp=ppos_args.n_comp_to_use)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    proj_or_not = (ppos_args.n_comp_to_use == 2)
    result = do_pca(ppos_args.n_components,
                    ppos_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=proj_or_not,
                    origin=origin,
                    use_IPCA=ppos_args.use_IPCA,
                    chunk_size=ppos_args.chunk_size)
    '''
    ==========================================================================================
    eval all xy coords
    ==========================================================================================
    '''

    if origin == "final_param":
        origin_param = result["final_concat_params"]
    else:
        origin_param = result["mean_param"]

    final_param = result["final_concat_params"]
    last_proj_coord = do_proj_on_first_n(final_param, result["first_n_pcs"],
                                         origin_param)

    if origin == "final_param":
        back_final_param = low_dim_to_old_basis(last_proj_coord,
                                                result["first_n_pcs"],
                                                origin_param)
        assert np.testing.assert_almost_equal(back_final_param, final_param)

    starting_coord = last_proj_coord
    logger.log(f"PPOS STASRTING CORRD: {starting_coord}")

    # starting_coord = (1/2*np.max(xcoordinates_to_eval), 1/2*np.max(ycoordinates_to_eval)) # use mean
    assert result["first_n_pcs"].shape[0] == ppos_args.n_comp_to_use

    eprews, moving_ave_rewards, optimization_path = do_ppos(
        ppos_args, result, intermediate_data_dir, origin_param)

    ppos_args.alg = full_space_alg
    plot_dir = get_plot_dir(ppos_args)
    ppos_plot_dir = get_ppos_plot_dir(plot_dir, ppos_args.n_comp_to_use,
                                      ppos_run_num)
    if not os.path.exists(ppos_plot_dir):
        os.makedirs(ppos_plot_dir)

    ret_plot_name = f"cma return on {ppos_args.n_comp_to_use} dim space of real pca plane, " \
                    f"explained {np.sum(result['explained_variance_ratio'][:ppos_args.n_comp_to_use])}"
    plot_ppos_returns(ppos_plot_dir,
                      ret_plot_name,
                      moving_ave_rewards,
                      show=False)

    if ppos_args.n_comp_to_use == 2:
        proj_coords = result["proj_coords"]
        assert proj_coords.shape[1] == 2

        xcoordinates_to_eval, ycoordinates_to_eval = gen_subspace_coords(
            ppos_args,
            np.vstack((proj_coords, optimization_path)).T)

        eval_returns = do_eval_returns(ppos_args,
                                       intermediate_data_dir,
                                       result["first_n_pcs"],
                                       origin_param,
                                       xcoordinates_to_eval,
                                       ycoordinates_to_eval,
                                       save_dir,
                                       pca_center=origin)

        plot_contour_trajectory(ppos_plot_dir,
                                "end_point_origin_eval_return_contour_plot",
                                xcoordinates_to_eval,
                                ycoordinates_to_eval,
                                eval_returns,
                                proj_coords[:, 0],
                                proj_coords[:, 1],
                                result["explained_variance_ratio"][:2],
                                num_levels=25,
                                show=False,
                                sub_alg_path=optimization_path)

    opt_mean_path_in_old_basis = [
        low_dim_to_old_basis(projected_opt_params, result["first_n_pcs"],
                             origin_param)
        for projected_opt_params in optimization_path
    ]
    distance_to_final = [
        LA.norm(opt_mean - final_param, ord=2)
        for opt_mean in opt_mean_path_in_old_basis
    ]
    distance_to_final_plot_name = f"distance_to_final over generations "
    plot_2d(ppos_plot_dir, distance_to_final_plot_name,
            np.arange(len(distance_to_final)), distance_to_final,
            "num generation", "distance_to_final", False)
Exemple #13
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    pcs_components = np.loadtxt(get_pcs_filename(
        intermediate_dir=intermediate_data_dir,
        n_comp=cma_args.num_comp_to_load),
                                delimiter=',')

    smallest_error_angle = postize_angle(cal_angle(V, pcs_components[0]))
    logger.log(f"@@@@@@@@@@@@ {smallest_error_angle}")

    curr_angles = []

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    ipca = IncrementalPCA(n_components=1)  # for sparse PCA to speed up

    inside_final_cone = []
    for chunk in all_param_iterator:
        # for param in chunk.values:
        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)

        angle = postize_angle(cal_angle(V, ipca.components_[0]))

        param = chunk.values[-1]

        curr_angle = cal_angle(param - start_params, ipca.components_[0])
        curr_angle = postize_angle(curr_angle)

        curr_angle_final = cal_angle(param - start_params, pcs_components[0])

        inside_final_cone.append(curr_angle_final - smallest_error_angle)
        curr_angles.append(curr_angle - angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"$$$curr_angles$$$"
    plot_2d(plot_dir, angles_plot_name, np.arange(len(curr_angles)),
            curr_angles, "num of chunks", "angle with diff in degrees", False)
    angles_plot_name = f"inside final cone?"
    plot_2d(plot_dir, angles_plot_name, np.arange(len(inside_final_cone)),
            inside_final_cone, "num of chunks", "angle with diff in degrees",
            False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    count_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "total_num_dumped")
    total_num = pd.read_csv(count_file, header=None).values[0]

    V = final_params - start_params

    all_thetas_downsampled = get_allinone_concat_df(
        dir_name=traj_params_dir_name).values[::2]

    unduped_angles_along_the_way = []
    duped_angles_along_the_way = []
    diff_along = []
    num = 2  #TODO hardcode!
    undup_ipca = PCA(n_components=1)  # for sparse PCA to speed up

    all_matrix_buffer = []
    for chunk in all_param_iterator:
        chunk = chunk.values
        undup_ipca.partial_fit(chunk)
        unduped_angle = cal_angle(V, undup_ipca.components_[0])

        #TODO ignore 90 or 180 for now
        if unduped_angle > 90:
            unduped_angle = 180 - unduped_angle
        unduped_angles_along_the_way.append(unduped_angle)

        all_matrix_buffer.extend(chunk)

        weights = gen_weights(all_param_iterator._currow, total_num)
        duped_in_so_far = dup_so_far_buffer(all_matrix_buffer, last_percentage,
                                            num)

        logger.log(
            f"currently at {all_param_iterator._currow}, last_pecentage: {last_percentage}"
        )
        # ipca = PCA(n_components=1)  # for sparse PCA to speed up
        # ipca.fit(duped_in_so_far)
        ipca = WPCA(
            n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up
        for i in range(0, len(duped_in_so_far), cma_args.chunk_size):
            logger.log(
                f"partial fitting: i : {i} len(duped_in_so_far): {len(duped_in_so_far)}"
            )
            if i + cma_args.chunk_size > len(duped_in_so_far):
                ipca.partial_fit(duped_in_so_far[i:])
            else:
                ipca.partial_fit(duped_in_so_far[i:i + cma_args.chunk_size])

        duped_angle = cal_angle(V, ipca.components_[0])

        #TODO ignore 90 or 180 for now
        if duped_angle > 90:
            duped_angle = 180 - duped_angle
        duped_angles_along_the_way.append(duped_angle)
        diff_along.append(unduped_angle - duped_angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"duped exponential 2, num dup: {num}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name,
            np.arange(len(duped_angles_along_the_way)),
            duped_angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)

    angles_plot_name = f"unduped exponential 2, num dup: {num}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name,
            np.arange(len(unduped_angles_along_the_way)),
            unduped_angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)


    angles_plot_name = f"undup - dup diff_along exponential 2, num dup: {num}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name, np.arange(len(diff_along)), diff_along,
            "num of chunks", "angle with diff in degrees", False)

    del all_matrix_buffer
    import gc
    gc.collect()