Example #1
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()


    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)


    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)


    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    result = do_pca(cma_args.n_components, cma_args.n_comp_to_use, traj_params_dir_name, intermediate_data_dir,
                    proj=False,
                    origin="mean_param", use_IPCA=cma_args.use_IPCA, chunk_size=cma_args.chunk_size, reuse=True)
    logger.debug("after pca")

    final_pcs = result["first_n_pcs"]

    all_param_iterator = get_allinone_concat_df(dir_name=traj_params_dir_name, use_IPCA=True, chunk_size=cma_args.pc1_chunk_size)
    plane_angles_vs_final_plane_along_the_way = []
    ipca = IncrementalPCA(n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)

        first_n_pcs = ipca.components_[:cma_args.n_comp_to_use]
        assert final_pcs.shape[0] == first_n_pcs.shape[0]


        plane_angle = cal_angle_between_nd_planes(first_n_pcs, final_pcs)
        plane_angles_vs_final_plane_along_the_way.append(plane_angle)


    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    plane_angles_vs_final_plane_plot_dir = get_plane_angles_vs_final_plane_along_the_way_plot_dir(plot_dir, cma_args.n_comp_to_use)
    if not os.path.exists(plane_angles_vs_final_plane_plot_dir):
        os.makedirs(plane_angles_vs_final_plane_plot_dir)




    angles_plot_name = f"plane_angles_vs_final_plane_plot_dir "
    plot_2d(plane_angles_vs_final_plane_plot_dir, angles_plot_name, np.arange(len(plane_angles_vs_final_plane_along_the_way)), plane_angles_vs_final_plane_along_the_way, "num of chunks", "angle with diff in degrees", False)
Example #2
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    all_grads_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size,
        index="grads")

    angles_with_pc1_along_the_way = []
    grad_vs_final_min_current_param = []
    ipca = IncrementalPCA(1)  # for sparse PCA to speed up
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")

        target_direction = final_params - chunk.values[-1]

        ipca.partial_fit(chunk.values)
        angle_with_pc1 = cal_angle(target_direction, ipca.components_[0])

        angles_with_pc1_along_the_way.append(angle_with_pc1)

        grads = all_grads_iterator.__next__().values
        for i, grad in enumerate(grads):

            grad_angle = cal_angle(grad, final_params - chunk.values[i])
            grad_vs_final_min_current_param.append(grad_angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"final - current VS so far pc1" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size}"
    plot_2d(plot_dir, angles_plot_name,
            np.arange(len(angles_with_pc1_along_the_way)),
            angles_with_pc1_along_the_way, "num of chunks",
            "angle with diff in degrees", False)
    grad_vs_current_plot_name = f"##final - current param VS current grad" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size}"
    plot_2d(plot_dir, grad_vs_current_plot_name,
            np.arange(len(grad_vs_final_min_current_param)),
            grad_vs_final_min_current_param, "num of chunks",
            "angle with diff in degrees", False)
    plot_next_n_dir = f"{plot_dir}/next_n"
    this_run_dir = get_dir_path_for_this_run(args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    if os.path.exists(plot_next_n_dir):
        import shutil
        shutil.rmtree(plot_next_n_dir)
    os.makedirs(plot_next_n_dir)

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_concat_params = pd.read_csv(final_file, header=None).values[0]

    tic = time.time()
    concat_matrix_diff = get_allinone_concat_df(
        dir_name=traj_params_dir_name, final_concat_params=final_concat_params)
    toc = time.time()
    print('\nElapsed time getting the full concat diff took {:.2f} s\n'.format(
        toc - tic))

    check_interval = len(concat_matrix_diff) // args.even_check_point_num
    check_points = [
        i for i in range(check_interval,
                         len(concat_matrix_diff) - 1, check_interval)
    ]

    for check_index in check_points:
        pca = PCA(n_components=args.n_components)

        tic = time.time()
        pca.fit(concat_matrix_diff[:check_index])
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    result = do_pca(cma_args.n_components,
                    cma_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=False,
                    origin="mean_param",
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size,
                    reuse=True)
    logger.debug("after pca")

    final_plane = result["first_n_pcs"]

    count_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "total_num_dumped")
    total_num = pd.read_csv(count_file, header=None).values[0]

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    unduped_angles_along_the_way = []
    duped_angles_along_the_way = []
    diff_along = []

    unweighted_pc1_vs_V_angles = []
    duped_pc1_vs_V_angles = []
    pc1_vs_V_diffs = []

    unweighted_ipca = IncrementalPCA(
        n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up

    all_matrix_buffer = []

    try:
        i = -1
        for chunk in all_param_iterator:
            i += 1
            if i >= 2:
                break
            chunk = chunk.values
            unweighted_ipca.partial_fit(chunk)
            unweighted_angle = cal_angle_between_nd_planes(
                final_plane,
                unweighted_ipca.components_[:cma_args.n_comp_to_use])
            unweighted_pc1_vs_V_angle = postize_angle(
                cal_angle_between_nd_planes(V, unweighted_ipca.components_[0]))

            unweighted_pc1_vs_V_angles.append(unweighted_pc1_vs_V_angle)

            #TODO ignore 90 or 180 for now
            if unweighted_angle > 90:
                unweighted_angle = 180 - unweighted_angle
            unduped_angles_along_the_way.append(unweighted_angle)

            np.testing.assert_almost_equal(
                cal_angle_between_nd_planes(
                    unweighted_ipca.components_[:cma_args.n_comp_to_use][0],
                    final_plane[0]),
                cal_angle(
                    unweighted_ipca.components_[:cma_args.n_comp_to_use][0],
                    final_plane[0]))

            all_matrix_buffer.extend(chunk)

            weights = gen_weights(all_matrix_buffer,
                                  Funcs[cma_args.func_index_to_use])
            logger.log(f"currently at {all_param_iterator._currow}")
            # ipca = PCA(n_components=1)  # for sparse PCA to speed up
            # ipca.fit(duped_in_so_far)
            wpca = WPCA(n_components=cma_args.n_comp_to_use
                        )  # for sparse PCA to speed up
            tic = time.time()
            wpca.fit(all_matrix_buffer, weights=weights)
            toc = time.time()

            logger.debug(
                f"WPCA of {len(all_matrix_buffer)} data took {toc - tic} secs "
            )
            duped_angle = cal_angle_between_nd_planes(
                final_plane, wpca.components_[:cma_args.n_comp_to_use])

            duped_pc1_vs_V_angle = postize_angle(
                cal_angle_between_nd_planes(V, wpca.components_[0]))
            duped_pc1_vs_V_angles.append(duped_pc1_vs_V_angle)
            pc1_vs_V_diffs.append(duped_pc1_vs_V_angle -
                                  unweighted_pc1_vs_V_angle)

            #TODO ignore 90 or 180 for now
            if duped_angle > 90:
                duped_angle = 180 - duped_angle
            duped_angles_along_the_way.append(duped_angle)
            diff_along.append(unweighted_angle - duped_angle)
    finally:
        plot_dir = get_plot_dir(cma_args)
        if not os.path.exists(plot_dir):
            os.makedirs(plot_dir)

        angles_plot_name = f"WPCA" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(duped_angles_along_the_way)),
                duped_angles_along_the_way, "num of chunks",
                "angle with diff in degrees", False)

        angles_plot_name = f"Not WPCA exponential 2" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(unduped_angles_along_the_way)),
                unduped_angles_along_the_way, "num of chunks",
                "angle with diff in degrees", False)


        angles_plot_name = f"Not WPCA - WPCA diff_along exponential 2," \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name, np.arange(len(diff_along)),
                diff_along, "num of chunks", "angle with diff in degrees",
                False)




        angles_plot_name = f"PC1 VS VWPCA PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(duped_pc1_vs_V_angles)), duped_pc1_vs_V_angles,
                "num of chunks", "angle with diff in degrees", False)

        angles_plot_name = f"PC1 VS VNot WPCA PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(unweighted_pc1_vs_V_angles)),
                unweighted_pc1_vs_V_angles, "num of chunks",
                "angle with diff in degrees", False)


        angles_plot_name = f"PC1 VS VNot WPCA - WPCA diff PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name, np.arange(len(pc1_vs_V_diffs)),
                pc1_vs_V_diffs, "num of chunks", "angle with diff in degrees",
                False)

        del all_matrix_buffer
        import gc
        gc.collect()
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []

    latest_thetas = deque(maxlen=cma_args.deque_len)

    for chunk in all_param_iterator:
        pca = PCA(
            n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up

        if chunk.shape[0] < cma_args.n_comp_to_use:
            logger.log("skipping too few data")
            continue

        latest_thetas.extend(chunk.values)

        logger.log(f"currently at {all_param_iterator._currow}")

        pca.fit(latest_thetas)
        pcs = pca.components_[:cma_args.n_comp_to_use]
        angle = cal_angle(V, pcs[0])
        angles_along_the_way.append(angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"lastest angles algone the way start start n_comp_used :{cma_args.n_comp_to_use} dim space of mean pca plane, " \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name, np.arange(len(angles_along_the_way)),
            angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []

    ipca = IncrementalPCA(n_components=1)  # for sparse PCA to speed up

    for chunk in all_param_iterator:
        if all_param_iterator._currow <= cma_args.pc1_chunk_size * cma_args.skipped_chunks:
            logger.log(
                f"skipping: currow: {all_param_iterator._currow} skip threshold {cma_args.pc1_chunk_size * cma_args.skipped_chunks}"
            )
            continue

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)
        angle = cal_angle(V, ipca.components_[0])
        #TODO ignore 90 or 180 for now
        if angle > 90:
            angle = 180 - angle
        angles_along_the_way.append(angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"skipped angles algone the way skipped {cma_args.skipped_chunks}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name, np.arange(len(angles_along_the_way)),
            angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)
Example #7
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()


    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)


    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)


    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    all_param_iterator = get_allinone_concat_df(dir_name=traj_params_dir_name, use_IPCA=True, chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []
    grad_vs_Vs = []


    ipca = IncrementalPCA(n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up
    last_chunk_last = start_params
    for chunk in all_param_iterator:
        chunk = chunk.values
        if chunk.shape[0] < cma_args.n_comp_to_use:
            logger.log("skipping too few data")
            continue

        for i in range(chunk.shape[0]):
            if i == 0:
                grad = chunk[i] - last_chunk_last
            else:
                grad = chunk[i] - chunk[i-1]

            grad_angle = cal_angle(grad, V)
            grad_vs_Vs.append(grad_angle)

        last_chunk_last = chunk[-1]

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk)

        angle = cal_angle(V, ipca.components_[0])
        if angle > 90:
            angle = 180 - angle
        angles_along_the_way.extend([angle]*chunk.shape[0])


    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    #
    # #TODO ignore negative for now
    # angles_along_the_way = np.array(angles_along_the_way)
    # if angles_along_the_way[-1] > 90:
    #     angles_along_the_way = 180 - angles_along_the_way


    assert len(angles_along_the_way) == len(grad_vs_Vs)

    angles_plot_name = f"in so far update direction and pc1 vs final - start " \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d_2(plot_dir, angles_plot_name, np.arange(len(grad_vs_Vs)),grad_vs_v=grad_vs_Vs, pc1_vs_V=angles_along_the_way,
              xlabel="num of chunks", ylabel="angle with diff in degrees", show=False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params
    all_grads_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size,
        index="grads")
    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    angles_along_the_way = []
    grad_vs_pull = []
    pc1s = []
    ipca = IncrementalPCA(n_components=1)  # for sparse PCA to speed up

    i = 1
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)
        pc1 = ipca.components_[0]
        if i % 2 == 0:
            pc1 = -pc1
        angle = cal_angle(V, pc1)
        angles_along_the_way.append(angle)
        pc1s.append(pc1)

        current_grad = all_grads_iterator.__next__().values[-1]
        current_param = chunk.values[-1]
        delta = unit_vector(current_param - start_params)
        pull_dir = V - delta
        pull_dir_vs_grad = cal_angle(pull_dir, current_grad)
        grad_vs_pull.append(pull_dir_vs_grad)
        i += 1

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    first_n_pc1_vs_V_plot_dir = get_first_n_pc1_vs_V_plot_dir(
        plot_dir, cma_args.pc1_chunk_size)
    if not os.path.exists(first_n_pc1_vs_V_plot_dir):
        os.makedirs(first_n_pc1_vs_V_plot_dir)

    angles_plot_name = f"angles algone the way dim space of mean pca plane "
    plot_2d(first_n_pc1_vs_V_plot_dir, angles_plot_name,
            np.arange(len(angles_along_the_way)), angles_along_the_way,
            "num of chunks", "angle with diff in degrees", False)

    grad_vs_pull_plot_name = f"grad vs V - delta_theta"
    plot_2d(first_n_pc1_vs_V_plot_dir, grad_vs_pull_plot_name,
            np.arange(len(grad_vs_pull)), grad_vs_pull, "num of chunks",
            "angle in degrees", False)

    pcpca = PCA(n_components=min(len(pc1s), 100))
    pcpca.fit(pc1s)
    logger.log(pcpca.explained_variance_ratio_)
    logger.log(cal_angle_plane(V, pcpca.components_[:2]))
    np.savetxt(f"{first_n_pc1_vs_V_plot_dir}/pcs_pcs.txt",
               pcpca.explained_variance_ratio_,
               delimiter=',')
    np.savetxt(
        f"{first_n_pc1_vs_V_plot_dir}/pcs_V_vs_pcapca_first_2_comp_plane.txt",
        np.array([cal_angle_plane(V, pcpca.components_[:2])]),
        delimiter=',')

    i = 0
    for angle in angles_along_the_way:
        if angle > 90:
            i += 1

    np.savetxt(f"{first_n_pc1_vs_V_plot_dir}/num of angles bigger than 90.txt",
               np.array([i]),
               delimiter=',')
Example #9
0
def main(n_comp_start=2, do_eval=True):


    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()


    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)


    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)


    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    from stable_baselines.low_dim_analysis.common import \
        calculate_projection_errors, plot_2d, get_allinone_concat_df, calculate_num_axis_to_explain

    origin = "mean_param"
    ratio_threshold = 0.99
    consec_threshold = 5
    error_threshold = 0.05

    tic = time.time()
    all_param_matrix = get_allinone_concat_df(dir_name=traj_params_dir_name).values
    toc = time.time()
    print('\nElapsed time getting the chunk concat diff took {:.2f} s\n'
          .format(toc - tic))

    n_comps = min(cma_args.n_comp_to_use, cma_args.chunk_size)

    num_to_explains = []

    deviates = []
    for i in range(0, len(all_param_matrix), cma_args.chunk_size):
        if i + cma_args.chunk_size >= len(all_param_matrix):
            break
        chunk = all_param_matrix[i:i + cma_args.chunk_size]

        pca = PCA(n_components=n_comps) # for sparse PCA to speed up
        pca.fit(chunk)

        num, explained = calculate_num_axis_to_explain(pca, ratio_threshold)
        num_to_explains.append(num)


        pcs_components = pca.components_

        num_to_deviate = 0
        consec = 0

        for j in range(i + cma_args.chunk_size, len(all_param_matrix)):

            errors = calculate_projection_errors(pca.mean_, pcs_components, all_param_matrix[j], num)
            if errors[0] >= error_threshold:

                consec += 1
                if consec >= consec_threshold:
                    break

            num_to_deviate += 1

        deviates.append(num_to_deviate)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    deviate_plot_name = f"num of steps to deviates from this plane chunk_size: {cma_args.chunk_size} ratio_threshold: {ratio_threshold} consec_threshold: {consec_threshold}error_threshold: {error_threshold}, "
    plot_2d(plot_dir, deviate_plot_name, np.arange(len(deviates)), deviates, "num of chunks", "num of steps to deviates from this plane", False)

    num_to_explain_plot_name = f"num to explain chunk_size: {cma_args.chunk_size} "
    plot_2d(plot_dir, num_to_explain_plot_name, np.arange(len(num_to_explains)), num_to_explains, "num of chunks", "num_to_explains", False)
Example #10
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params

    pcs_components = np.loadtxt(get_pcs_filename(
        intermediate_dir=intermediate_data_dir,
        n_comp=cma_args.num_comp_to_load),
                                delimiter=',')

    smallest_error_angle = postize_angle(cal_angle(V, pcs_components[0]))
    logger.log(f"@@@@@@@@@@@@ {smallest_error_angle}")

    curr_angles = []

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    ipca = IncrementalPCA(n_components=1)  # for sparse PCA to speed up

    inside_final_cone = []
    for chunk in all_param_iterator:
        # for param in chunk.values:
        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)

        angle = postize_angle(cal_angle(V, ipca.components_[0]))

        param = chunk.values[-1]

        curr_angle = cal_angle(param - start_params, ipca.components_[0])
        curr_angle = postize_angle(curr_angle)

        curr_angle_final = cal_angle(param - start_params, pcs_components[0])

        inside_final_cone.append(curr_angle_final - smallest_error_angle)
        curr_angles.append(curr_angle - angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"$$$curr_angles$$$"
    plot_2d(plot_dir, angles_plot_name, np.arange(len(curr_angles)),
            curr_angles, "num of chunks", "angle with diff in degrees", False)
    angles_plot_name = f"inside final cone?"
    plot_2d(plot_dir, angles_plot_name, np.arange(len(inside_final_cone)),
            inside_final_cone, "num of chunks", "angle with diff in degrees",
            False)
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    count_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "total_num_dumped")
    total_num = pd.read_csv(count_file, header=None).values[0]

    V = final_params - start_params

    all_thetas_downsampled = get_allinone_concat_df(
        dir_name=traj_params_dir_name).values[::2]

    unduped_angles_along_the_way = []
    duped_angles_along_the_way = []
    diff_along = []
    num = 2  #TODO hardcode!
    undup_ipca = PCA(n_components=1)  # for sparse PCA to speed up

    all_matrix_buffer = []
    for chunk in all_param_iterator:
        chunk = chunk.values
        undup_ipca.partial_fit(chunk)
        unduped_angle = cal_angle(V, undup_ipca.components_[0])

        #TODO ignore 90 or 180 for now
        if unduped_angle > 90:
            unduped_angle = 180 - unduped_angle
        unduped_angles_along_the_way.append(unduped_angle)

        all_matrix_buffer.extend(chunk)

        weights = gen_weights(all_param_iterator._currow, total_num)
        duped_in_so_far = dup_so_far_buffer(all_matrix_buffer, last_percentage,
                                            num)

        logger.log(
            f"currently at {all_param_iterator._currow}, last_pecentage: {last_percentage}"
        )
        # ipca = PCA(n_components=1)  # for sparse PCA to speed up
        # ipca.fit(duped_in_so_far)
        ipca = WPCA(
            n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up
        for i in range(0, len(duped_in_so_far), cma_args.chunk_size):
            logger.log(
                f"partial fitting: i : {i} len(duped_in_so_far): {len(duped_in_so_far)}"
            )
            if i + cma_args.chunk_size > len(duped_in_so_far):
                ipca.partial_fit(duped_in_so_far[i:])
            else:
                ipca.partial_fit(duped_in_so_far[i:i + cma_args.chunk_size])

        duped_angle = cal_angle(V, ipca.components_[0])

        #TODO ignore 90 or 180 for now
        if duped_angle > 90:
            duped_angle = 180 - duped_angle
        duped_angles_along_the_way.append(duped_angle)
        diff_along.append(unduped_angle - duped_angle)

    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    angles_plot_name = f"duped exponential 2, num dup: {num}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name,
            np.arange(len(duped_angles_along_the_way)),
            duped_angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)

    angles_plot_name = f"unduped exponential 2, num dup: {num}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name,
            np.arange(len(unduped_angles_along_the_way)),
            unduped_angles_along_the_way, "num of chunks",
            "angle with diff in degrees", False)


    angles_plot_name = f"undup - dup diff_along exponential 2, num dup: {num}" \
                       f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
    plot_2d(plot_dir, angles_plot_name, np.arange(len(diff_along)), diff_along,
            "num of chunks", "angle with diff in degrees", False)

    del all_matrix_buffer
    import gc
    gc.collect()