Exemple #1
0
def get_tree_df_row(tree, config_obj):
    balanced_h5_file = get_balanced_h5_path(config_obj, True) + tree
    balanced_h5 = None
    row = {}
    try:
        balanced_h5 = h5py.File(balanced_h5_file, 'r')
        row = {
            "pv_c0_pct": 100 * (balanced_h5.attrs['pendant_vertices_c0'] / (balanced_h5.attrs['pendant_vertices_c0'] + balanced_h5.attrs['pendant_vertices_c1'])),
            "pv_c1_pct":  100 * (balanced_h5.attrs['pendant_vertices_c1'] / (balanced_h5.attrs['pendant_vertices_c0'] + balanced_h5.attrs['pendant_vertices_c1'])),
            "pv_pct": 100 * ((balanced_h5.attrs['pendant_vertices_c0'] + balanced_h5.attrs['pendant_vertices_c1']) / balanced_h5.attrs['total_vertex_size']),
            "tree_pendant_vertices": balanced_h5.attrs['pendant_vertices_c0'] + balanced_h5.attrs['pendant_vertices_c1'],
            "c0_pct": (balanced_h5.attrs['c0_vertex_size'] / (balanced_h5.attrs['c0_vertex_size'] + balanced_h5.attrs['c1_vertex_size'])) * 100,    
            "++_edges": balanced_h5.attrs['++_edges'],
            "-+_edges": balanced_h5.attrs['-+_edges'],
            "+-_edges": balanced_h5.attrs['+-_edges'],
            "--_edges": balanced_h5.attrs['--_edges'],
            "num_changed_edges": balanced_h5.attrs['num_changed_edges'],
            "mean_tree_vertex_deg": balanced_h5.attrs['mean_tree_vertex_deg'],
            "stdev_tree_vertex_deg": balanced_h5.attrs['stdev_tree_vertex_deg']
        }
        if 'tree_bias_score' in balanced_h5.attrs:
            row["tree_bias_score"] = balanced_h5.attrs['tree_bias_score']
        balanced_h5.close()
    except Exception as error:
        print("Tree DF OSError. The following tree is bad: ", tree, " - path: ", balanced_h5_file)
        print(error)
   #     os.remove(balanced_h5_file)
    return row
Exemple #2
0
def get_per_tree_vertex_dict(tree, config_obj): 
    balanced_h5_file = get_balanced_h5_path(config_obj, True) + tree
    balanced_h5 = None
    row = {}
    try:
        balanced_h5 = h5py.File(balanced_h5_file, 'r')
### TODO: Make this a config option in the yaml file instead of comment/uncomment
### This block can be uncommented to add the c0 and c1 size percentages to Data h5s that have already been made. 
        total_nodes = balanced_h5.attrs['c0_vertex_size'] + balanced_h5.attrs['c1_vertex_size']
        c0_size_percent = balanced_h5.attrs['c0_vertex_size'] / total_nodes
        c1_size_percent = balanced_h5.attrs['c1_vertex_size'] / total_nodes
        component_list_dict = dict(enumerate(balanced_h5['component_list']))
        
        component_list_with_tuples = [(component_list_dict[entry], c0_size_percent) if component_list_dict[entry] == 0 else (component_list_dict[entry], c1_size_percent) for entry in range(total_nodes)]

### These next two lines should be switched depending on if the c0 and c1 size were added when the trees were initially run or not. If they were not, use line 176 and the block above. If they were, use line 177.
        row['component_list_with_tuples'] = component_list_with_tuples
#        row['component_list_with_tuples'] = list(balanced_h5['weighted_component_list'])
        row['component_list'] = list(balanced_h5['component_list']) 
        row['vertex_neighbor_amt_list'] = list(balanced_h5['vertex_neighbor_amt_list'])
        row['mean_tree_vertex_deg'] = balanced_h5.attrs['mean_tree_vertex_deg']
        row['stdev_tree_vertex_deg'] = balanced_h5.attrs['stdev_tree_vertex_deg']
        row['pendant_vertices_c0'] = balanced_h5.attrs['pendant_vertices_c0']
        row['pendant_vertices_c1'] = balanced_h5.attrs['pendant_vertices_c1']
        row['pendant_vertices_total'] = balanced_h5.attrs['pendant_vertices_total']
        balanced_h5.close()
    except Exception as error:
        print("Vertex DF error. The following tree is bad. ", tree, " - path: ", balanced_h5_file)
        print("error: " , error)
        ## Eric commented out this line so things can be tested in this function without deleting tree files.
 #       os.remove(balanced_h5_file)
    return row
Exemple #3
0
def get_per_tree_vertex_dict(tree, config_obj):
    balanced_h5_file = get_balanced_h5_path(config_obj, True) + tree
    balanced_h5 = None
    row = {}
    try:
        balanced_h5 = h5py.File(balanced_h5_file, "r")
        row["component_list_with_tuples"] = list(
            balanced_h5["weighted_component_list"])

        row["component_list"] = list(balanced_h5["component_list"])
        row["vertex_neighbor_amt_list"] = list(
            balanced_h5["vertex_neighbor_amt_list"])
        row["mean_tree_vertex_deg"] = balanced_h5.attrs["mean_tree_vertex_deg"]
        row["stdev_tree_vertex_deg"] = balanced_h5.attrs[
            "stdev_tree_vertex_deg"]
        row["pendant_vertices_c0"] = balanced_h5.attrs["pendant_vertices_c0"]
        row["pendant_vertices_c1"] = balanced_h5.attrs["pendant_vertices_c1"]
        row["pendant_vertices_total"] = balanced_h5.attrs[
            "pendant_vertices_total"]
        balanced_h5.close()
    except Exception as error:
        print(
            "Vertex DF error. The following tree is bad. ",
            tree,
            " - path: ",
            balanced_h5_file,
        )
        print("error: ", error)

    return row
Exemple #4
0
def create_vertex_df_from_vertex_dict(to_be_df_dict, config_obj, trees_list,
                                      FULL_DF_PATH):
    #print("Creating components dict.")
    try:
        components_dict = {
            tree: to_be_df_dict[tree]["component_list"]
            for tree in trees_list
        }
        component_df = pd.DataFrame(components_dict)
        component_dict_with_weights = {
            tree: to_be_df_dict[tree]["component_list_with_tuples"]
            for tree in trees_list
        }
        component_weights_df = pd.DataFrame(component_dict_with_weights)
    except:
        print("tree without component list found, moving to next.")

    balanced_h5_file = get_balanced_h5_path(config_obj, True) + trees_list[0]
    try:
        balanced_h5 = h5py.File(balanced_h5_file, "r")
        #print("Adding c0 pct (and outcomes if applicable).")
        vertex_df = add_c0_pct_and_outcomes(
            trees_list[0],
            balanced_h5,
            component_weights_df,
            config_obj["tiebreak_node"],
            config_obj["weighted_status"],
            config_obj["machine"],
        )
        pd.to_pickle(vertex_df, FULL_DF_PATH)
        #print("Done. Made vertex df of shape: ", vertex_df.shape)
    finally:
        balanced_h5.close()
    return vertex_df
Exemple #5
0
def get_balanced_file_list(config_obj, local_override=False):
    BALANCED_DIR = get_balanced_h5_path(config_obj, local_override)
    balanced_file_list = [
        f for f in listdir(BALANCED_DIR) if isfile(join(BALANCED_DIR, f))
        if f != ".DS_Store"
    ]
    balanced_file_list.sort()
    #print("Tree list Length: ", len(balanced_file_list))
    return balanced_file_list
Exemple #6
0
def write_balanced_h5(
    config_obj,
    mapping_to_original,
    b_csr_unbal_adj_matrix,
    csr_st_adj_matrix,
    nx_st_graph,
    csr_bal_adj_matrix,
    changed_edge_list,
    tree_name,
):
    bal_adj_path = get_balanced_h5_path(config_obj) + tree_name + ".h5"
    try:
        output_type = config_obj["machine"]
        start_component_list = datetime.now()
        component_list = get_balance_components(nx_st_graph)
        print_timing_output(
            "COMPONENT_LIST_GEN_TIME: Component List Acquired, took: (hh:mm:ss.ms)",
            datetime.now() - start_component_list,
            output_type,
        )

        num_components = np.unique(component_list)
        if len(num_components) > 1:
            f = h5py.File(bal_adj_path, "w")
            g = f.create_group("csr_st_adj_matrix")
            create_matrix_h5_file(g, csr_st_adj_matrix)
            h = f.create_group("csr_bal_adj_matrix")
            create_matrix_h5_file(h, csr_bal_adj_matrix)
            f.create_dataset("changed_edge_list", data=changed_edge_list)
            f.create_dataset("mapping_to_original", data=mapping_to_original)
            component_stats_start = datetime.now()
            write_component_stats(f, csr_bal_adj_matrix, csr_st_adj_matrix,
                                  component_list, config_obj)
            print_timing_output(
                "COMPONENT_STATS_TIME: (hh:mm:ss:ms)",
                datetime.now() - component_stats_start,
                output_type,
            )
            write_vertex_degree_stats(f, csr_bal_adj_matrix, csr_st_adj_matrix,
                                      component_list)
            write_edge_stats(f, b_csr_unbal_adj_matrix, csr_bal_adj_matrix)
            write_balanced_attributes(f, config_obj, tree_name)
            if config_obj[
                    "has_labels"]:  ## wrties c0 and c1 wins and losses to existing h5
                write_outcome_stats(f, config_obj, csr_bal_adj_matrix,
                                    component_list)
            f.close()
    except:
        print("Error writing balanced h5")
Exemple #7
0
def write_balanced_h5(config_obj, mapping_to_original, b_csr_unbal_adj_matrix,
                      csr_st_adj_matrix, nx_st_graph, csr_bal_adj_matrix,
                      changed_edge_list, tree_name):
    bal_adj_path = get_balanced_h5_path(config_obj) + tree_name + ".h5"
    print("Writing balanced h5: ", "(", tree_name, ", ", config_obj['dataset'],
          ", ", config_obj['data_subset_type'], ", ",
          config_obj['matrix_name'], ", ", config_obj['component_no'], ") ")
    try:
        start_component_list = datetime.now()
        component_list = get_balance_components(nx_st_graph)
        print('Component List Acquired, took: (hh:mm:ss.ms) {}'.format(
            datetime.now() - start_component_list))
        num_components = np.unique(component_list)
        if len(num_components) > 1:
            f = h5py.File(bal_adj_path, 'w')
            g = f.create_group("csr_st_adj_matrix")
            create_matrix_h5_file(g, csr_st_adj_matrix)
            h = f.create_group("csr_bal_adj_matrix")
            create_matrix_h5_file(h, csr_bal_adj_matrix)
            f.create_dataset('changed_edge_list', data=changed_edge_list)
            f.create_dataset('mapping_to_original', data=mapping_to_original)
            write_component_stats(f, csr_bal_adj_matrix, csr_st_adj_matrix,
                                  component_list)
            write_vertex_degree_stats(f, csr_bal_adj_matrix, csr_st_adj_matrix,
                                      component_list)
            write_edge_stats(f, b_csr_unbal_adj_matrix, csr_bal_adj_matrix)
            write_balanced_attributes(f, config_obj, tree_name)
            if config_obj[
                    'has_labels']:  ## wrties c0 and c1 wins and losses to existing h5
                write_outcome_stats(f, config_obj, csr_bal_adj_matrix,
                                    component_list)
            print("Wrote balanced h5: ", "(", tree_name, ", ",
                  config_obj['dataset'], ", ", config_obj['data_subset_type'],
                  ", ", config_obj['matrix_name'], ", ",
                  config_obj['component_no'], ")")
            f.close()
        else:
            print(
                "Component list had only one component. No Agreeable Minority exists for this tree. Tree not saved to h5."
            )
    finally:
        print()