Esempio n. 1
0
 def shift_dir_baixo(matriz, n=1, valor=None):
     # inclui n linhas e colunas à esquerda da matriz
     for i in range(n):
         linhas = len(matriz)
         m_colunas = [valor for _ in range(linhas)]
         nova = np_vstack((np_array(m_colunas), np_array(matriz).T)).T
         m_linhas = [valor for _ in range(len(nova[0]))]
         matriz = np_vstack((np_array(m_linhas), nova))
     return matriz
def create_feat_mat_1(graph):
    CCs = list(nx_clustering(graph).values())

    DCs = list(nx_average_neighbor_degree(graph).values())

    degrees = [tup[1] for tup in graph.degree()]

    edge_wts = [tup[2] for tup in graph.edges.data('weight')]

    A_mat = nx_to_numpy_matrix(graph)
    svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False)

    if len(svs) >= 3:
        sv1 = svs[0]
        sv2 = svs[1]
        sv3 = svs[2]
    elif len(svs) >= 2:
        sv1 = svs[0]
        sv2 = svs[1]
        sv3 = 0
    else:
        sv1 = svs[0]
        sv2 = sv3 = 0

    feat_mat = np_vstack(
        (nx_density(graph), nx_number_of_nodes(graph), max(degrees),
         np_mean(degrees), np_median(degrees), np_var(degrees), max(CCs),
         np_mean(CCs), np_var(CCs), np_mean(edge_wts), max(edge_wts),
         np_var(edge_wts), np_mean(DCs), np_var(DCs), max(DCs), sv1, sv2,
         sv3)).T

    return feat_mat
Esempio n. 3
0
def extract_features(out_comp_nm, split_type, max_size, inputs, G_nodes,
                     feat_list, X_pos, X_allpos, n_allpos, sizes):
    n_pos = len(X_pos)

    folNm = inputs['dir_nm'] + inputs['graph_files_dir'] + "/neig_dicts"
    dims = X_pos.shape
    n_feats = dims[1]
    with open(out_comp_nm + '_metrics.out', "a") as fid:
        print("No. of " + split_type + " features = ", n_feats, file=fid)
        print("No. of " + split_type + " positive complexes = ",
              n_pos,
              file=fid)

    logging_info("Constructing " + split_type + " negative complexes...")
    if "neg_sample_method" not in inputs:
        inputs["neg_sample_method"] = "uniform"
    neg_comp_list = construct_neg_comps(max_size, n_pos,
                                        inputs['scale_factor'], G_nodes, sizes,
                                        inputs["neg_sample_method"], folNm)
    logging_info("Finished constructing " + split_type + " negative complexes")

    X_neg = create_feat_mat(neg_comp_list, n_feats)

    X_neg, neg_comp_list, n_neg = remove_same_rows(n_allpos, X_neg, X_allpos,
                                                   neg_comp_list)

    # print(n_neg)
    # HHANDLE CASE WHEN n_neg = 0 !!!!!
    with open(out_comp_nm + '_metrics.out', "a") as fid:
        print("No. of " + split_type + " negative complexes = ",
              n_neg,
              file=fid)

    write_neg2out(out_comp_nm + '_neg_' + split_type + '.out',
                  out_comp_nm + '_neg_' + split_type + '_edges.out',
                  neg_comp_list)

    X = np_vstack((X_pos, X_neg))

    y_pos = [1] * n_pos
    y_neg = [0] * n_neg
    y = y_pos + y_neg
    y = np_array(y)
    y_pos = np_array(y_pos)
    y_neg = np_array(y_neg)

    # Writing raw training data to csv in tpot format
    write2csv_tpot(X, y, out_comp_nm + "_" + split_type + "_dat.csv",
                   feat_list)
    return y, X, X_pos, y_pos, X_neg, y_neg
Esempio n. 4
0
def feature_extract(inputs, complex_graphs, test_complex_graphs, G):
    G_nodes = G.nodes()
    n_feats = inputs['feats']
    out_comp_nm = inputs['dir_nm'] + inputs['out_comp_nm']
    mode = inputs['mode']
    # mode = "non_gen" # Change to gen if you want to generate matrices

    # n_pos = len(complex_graphs)
    sizes = [len(comp) for comp in complex_graphs]

    # get quartiles
    q1 = np_percentile(sizes, 25)
    q3 = np_percentile(sizes, 75)
    max_wo_outliers = math_ceil(q3 + 4.5 *
                                (q3 - q1))  # Maximum after removing outliers

    max_size_train = max(sizes)
    recommended_max_size = min(max_size_train, max_wo_outliers)

    max_sizeF = inputs['dir_nm'] + inputs[
        'train_test_files_dir'] + "/res_max_size_search"
    with open(max_sizeF, 'wb') as f:
        pickle_dump(recommended_max_size, f)

    # n_pos_test = len(test_complex_graphs)
    sizes_test = [len(comp) for comp in test_complex_graphs]
    max_size_test = max(sizes_test)

    fig = plt.figure()
    # Plot box plot of sizes to know the outliers (for setting step size in sampling)
    sns_boxplot(sizes)
    plt.xlabel("Size")
    plt.title("Size distribution of training complexes")
    plt.savefig(out_comp_nm + "_known_train_size_dist_box_plot")
    plt.close(fig)

    fig = plt.figure()
    # Plot box plot of sizes to know the outliers (for setting step size in sampling)
    sns_boxplot(sizes + sizes_test)
    plt.xlabel("Size")
    plt.title("Size distribution of known complexes")
    plt.savefig(out_comp_nm + "_known_size_dist_box_plot")
    plt.close(fig)

    if inputs[
            'model_type'] == "tpot" and mode == "non_gen":  # CHANGE X_POS, Y_POS later !!!!
        logging_info("Reading labeled feature matrix from file...")
        # Read X,y from csv file

        y, X, X_pos, y_pos, X_neg, y_neg = read_from_csv(
            inputs['train_feat_mat'])

        y_test, X_test, X_pos_test, y_pos_test, X_neg_test, y_neg_test = read_from_csv(
            inputs['test_feat_mat'])

        logging_info("Finished reading feature matrix")
    else:

        logging_info("Feature extraction...")

        feat_list = [
            "dens", "nodes", "degree_max", "degree_mean", "degree_median",
            "degree_var", "CC_max", "CC_mean", "CC_var", "edge_wt_mean",
            "edge_wt_max", "edge_wt_var", "DC_mean", "DC_var", "DC_max", "sv1",
            "sv2", "sv3", "complex"
        ]

        X_pos = create_feat_mat(complex_graphs, n_feats)
        X_pos_test = create_feat_mat(test_complex_graphs, n_feats)

        X_allpos = np_vstack((X_pos, X_pos_test))
        n_allpos = len(X_allpos)
        y, X, X_pos, y_pos, X_neg, y_neg = extract_features(
            out_comp_nm, 'train', max_size_train, inputs, G_nodes, feat_list,
            X_pos, X_allpos, n_allpos, sizes)
        y_test, X_test, X_pos_test, y_pos_test, X_neg_test, y_neg_test = extract_features(
            out_comp_nm, 'test', max_size_test, inputs, G_nodes, feat_list,
            X_pos_test, X_allpos, n_allpos, sizes_test)

        logging_info("Finished Feature extraction")
    return max_size_train, max_size_test, X_pos_test, X_neg_test, X_test, y_test, X_pos, y_pos, X, y, X_neg, y_neg
    def collate_func(self, batch, split):

        necessary_num_img_captions = self.necessary_num_img_captions

        fc_batch = []
        att_batch = []
        label_batch = []

        wrapped = False

        infos = []
        gts = []

        for sample in batch:
            # fetch image
            tmp_fc, tmp_att, tmp_seq, ix, it_pos_now, tmp_wrapped = sample
            if tmp_wrapped:
                wrapped = True

            fc_batch.append(tmp_fc)
            att_batch.append(tmp_att)

            tmp_label = np_zeros(
                [necessary_num_img_captions, self.max_seq_length + 2],
                dtype="int")
            if hasattr(self, "h5_label_file"):
                # if there is ground truth
                tmp_label[:, 1:self.max_seq_length + 1] = tmp_seq
            label_batch.append(tmp_label)

            # Used for reward evaluation
            if hasattr(self, "h5_label_file"):
                # if there is ground truth
                gts.append(self.label[self.label_start_ix[ix] -
                                      1:self.label_end_ix[ix]])
            else:
                gts.append([])

            # record associated info as well
            info_dict = {}
            info_dict["ix"] = ix
            info_dict["id"] = self.info["images"][ix]["id"]
            info_dict["file_path"] = self.info["images"][ix].get(
                "file_path", "")
            infos.append(info_dict)

        # #sort by att_feat length
        # fc_batch, att_batch, label_batch, gts, infos = \
        #     zip(*sorted(zip(fc_batch, att_batch, np.vsplit(label_batch, batch_size), gts, infos), key=lambda x: len(x[1]), reverse=True))
        fc_batch, att_batch, label_batch, gts, infos = zip(*sorted(
            zip(fc_batch, att_batch, label_batch, gts, infos),
            key=lambda x: 0,
            reverse=True,
        ))
        data = {}
        data["fc_feats"] = np_stack(fc_batch)
        # merge att_feats
        max_att_len = max([_.shape[0] for _ in att_batch])
        data["att_feats"] = np_zeros(
            [len(att_batch), max_att_len, att_batch[0].shape[1]],
            dtype="float32")
        for i in range(len(att_batch)):
            data["att_feats"][i, :att_batch[i].shape[0]] = att_batch[i]
        data["att_masks"] = np_zeros(data["att_feats"].shape[:2],
                                     dtype="float32")
        for i in range(len(att_batch)):
            data["att_masks"][i, :att_batch[i].shape[0]] = 1
        # set att_masks to None if attention features have same length
        if data["att_masks"].sum() == data["att_masks"].size:
            data["att_masks"] = None

        data["labels"] = np_vstack(label_batch)
        #
        # generate mask
        nonzeros = np_array(
            list(map(lambda x: (x != 0).sum() + 2, data["labels"])))
        mask_batch = np_zeros(
            [data["labels"].shape[0], self.max_seq_length + 2],
            dtype="float32")
        for ix, row in enumerate(mask_batch):
            row[:nonzeros[ix]] = 1

        data["masks"] = mask_batch
        data["labels"] = data["labels"].reshape(len(batch),
                                                necessary_num_img_captions, -1)
        data["masks"] = data["masks"].reshape(len(batch),
                                              necessary_num_img_captions, -1)

        data["gts"] = gts  # all ground truth captions of each images
        data["bounds"] = {
            "it_pos_now": it_pos_now,  # the it_pos_now of the last sample
            "it_max": len(self.split_ix[split]),
            "wrapped": wrapped,
        }
        data["infos"] = infos

        data = {
            k: from_numpy(v) if type(v) is np_ndarray else v
            for k, v in data.items()
        }  # Turn all ndarray to torch tensor

        return data
Esempio n. 6
0
def create_feat_mat(graph_list, n_feats):
    dens_pos = [nx_density(graph) for graph in graph_list]
    nodes_pos = [nx_number_of_nodes(graph) for graph in graph_list]

    # CC statistics - mean and max  - faster to use a big loop mostly
    CC_mean = []
    CC_mean_append = CC_mean.append
    CC_max = []
    CC_max_append = CC_max.append
    CC_var = []
    CC_var_append = CC_var.append
    # Degree correlation - avg degree of the neighborhood     
    DC_mean = []
    DC_mean_append = DC_mean.append
    DC_max = []
    DC_max_append = DC_max.append
    DC_var = []
    DC_var_append = DC_var.append
    # Degree statistics
    degree_mean = []
    degree_mean_append = degree_mean.append
    degree_max = []
    degree_max_append = degree_max.append
    degree_median = []
    degree_median_append = degree_median.append
    degree_var = []
    degree_var_append = degree_var.append
    # Edge weight statistics 
    edge_wt_mean = []
    edge_wt_mean_append = edge_wt_mean.append
    edge_wt_max = []
    edge_wt_max_append = edge_wt_max.append
    edge_wt_var = []
    edge_wt_var_append = edge_wt_var.append
    # First 3 singular values 
    sv1 = []
    sv1_append = sv1.append
    sv2 = []
    sv2_append = sv2.append
    sv3 = []
    sv3_append = sv3.append
    for graph in graph_list:

        CCs = list(nx_clustering(graph).values())
        CC_max_append(max(CCs))
        CC_mean_append(np_mean(CCs))
        CC_var_append(np_var(CCs))

        DCs = list(nx_average_neighbor_degree(graph).values())
        DC_max_append(max(DCs))
        DC_mean_append(np_mean(DCs))
        DC_var_append(np_var(DCs))

        degrees = [tup[1] for tup in graph.degree()]
        degree_mean_append(np_mean(degrees))
        degree_median_append(np_median(degrees))
        degree_max_append(max(degrees))
        degree_var_append(np_var(degrees))

        edge_wts = [tup[2] for tup in graph.edges.data('weight')]
        edge_wt_mean_append(np_mean(edge_wts))
        edge_wt_var_append(np_var(edge_wts))
        edge_wt_max_append(max(edge_wts))

        A_mat = nx_to_numpy_matrix(graph)
        svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False)

        if len(svs) >= 3:
            sv1_append(svs[0])
            sv2_append(svs[1])
            sv3_append(svs[2])
        elif len(svs) >= 2:
            sv1_append(svs[0])
            sv2_append(svs[1])
            sv3_append(0)
        else:
            sv1_append(svs[0])
            sv2_append(0)
            sv3_append(0)

    feat_mat = np_vstack((dens_pos, nodes_pos, degree_max, degree_mean, degree_median, degree_var, CC_max, CC_mean,
                          CC_var, edge_wt_mean, edge_wt_max, edge_wt_var, DC_mean, DC_var, DC_max, sv1, sv2, sv3)).T

    if n_feats == 1:
        feat_mat = np_array(dens_pos).reshape(-1, 1)

    return feat_mat
 def __init_alliances(self):
     alliances = [[team[3:] for team in alliance['picks']]
                  for alliance in self.raw_event['alliances']]
     alliances = np_array(alliances, np_int)
     numbers = np_vstack(np_arange(1, 9, 1))
     self.alliances = np_concatenate((numbers, alliances), 1)
 def __init_alliances(self):
     alliances = [[team[3:] for team in alliance['picks']] for alliance in self.raw_event['alliances']]
     alliances = np_array(alliances, np_int)
     numbers = np_vstack(np_arange(1, 9, 1))
     self.alliances = np_concatenate((numbers, alliances), 1)