def shift_dir_baixo(matriz, n=1, valor=None): # inclui n linhas e colunas à esquerda da matriz for i in range(n): linhas = len(matriz) m_colunas = [valor for _ in range(linhas)] nova = np_vstack((np_array(m_colunas), np_array(matriz).T)).T m_linhas = [valor for _ in range(len(nova[0]))] matriz = np_vstack((np_array(m_linhas), nova)) return matriz
def create_feat_mat_1(graph): CCs = list(nx_clustering(graph).values()) DCs = list(nx_average_neighbor_degree(graph).values()) degrees = [tup[1] for tup in graph.degree()] edge_wts = [tup[2] for tup in graph.edges.data('weight')] A_mat = nx_to_numpy_matrix(graph) svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False) if len(svs) >= 3: sv1 = svs[0] sv2 = svs[1] sv3 = svs[2] elif len(svs) >= 2: sv1 = svs[0] sv2 = svs[1] sv3 = 0 else: sv1 = svs[0] sv2 = sv3 = 0 feat_mat = np_vstack( (nx_density(graph), nx_number_of_nodes(graph), max(degrees), np_mean(degrees), np_median(degrees), np_var(degrees), max(CCs), np_mean(CCs), np_var(CCs), np_mean(edge_wts), max(edge_wts), np_var(edge_wts), np_mean(DCs), np_var(DCs), max(DCs), sv1, sv2, sv3)).T return feat_mat
def extract_features(out_comp_nm, split_type, max_size, inputs, G_nodes, feat_list, X_pos, X_allpos, n_allpos, sizes): n_pos = len(X_pos) folNm = inputs['dir_nm'] + inputs['graph_files_dir'] + "/neig_dicts" dims = X_pos.shape n_feats = dims[1] with open(out_comp_nm + '_metrics.out', "a") as fid: print("No. of " + split_type + " features = ", n_feats, file=fid) print("No. of " + split_type + " positive complexes = ", n_pos, file=fid) logging_info("Constructing " + split_type + " negative complexes...") if "neg_sample_method" not in inputs: inputs["neg_sample_method"] = "uniform" neg_comp_list = construct_neg_comps(max_size, n_pos, inputs['scale_factor'], G_nodes, sizes, inputs["neg_sample_method"], folNm) logging_info("Finished constructing " + split_type + " negative complexes") X_neg = create_feat_mat(neg_comp_list, n_feats) X_neg, neg_comp_list, n_neg = remove_same_rows(n_allpos, X_neg, X_allpos, neg_comp_list) # print(n_neg) # HHANDLE CASE WHEN n_neg = 0 !!!!! with open(out_comp_nm + '_metrics.out', "a") as fid: print("No. of " + split_type + " negative complexes = ", n_neg, file=fid) write_neg2out(out_comp_nm + '_neg_' + split_type + '.out', out_comp_nm + '_neg_' + split_type + '_edges.out', neg_comp_list) X = np_vstack((X_pos, X_neg)) y_pos = [1] * n_pos y_neg = [0] * n_neg y = y_pos + y_neg y = np_array(y) y_pos = np_array(y_pos) y_neg = np_array(y_neg) # Writing raw training data to csv in tpot format write2csv_tpot(X, y, out_comp_nm + "_" + split_type + "_dat.csv", feat_list) return y, X, X_pos, y_pos, X_neg, y_neg
def feature_extract(inputs, complex_graphs, test_complex_graphs, G): G_nodes = G.nodes() n_feats = inputs['feats'] out_comp_nm = inputs['dir_nm'] + inputs['out_comp_nm'] mode = inputs['mode'] # mode = "non_gen" # Change to gen if you want to generate matrices # n_pos = len(complex_graphs) sizes = [len(comp) for comp in complex_graphs] # get quartiles q1 = np_percentile(sizes, 25) q3 = np_percentile(sizes, 75) max_wo_outliers = math_ceil(q3 + 4.5 * (q3 - q1)) # Maximum after removing outliers max_size_train = max(sizes) recommended_max_size = min(max_size_train, max_wo_outliers) max_sizeF = inputs['dir_nm'] + inputs[ 'train_test_files_dir'] + "/res_max_size_search" with open(max_sizeF, 'wb') as f: pickle_dump(recommended_max_size, f) # n_pos_test = len(test_complex_graphs) sizes_test = [len(comp) for comp in test_complex_graphs] max_size_test = max(sizes_test) fig = plt.figure() # Plot box plot of sizes to know the outliers (for setting step size in sampling) sns_boxplot(sizes) plt.xlabel("Size") plt.title("Size distribution of training complexes") plt.savefig(out_comp_nm + "_known_train_size_dist_box_plot") plt.close(fig) fig = plt.figure() # Plot box plot of sizes to know the outliers (for setting step size in sampling) sns_boxplot(sizes + sizes_test) plt.xlabel("Size") plt.title("Size distribution of known complexes") plt.savefig(out_comp_nm + "_known_size_dist_box_plot") plt.close(fig) if inputs[ 'model_type'] == "tpot" and mode == "non_gen": # CHANGE X_POS, Y_POS later !!!! logging_info("Reading labeled feature matrix from file...") # Read X,y from csv file y, X, X_pos, y_pos, X_neg, y_neg = read_from_csv( inputs['train_feat_mat']) y_test, X_test, X_pos_test, y_pos_test, X_neg_test, y_neg_test = read_from_csv( inputs['test_feat_mat']) logging_info("Finished reading feature matrix") else: logging_info("Feature extraction...") feat_list = [ "dens", "nodes", "degree_max", "degree_mean", "degree_median", "degree_var", "CC_max", "CC_mean", "CC_var", "edge_wt_mean", "edge_wt_max", "edge_wt_var", "DC_mean", "DC_var", "DC_max", "sv1", "sv2", "sv3", "complex" ] X_pos = create_feat_mat(complex_graphs, n_feats) X_pos_test = create_feat_mat(test_complex_graphs, n_feats) X_allpos = np_vstack((X_pos, X_pos_test)) n_allpos = len(X_allpos) y, X, X_pos, y_pos, X_neg, y_neg = extract_features( out_comp_nm, 'train', max_size_train, inputs, G_nodes, feat_list, X_pos, X_allpos, n_allpos, sizes) y_test, X_test, X_pos_test, y_pos_test, X_neg_test, y_neg_test = extract_features( out_comp_nm, 'test', max_size_test, inputs, G_nodes, feat_list, X_pos_test, X_allpos, n_allpos, sizes_test) logging_info("Finished Feature extraction") return max_size_train, max_size_test, X_pos_test, X_neg_test, X_test, y_test, X_pos, y_pos, X, y, X_neg, y_neg
def collate_func(self, batch, split): necessary_num_img_captions = self.necessary_num_img_captions fc_batch = [] att_batch = [] label_batch = [] wrapped = False infos = [] gts = [] for sample in batch: # fetch image tmp_fc, tmp_att, tmp_seq, ix, it_pos_now, tmp_wrapped = sample if tmp_wrapped: wrapped = True fc_batch.append(tmp_fc) att_batch.append(tmp_att) tmp_label = np_zeros( [necessary_num_img_captions, self.max_seq_length + 2], dtype="int") if hasattr(self, "h5_label_file"): # if there is ground truth tmp_label[:, 1:self.max_seq_length + 1] = tmp_seq label_batch.append(tmp_label) # Used for reward evaluation if hasattr(self, "h5_label_file"): # if there is ground truth gts.append(self.label[self.label_start_ix[ix] - 1:self.label_end_ix[ix]]) else: gts.append([]) # record associated info as well info_dict = {} info_dict["ix"] = ix info_dict["id"] = self.info["images"][ix]["id"] info_dict["file_path"] = self.info["images"][ix].get( "file_path", "") infos.append(info_dict) # #sort by att_feat length # fc_batch, att_batch, label_batch, gts, infos = \ # zip(*sorted(zip(fc_batch, att_batch, np.vsplit(label_batch, batch_size), gts, infos), key=lambda x: len(x[1]), reverse=True)) fc_batch, att_batch, label_batch, gts, infos = zip(*sorted( zip(fc_batch, att_batch, label_batch, gts, infos), key=lambda x: 0, reverse=True, )) data = {} data["fc_feats"] = np_stack(fc_batch) # merge att_feats max_att_len = max([_.shape[0] for _ in att_batch]) data["att_feats"] = np_zeros( [len(att_batch), max_att_len, att_batch[0].shape[1]], dtype="float32") for i in range(len(att_batch)): data["att_feats"][i, :att_batch[i].shape[0]] = att_batch[i] data["att_masks"] = np_zeros(data["att_feats"].shape[:2], dtype="float32") for i in range(len(att_batch)): data["att_masks"][i, :att_batch[i].shape[0]] = 1 # set att_masks to None if attention features have same length if data["att_masks"].sum() == data["att_masks"].size: data["att_masks"] = None data["labels"] = np_vstack(label_batch) # # generate mask nonzeros = np_array( list(map(lambda x: (x != 0).sum() + 2, data["labels"]))) mask_batch = np_zeros( [data["labels"].shape[0], self.max_seq_length + 2], dtype="float32") for ix, row in enumerate(mask_batch): row[:nonzeros[ix]] = 1 data["masks"] = mask_batch data["labels"] = data["labels"].reshape(len(batch), necessary_num_img_captions, -1) data["masks"] = data["masks"].reshape(len(batch), necessary_num_img_captions, -1) data["gts"] = gts # all ground truth captions of each images data["bounds"] = { "it_pos_now": it_pos_now, # the it_pos_now of the last sample "it_max": len(self.split_ix[split]), "wrapped": wrapped, } data["infos"] = infos data = { k: from_numpy(v) if type(v) is np_ndarray else v for k, v in data.items() } # Turn all ndarray to torch tensor return data
def create_feat_mat(graph_list, n_feats): dens_pos = [nx_density(graph) for graph in graph_list] nodes_pos = [nx_number_of_nodes(graph) for graph in graph_list] # CC statistics - mean and max - faster to use a big loop mostly CC_mean = [] CC_mean_append = CC_mean.append CC_max = [] CC_max_append = CC_max.append CC_var = [] CC_var_append = CC_var.append # Degree correlation - avg degree of the neighborhood DC_mean = [] DC_mean_append = DC_mean.append DC_max = [] DC_max_append = DC_max.append DC_var = [] DC_var_append = DC_var.append # Degree statistics degree_mean = [] degree_mean_append = degree_mean.append degree_max = [] degree_max_append = degree_max.append degree_median = [] degree_median_append = degree_median.append degree_var = [] degree_var_append = degree_var.append # Edge weight statistics edge_wt_mean = [] edge_wt_mean_append = edge_wt_mean.append edge_wt_max = [] edge_wt_max_append = edge_wt_max.append edge_wt_var = [] edge_wt_var_append = edge_wt_var.append # First 3 singular values sv1 = [] sv1_append = sv1.append sv2 = [] sv2_append = sv2.append sv3 = [] sv3_append = sv3.append for graph in graph_list: CCs = list(nx_clustering(graph).values()) CC_max_append(max(CCs)) CC_mean_append(np_mean(CCs)) CC_var_append(np_var(CCs)) DCs = list(nx_average_neighbor_degree(graph).values()) DC_max_append(max(DCs)) DC_mean_append(np_mean(DCs)) DC_var_append(np_var(DCs)) degrees = [tup[1] for tup in graph.degree()] degree_mean_append(np_mean(degrees)) degree_median_append(np_median(degrees)) degree_max_append(max(degrees)) degree_var_append(np_var(degrees)) edge_wts = [tup[2] for tup in graph.edges.data('weight')] edge_wt_mean_append(np_mean(edge_wts)) edge_wt_var_append(np_var(edge_wts)) edge_wt_max_append(max(edge_wts)) A_mat = nx_to_numpy_matrix(graph) svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False) if len(svs) >= 3: sv1_append(svs[0]) sv2_append(svs[1]) sv3_append(svs[2]) elif len(svs) >= 2: sv1_append(svs[0]) sv2_append(svs[1]) sv3_append(0) else: sv1_append(svs[0]) sv2_append(0) sv3_append(0) feat_mat = np_vstack((dens_pos, nodes_pos, degree_max, degree_mean, degree_median, degree_var, CC_max, CC_mean, CC_var, edge_wt_mean, edge_wt_max, edge_wt_var, DC_mean, DC_var, DC_max, sv1, sv2, sv3)).T if n_feats == 1: feat_mat = np_array(dens_pos).reshape(-1, 1) return feat_mat
def __init_alliances(self): alliances = [[team[3:] for team in alliance['picks']] for alliance in self.raw_event['alliances']] alliances = np_array(alliances, np_int) numbers = np_vstack(np_arange(1, 9, 1)) self.alliances = np_concatenate((numbers, alliances), 1)