# Repeat for the GFT error if doGFT: reprErrorGFT[graph] = [None] * len(perturbationEpsilon) # The bound also depends on the specific value of epsilon so we also need # this if computeBound: bound[graph] = [None] * len(perturbationEpsilon) #%%################################################################## # # # GRAPH CREATION # # # ##################################################################### # Create graph G = graphTools.Graph(graphType, nNodes, graphOptions) #%%################################################################## # # # GRAPH SCATTERING MODELS # # # ##################################################################### modelsGST = {} # Store each model as a key in this dictionary, then we can # can compute the output for each model inside a for (iterating over # the key), since all models have a computeTransform() method. if doDiffusion: modelsGST[diffusionName] = GST.DiffusionScattering( numScales, numLayers, G.W)
# change later on when the graph is created and the options on whether to # make it connected, etc., come into effect) nNodes = data.selectedAuthor['all']['wordFreq'].shape[1] ######### # GRAPH # ######### # Create graph nodesToKeep = [] # here we store the list of nodes kept after all # modifications to the graph, so we can then update the data samples # accordingly; since lists are passed as pointers (mutable objects) # we can store the node list without necessary getting an output to the # function G = graphTools.Graph('fuseEdges', nNodes, data.selectedAuthor['train']['WAN'], 'sum', graphNormalizationType, keepIsolatedNodes, forceUndirected, forceConnected, nodesToKeep) G.computeGFT() # Compute the GFT of the stored GSO # And re-update the number of nodes for changes in the graph (due to # enforced connectedness, for instance) nNodes = G.N nodesToKeep = np.array(nodesToKeep) # And re-update the data (keep only the nodes that are kept after isolated # nodes or nodes to make the graph connected have been removed) data.samples['train']['signals'] = \ data.samples['train']['signals'][:, nodesToKeep] data.samples['valid']['signals'] = \ data.samples['valid']['signals'][:, nodesToKeep] data.samples['test']['signals'] = \ data.samples['test']['signals'][:, nodesToKeep]
# Number of classes nClasses = data.getNumberOfClasses() if doPrint: print("OK") ######### # GRAPH # ######### if doPrint: print("Setting up the graph...", end=' ', flush=True) # Create graph adjacencyMatrix = data.getGraph() G = graphTools.Graph('adjacency', adjacencyMatrix.shape[0], {'adjacencyMatrix': adjacencyMatrix}) G.computeGFT() # Compute the GFT of the stored GSO # And re-update the number of nodes for changes in the graph (due to # enforced connectedness, for instance) nNodes = G.N # Once data is completely formatted and in appropriate fashion, change its # type to torch and move it to the appropriate device data.astype(torch.float64) data.to(device) if doPrint: print("OK") #%%##################################################################
print("OK") # Now, to create the proper graph object, since we're going to use # 'fuseEdges' option in createGraph, we are going to add an extra dimension # to the adjacencyMatrix (to indicate there's only one matrix in the # collection that we should be fusing) adjacencyMatrix = adjacencyMatrix.reshape([1, N, N]) nodeList = [] extraComponents = [] if doPrint: print("Creating graph...", flush=True, end=' ') G = graphTools.Graph( 'fuseEdges', N, { 'adjacencyMatrices': adjacencyMatrix, 'nodeList': nodeList, 'extraComponents': extraComponents, 'aggregationType': 'sum', 'normalizationType': 'no', 'isolatedNodes': keepIsolatedNodes, 'forceUndirected': True, 'forceConnected': forceConnected }) G.computeGFT() # Compute the eigendecomposition of the stored GSO if doPrint: print("OK") ################ # SOURCE NODES # ################ if doPrint: print("Selecting source nodes...", end=' ', flush=True)
def train_net(data, h_parameters, phi=None): # Now, we are in position to know the number of nodes (for now; this might # change later on when the graph is created and the options on whether to # make it connected, etc., come into effect) nNodes = data.selectedAuthor['all']['wordFreq'].shape[1] ######### # GRAPH # ######### # Create graph nodesToKeep = [] # here we store the list of nodes kept after all # modifications to the graph, so we can then update the data samples # accordingly; since lists are passed as pointers (mutable objects) # we can store the node list without necessary getting an output to the # function G = graphTools.Graph('fuseEdges', nNodes, data.selectedAuthor['train']['WAN'], 'sum', graphNormalizationType, keepIsolatedNodes, forceUndirected, forceConnected, nodesToKeep) G.computeGFT() # Compute the GFT of the stored GSO # And re-update the number of nodes for changes in the graph (due to # enforced connectedness, for instance) if phi is None: nNodes = G.N nodesToKeep = np.array(nodesToKeep) # And re-update the data (keep only the nodes that are kept after isolated # nodes or nodes to make the graph connected have been removed) data.samples['train']['signals'] = \ data.samples['train']['signals'][:, nodesToKeep] data.samples['valid']['signals'] = \ data.samples['valid']['signals'][:, nodesToKeep] data.samples['test']['signals'] = \ data.samples['test']['signals'][:, nodesToKeep] else: nNodes = phi.shape[0] # Once data is completely formatted and in appropriate fashion, change its # type to torch and move it to the appropriate device data.astype(torch.float64) data.to(device) ################################################################## # # # MODELS INITIALIZATION # # # ##################################################################### # Override parameters with grid parameters. hParamsPolynomial['F'] = h_parameters[0] hParamsPolynomial['K'] = h_parameters[1] # This is the dictionary where we store the models (in a model.Model # class, that is then passed to training). modelsGNN = {} # If a new model is to be created, it should be called for here. # \\\\\\\\\\ # \\\ MODEL 2: Polynomial GNN # \\\\\\\\\\\\ thisName = hParamsPolynomial['name'] ############## # PARAMETERS # ############## # \\\ Optimizer options # (If different from the default ones, change here.) thisTrainer = trainer thisLearningRate = learningRate thisBeta1 = beta1 thisBeta2 = beta2 if phi is None: # \\\ Ordering S, order = graphTools.permIdentity(G.S / np.max(np.diag(G.E))) # order is an np.array with the ordering of the nodes with respect # to the original GSO (the original GSO is kept in G.S). else: # compute the Eigenvalues of matrix e, V = np.linalg.eig(phi) # \\\ Ordering highest_eig_val = np.max(np.diag(e)).real if highest_eig_val == 0: S, order = graphTools.permIdentity(phi) else: S, order = graphTools.permIdentity(phi / highest_eig_val) # order is an np.array with the ordering of the nodes with respect # to the original GSO (the original GSO is kept in G.S). ################ # ARCHITECTURE # ################ hParamsPolynomial['N'] = [nNodes] if doPrint: print('') print('COMBINATION {0}, {1}'.format(str(hParamsPolynomial['F']), str(hParamsPolynomial['K']))) thisArchit = archit.SelectionGNN( # Graph filtering hParamsPolynomial['F'], hParamsPolynomial['K'], hParamsPolynomial['bias'], # Nonlinearity hParamsPolynomial['sigma'], # Pooling hParamsPolynomial['N'], hParamsPolynomial['rho'], hParamsPolynomial['alpha'], # MLP hParamsPolynomial['dimLayersMLP'], # Structure S) # This is necessary to move all the learnable parameters to be # stored in the device (mostly, if it's a GPU) thisArchit.to(device) ############# # OPTIMIZER # ############# if thisTrainer == 'ADAM': thisOptim = optim.Adam(thisArchit.parameters(), lr=learningRate, betas=(beta1, beta2)) elif thisTrainer == 'SGD': thisOptim = optim.SGD(thisArchit.parameters(), lr=learningRate) elif thisTrainer == 'RMSprop': thisOptim = optim.RMSprop(thisArchit.parameters(), lr=learningRate, alpha=beta1) ######## # LOSS # ######## thisLossFunction = lossFunction ######### # MODEL # ######### Polynomial = model.Model(thisArchit, thisLossFunction, thisOptim, thisName, saveDir, order) modelsGNN[thisName] = Polynomial ################################################################### # # # TRAINING # # # ##################################################################### ############ # TRAINING # ############ # On top of the rest of the training options, we pass the identification # of this specific data split realization. # This is the function that trains the models detailed in the dictionary # modelsGNN using the data data, with the specified training options. train.MultipleModels(modelsGNN, data, nEpochs=nEpochs, batchSize=batchSize, **trainingOptions) return modelsGNN['PolynomiGNN']
# 'fuseEdges' option in createGraph, we are going to add an extra dimension # to the adjacencyMatrix (to indicate there's only one matrix in the # collection that we should be fusing) adjacencyMatrix = adjacencyMatrix.reshape([1, nNodes, nNodes]) nodeList = [] extraComponents = [] if doPrint: print("Creating graph...", flush=True, end=' ') graphOptions['adjacencyMatrices'] = adjacencyMatrix graphOptions['nodeList'] = nodeList graphOptions['extraComponents'] = extraComponents graphOptions['aggregationType'] = 'sum' graphOptions['normalizationType'] = 'no' graphOptions['forceUndirected'] = True G = graphTools.Graph('fuseEdges', nNodes, graphOptions) G.computeGFT() # Compute the eigendecomposition of the stored GSO nNodes = G.N if doPrint: print("OK") ################ # SOURCE NODES # ################ if doPrint: print("Selecting source nodes...", end=' ', flush=True) # For the source localization problem, we have to select which ones, of all # the nodes, will act as source nodes. This is determined by a list of
dataPath = os.path.join('authorData', 'authorshipData.mat') graphNormalizationType = 'rows' # or 'cols' - Makes all rows add up to 1. keepIsolatedNodes = False force_undirected = True force_connected = True # data = Utils.dataTools.Authorship(name, 1, 0, # dataPath, graphNormalizationType, # keepIsolatedNodes, force_undirected, # force_connected) data = Utils.dataTools.Authorship(name, 1, 0, dataPath) nNodes = data.selectedAuthor['all']['wordFreq'].shape[1] nodesToKeep = [] G = graphTools.Graph('fuseEdges', nNodes, data.authorData['abbott']['WAN'], 'sum', graphNormalizationType, keepIsolatedNodes, force_undirected, force_connected, nodesToKeep) # %%################################################################## def get_degree_dist(ad): result = [] for i in range(ad.shape[0]): result.append(np.count_nonzero(ad[i])) # counter = collections.Counter(result) return result def convert_to_ad(matrix):
def load_dataset_syn(adjtype, nNodes, nTrain, nValid, nTest, num_timestep, K, batch_size, valid_batch_size=None, test_batch_size=None, same_G=True, pooltype='avg'): ''' K: K-step prediction (also K step input) same_G: whether all samples have a same graph structure or not pooltype: can be 'avg','selectOne','weighted' ''' # graph config graphType = 'SBM' # Type of graph graphOptions = {} graphOptions[ 'nCommunities'] = 5 #64 # Number of communities (EEG node number) graphOptions['probIntra'] = 0.8 # Intracommunity probability graphOptions['probInter'] = 0.2 # Intercommunity probability # sample config F_t = K // 12 # need K%F_t==0 for a cleaner fMRI cut # noise parameters sigmaSpatial = 0.1 sigmaTemporal = 0.1 rhoSpatial = 0 rhoTemporal = 0 if same_G: # data generation G = graphTools.Graph(graphType, nNodes, graphOptions) G.computeGFT() # Compute the eigendecomposition of the stored GSO _data = dataTools.MultiModalityPrediction(G, K, nTrain, nValid, nTest, num_timestep, F_t=F_t, pooltype=pooltype, sigmaSpatial=sigmaSpatial, sigmaTemporal=sigmaTemporal, rhoSpatial=rhoSpatial, rhoTemporal=rhoTemporal) data = {} for category in ['train', 'val', 'test']: data['x_' + category], data['y_' + category] = _data.getSamples(category) scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std()) # Data format for category in ['train', 'val', 'test']: data['x_' + category][..., 0] = scaler.transform( data['x_' + category][..., 0]) data['train_loader'] = DataLoader(data['x_train'], data['y_train'], batch_size) data['val_loader'] = DataLoader(data['x_val'], data['y_val'], valid_batch_size) data['test_loader'] = DataLoader(data['x_test'], data['y_test'], test_batch_size) data['scaler'] = scaler adj = mod_adj(G.W, adjtype) return data, adj, F_t, G else: nTotal = nTrain + nValid + nTest Gs = [] adjs = [] xs = [] ys = [] for i in tqdm(range(nTotal)): G = graphTools.Graph(graphType, nNodes, graphOptions) G.computeGFT() _data = dataTools.MultiModalityPrediction( G, K, 1, 0, 0, num_timestep, F_t=F_t, pooltype=pooltype, sigmaSpatial=sigmaSpatial, sigmaTemporal=sigmaTemporal, rhoSpatial=rhoSpatial, rhoTemporal=rhoTemporal) x, y = _data.getSamples('train') # (971, 15, 80, 2) xs.append(x) ys.append(y) Gs.append(G) adjs.append(mod_adj(G.W, adjtype)) xs = np.stack(xs) ys = np.stack(ys) G = {} data = {} data['x_train'], data['y_train'], G[ 'train'] = xs[:nTrain], ys[:nTrain], Gs[:nTrain] data['x_val'], data['y_val'], G['val'] = xs[nTrain:-nTest], ys[ nTrain:-nTest], Gs[nTrain:-nTest] data['x_test'], data['y_test'], G['test'] = xs[-nTest:], ys[ -nTest:], Gs[-nTest:] data['train_adj_idx'] = np.arange(nTrain).reshape(-1, 1).repeat( data['x_train'].shape[1], axis=1) data['val_adj_idx'] = np.arange(nValid).reshape(-1, 1).repeat( data['x_val'].shape[1], axis=1) data['test_adj_idx'] = np.arange(nTest).reshape(-1, 1).repeat( data['x_test'].shape[1], axis=1) for k, v in data.items(): # batching 1 : train model on one subject then finetune data[k] = v.reshape(-1, *v.shape[2:]) # # batching 2 : each batch contains different subject # v = np.transpose(v, (1,0,2,3,4)).reshape(-1, *v.shape[2:]) scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std()) # Data format for category in ['train', 'val', 'test']: data['x_' + category][..., 0] = scaler.transform( data['x_' + category][..., 0]) data['train_loader'] = DataLoader_syn(data['x_train'], data['y_train'], data['train_adj_idx'], batch_size) data['val_loader'] = DataLoader_syn(data['x_val'], data['y_val'], data['val_adj_idx'], valid_batch_size) data['test_loader'] = DataLoader_syn(data['x_test'], data['y_test'], data['test_adj_idx'], test_batch_size) data['scaler'] = scaler return data, adjs, F_t, G