Exemple #1
0
    # Repeat for the GFT error
    if doGFT:
        reprErrorGFT[graph] = [None] * len(perturbationEpsilon)
    # The bound also depends on the specific value of epsilon so we also need
    # this
    if computeBound:
        bound[graph] = [None] * len(perturbationEpsilon)

    #%%##################################################################
    #                                                                   #
    #                    GRAPH CREATION                                 #
    #                                                                   #
    #####################################################################

    # Create graph
    G = graphTools.Graph(graphType, nNodes, graphOptions)

    #%%##################################################################
    #                                                                   #
    #                    GRAPH SCATTERING MODELS                        #
    #                                                                   #
    #####################################################################

    modelsGST = {}  # Store each model as a key in this dictionary, then we can
    # can compute the output for each model inside a for (iterating over
    # the key), since all models have a computeTransform() method.

    if doDiffusion:
        modelsGST[diffusionName] = GST.DiffusionScattering(
            numScales, numLayers, G.W)
Exemple #2
0
        # change later on when the graph is created and the options on whether to
        # make it connected, etc., come into effect)
        nNodes = data.selectedAuthor['all']['wordFreq'].shape[1]

        #########
        # GRAPH #
        #########

        # Create graph
        nodesToKeep = []  # here we store the list of nodes kept after all
        # modifications to the graph, so we can then update the data samples
        # accordingly; since lists are passed as pointers (mutable objects)
        # we can store the node list without necessary getting an output to the
        # function
        G = graphTools.Graph('fuseEdges', nNodes,
                             data.selectedAuthor['train']['WAN'], 'sum',
                             graphNormalizationType, keepIsolatedNodes,
                             forceUndirected, forceConnected, nodesToKeep)
        G.computeGFT()  # Compute the GFT of the stored GSO

        # And re-update the number of nodes for changes in the graph (due to
        # enforced connectedness, for instance)
        nNodes = G.N
        nodesToKeep = np.array(nodesToKeep)
        # And re-update the data (keep only the nodes that are kept after isolated
        # nodes or nodes to make the graph connected have been removed)
        data.samples['train']['signals'] = \
            data.samples['train']['signals'][:, nodesToKeep]
        data.samples['valid']['signals'] = \
            data.samples['valid']['signals'][:, nodesToKeep]
        data.samples['test']['signals'] = \
            data.samples['test']['signals'][:, nodesToKeep]
Exemple #3
0
    # Number of classes
    nClasses = data.getNumberOfClasses()

    if doPrint:
        print("OK")

    #########
    # GRAPH #
    #########

    if doPrint:
        print("Setting up the graph...", end=' ', flush=True)

    # Create graph
    adjacencyMatrix = data.getGraph()
    G = graphTools.Graph('adjacency', adjacencyMatrix.shape[0],
                         {'adjacencyMatrix': adjacencyMatrix})
    G.computeGFT()  # Compute the GFT of the stored GSO

    # And re-update the number of nodes for changes in the graph (due to
    # enforced connectedness, for instance)
    nNodes = G.N

    # Once data is completely formatted and in appropriate fashion, change its
    # type to torch and move it to the appropriate device
    data.astype(torch.float64)
    data.to(device)

    if doPrint:
        print("OK")

    #%%##################################################################
Exemple #4
0
    print("OK")
# Now, to create the proper graph object, since we're going to use
# 'fuseEdges' option in createGraph, we are going to add an extra dimension
# to the adjacencyMatrix (to indicate there's only one matrix in the
# collection that we should be fusing)
adjacencyMatrix = adjacencyMatrix.reshape([1, N, N])
nodeList = []
extraComponents = []
if doPrint:
    print("Creating graph...", flush=True, end=' ')
G = graphTools.Graph(
    'fuseEdges', N, {
        'adjacencyMatrices': adjacencyMatrix,
        'nodeList': nodeList,
        'extraComponents': extraComponents,
        'aggregationType': 'sum',
        'normalizationType': 'no',
        'isolatedNodes': keepIsolatedNodes,
        'forceUndirected': True,
        'forceConnected': forceConnected
    })
G.computeGFT()  # Compute the eigendecomposition of the stored GSO
if doPrint:
    print("OK")

################
# SOURCE NODES #
################

if doPrint:
    print("Selecting source nodes...", end=' ', flush=True)
Exemple #5
0
def train_net(data, h_parameters, phi=None):
    # Now, we are in position to know the number of nodes (for now; this might
    # change later on when the graph is created and the options on whether to
    # make it connected, etc., come into effect)
    nNodes = data.selectedAuthor['all']['wordFreq'].shape[1]

    #########
    # GRAPH #
    #########

    # Create graph
    nodesToKeep = []  # here we store the list of nodes kept after all
    # modifications to the graph, so we can then update the data samples
    # accordingly; since lists are passed as pointers (mutable objects)
    # we can store the node list without necessary getting an output to the
    # function
    G = graphTools.Graph('fuseEdges', nNodes,
                         data.selectedAuthor['train']['WAN'], 'sum',
                         graphNormalizationType, keepIsolatedNodes,
                         forceUndirected, forceConnected, nodesToKeep)
    G.computeGFT()  # Compute the GFT of the stored GSO

    # And re-update the number of nodes for changes in the graph (due to
    # enforced connectedness, for instance)
    if phi is None:
        nNodes = G.N
        nodesToKeep = np.array(nodesToKeep)
        # And re-update the data (keep only the nodes that are kept after isolated
        # nodes or nodes to make the graph connected have been removed)
        data.samples['train']['signals'] = \
            data.samples['train']['signals'][:, nodesToKeep]
        data.samples['valid']['signals'] = \
            data.samples['valid']['signals'][:, nodesToKeep]
        data.samples['test']['signals'] = \
            data.samples['test']['signals'][:, nodesToKeep]
    else:
        nNodes = phi.shape[0]

    # Once data is completely formatted and in appropriate fashion, change its
    # type to torch and move it to the appropriate device
    data.astype(torch.float64)
    data.to(device)

    ##################################################################
    #                                                                   #
    #                    MODELS INITIALIZATION                          #
    #                                                                   #
    #####################################################################

    # Override parameters with grid parameters.
    hParamsPolynomial['F'] = h_parameters[0]
    hParamsPolynomial['K'] = h_parameters[1]

    # This is the dictionary where we store the models (in a model.Model
    # class, that is then passed to training).
    modelsGNN = {}

    # If a new model is to be created, it should be called for here.

    # \\\\\\\\\\
    # \\\ MODEL 2: Polynomial GNN
    # \\\\\\\\\\\\

    thisName = hParamsPolynomial['name']

    ##############
    # PARAMETERS #
    ##############

    # \\\ Optimizer options
    #   (If different from the default ones, change here.)
    thisTrainer = trainer
    thisLearningRate = learningRate
    thisBeta1 = beta1
    thisBeta2 = beta2

    if phi is None:
        # \\\ Ordering
        S, order = graphTools.permIdentity(G.S / np.max(np.diag(G.E)))
        # order is an np.array with the ordering of the nodes with respect
        # to the original GSO (the original GSO is kept in G.S).
    else:
        # compute the Eigenvalues of matrix
        e, V = np.linalg.eig(phi)
        # \\\ Ordering
        highest_eig_val = np.max(np.diag(e)).real

        if highest_eig_val == 0:
            S, order = graphTools.permIdentity(phi)
        else:
            S, order = graphTools.permIdentity(phi / highest_eig_val)
        # order is an np.array with the ordering of the nodes with respect
        # to the original GSO (the original GSO is kept in G.S).

    ################
    # ARCHITECTURE #
    ################

    hParamsPolynomial['N'] = [nNodes]

    if doPrint:
        print('')
        print('COMBINATION {0}, {1}'.format(str(hParamsPolynomial['F']),
                                            str(hParamsPolynomial['K'])))

    thisArchit = archit.SelectionGNN(  # Graph filtering
        hParamsPolynomial['F'],
        hParamsPolynomial['K'],
        hParamsPolynomial['bias'],
        # Nonlinearity
        hParamsPolynomial['sigma'],
        # Pooling
        hParamsPolynomial['N'],
        hParamsPolynomial['rho'],
        hParamsPolynomial['alpha'],
        # MLP
        hParamsPolynomial['dimLayersMLP'],
        # Structure
        S)
    # This is necessary to move all the learnable parameters to be
    # stored in the device (mostly, if it's a GPU)
    thisArchit.to(device)

    #############
    # OPTIMIZER #
    #############

    if thisTrainer == 'ADAM':
        thisOptim = optim.Adam(thisArchit.parameters(),
                               lr=learningRate,
                               betas=(beta1, beta2))
    elif thisTrainer == 'SGD':
        thisOptim = optim.SGD(thisArchit.parameters(), lr=learningRate)
    elif thisTrainer == 'RMSprop':
        thisOptim = optim.RMSprop(thisArchit.parameters(),
                                  lr=learningRate,
                                  alpha=beta1)

    ########
    # LOSS #
    ########

    thisLossFunction = lossFunction

    #########
    # MODEL #
    #########

    Polynomial = model.Model(thisArchit, thisLossFunction, thisOptim, thisName,
                             saveDir, order)

    modelsGNN[thisName] = Polynomial

    ###################################################################
    #                                                                   #
    #                    TRAINING                                       #
    #                                                                   #
    #####################################################################

    ############
    # TRAINING #
    ############

    # On top of the rest of the training options, we pass the identification
    # of this specific data split realization.

    # This is the function that trains the models detailed in the dictionary
    # modelsGNN using the data data, with the specified training options.
    train.MultipleModels(modelsGNN,
                         data,
                         nEpochs=nEpochs,
                         batchSize=batchSize,
                         **trainingOptions)

    return modelsGNN['PolynomiGNN']
    # 'fuseEdges' option in createGraph, we are going to add an extra dimension
    # to the adjacencyMatrix (to indicate there's only one matrix in the
    # collection that we should be fusing)
    adjacencyMatrix = adjacencyMatrix.reshape([1, nNodes, nNodes])
    nodeList = []
    extraComponents = []
    if doPrint:
        print("Creating graph...", flush=True, end=' ')
    graphOptions['adjacencyMatrices'] = adjacencyMatrix
    graphOptions['nodeList'] = nodeList
    graphOptions['extraComponents'] = extraComponents
    graphOptions['aggregationType'] = 'sum'
    graphOptions['normalizationType'] = 'no'
    graphOptions['forceUndirected'] = True

    G = graphTools.Graph('fuseEdges', nNodes, graphOptions)
    G.computeGFT()  # Compute the eigendecomposition of the stored GSO

    nNodes = G.N

    if doPrint:
        print("OK")

    ################
    # SOURCE NODES #
    ################

    if doPrint:
        print("Selecting source nodes...", end=' ', flush=True)
    # For the source localization problem, we have to select which ones, of all
    # the nodes, will act as source nodes. This is determined by a list of
Exemple #7
0
dataPath = os.path.join('authorData', 'authorshipData.mat')
graphNormalizationType = 'rows'  # or 'cols' - Makes all rows add up to 1.
keepIsolatedNodes = False

force_undirected = True
force_connected = True

# data = Utils.dataTools.Authorship(name, 1, 0,
#                                   dataPath, graphNormalizationType,
#                                   keepIsolatedNodes, force_undirected,
#                                   force_connected)
data = Utils.dataTools.Authorship(name, 1, 0, dataPath)
nNodes = data.selectedAuthor['all']['wordFreq'].shape[1]
nodesToKeep = []
G = graphTools.Graph('fuseEdges', nNodes,
                     data.authorData['abbott']['WAN'],
                     'sum', graphNormalizationType, keepIsolatedNodes,
                     force_undirected, force_connected, nodesToKeep)


# %%##################################################################
def get_degree_dist(ad):
    result = []

    for i in range(ad.shape[0]):
        result.append(np.count_nonzero(ad[i]))

    # counter = collections.Counter(result)
    return result


def convert_to_ad(matrix):
Exemple #8
0
def load_dataset_syn(adjtype,
                     nNodes,
                     nTrain,
                     nValid,
                     nTest,
                     num_timestep,
                     K,
                     batch_size,
                     valid_batch_size=None,
                     test_batch_size=None,
                     same_G=True,
                     pooltype='avg'):
    '''
    K: K-step prediction (also K step input)
    same_G: whether all samples have a same graph structure or not
    pooltype: can be 'avg','selectOne','weighted'
    '''
    # graph config
    graphType = 'SBM'  # Type of graph
    graphOptions = {}
    graphOptions[
        'nCommunities'] = 5  #64 # Number of communities (EEG node number)
    graphOptions['probIntra'] = 0.8  # Intracommunity probability
    graphOptions['probInter'] = 0.2  # Intercommunity probability
    # sample config
    F_t = K // 12  # need K%F_t==0 for a cleaner fMRI cut
    # noise parameters
    sigmaSpatial = 0.1
    sigmaTemporal = 0.1
    rhoSpatial = 0
    rhoTemporal = 0

    if same_G:
        # data generation
        G = graphTools.Graph(graphType, nNodes, graphOptions)
        G.computeGFT()  # Compute the eigendecomposition of the stored GSO
        _data = dataTools.MultiModalityPrediction(G,
                                                  K,
                                                  nTrain,
                                                  nValid,
                                                  nTest,
                                                  num_timestep,
                                                  F_t=F_t,
                                                  pooltype=pooltype,
                                                  sigmaSpatial=sigmaSpatial,
                                                  sigmaTemporal=sigmaTemporal,
                                                  rhoSpatial=rhoSpatial,
                                                  rhoTemporal=rhoTemporal)
        data = {}
        for category in ['train', 'val', 'test']:
            data['x_' + category], data['y_' +
                                        category] = _data.getSamples(category)

        scaler = StandardScaler(mean=data['x_train'][..., 0].mean(),
                                std=data['x_train'][..., 0].std())
        # Data format
        for category in ['train', 'val', 'test']:
            data['x_' + category][..., 0] = scaler.transform(
                data['x_' + category][..., 0])

        data['train_loader'] = DataLoader(data['x_train'], data['y_train'],
                                          batch_size)
        data['val_loader'] = DataLoader(data['x_val'], data['y_val'],
                                        valid_batch_size)
        data['test_loader'] = DataLoader(data['x_test'], data['y_test'],
                                         test_batch_size)
        data['scaler'] = scaler
        adj = mod_adj(G.W, adjtype)
        return data, adj, F_t, G
    else:
        nTotal = nTrain + nValid + nTest
        Gs = []
        adjs = []
        xs = []
        ys = []
        for i in tqdm(range(nTotal)):
            G = graphTools.Graph(graphType, nNodes, graphOptions)
            G.computeGFT()
            _data = dataTools.MultiModalityPrediction(
                G,
                K,
                1,
                0,
                0,
                num_timestep,
                F_t=F_t,
                pooltype=pooltype,
                sigmaSpatial=sigmaSpatial,
                sigmaTemporal=sigmaTemporal,
                rhoSpatial=rhoSpatial,
                rhoTemporal=rhoTemporal)
            x, y = _data.getSamples('train')  # (971, 15, 80, 2)
            xs.append(x)
            ys.append(y)
            Gs.append(G)
            adjs.append(mod_adj(G.W, adjtype))

        xs = np.stack(xs)
        ys = np.stack(ys)

        G = {}
        data = {}
        data['x_train'], data['y_train'], G[
            'train'] = xs[:nTrain], ys[:nTrain], Gs[:nTrain]
        data['x_val'], data['y_val'], G['val'] = xs[nTrain:-nTest], ys[
            nTrain:-nTest], Gs[nTrain:-nTest]
        data['x_test'], data['y_test'], G['test'] = xs[-nTest:], ys[
            -nTest:], Gs[-nTest:]

        data['train_adj_idx'] = np.arange(nTrain).reshape(-1, 1).repeat(
            data['x_train'].shape[1], axis=1)
        data['val_adj_idx'] = np.arange(nValid).reshape(-1, 1).repeat(
            data['x_val'].shape[1], axis=1)
        data['test_adj_idx'] = np.arange(nTest).reshape(-1, 1).repeat(
            data['x_test'].shape[1], axis=1)

        for k, v in data.items():
            # batching 1 : train model on one subject then finetune
            data[k] = v.reshape(-1, *v.shape[2:])
            # # batching 2 : each batch contains different subject
            # v = np.transpose(v, (1,0,2,3,4)).reshape(-1, *v.shape[2:])

        scaler = StandardScaler(mean=data['x_train'][..., 0].mean(),
                                std=data['x_train'][..., 0].std())
        # Data format
        for category in ['train', 'val', 'test']:
            data['x_' + category][..., 0] = scaler.transform(
                data['x_' + category][..., 0])

        data['train_loader'] = DataLoader_syn(data['x_train'], data['y_train'],
                                              data['train_adj_idx'],
                                              batch_size)
        data['val_loader'] = DataLoader_syn(data['x_val'], data['y_val'],
                                            data['val_adj_idx'],
                                            valid_batch_size)
        data['test_loader'] = DataLoader_syn(data['x_test'], data['y_test'],
                                             data['test_adj_idx'],
                                             test_batch_size)
        data['scaler'] = scaler

        return data, adjs, F_t, G