nWordsShortDocs = 5 # Docs with less than this number of words are discarded nEdges = 16 # Minimum number of edges per vertex in the graph distMetric = 'cosine' # Similarity measure between graph embedded features to # build the adjacency matrix nDataSplits = 10 # Number of data realizations # Obs.: The built graph depends on the split between training and validation. # Therefore, we will run several of these splits and average across # them, to obtain some result that is more robust to this split. #\\\ Save values: writeVarValues( varsFile, { 'ratioValid': ratioValid, 'nWords': nWords, 'nWordsShortDocs': nWordsShortDocs, 'nEdges': nEdges, 'distMetric': distMetric, 'nDataSplits': nDataSplits, 'useGPU': useGPU }) ############ # TRAINING # ############ #\\\ Individual model training options trainer = 'ADAM' # Options: 'SGD', 'ADAM', 'RMSprop' learningRate = 0.001 # In all options beta1 = 0.9 # beta1 if 'ADAM', alpha if 'RMSprop' beta2 = 0.999 # ADAM option only
perturbationEpsilon = signalPower * np.arange( 1, nSimPoints + 1) / float(nSimPoints) # Value epsilon of the perturbation, it is relative to the signal power # and it goes from 1/nSimPoints to 1 nPerturbationRealizations = 10 # Number of realizations of the perturbation # Each perturbation is random, so how many different perturbations we want # to run before we average the results nGraphRealizations = 10 # Number of graph realizations # Each graph is random, so how many graphs to create to average the results # The randomization of the graphs is the one that is plotted as error bars # (i.e. how much the performance changes with different graphs within # the same family) #\\\ Save values: writeVarValues(varsFile, {'nNodes': nNodes, 'graphType': graphType}) writeVarValues(varsFile, graphOptions) writeVarValues( varsFile, { 'nTest': nTest, 'nSimPoints': nSimPoints, 'signalPower': signalPower, 'nPerturbationRealizations': nPerturbationRealizations, 'nGraphRealizations': nGraphRealizations }) ################# # ARCHITECTURES # ################# # Select which wavelets to use
forceConnected = False # If True returns the largest connected component of the # graph as the main graph ---> TRUE kNN = 10 # Number of nearest neighbors maxDataPoints = None # None to consider all data points #\\\ Save values: writeVarValues( varsFile, { 'labelID': labelID, 'graphType': graphType, 'ratioTrain': ratioTrain, 'ratioValid': ratioValid, 'maxNodes': maxNodes, 'minRatings': minRatings, 'interpolateRatings': interpolateRatings, 'nDataSplits': nDataSplits, 'keepIsolatedNodes': keepIsolatedNodes, 'forceUndirected': forceUndirected, 'forceConnected': forceConnected, 'kNN': kNN, 'maxDataPoints': maxDataPoints, 'useGPU': useGPU }) ############ # TRAINING # ############ #\\\ Individual model training options optimAlg = 'ADAM' # Options: 'SGD', 'ADAM', 'RMSprop'
# Every training excerpt has a WAN associated to it. We combine all these WANs # into a single graph to use as the supporting graph for all samples. This # combination happens under some extra options: graphNormalizationType = 'rows' # or 'cols' - Makes all rows add up to 1. keepIsolatedNodes = False # If True keeps isolated nodes forceUndirected = True # If True forces the graph to be undirected (symmetrizes) forceConnected = True # If True removes nodes (from lowest to highest degree) # until the resulting graph is connected. # \\\ Save values: writeVarValues( varsFile, { 'nClasses': nClasses, 'ratioTrain': ratioTrain, 'ratioValid': ratioValid, 'nDataSplits': nDataSplits, 'graphNormalizationType': graphNormalizationType, 'keepIsolatedNodes': keepIsolatedNodes, 'forceUndirected': forceUndirected, 'forceConnected': forceConnected }) ############ # TRAINING # ############ # \\\ Individual model training options trainer = 'ADAM' # Options: 'SGD', 'ADAM', 'RMSprop' learningRate = 0.001 # In all options beta1 = 0.9 # beta1 if 'ADAM', alpha if 'RMSprop' beta2 = 0.999 # ADAM option only
# How to process the loaded Facebook graph keepIsolatedNodes = False # If True keeps isolated nodes forceConnected = True # If True removes nodes (from lowest to highest degree) # until the resulting graph is connected. use234 = True # Use a smaller 234-matrix with 2-communities instead of the full # graph with around 4k users #\\\ Save values: writeVarValues( varsFile, { 'nClasses': nClasses, 'beginProbEdgeFailSim': beginProbEdgeFailSim, 'endProbEdgeFailSim': endProbEdgeFailSim, 'nSimPoints': nSimPoints, 'nTrain': nTrain, 'nValid': nValid, 'nTest': nTest, 'nEdgeFailRealizations': nEdgeFailRealizations, 'keepIsolatedNodes': keepIsolatedNodes, 'forceConnected': forceConnected, 'use234': use234, 'useGPU': useGPU }) ################# # ARCHITECTURES # ################# # Select which wavelets to use doDiffusion = False # F. Gama, A. Ribeiro, and J. Bruna, "Diffusion scattering # transforms on graphs," in 7th Int. Conf. Learning Representations. New
# into a single graph to use as the supporting graph for all samples. This # combination happens under some extra options: graphNormalizationType = 'rows' # or 'cols' - Makes all rows add up to 1. keepIsolatedNodes = False # If True keeps isolated nodes forceUndirected = True # If True forces the graph to be undirected (symmetrizes) forceConnected = True # If True removes nodes (from lowest to highest degree) # until the resulting graph is connected. #\\\ Save values: writeVarValues( varsFile, { 'authorName': authorName, 'nClasses': nClasses, 'ratioTrain': ratioTrain, 'ratioValid': ratioValid, 'nDataSplits': nDataSplits, 'graphNormalizationType': graphNormalizationType, 'keepIsolatedNodes': keepIsolatedNodes, 'forceUndirected': forceUndirected, 'forceConnected': forceConnected, 'useGPU': useGPU }) ############ # TRAINING # ############ #\\\ Individual model training options trainer = 'ADAM' # Options: 'SGD', 'ADAM', 'RMSprop' learningRate = 0.005 # In all options beta1 = 0.9 # beta1 if 'ADAM', alpha if 'RMSprop'
# accounting for random data generation through several nTrain or nTest. # How to process the loaded Facebook graph keepIsolatedNodes = False # If True keeps isolated nodes forceConnected = True # If True removes nodes (from lowest to highest degree) # until the resulting graph is connected. use234 = True # Use a smaller 234-matrix with 2-communities instead of the full # graph with around 4k users #\\\ Save values: writeVarValues(varsFile, {'nClasses': nClasses, 'beginProbEdgeFailSim': beginProbEdgeFailSim, 'endProbEdgeFailSim': endProbEdgeFailSim, 'nSimPoints': nSimPoints, 'nTrain': nTrain, 'nValid': nValid, 'nTest': nTest, 'nEdgeFailRealizations': nEdgeFailRealizations, 'keepIsolatedNodes': keepIsolatedNodes, 'forceConnected': forceConnected, 'use234': use234}) ################# # ARCHITECTURES # ################# # Select which wavelets to use doDiffusion = True # F. Gama, A. Ribeiro, and J. Bruna, "Diffusion scattering # transforms on graphs,” in Int. Conf. Learning Representations 2019. # New Orleans, LA: Assoc. Comput. Linguistics, 6-9 May 2019. doMonicCubic = True # Eq. (65) in D. K. Hammond, P. Vandergheynst, and
graphOptions = {} # Dictionary of options to pass to the createGraph function if graphType == 'SBM': graphOptions['nCommunities'] = nClasses # Number of communities graphOptions['probIntra'] = 0.8 # Intracommunity probability graphOptions['probInter'] = 0.2 # Intercommunity probability elif graphType == 'SmallWorld': graphOptions['probEdge'] = 0.5 # Edge probability graphOptions['probRewiring'] = 0.1 # Probability of rewiring elif graphType == 'geometric': graphOptions['pos'] = [None] graphOptions['kernelType'] = 'exponential' graphOptions['sparseType'] = 'NN' graphOptions['sparseParam'] = 5 #\\\ Save values: writeVarValues(varsFile, {'nNodes': nNodes, 'graphType': graphType}) writeVarValues(varsFile, graphOptions) writeVarValues( varsFile, { 'nTrain': nTrain, 'nValid': nValid, 'nTest': nTest, 'tMax': tMax, 'nDataRealizations': nDataRealizations, 'nGraphRealizations': nGraphRealizations, 'nClasses': nClasses, 'useGPU': useGPU }) ############ # TRAINING #
# Every training excerpt has a WAN associated to it. We combine all these WANs # into a single graph to use as the supporting graph for all samples. This # combination happens under some extra options: graphNormalizationType = 'rows' # or 'cols' - Makes all rows add up to 1. keepIsolatedNodes = False # If True keeps isolated nodes forceUndirected = True # If True forces the graph to be undirected (symmetrizes) forceConnected = True # If True keeps the largest connected component #\\\ Save values: writeVarValues( varsFile, { 'authorName': authorName, 'nClasses': nClasses, 'beginRatioTrainSim': beginRatioTrainSim, 'endRatioTrainSim': endRatioTrainSim, 'nSimPoints': nSimPoints, 'ratioValid': ratioValid, 'nDataSplits': nDataSplits, 'graphNormalizationType': graphNormalizationType, 'keepIsolatedNodes': keepIsolatedNodes, 'forceUndirected': forceUndirected, 'forceConnected': forceConnected }) ################# # ARCHITECTURES # ################# # Select which wavelets to use doDiffusion = True # F. Gama, A. Ribeiro, and J. Bruna, "Diffusion scattering # transforms on graphs,” in Int. Conf. Learning Representations 2019. # New Orleans, LA: Assoc. Comput. Linguistics, 6-9 May 2019.