def _check_context(self, context): # infer available context gpus = num_gpus() available_gpus = [gpu(i) for i in range(gpus)] if context: # check context values, only accept Context or a list of Context if isinstance(context, Context): context = [context] elif isinstance(context, list) and all([isinstance(c, Context) for c in context]): context = context else: raise ValueError("context must be a Context or a list of Context, " "for example mx.cpu() or [mx.gpu(0), mx.gpu(1)], " "refer to mxnet.Context:{}".format(context)) for ctx in context: assert ctx in available_gpus or str(ctx).startswith('cpu'), \ "%s is not available, please make sure " \ "your context is in one of: mx.cpu(), %s" % \ (ctx, ", ".join([str(ctx) for ctx in available_gpus])) else: # provide default context if gpus > 0: # only use 1 GPU by default if gpus > 1: warnings.warn("You have multiple GPUs, gpu(0) will be used by default." "To utilize all your GPUs, specify context as a list of gpus, " "e.g. context=[mx.gpu(0), mx.gpu(1)] ") context = [gpu(0)] else: context = [cpu()] return context
def random_walks(graph, parameters, sources, alpha=0.3, max_iter=500): """ Random walk with given parameters and directed graph :param graph: igraph object :type graph: igraph.Graph :param parameters: :type parameters: :param sources: list of indices of source nodes :type sources: list(int) :param alpha: restart probability :type alpha: float :param max_iter: maximum number of iterations :type max_iter: int :return: p vector for every source node :rtype: numpy.array """ with gpu(0): epsilon = 1e-12 small_epsilon = 1e-18 features = mx.nd.array([graph.es[feature] for feature in graph.es.attributes()]).T parameters = mx.nd.array(parameters) strengths = logistic_edge_strength_function(parameters, features) + small_epsilon graph.es['strength'] = strengths.reshape((-1,)).asnumpy() A = mx.nd.array(graph.get_adjacency(attribute='strength').data) Q_prim = get_stochastic_transition_matrix(A) result = dict() for source in tqdm(sources): Q = get_transition_matrix(Q_prim, source, alpha) p = iterative_page_rank(Q, epsilon, max_iter) result[source] = [p.asnumpy()] return result
def get_stochastic_transition_matrix(A): """ Calculates the stochastic transition matrix :param A: adjacency matrix with edge strengths :type A: numpy.array :return: stochastic transition matrix :rtype: numpy.array """ with gpu(0): Q_prim = 1 / mx.nd.sum(A, axis=1).reshape((-1, 1)) * A return Q_prim
def gradient_function(graph, features, sources, destinations, alpha, max_iter, lambda_param, epsilon, small_epsilon, margin_loss, w): """ Gradient function :param graph: igraph object :type graph: igraph.Graph :param features: feature vector :type features: numpy.array :param w: parameter vector :type w: numpy.array :param sources: list of indices of source nodes :type sources: list(int) :param destinations: list of indices of destination nodes :type destinations: list(list(int)) :param alpha: restart probability :type alpha: float :param max_iter: maximum number of iterations :type max_iter: int :param lambda_param: regularization parameter :type lambda_param: float :param epsilon: tolerance parameter for page rank convergence :type epsilon: float :param small_epsilon: change parameter in strengths of the edges :rtype small_epsilon: float :type small_epsilon: float :param margin_loss: margin loss :type margin_loss: float :return: values of the gradient function :rtype: numpy.array """ with gpu(0): features = mx.nd.array(features) w = mx.nd.array(w) gr = mx.nd.zeros(w.shape[0]) strengths = logistic_edge_strength_function(w, features) + small_epsilon graph.es['strength'] = strengths.reshape((-1,)).asnumpy() A = mx.nd.array(graph.get_adjacency(attribute='strength').data) Q_prim = get_stochastic_transition_matrix(A) for source, i in zip(sources, range(len(sources))): print('{} Gradient func, source {}, index {}'.format(time.strftime("%c"), source, i)) Q = get_transition_matrix(Q_prim, source, alpha) p = iterative_page_rank(Q, epsilon, max_iter) dp = iterative_page_rank_derivative(graph, p, Q, A, epsilon, max_iter, w, features, alpha) l_set = list(set(graph.vs.indices) - set(destinations[i] + [source])) diff = get_differences(p, l_set, destinations[i]) dh = derivative_logistic_function(diff, margin_loss) for k in range(w.shape[0]): gr[k] += 2 * w[k] + lambda_param * mx.nd.sum(dh * get_differences(dp[:, k], l_set, destinations[i])) return gr.asnumpy().astype(np.float64)
def objective_function(graph, features, sources, destinations, alpha, max_iter, lambda_param, epsilon, small_epsilon, margin_loss, w): """ Objective function for minimization in the training process :param graph: igraph object :type graph: igraph.Graph :param features: feature vector :type features: numpy.array :param w: parameter vector :type w: numpy.array :param sources: list of indices of source nodes :type sources: list(int) :param destinations: list of indices of destination nodes :type destinations: list(list(int)) :param alpha: restart probability :type alpha: float :param max_iter: maximum number of iterations :type max_iter: int :param lambda_param: regularization parameter :type lambda_param: float :param epsilon: tolerance parameter for page rank convergence :type epsilon: float :param small_epsilon: change parameter in strengths of the edges :rtype small_epsilon: float :type small_epsilon: float :param margin_loss: margin loss :type margin_loss: float :return: value of the objective function for the given parameters :rtype: float """ with gpu(0): features = mx.nd.array(features) w = mx.nd.array(w) strengths = logistic_edge_strength_function(w, features) + small_epsilon graph.es['strength'] = strengths.reshape((-1,)).asnumpy() A = mx.nd.array(graph.get_adjacency(attribute='strength').data) Q_prim = get_stochastic_transition_matrix(A) loss = 0 for source, i in zip(sources, range(len(sources))): print('{} Objective func, source {}, index {}'.format(time.strftime("%c"), source, i)) Q = get_transition_matrix(Q_prim, source, alpha) p = iterative_page_rank(Q, epsilon, max_iter) l_set = list(set(graph.vs.indices) - set(destinations[i] + [source])) diff = get_differences(p, l_set, destinations[i]) h = loss_function(diff, margin_loss) loss += mx.nd.sum(h).asnumpy()[0] return float(mx.nd.sum(mx.nd.square(w)).asnumpy()[0] + lambda_param * loss)
def get_transition_matrix(Q_prim, start_node, alpha): """ Calculate the transition matrix from given stochastic transition matrix, start node and restart probability :param Q_prim: stochastic transition matrix :type Q_prim: numpy.array :param start_node: index of the start node :type start_node: int :param alpha: restart probability :type alpha: float :return: transition matrix :rtype: numpy.array """ with gpu(0): one = mx.nd.zeros(Q_prim.shape) one[:, start_node] = 1 return (1 - alpha) * Q_prim + alpha * one
def get_differences(p, l_set, d_set): """ Calculate difference between pl and pd :param p: stationary distribution :type p: numpy.array :param l_set: indices of the nodes in L set :type l_set: list :param d_set: indices of the nodes in D set :type d_set: list :return: difference between pl and pd :rtype: numpy.array """ with gpu(0): pd = p.asnumpy()[d_set] pl = p.asnumpy()[l_set] result = mx.nd.array([l - d for d in pd for l in pl]) return result
def test_1(): patch_ndarray() a = mx.nd.zeros((256, 3, 128, 128)) print(a.handle) print(a.handle.value) p = a.get_data_p() print(p) d = a.as_in_context(context.gpu(0)) print(d.handle) print(d.handle.value) p = d.get_data_p() print(p) '''
def iterative_page_rank_derivative(graph, p, Q, A, epsilon, max_iter, w, features, alpha): """ Derivative of PageRank vector p :param graph: igraph object :type graph: igraph.Graph :param p: PageRank vector :param p: numpy.array :param Q: transition matrix :type Q: numpy.array :param A: adjacency strength matrix :type A: numpy.array :param epsilon: tolerance parameter :type epsilon: float :param max_iter: maximum number of iterations :type max_iter: int :param w: parameter vector :type w: numpy.array :param features: feature vector :type features: numpy.array :param alpha: restart probability :type alpha: float :return: derivative of PageRank vector :rtype: numpy.array """ with gpu(0): dp = mx.nd.zeros((Q.shape[0], w.shape[0])) dstrengths = logistic_edge_strength_derivative_function(w, features) A_rowsum = mx.nd.sum(A, axis=1).reshape((-1, 1)) rec = mx.nd.power(A_rowsum, -2) for k in range(w.shape[0]): graph.es['temp'] = mx.nd.transpose(dstrengths)[:, k].reshape((-1,)).asnumpy().tolist() dA = mx.nd.array(graph.get_adjacency(attribute='temp').data) dA_rowsum = mx.nd.sum(dA, axis=1).reshape((-1, 1)) dQk = (1 - alpha) * rec * ((A_rowsum * dA) - (dA_rowsum * A)) prod = mx.nd.dot(p, dQk) for t in range(max_iter): dp_new = mx.nd.dot(dp[:, k], Q) + prod if mx.nd.max(mx.nd.abs(dp_new - dp[:, k])).asnumpy()[0] < epsilon: dp[:, k] = dp_new break dp[:, k] = dp_new return dp
def eco_full(pretrained=False, ctx=gpu(), root=os.path.join(base.data_dir(), '/path/to/json'), **kwargs): r"""Build ECO_Full network Parameters ---------- pretrained : bool, default False ctx : Context, default GPU The context in which to load the pretrained weights. root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ net = Eco(**kwargs) if pretrained: from mxnet.gluon.model_zoo.model_store import get_model_file net.load_parameters(get_model_file('eco_full_kinetics', root=root), ctx=ctx) return net
def iterative_page_rank(trans, epsilon, max_iter): """ Iterative power-iterator like computation of PageRank vector p :param trans: transition matrix :type trans: numpy.array :param epsilon: tolerance parameter :type epsilon: float :param max_iter: maximum number of iterations :type max_iter: int :return: stationary distribution :rtype: numpy.array """ with gpu(0): p = mx.nd.ones((1, trans.shape[0])) / trans.shape[0] p_new = mx.nd.dot(p, trans) for t in range(max_iter): if mx.nd.max(mx.nd.abs(p - p_new)).asnumpy()[0] < epsilon: break p = p_new p_new = mx.nd.dot(p, trans) return p_new[0]
def _test_consistency(version: str, approx_func, sym: bool): """ test rational activation function from keras package on test_data, validating that cuda and cpu results are consistent, i.e. that there is no significant difference between cuda and cpu results :param sym: use symbolic execution if True, else imperative execution :param approx_func: which function to use as initial shape :param version: which version of the function to test """ # declare results cpu_result = None gpu_result = None # set cpu context and test with mx.Context(cpu(0)): # instantiate a tensor for testing test_data = mx.nd.array([-2., -1, 0., 1., 2.]) init_fun_names = { LeakyReLU: 'leaky_relu', tanh: 'tanh', sigmoid: 'sigmoid' } # instantiate rational activation function under test on cpu cpu_fut = Rational(approx_func=init_fun_names.get(approx_func), version=version, cuda=False, trainable=False) # create small neural networks and add futs as layers cpu_net = mx.gluon.nn.HybridSequential() with cpu_net.name_scope(): cpu_net.add(cpu_fut) cpu_net.initialize() # trigger symbolic rather than imperative API, if specified if sym: cpu_net.hybridize() # run the function on test data cpu_result = cpu_net(test_data) # set gpu context and test assert num_gpus() > 0, 'tried to run on GPU, but none available.' with mx.Context(gpu(0)): # instantiate a tensor for testing test_data = mx.nd.array([-2., -1, 0., 1., 2.]) init_fun_names = { LeakyReLU: 'leaky_relu', tanh: 'tanh', sigmoid: 'sigmoid' } # instantiate rational activation function under test on gpu gpu_fut = Rational(approx_func=init_fun_names.get(approx_func), version=version, cuda=True, trainable=False) # create small neural networks and add futs as layers gpu_net = mx.gluon.nn.HybridSequential() with gpu_net.name_scope(): gpu_net.add(gpu_fut) gpu_net.initialize() # trigger symbolic rather than imperative API, if specified if sym: gpu_net.hybridize() # run the function on test data gpu_result = gpu_net(test_data) # check that there is no significant difference between the results assert all(isclose(cpu_result.asnumpy(), gpu_result.asnumpy(), atol=1e-06))
# #### 3.1.1 Test CNN model # In[9]: net.hybridize() net.initialize(force_reinit=True) X = nd.random.uniform(shape=(1, 3, 32, 32)) net(X) # ### 3.2 Set device # In[10]: ctx = context.gpu(0) if context.num_gpus() else context.cpu() LOG(INFO, 'Device in Use:', ctx) # ### 3.3 Define Learning Rate Scheduler # In[11]: # learning rate scheduler iter_per_epochs = math.ceil(len(train_dataset) / BATCH_SIZE) niterations = cfg.NEPOCHS * iter_per_epochs lr_scheduler = learning.OneCycleScheduler(start_lr=0.01, max_lr=0.05, cycle_length=40 * iter_per_epochs, cooldown_length=niterations -
type=str2bool, nargs='?', const=True, default=True, help="Use mutation when creating children") p.add_argument('--do_crossover', type=str2bool, nargs='?', const=True, default=True, help="Use crossover when creating children") # Log parameters p.add_argument('--logger_filename', type=str, default="output.json", help="Filename of json output file") # Miscellaneous parameters p.add_argument( '--device', type=str, default=cuda.gpu(0) if cuda.num_gpus() else cuda.cpu(), help= "Specifies on which device the neural network of the agent will be run" ) args = p.parse_args() main(args)
def try_gpu(i=0): """Return gpu(i) if exists, otherwise return cpu().""" return context.gpu(i) if context.num_gpus() >= i else context.cpu()
def try_all_gpus(): """Return all available GPUs, or [cpu(),] if no GPU exists.""" ctxes = [context.gpu(i) for i in range(context.num_gpus())] return ctxes if ctxes else [context.cpu()]
def main(): """Main function: load data comprising molecules, create RNN, fit RNN with the data, and predict novel molecules. Command line options: positional arguments: filename The path to the training data containing SMILES strings. optional arguments: -h, --help show this help message and exit -b BATCH_SIZE, --batch_size BATCH_SIZE The number of batches to generate at every iteration. (default: 32) -s N_STEPS, --n_steps N_STEPS The number of time steps. (default: 40) -u HIDDEN_SIZE, --hidden_size HIDDEN_SIZE The number of units in a network's hidden state. (default: 256) -n N_LAYERS, --n_layers N_LAYERS The number of hidden layers. (default: 1) -l LEARNING_RATE, --learning_rate LEARNING_RATE The learning rate. (default: 1.0) -e N_EPOCHS, --n_epochs N_EPOCHS The number of epochs. (default: 2000) -p PREDICT_EPOCH, --predict_epoch PREDICT_EPOCH Predict new strings every p epochs (default: 20) -v VERBOSE, --verbose VERBOSE Print logs every v iterations. (default: 10) -c {cpu,CPU,gpu,GPU}, --ctx {cpu,CPU,gpu,GPU} CPU or GPU (default: cpu) -r PREFIX, --prefix PREFIX Initial symbol(s) of a SMILES string to generate. (default: C) """ options = process_options() dataset = SMILESDataset(filename=options.filename) dataloader = SMILESDataLoader( batch_size=options.batch_size, n_steps=options.n_steps, dataset=dataset, ) rnn_layer = gluon.rnn.GRU( hidden_size=options.hidden_size, num_layers=options.n_layers, ) model = SMILESRNNModel( rnn_layer=rnn_layer, vocab_size=len(dataloader.vocab), ) optimizer_params = { 'learning_rate': options.learning_rate, # TODO Add gradient clipping # 'clip_gradient': 1, } ctx = { 'cpu': context.cpu(0), 'gpu': context.gpu(0), } train( dataloader=dataloader, model=model, optimizer_params=optimizer_params, n_epochs=options.n_epochs, predict_epoch=options.predict_epoch, verbose=options.verbose, ctx=ctx[options.ctx.lower()], prefix=options.prefix, )