def build_model(self): pretrained_model = applications.InceptionV3( include_top=False, weights="imagenet", input_shape=(constants.IMAGE_DIMS[0], constants.IMAGE_DIMS[1], 3)) for layer in pretrained_model.layers: layer.trainable = self.config.model.train_lower x = pretrained_model.output x = GlobalAveragePooling2D()(x) if self.config.model.architecture.available: for i in range(len(self.config.model.architecture.dense)): dense_num = self.config.model.architecture.dense[i] dropout_rate = self.config.model.architecture.dropout[i] x = Dense(dense_num, activation="relu")(x) x = Dropout(dropout_rate)(x) else: x = Dense(2048, activation="relu")(x) x = Dropout(0.5)(x) predictions = Dense(self.config.exp.num_of_classes, activation="softmax")(x) self.model = Model(inputs=pretrained_model.input, outputs=predictions) optimizer = optimizers.get(self.config.model.optimizing.optimizer) assert isinstance(optimizer, optimizers.Optimizer) optimizer.lr = self.config.model.optimizing.learning_rate if self.config.model.optimizing.optimizer == "sgd": optimizer.nesterov = self.config.model.optimizing.nesterov optimizer.momentum = self.config.model.optimizing.momentum optimizer.decay = self.config.model.decay self.model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=self.metrics) return self.model
def compile(self, *args, **kwargs): '''Refer to Model.compile docstring for parameters. Override functionality is documented below. :override compile: Override Model.compile method to check for options that the optimizer is multi-gpu enabled, and synchronize initial variables. ''' initsync = self._initsync usenccl = self._usenccl opt = kwargs['optimizer'] # if isinstance(opt, str): if not isinstance(opt, KO.Optimizer): opt = KO.get(opt) kwargs['optimizer'] = opt if self._syncopt and not getattr(opt, 'ismgpu', False): raise RuntimeError( 'Multi-GPU synchronization model requires a multi-GPU ' 'optimizer. Instead got: {}'.format(opt)) opt.usenccl = usenccl if self._enqueue_ops: # Produces a warning that kwargs are ignored for Tensorflow. Patch # Function in tensorflow_backend to use the enqueue_ops option. kwargs['enqueue_ops'] = self._enqueue_ops super(ModelMGPU, self).compile(*args, **kwargs) if initsync: self._run_initsync()
def __init__(self, n_hidden_joint_layers=32, n_hidden_joint_units=32, activation='selu', kernel_initializer='lecun_normal', kernel_regularizer=l2(l=0.01), optimizer="adam", batch_size=256, random_state=None, **kwargs): self.logger = logging.getLogger(FATERankingCore.__name__) self.random_state = check_random_state(random_state) self.n_hidden_joint_layers = n_hidden_joint_layers self.n_hidden_joint_units = n_hidden_joint_units self.activation = activation self.kernel_initializer = kernel_initializer self.kernel_regularizer = kernel_regularizer self.batch_size = batch_size self.optimizer = optimizers.get(optimizer) self.__kwargs__ = kwargs self._construct_layers(activation=self.activation, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer)
def __init__(self, n_hidden_joint_layers=32, n_hidden_joint_units=32, activation='selu', kernel_initializer='lecun_normal', kernel_regularizer=l2(l=0.01), optimizer="adam", es_patience=300, use_early_stopping=False, batch_size=256, random_state=None, **kwargs): self.logger = logging.getLogger(GENERAL_RANKING_CORE) self.random_state = check_random_state(random_state) self.n_hidden_joint_layers = n_hidden_joint_layers self.n_hidden_joint_units = n_hidden_joint_units self.activation = activation self.kernel_initializer = kernel_initializer self.kernel_regularizer = kernel_regularizer self.batch_size = batch_size self.optimizer = optimizers.get(optimizer) self.early_stopping = EarlyStoppingWithWeights(patience=es_patience) self._use_early_stopping = use_early_stopping self.__kwargs__ = kwargs self._construct_layers(activation=self.activation, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer)
def valid_optimizer(value): from keras.optimizers import get try: _ = get(value) except ValueError: raise argparse.ArgumentTypeError('unknown optimizer: %s' % value) return value
def make_online(self): embedding = K.variable( np.random.uniform(0, 1, (self.dataset.nsize, self.flowargs['embdim']))) prevemb = K.placeholder(ndim=2, dtype='float32') # (nsize, d) data = K.placeholder( ndim=2, dtype='int32' ) # (batchsize, 5), [k, from_pos, to_pos, from_neg, to_neg] weight = K.placeholder(ndim=1, dtype='float32') # (batchsize, ) if K._BACKEND == 'theano': # (batchsize, d) => (batchsize, ) # data[:, 0] should be always 0, so we simply ignore it # note, when you want to use it, that according to data generation procedure, the actual data[:, 0] is not 0 dist_pos = embedding[data[:, 1]] - embedding[data[:, 2]] dist_pos = K.sum(dist_pos * dist_pos, axis=-1) dist_neg = embedding[data[:, 3]] - embedding[data[:, 4]] dist_neg = K.sum(dist_neg * dist_neg, axis=-1) else: dist_pos = K.gather(embedding, K.squeeze(K.slice(data, [0, 1], [-1, 1]), axis=1)) - \ K.gather(embedding, K.squeeze(K.slice(data, [0, 2], [-1, 1]), axis=1)) dist_pos = K.sum(dist_pos * dist_pos, axis=-1) dist_neg = K.gather(embedding, K.squeeze(K.slice(data, [0, 3], [-1, 1]), axis=1)) - \ K.gather(embedding, K.squeeze(K.slice(data, [0, 4], [-1, 1]), axis=1)) dist_neg = K.sum(dist_neg * dist_neg, axis=-1) # (batchsize, ) margin = 1 lprox = K.maximum(margin + dist_pos - dist_neg, 0) * weight # (1, ) lprox = K.mean(lprox) # lsmooth lsmooth = embedding - prevemb # (nsize, d) lsmooth = K.sum(K.square(lsmooth), axis=-1) # (nsize) lsmooth = K.mean(lsmooth) loss = lprox + self.flowargs['beta'][0] * lsmooth opt = optimizers.get({ 'class_name': 'Adagrad', 'config': { 'lr': self.lr } }) cstr = { embedding: constraints.get({ 'class_name': 'maxnorm', 'config': { 'max_value': 1, 'axis': 1 } }) } upd = opt.get_updates([embedding], cstr, loss) lf = K.function([data, weight, prevemb], [loss], updates=upd) return lf, None, [embedding], {}
def __init__(self, optimizer): """ Base wrapper class for a Keras optimizer such that its gradients are corrected prior to computing the update ops. Since it is a wrapper optimizer, it must delegate all normal optimizer calls to the optimizer that it wraps. Note: This wrapper optimizer monkey-patches the optimizer it wraps such that the call to `get_gradients` will call the gradients of the optimizer and then normalize the list of gradients. This is required because Keras calls the optimizer's `get_gradients` method inside `get_updates`, and without this patch, we cannot normalize the gradients before computing the rest of the `get_updates` code. # Abstract Methods get_gradients: Must be overridden to support differnt gradient operations. get_config: Config needs to be carefully built for serialization. from_config: Config must be carefully used to build a Subclass. # Arguments: optimizer: Keras Optimizer or a string. All optimizers other than TFOptimizer are supported. If string, instantiates a default optimizer with that alias. # Raises NotImplementedError: If `optimizer` is of type `TFOptimizer`. """ if optimizer.__class__.__name__ == 'TFOptimizer': raise NotImplementedError('Currently, TFOptimizer is not supported.') self.optimizer = optimizers.get(optimizer) # patch the `get_gradients` call self._optimizer_get_gradients = self.optimizer.get_gradients
def compile(self, *args, **kwargs): '''Refer to Model.compile docstring for parameters. Override functionality is documented below. :override compile: Override Model.compile method to check for options that the optimizer is multi-gpu enabled, and synchronize initial variables. ''' initsync = self._initsync usenccl = self._usenccl opt = kwargs['optimizer'] # if isinstance(opt, str): if not isinstance(opt, KO.Optimizer): opt = KO.get(opt) kwargs['optimizer'] = opt if self._syncopt and not getattr(opt, 'ismgpu', False): raise RuntimeError( 'Multi-GPU synchronization model requires a multi-GPU ' 'optimizer. Instead got: {}'.format(opt)) opt.usenccl = usenccl if self._enqueue_ops: # Produces a warning that kwargs are ignored for Tensorflow. Patch # Function in tensorflow_backend to use the enqueue_ops option. kwargs['fetches'] = self._enqueue_ops super(ModelMGPU, self).compile(*args, **kwargs) if initsync: self._run_initsync()
def compile_model(self): self.Q1, self.Q2, self.Q1_char, self.Q2_char = self.make_input() self.Q1_emb, self.Q2_emb, self.Q1_char_emb, self.Q2_char_emb = self.embedded( ) self.output = self.build_model() if self.args.need_word_level: inputs = [self.Q1, self.Q2] else: inputs = [] if self.args.need_char_level: inputs += [self.Q1_char, self.Q2_char] inputs += [self.magic] self.model = Model(inputs=inputs, outputs=self.output) optimizer = get({ 'class_name': self.args.optimizer, 'config': { 'lr': self.args.lr } }) self.model.compile(optimizer=optimizer, loss=self.args.loss, metrics=['acc']) self.model.summary()
def compile(self, optimizer, loss): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) train_loss = self.loss(self.X_train) test_loss = self.loss(self.X_test) train_loss.name = 'train_loss' test_loss.name = 'test_loss' for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train test_ins = self.X_test else: train_ins = [self.X_train] test_ins = [self.X_test] self._train = K.function(train_ins, train_loss, updates=updates) self._test = K.function(test_ins, test_loss)
def model_from_config(config, custom_objects={}): model_name = config.get('name') if model_name not in {'Graph', 'Sequential'}: raise Exception('Unrecognized model:', model_name) # Create a container then set class to appropriate model model = container_from_config(config, custom_objects=custom_objects) if model_name == 'Graph': model.__class__ = Graph elif model_name == 'Sequential': model.__class__ = Sequential if 'optimizer' in config: # if it has an optimizer, the model is assumed to be compiled loss = config.get('loss') class_mode = config.get('class_mode') theano_mode = config.get('theano_mode') optimizer_params = dict([(k, v) for k, v in config.get('optimizer').items()]) optimizer_name = optimizer_params.pop('name') optimizer = optimizers.get(optimizer_name, optimizer_params) if model_name == 'Sequential': model.compile(loss=loss, optimizer=optimizer, class_mode=class_mode, theano_mode=theano_mode) elif model_name == 'Graph': model.compile(loss=loss, optimizer=optimizer, theano_mode=theano_mode) return model
def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='binary_crossentropy', batch_normalization=True, kernel_regularizer=l2(l=1e-4), kernel_initializer='lecun_normal', activation='relu', optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), metrics=['binary_accuracy'], batch_size=256, random_state=None, **kwargs): self.logger = logging.getLogger("CmpNet") self.n_object_features = n_object_features self.batch_normalization = batch_normalization self.activation = activation self.hash_file = None self.batch_size = batch_size self.metrics = metrics self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.loss_function = loss_function self.optimizer = optimizers.get(optimizer) self._optimizer_config = self.optimizer.get_config() self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) for key in keys: if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs self.threshold_instances = int(1e10) self.random_state = check_random_state(random_state) self.model = None self._construct_layers(kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, activation=self.activation, **self.kwargs)
def optimizer_from_config(config): optimizer_params = dict() for k, v in config.get('optimizer').items(): optimizer_params[k] = v optimizer_name = optimizer_params.pop('name') optimizer = kopt.get(optimizer_name, optimizer_params) return optimizer
def on_train_begin(self, logs=None): logs = logs or {} _logs = {} for k, v in logs.items(): if isinstance(v, (np.ndarray, np.generic)): _logs[k] = v.item() else: _logs[k] = v if type(self.model.optimizer) is str: opt_name = optimizers.get(self.model.optimizers).__class__.__name__ else: opt_name = self.model.optimizer.__class__.__name__ send = { 'model': { 'name': self.model.name or '', 'config': self.model.get_config() }, 'optimizer': { 'name': opt_name, 'config': self.model.optimizer.get_config() } } response = requests.post(f'{self.api_root}/model', json=send) if (response): r = response.json() self.model_id = r['id'] if r['id'] else None print( f'[Fitsbook]: Monitoring this training in real time {self.site_url}/stats/{self.model_id}' )
def model_from_config(config): model_name = config.get('name') if model_name not in {'Graph', 'Sequential'}: raise Exception('Unrecognized model:', model_name) # Create a container then set class to appropriate model model = container_from_config(config) if model_name == 'Graph': model.__class__ = Graph elif model_name == 'Sequential': model.__class__ = Sequential if 'optimizer' in config: # if it has an optimizer, the model is assumed to be compiled loss = config.get('loss') class_mode = config.get('class_mode') theano_mode = config.get('theano_mode') optimizer_params = dict([(k, v) for k, v in config.get('optimizer').items()]) optimizer_name = optimizer_params.pop('name') optimizer = optimizers.get(optimizer_name, optimizer_params) if model_name == 'Sequential': model.compile(loss=loss, optimizer=optimizer, class_mode=class_mode, theano_mode=theano_mode) elif model_name == 'Graph': model.compile(loss=loss, optimizer=optimizer, theano_mode=theano_mode) return model
def adversarial_compile(self, adversarial_optimizer, player_optimizers, loss, **kwargs): """ Configures the learning process. :param adversarial_optimizer: instance of AdversarialOptimizer :param player_optimizers: list of optimizers for each player :param loss: loss function or function name :param kwargs: additional arguments to function compilation :return: """ self._function_kwargs = kwargs self.adversarial_optimizer = adversarial_optimizer assert (len(player_optimizers) == self.player_count) self.optimizers = [optimizers.get(optimizer) for optimizer in player_optimizers] self.loss = loss self.optimizer = None # Build player models for opt, model in zip(self.optimizers, self.layers): model.compile(opt, loss=self.loss) self.train_function = None self.test_function = None # Inputs are same for each model def filter_inputs(inputs): return inputs self.internal_input_shapes = filter_inputs(self.layers[0].internal_input_shapes) self.input_names = filter_inputs(self.layers[0].input_names) self.inputs = filter_inputs(self.layers[0].inputs) # Outputs are concatenated player models models = self.layers def collect(f): return list(itertools.chain.from_iterable(f(m) for m in models)) self.internal_output_shapes = collect(lambda m: m.internal_output_shapes) self.loss_functions = collect(lambda m: m.loss_functions) self.targets = collect(lambda m: m.targets) self.outputs = collect(lambda m: m.outputs) self.sample_weights = collect(lambda m: m.sample_weights) self.sample_weight_modes = collect(lambda m: m.sample_weight_modes) # for each target, output name is {player}_{target} self.output_names = [] for i in range(self.player_count): for name in models[i].output_names: self.output_names.append("{}_{}".format(self.player_names[i], name)) # for each metric, metric name is {player}_{metric} self.metrics_names = ["loss"] for i in range(self.player_count): for name in models[i].metrics_names: self.metrics_names.append("{}_{}".format(self.player_names[i], name)) # total loss is sum of losses self.total_loss = np.float32(0) for model in models: self.total_loss += model.total_loss
def get_optimizer(optimizer, lr, decay, momentum) -> Optimizer: optimizer = optimizers.get(optimizer) K.set_value(optimizer.lr, lr) if hasattr(optimizer, 'momentum'): K.set_value(optimizer.momentum, momentum) if hasattr(optimizer, 'decay'): K.set_value(optimizer.decay, decay) return optimizer
def from_config(cls, config): optimizer_config = {'class_name': config['optimizer_name'], 'config': config['optimizer_config']} optimizer = optimizers.get(optimizer_config) normalization = config['normalization'] return cls(optimizer, normalization=normalization)
def __init__(self, n_hidden_joint_layers=32, n_hidden_joint_units=32, activation='selu', kernel_initializer='lecun_normal', kernel_regularizer=l2(l=0.01), optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), batch_size=256, random_state=None, **kwargs): """ Create a FATE-network architecture. Training and prediction complexity is linear in the number of objects. Parameters ---------- n_hidden_joint_layers : int Number of joint layers. n_hidden_joint_units : int Number of hidden units in each joint layer activation : string or function Activation function to use in the hidden units kernel_initializer : function or string Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units optimizer : string or function Stochastic gradient optimizer batch_size : int Batch size to use for training random_state : int or object Numpy random state **kwargs Keyword arguments for the hidden units """ self.logger = logging.getLogger(FATENetworkCore.__name__) self.random_state = check_random_state(random_state) self.n_hidden_joint_layers = n_hidden_joint_layers self.n_hidden_joint_units = n_hidden_joint_units self.activation = activation self.kernel_initializer = kernel_initializer self.kernel_regularizer = kernel_regularizer self.batch_size = batch_size self.optimizer = optimizers.get(optimizer) self._optimizer_config = self.optimizer.get_config() self.joint_layers = None self.scorer = None keys = list(kwargs.keys()) for key in keys: if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs self._construct_layers(activation=self.activation, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, **self.kwargs)
def compile(self, optimizer, loss, theano_mode=None): # loss is a dictionary mapping output name to loss functions ys = [] ys_train = [] ys_test = [] weights = [] train_loss = 0. test_loss = 0. for output_name in self.output_order: loss_fn = loss[output_name] output = self.outputs[output_name] y_train = output.get_output(True) y_test = output.get_output(False) y = T.zeros_like(y_test) ys.append(y) ys_train.append(y_train) ys_test.append(y_test) if hasattr(output, "get_output_mask"): mask = output.get_output_mask() else: mask = None weight = T.ones_like(y_test) weights.append(weight) weighted_loss = weighted_objective(objectives.get(loss_fn)) train_loss += weighted_loss(y, y_train, weight, mask) test_loss += weighted_loss(y, y_test, weight, mask) train_loss.name = 'train_loss' test_loss.name = 'test_loss' ins = [self.inputs[name].input for name in self.input_order] train_ins = ins + ys + weights test_ins = ins + ys + weights for r in self.regularizers: train_loss = r(train_loss) self.optimizer = optimizers.get(optimizer) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates self.theano_mode = theano_mode self.loss = loss self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(inputs=ins, outputs=ys_test, allow_input_downcast=True, mode=theano_mode)
def model_from_dict_w_opt(model_dict, custom_objects=None): """Builds a model from a serialized model using `to_dict_w_opt` Args: model_dict(dict): a serialized Keras model custom_objects(dict, optionnal): a dictionnary mapping custom objects names to custom objects (Layers, functions, etc.) Returns: A Keras.Model which is compiled if the information about the optimizer is available. """ if custom_objects is None: custom_objects = {} model = layer_from_config(model_dict['config'], custom_objects=custom_objects) if 'optimizer' in model_dict: metrics = model_dict.get("metrics") model_name = model_dict['config'].get('class_name') # if it has an optimizer, the model is assumed to be compiled loss = model_dict.get('loss') # if a custom loss function is passed replace it in loss for l in loss: for c in custom_objects: if loss[l] == c: loss[l] = custom_objects[c] optimizer_params = dict([( k, v) for k, v in model_dict.get('optimizer').items()]) optimizer_name = optimizer_params.pop('name') optimizer = optimizers.get(optimizer_name, optimizer_params) if model_name == "Sequential": sample_weight_mode = model_dict.get('sample_weight_mode') model.compile(loss=loss, optimizer=optimizer, sample_weight_mode=sample_weight_mode, metrics=metrics) elif model_name == "Graph": sample_weight_modes = model_dict.get('sample_weight_modes', None) loss_weights = model_dict.get('loss_weights', None) model.compile(loss=loss, optimizer=optimizer, sample_weight_modes=sample_weight_modes, loss_weights=loss_weights) elif model_name == "Model": sample_weight_mode = model_dict.get('sample_weight_mode') loss_weights = model_dict.get('loss_weights', None) model.compile(loss=loss, optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss_weights=loss_weights, metrics=metrics) return model
def clone_optimizer(optimizer): params = dict([(k, v) for k, v in optimizer.get_config().items()]) name = params.pop('name') clone = optimizers.get(name, params) if hasattr(optimizer, 'clipnorm'): clone.clipnorm = optimizer.clipnorm if hasattr(optimizer, 'clipvalue'): clone.clipvalue = optimizer.clipvalue return clone
def get_optimizer(config): """Return optimizer specified by configuration.""" config = vars(config) name = config.get('optimizer', DEFAULT_OPTIMIZER) optimizer = optimizers.get(name) # Default parameters lr = config.get('learning_rate') if lr is not None: optimizer = type(optimizer)(lr=lr) return optimizer
def clone_optimizer(optimizer): if type(optimizer) is str: return get(optimizer) params = dict([(k, v) for k, v in optimizer.get_config().items()]) config = { "class_name": optimizer.__class__.__name__, "config": params } clone = optimizer_from_config(config) return clone
def clone_optimizer(optimizer): if type(optimizer) is str: return get(optimizer) params = dict([(k, v) for k, v in optimizer.get_config().items()]) config = { 'class_name': optimizer.__class__.__name__, 'config': params, } clone = deserialize(config) return clone
def getOptimizer(self): ''' Set up a keras optimizer based on whatever settings you provided. ''' optimizer = optimizers.get(self.optimizer) try: optimizer.lr = K.variable(self.lr, name='lr') optimizer.clipnorm = self.clipnorm except Exception: print('WARNING: could not set all optimizer flags') return optimizer
def __init__(self, sparse_coding, nb_negative, embed_dims=128, context_dims=128, init_embeddings=None, negprob_table=None, optimizer='adam'): super(NCELangModelV4, self).__init__(weighted_inputs=False) vocab_size = sparse_coding.shape[0] # the extra word is for OOV self.nb_base = sparse_coding.shape[1] - 1 self.vocab_size = vocab_size self.embed_dim = embed_dims self.optimizer = optimizers.get(optimizer) self.nb_negative = nb_negative self.loss = categorical_crossentropy self.loss_fnc = objective_fnc(self.loss) self.sparse_coding = sparse_coding if negprob_table is None: negprob_table_ = np.ones(shape=(vocab_size,), dtype=theano.config.floatX)/vocab_size negprob_table = theano.shared(negprob_table_) self.neg_prob_table = negprob_table_ else: self.neg_prob_table = negprob_table.astype(theano.config.floatX) negprob_table = theano.shared(negprob_table.astype(theano.config.floatX)) self.sampler = TableSampler(self.neg_prob_table) self.add_input(name='idxes', ndim=3, dtype='int32') idxes = self.inputs['idxes'].get_output(True) shape = idxes.shape[1:] codes = tsp.csr_matrix('sp-codes', dtype=floatX) nb_pos_words = shape[0] * shape[1] pos_codes = codes[:nb_pos_words] self.add_node(Identity(inputs={True: pos_codes, False: pos_codes}), name='codes_flat') self.add_node(Identity(inputs={True: shape, False: shape}), name='sents_shape') self.add_node(Identity(inputs={True: codes, False: codes}), name='sparse_codes') self.add_node(SparseEmbedding(self.nb_base+1, embed_dims, weights=init_embeddings), name='embedding', inputs=('codes_flat', 'sents_shape')) self.add_node(LangLSTMLayer(embed_dims, output_dim=context_dims), name='encoder', inputs='embedding') # seq.add(Dropout(0.5)) self.add_node(PartialSoftmaxV4(input_dim=context_dims, base_size=self.nb_base+1), name='part_prob', inputs=('idxes', 'sparse_codes', 'encoder')) self.add_node(Dense(input_dim=context_dims, output_dim=1, activation='exponential'), name='normalizer', inputs='encoder') self.add_node(LookupProb(negprob_table), name='lookup_prob', inputs='idxes') self.add_node(SharedWeightsDense(self.nodes['part_prob'].W, self.nodes['part_prob'].b, self.sparse_coding, activation='exponential'), name='true_unnorm_prob', inputs='encoder') self.add_node(ActivationLayer(name='normalization'), name='true_prob', inputs='true_unnorm_prob') self.add_output('pos_prob', node='part_prob') self.add_output('neg_prob', node='lookup_prob') self.add_output('pred_prob', node='true_prob') self.add_output('normalizer', node='normalizer') self.add_output('unrm_prob', node='true_unnorm_prob')
def clone_optimizer(optimizer): if type(optimizer) is str: return get(optimizer) # Requires Keras 1.0.7 since get_config has breaking changes. params = dict([(k, v) for k, v in optimizer.get_config().items()]) config = { 'class_name': optimizer.__class__.__name__, 'config': params, } clone = optimizer_from_config(config) return clone
def __init__(self, optimizer, normalization='l2'): """ Creates a wrapper for a Keras optimizer such that its gradients are normalized prior to computing the update ops. Since it is a wrapper optimizer, it must delegate all normal optimizer calls to the optimizer that it wraps. Note: This wrapper optimizer monkey-patches the optimizer it wraps such that the call to `get_gradients` will call the gradients of the optimizer and then normalize the list of gradients. This is required because Keras calls the optimizer's `get_gradients` method inside `get_updates`, and without this patch, we cannot normalize the gradients before computing the rest of the `get_updates` code. # Arguments: optimizer: Keras Optimizer or a string. All optimizers other than TFOptimizer are supported. If string, instantiates a default optimizer with that alias. normalization: string. Must refer to a normalization function that is available in this modules list of normalization functions. To get all possible normalization functions, use `NormalizedOptimizer.get_normalization_functions()`. # Raises ValueError: If an incorrect name is supplied for `normalization`, such that the normalization function is not available or not set using `NormalizedOptimizer.set_normalization_functions()`. NotImplementedError: If `optimizer` is of type `TFOptimizer`. """ if optimizer.__class__.__name__ == 'TFOptimizer': raise NotImplementedError( 'Currently, TFOptimizer is not supported.') if normalization not in _NORMS: raise ValueError('`normalization` must be one of %s.\n' 'Provided was "%s".' % (str(sorted(list(_NORMS.keys()))), normalization)) self.optimizer = optimizers.get(optimizer) self.normalization = normalization self.normalization_fn = _NORMS[normalization] # patch the `get_gradients` call self._optimizer_get_gradients = self.optimizer.get_gradients self.optimizer.get_gradients = self.get_gradients
def build_model(self): input = Input(shape=(constants.LSTM_SEQUENCE_LENGTH, constants.LSTM_FEATURE_SIZE)) if self.config.model.architecture.available: for i in range(len(self.config.model.architecture.lstm)): neurons = self.config.model.architecture.lstm[i] dropout = self.config.model.architecture.dropout[i] return_sequences = True if i < (len(self.config.model.architecture.lstm)-1) \ else self.config.model.return_sequence if i == 0: x = LSTM(neurons, return_sequences=return_sequences, dropout=dropout)(input) else: x = LSTM(neurons, return_sequences=return_sequences, dropout=dropout)(x) for i in range(len(self.config.model.architecture.dense)): neurons, dropout_rate = self.config.model.architecture.dense[i], \ self.config.model.architecture.dense_dropout[i] x = Dense(neurons, activation="relu")(x) x = Dropout(dropout_rate)(x) else: x = LSTM(1024, return_sequences=self.config.model.return_sequence, dropout=0.5)(input) x = Dense(512, activation="relu")(x) x = Dropout(0.5)(x) # classifier predictions = Dense(self.config.exp.num_of_classes, activation="softmax")(x) if self.config.model.return_sequence: predictions = AveragePooling1D( pool_size=constants.LSTM_SEQUENCE_LENGTH)(predictions) predictions = Flatten()(predictions) self.model = Model(inputs=input, outputs=predictions) optimizer = optimizers.get(self.config.model.optimizing.optimizer) assert isinstance(optimizer, optimizers.Optimizer) optimizer.lr = self.config.model.optimizing.learning_rate if self.config.model.optimizing.optimizer == "sgd": optimizer.nesterov = self.config.model.optimizing.nesterov optimizer.momentum = self.config.model.optimizing.momentum optimizer.decay = self.config.model.decay self.model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=self.metrics) return self.model
def __init__(self, n_objects, n_object_features, n_hidden=2, n_units=8, add_zeroth_order_model=False, max_number_of_objects=5, num_subsample=5, loss_function=hinged_rank_loss, batch_normalization=False, kernel_regularizer=l2(l=1e-4), kernel_initializer='lecun_normal', activation='selu', optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), metrics=None, batch_size=256, random_state=None, **kwargs): self.logger = logging.getLogger(FETANetwork.__name__) self.random_state = check_random_state(random_state) self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.batch_normalization = batch_normalization self.activation = activation self.loss_function = loss_function self.metrics = metrics self._n_objects = n_objects self.max_number_of_objects = max_number_of_objects self.num_subsample = num_subsample self.n_object_features = n_object_features self.batch_size = batch_size self.hash_file = None self.optimizer = optimizers.get(optimizer) self._optimizer_config = self.optimizer.get_config() self._use_zeroth_model = add_zeroth_order_model self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) for key in keys: if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs self._construct_layers(kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, activation=self.activation, **self.kwargs) self._pairwise_model = None self.model = None self.zero_order_model = None
def __init__(self, cls, config): self.cls = cls self.config = config self.arrangement_index = load_data(self.config.map_path).get( self.config.arrangement, 0) # 初始化优化器 identifier = { 'class_name': self.config.optimizer, 'config': { 'lr': self.config.lr } } self.config.optimizer = optimizers.get(identifier)
def clone_optimizer(optimizer): if type(optimizer) is str: return optimizers.get(optimizer) # Requires Keras 1.0.7 since get_config has breaking changes. params = dict([(k, v) for k, v in optimizer.get_config().items()]) config = { 'class_name': optimizer.__class__.__name__, 'config': params, } if hasattr(optimizers, 'optimizer_from_config'): # COMPATIBILITY: Keras < 2.0 clone = optimizers.optimizer_from_config(config) else: clone = optimizers.deserialize(config) return clone
def __init__( self, estimator, gamma, discrete_actions, optimizer="adam", state_dim=None, action_dim=None, norm_value=2, update_theta_every=1, horizon=10, verbose=0, ): super(GenGradFQI, self).__init__(estimator, state_dim, action_dim, discrete_actions, gamma, horizon, verbose) # save MDP information self.norm_value = norm_value self.update_theta_every = update_theta_every if update_theta_every > 0 else -1 # create theano variables self.T_Y = T.dvector() # define bellman operator (check that BOP has only one output) assert isinstance(estimator.inputs, list) assert len(estimator.inputs) == 1 assert isinstance(estimator.outputs, list) assert len(estimator.outputs) == 1 # construct (theano) Bellman error v = self._estimator.outputs[0] - self.T_Y if self.norm_value == np.inf: err = T.max(v ** 2) else: err = T.mean(v ** self.norm_value) ** (1.0 / self.norm_value) self.fqi_loss = err # define function to be used for train and drawing actions self.train_function = None # get keras optimizer self.optimizer = optimizers.get(optimizer) # validate input data (the output is a list storing the validated input) self.discrete_actions = standardize_input_data( discrete_actions, ["discrete_actions"], [(None, self.action_dim)] if self.action_dim is not None else None, exception_prefix="discrete_actions", )
def __init__(self, q_model, gamma, discrete_actions, optimizer, state_dim=None, action_dim=None, incremental=True): # save MDP information self.state_dim = state_dim self.action_dim = action_dim self.incremental = incremental self.gamma = gamma # create theano variables T_s = T.matrix() T_a = T.matrix() T_s_next = T.matrix() T_r = T.vector() # T_r = T.dmatrix() T_discrete_actions = T.matrix() # store models of bellman apx and Q-function self.q_model = q_model # construct (theano) Bellman error self.T_bellman_err = self.bellman_error(T_s, T_a, T_s_next, T_r, self.gamma, T_discrete_actions) # define function to be used for train and drawing actions self.train_function = None self.draw_action_function = None self.T_s = T_s self.T_a = T_a self.T_s_next = T_s_next self.T_r = T_r self.T_discrete_actions = T_discrete_actions # get keras optimizer self.optimizer = optimizers.get(optimizer) # validate input data (the output is a list storing the validated input) self.discrete_actions = standardize_input_data( discrete_actions, ['discrete_actions'], [(None, self.action_dim)] if self.action_dim is not None else None, check_batch_dim=False, exception_prefix='discrete_actions')
def __init__(self, q_model, gamma, discrete_actions, optimizer, state_dim=None, action_dim=None, incremental=True): # save MDP information self.state_dim = state_dim self.action_dim = action_dim self.incremental = incremental self.gamma = gamma # create theano variables T_s = T.matrix() T_a = T.matrix() T_s_next = T.matrix() T_r = T.vector() # T_r = T.dmatrix() T_discrete_actions = T.matrix() # store models of bellman apx and Q-function self.q_model = q_model # construct (theano) Bellman error self.T_bellman_err = self.bellman_error(T_s, T_a, T_s_next, T_r, self.gamma, T_discrete_actions) # define function to be used for train and drawing actions self.train_function = None self.draw_action_function = None self.T_s = T_s self.T_a = T_a self.T_s_next = T_s_next self.T_r = T_r self.T_discrete_actions = T_discrete_actions # get keras optimizer self.optimizer = optimizers.get(optimizer) # validate input data (the output is a list storing the validated input) self.discrete_actions = standardize_input_data(discrete_actions, ['discrete_actions'], [(None, self.action_dim)] if self.action_dim is not None else None, check_batch_dim=False, exception_prefix='discrete_actions')
def compile(self, optimizer, loss, theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train= False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) train_loss = self.loss(self.y_train) test_loss = self.loss(self.y_test) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train test_ins = self.X_test predict_ins = self.X_test else: train_ins = [self.X_train] test_ins = [self.X_test] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode, on_unused_input='ignore')
def compile(self, optimizer, loss, log_fcn=lambda x, y: (x, y), joint_model=False, skiplist = []): log = lambda x: log_fcn(x, True) log("Entering compile...") log("Compiling functions...") self.optimizer = optimizers.get(optimizer) self.old_lr = self.optimizer.lr if 'lr' in dir(self.optimizer) else 0 self.lr = T.scalar() self.optimizer.lr = self.lr self.loss = objectives.get(loss) self.X = self.layers[0].input # input of model self.Y = T.tensor3() # vector word labels self.y_train = self.layers[-1].output(train=True)[0] self.y_test = self.layers[-1].output(train=False)[0] self.train_loss = self.loss(self.Y, self.y_train) self.test_score = self.loss(self.Y, self.y_test) updates = self.optimizer.get_updates(self.params, self.train_loss) if 'train' not in skiplist: log("Creating train function...") self._train = theano.function([self.X, self.Y, self.lr], self.train_loss, updates=updates, allow_input_downcast=True) if 'predict' not in skiplist: log("Creating predict function...") self._predict = theano.function([self.X], self.y_test, allow_input_downcast=True) if 'test' not in skiplist: log("Creating test function...") self._test = theano.function([self.X, self.Y], self.test_score, allow_input_downcast=True) log("Done compiling functions")
def compile(self, optimizer, loss, class_mode="categorical", sample_weight_mode=None): '''Configure the learning process. # Arguments optimizer: str (name of optimizer) or optimizer object. See [optimizers](optimizers.md). loss: str (name of objective function) or objective function. See [objectives](objectives.md). class_mode: one of "categorical", "binary". This is only used for computing classification accuracy or using the predict_classes method. sample_weight_mode: if you need to do timestep-wise sample weighting (2D weights), set this to "temporal". "None" defaults to sample-wise weights (1D). ''' self.optimizer = optimizers.get(optimizer) self.sample_weight_mode = sample_weight_mode self.loss = objectives.get(loss) weighted_loss = weighted_objective(self.loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.single_y_train = self.get_output(train=True) self.single_y_test = self.get_output(train=False) self.diff_train = K.placeholder(ndim=1) self.diff_test = K.placeholder(ndim=1) self.y_train = K.concatenate( [K.dot(self.diff_train, self.single_y_train[:self.diff_train.shape[0]]), K.dot(self.diff_train, self.single_y_train[self.diff_train.shape[0]:])], axis=0) self.y_test = K.concatenate( [K.dot(self.diff_test, self.single_y_test[:self.diff_test.shape[0]]), K.dot(self.diff_test, self.single_y_test[self.diff_test.shape[0]:])], axis=0) # target of model self.y = K.placeholder(ndim=K.ndim(self.y_train)) if self.sample_weight_mode == 'temporal': self.weights = K.placeholder(ndim=2) else: self.weights = K.placeholder(ndim=1) if hasattr(self.layers[-1], "get_output_mask"): mask = self.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) if class_mode == "categorical": train_accuracy = K.mean(K.equal(K.argmax(self.y, axis=-1), K.argmax(self.y_train, axis=-1))) test_accuracy = K.mean(K.equal(K.argmax(self.y, axis=-1), K.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = K.mean(K.equal(self.y, K.round(self.y_train))) test_accuracy = K.mean(K.equal(self.y, K.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.trainable_weights, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train + [self.diff_train, self.y, self.weights] test_ins = self.X_test + [self.diff_test, self.y, self.weights] assert type(self.X_test) == list predict_ins = self.X_test + [self.diff_test] else: train_ins = [self.X_train, self.diff_train, self.y, self.weights] test_ins = [self.X_test, self.diff_test, self.y, self.weights] predict_ins = [self.X_test, self.diff_test] self.__train = K.function(train_ins, [train_loss], updates=updates) self.__train_with_acc = K.function(train_ins, [train_loss, train_accuracy], updates=updates) self.__predict = K.function(predict_ins, [self.y_test], updates=self.state_updates) self.__test = K.function(test_ins, [test_loss], updates=self.state_updates) self.__test_with_acc = K.function(test_ins, [test_loss, test_accuracy], updates=self.state_updates) self._train = lambda rr: self.__train([r[0] for r in rr[:-1]] + [rr[-1]]) self._train_with_acc = lambda rr: self.__train_with_acc([r[0] for r in rr[:-1]] + [rr[-1]]) self._predict = lambda rr: self.__predict([r[0] for r in rr]) self._test = lambda rr: self.__test([r[0] for r in rr[:-1]] + [rr[-1]]) self._test_with_acc = lambda rr: self.__test_with_acc([r[0] for r in rr[:-1]] + [rr[-1]])
def compile(self, optimizer, loss, theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) # self.y_train = self.get_output(train=True) # self.y_test = self.get_output(train=False) # # target of model # self.y = T.zeros_like(self.y_train) # self.weights = T.ones_like(self.y_train) # train_loss = weighted_loss(self.y, self.y_train, self.weights) # test_loss = weighted_loss(self.y, self.y_test, self.weights) train_loss = self.loss(self.X_train) test_loss = self.loss(self.X_test) train_loss.name = 'train_loss' test_loss.name = 'test_loss' # self.y.name = 'y' # if class_mode == "categorical": # train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) # test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) # elif class_mode == "binary": # train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) # test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) # else: # raise Exception("Invalid class mode:" + str(class_mode)) #train_accuracy = monitor(self.X_train) #test_accuracy = monitor(self.X_test) # self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) if type(self.X_train) == list: train_ins = self.X_train# + [self.y, self.weights] test_ins = self.X_test# + [self.y, self.weights] # predict_ins = self.X_test else: train_ins = [self.X_train]#, self.y, self.weights] test_ins = [self.X_test]#, self.y, self.weights] # predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) #self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], # updates=updates, allow_input_downcast=True, mode=theano_mode) # self._predict = theano.function(predict_ins, self.y_test, # allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode)
def compile(self, optimizer, metrics=[]): metrics += [mean_q] if type(optimizer) in (list, tuple): if len(optimizer) != 2: raise ValueError('More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.') actor_optimizer, critic_optimizer = optimizer else: actor_optimizer = optimizer critic_optimizer = clone_optimizer(optimizer) if type(actor_optimizer) is str: actor_optimizer = optimizers.get(actor_optimizer) if type(critic_optimizer) is str: critic_optimizer = optimizers.get(critic_optimizer) assert actor_optimizer != critic_optimizer if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr(metrics[1], '__len__'): actor_metrics, critic_metrics = metrics else: actor_metrics = critic_metrics = metrics def clipped_error(y_true, y_pred): return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) # Compile target networks. We only use them in feed-forward mode, hence we can pass any # optimizer and loss since we never use it anyway. self.target_actor = clone_model(self.actor, self.custom_model_objects) self.target_actor.compile(optimizer='sgd', loss='mse') self.target_critic = clone_model(self.critic, self.custom_model_objects) self.target_critic.compile(optimizer='sgd', loss='mse') # We also compile the actor. We never optimize the actor using Keras but instead compute # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence # we also compile it with any optimzer and self.actor.compile(optimizer='sgd', loss='mse') # Compile the critic. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. critic_updates = get_soft_target_model_updates(self.target_critic, self.critic, self.target_model_update) critic_optimizer = AdditionalUpdatesOptimizer(critic_optimizer, critic_updates) self.critic.compile(optimizer=critic_optimizer, loss=clipped_error, metrics=critic_metrics) # Combine actor and critic so that we can get the policy gradient. # Assuming critic's state inputs are the same as actor's. combined_inputs = [] state_inputs = [] for i in self.critic.input: if i == self.critic_action_input: combined_inputs.append([]) else: combined_inputs.append(i) state_inputs.append(i) combined_inputs[self.critic_action_input_idx] = self.actor(state_inputs) combined_output = self.critic(combined_inputs) updates = actor_optimizer.get_updates( params=self.actor.trainable_weights, loss=-K.mean(combined_output)) if self.target_model_update < 1.: # Include soft target model updates. updates += get_soft_target_model_updates(self.target_actor, self.actor, self.target_model_update) updates += self.actor.updates # include other updates of the actor, e.g. for BN # Finally, combine it all into a callable function. if K.backend() == 'tensorflow': self.actor_train_fn = K.function(state_inputs + [K.learning_phase()], [self.actor(state_inputs)], updates=updates) else: if self.uses_learning_phase: state_inputs += [K.learning_phase()] self.actor_train_fn = K.function(state_inputs, [self.actor(state_inputs)], updates=updates) self.actor_optimizer = actor_optimizer self.compiled = True
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) self.weights = T.ones_like(self.y_train) if hasattr(self.layers[-1], "get_output_mask"): mask = self.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.y.name = 'y' if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] predict_ins = self.X_test else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def model_from_dict_w_opt(model_dict, custom_objects=None): """Builds a model from a serialized model using `to_dict_w_opt` Args: model_dict(dict): a serialized Keras model custom_objects(dict, optionnal): a dictionnary mapping custom objects names to custom objects (Layers, functions, etc.) Returns: A Keras.Model which is compiled if the information about the optimizer is available. """ from keras import optimizers from keras.utils.layer_utils import layer_from_config if custom_objects is None: custom_objects = dict() custom_objects = {k: deserialize(**custom_objects[k]) for k in custom_objects} for k in custom_objects: if inspect.isfunction(custom_objects[k]): custom_objects[k] = custom_objects[k]() model = layer_from_config(model_dict['config'], custom_objects=custom_objects) if 'optimizer' in model_dict: metrics = model_dict.get("metrics", []) ser_metrics = model_dict.get("ser_metrics", []) for k in custom_objects: if inspect.isfunction(custom_objects[k]): function_name = custom_objects[k].__name__ if k in ser_metrics or function_name in ser_metrics: metrics.append(custom_objects[k]) model_name = model_dict['config'].get('class_name') # if it has an optimizer, the model is assumed to be compiled loss = model_dict.get('loss') # if a custom loss function is passed replace it in loss for l in loss: for c in custom_objects: if loss[l] == c: loss[l] = custom_objects[c] optimizer_params = dict([( k, v) for k, v in model_dict.get('optimizer').items()]) optimizer_name = optimizer_params.pop('name') optimizer = optimizers.get(optimizer_name, optimizer_params) if model_name == "Sequential": sample_weight_mode = model_dict.get('sample_weight_mode') model.compile(loss=loss, optimizer=optimizer, sample_weight_mode=sample_weight_mode, metrics=metrics) elif model_name == "Model": sample_weight_mode = model_dict.get('sample_weight_mode') loss_weights = model_dict.get('loss_weights', None) model.compile(loss=loss, optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss_weights=loss_weights, metrics=metrics) else: # pragma: no cover raise Exception('{} model, must be in Sequential, ' 'Model'.format(model_name)) return model
def __init__(self, bellman_model, q_model, steps_ahead, gamma, discrete_actions, optimizer, state_dim=None, action_dim=None, incremental=True, norm_value=np.inf, update_theta_every=1, steps_per_theta_update=None, independent=False, verbose=0, term_condition=None): # save MDP information self.state_dim = state_dim self.action_dim = action_dim self.incremental = incremental self.gamma = gamma self.norm_value = norm_value self.update_theta_every = update_theta_every if update_theta_every > 0 else -1 self.verbose = verbose self.independent = independent self.steps_per_theta_update = steps_ahead if steps_per_theta_update is None else max( 1, steps_per_theta_update) # create theano variables T_s = T.dmatrix() T_a = T.dmatrix() T_s_next = T.dmatrix() T_r = T.dvector() T_absorbing = T.dvector() # T_r = T.dmatrix() T_discrete_actions = T.dmatrix() # store models of bellman apx and Q-function self.bellman_model = bellman_model self.q_model = q_model self.steps_ahead = steps_ahead # define bellman operator (check that BOP has only one output) assert isinstance(bellman_model.inputs, list) assert len(bellman_model.inputs) == 1 assert isinstance(bellman_model.outputs, list) assert len(bellman_model.outputs) == 1 # construct (theano) Bellman error self.theta_list = [bellman_model.inputs[0]] if not independent: self.T_bellman_err, _ = self.k_step_bellman_error( T_s, T_a, T_s_next, T_r, T_absorbing, self.theta_list[0], gamma, T_discrete_actions, steps_ahead) assert len(self.theta_list) == 1 else: self.theta_list += [T.fmatrix(str(ll)) for ll in range( steps_ahead - 1)] # theta_0, theta_1, ..., theta_steps T_bellman_err = None for theta in self.theta_list: if T_bellman_err is None: T_bellman_err = self.bellman_error( T_s, T_a, T_s_next, T_r, theta, gamma, T_discrete_actions)[0] else: T_bellman_err = T_bellman_err + \ self.bellman_error( T_s, T_a, T_s_next, T_r, theta, gamma, T_discrete_actions)[0] self.T_bellman_err = T_bellman_err assert len(self.theta_list) == steps_ahead # define function to be used for train and drawing actions self.train_function = None self.draw_action_function = None self.T_s = T_s self.T_a = T_a self.T_s_next = T_s_next self.T_r = T_r self.T_discrete_actions = T_discrete_actions self.T_absorbing = T_absorbing # get keras optimizer self.optimizer = optimizers.get(optimizer) # validate input data (the output is a list storing the validated input) self.discrete_actions = standardize_input_data( discrete_actions, ['discrete_actions'], [(None, self.action_dim)] if self.action_dim is not None else None, exception_prefix='discrete_actions') if isinstance(term_condition, str): self.term_condition = DEFAULT_TERM[term_condition] else: self.term_condition = term_condition
def adversarial_compile(self, adversarial_optimizer, player_optimizers, loss, compile_kwargs={}, **kwargs): """ Configures the learning process. :param adversarial_optimizer: instance of AdversarialOptimizer :param player_optimizers: list of optimizers for each player :param loss: loss function or function name :param kwargs: additional arguments to function compilation :return: """ self._function_kwargs = kwargs self.adversarial_optimizer = adversarial_optimizer assert (len(player_optimizers) == self.player_count) self.optimizers = [optimizers.get(optimizer) for optimizer in player_optimizers] self.loss = loss self.optimizer = None # Build player models for opt, model in zip(self.optimizers, self.layers): model.compile(opt, loss=self.loss, **compile_kwargs) self.train_function = None self.test_function = None # Inputs are same for each model def filter_inputs(inputs): return inputs self.internal_input_shapes = filter_inputs(self.layers[0].internal_input_shapes) self.input_names = filter_inputs(self.layers[0].input_names) self.inputs = filter_inputs(self.layers[0].inputs) # Outputs are concatenated player models models = self.layers def collect(f): return list(itertools.chain.from_iterable(f(m) for m in models)) self.internal_output_shapes = collect(lambda m: m.internal_output_shapes) self.loss_functions = collect(lambda m: m.loss_functions) self.targets = collect(lambda m: m.targets) self.outputs = collect(lambda m: m.outputs) self.sample_weights = collect(lambda m: m.sample_weights) self.sample_weight_modes = collect(lambda m: m.sample_weight_modes) # for each target, output name is {player}_{target} self.output_names = [] for i in range(self.player_count): for name in models[i].output_names: self.output_names.append("{}_{}".format(self.player_names[i], name)) # for each metric, metric name is {player}_{metric} self.metrics_names = ["loss"] for i in range(self.player_count): for name in models[i].metrics_names: self.metrics_names.append("{}_{}".format(self.player_names[i], name)) # total loss is sum of losses self.total_loss = np.float32(0) for model in models: self.total_loss += model.total_loss # Keras-2 self._feed_loss_fns = self.loss_functions self._feed_inputs = self.inputs self._feed_input_names = self.input_names self._feed_input_shapes = self.internal_input_shapes self._feed_outputs = self.outputs self._feed_output_names = self.output_names self._feed_output_shapes = self.internal_output_shapes self._feed_sample_weights = self.sample_weights self._feed_sample_weight_modes = self.sample_weight_modes
def optimizer_from_params(params): return kopt.get(params.optimizer, params.optimizer_params)
def compile(self, optimizer, loss, log_fcn=lambda x, y: (x, y), joint_model=False, skiplist = []): log = lambda x: log_fcn(x, True) log("Entering compile...") self.optimizer = optimizers.get(optimizer) self.old_lr = self.optimizer.lr if 'lr' in dir(self.optimizer) else 0 self.lr = T.scalar() self.optimizer.lr = self.lr objective = objectives.get(loss) self.loss = create_masked_loss(objective) v = theano.shared(numpy.array([1])) # output of model self.Y = T.tensor3() # vector word labels self.M = T.tensor3() # mask self.X1 = T.tensor3() # first sequence log("Compiling functions...") self.CH_layers = filter(lambda x: hasattr(x, 'C1') and hasattr(x, 'H1'), self.layers) # Loop inner function def make_step(train): # create closure around train def _step(last_X, *last_S): # set top layer's input = last output self.layers[0].input = last_X # C and H have to be manually passed into FlatLSTM # layers for each iteration of the loop. # last_S is variadic, as inputs to _step need to be # tensors. last_C = last_S[:len(self.CH_layers)] last_H = last_S[len(self.CH_layers):] for i, layer in enumerate(self.CH_layers): layer.c_tm1 = last_C[i] layer.h_tm1 = last_H[i] # Get the following: # - final layer's output # - each layer's C (cell memory) # - each layer's H (layer's last output) out, C, H = self.layers[-1].output(train=train) return [out] + C + H return _step # Create train, predict functions train_step = make_step(True) predict_step = make_step(False) # Train and predict result: loop over step function n_steps times. # Initial values are set by the calling function: the first sequence # token, and an initial C and H for each layer. # (this produces a sequence of length n_steps) # Train result can take an extremely long time to compile. if 'train' not in skiplist: log("Creating train result (n_steps={0})...".format(self.steps)) self._train_result_scan, _ = theano.scan(fn=train_step, outputs_info = [dict(initial=self.X1, taps=[-1])] + [dict(initial=layer.C1, taps=[-1]) for layer in self.CH_layers] + [dict(initial=layer.H1, taps=[-1]) for layer in self.CH_layers], n_steps=self.steps) if 'predict' not in skiplist or 'test' not in skiplist: log("Creating predict result (n_steps={0})...".format(self.steps)) self._predict_result_scan, _ = theano.scan(fn=predict_step, outputs_info = [dict(initial=self.X1, taps=[-1])] + [dict(initial=layer.C1, taps=[-1]) for layer in self.CH_layers] + [dict(initial=layer.H1, taps=[-1]) for layer in self.CH_layers], n_steps=self.steps) # Fixes dimensions from result function to produce the # correct ordering of (sequence, token, vector) # (dimension #2 is an artefact of porting the functions to a loop) if not 'train' in skiplist: self._train_result = self._train_result_scan[0] self._train_result = self._train_result.dimshuffle(1, 0, 3, 2) self._train_result = self._train_result.flatten(ndim=3) if not ('predict' in skiplist and 'test' in skiplist): self._predict_result = self._predict_result_scan[0] self._predict_result = self._predict_result.dimshuffle(1, 0, 3, 2) self._predict_result = self._predict_result.flatten(ndim=3) # Create train, predict, testing functions if not 'train' in skiplist: log("Setting train loss and updates...") self.train_loss = self.loss(self.Y, self._train_result, self.M) self.updates = self.optimizer.get_updates(self.params, self.train_loss) if not joint_model: log("Creating train function...") self.__train = theano.function([self.X1, self.Y, self.M] + [self.lr] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.train_loss, updates=self.updates, allow_input_downcast=True) if not 'predict' in skiplist: self.predict_result = self._predict_result if not joint_model: log("Creating predict function...") self.__predict = theano.function([self.X1] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.predict_result, allow_input_downcast=True) if not 'test' in skiplist: self.test_score = self.loss(self.Y, self._predict_result, self.M) if not joint_model: log("Creating test function...") self.__test = theano.function([self.X1, self.Y, self.M] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.test_score, allow_input_downcast=True) log("Done compiling functions")
def compile(self, optimizer="sgd", loss="mse", policy_rule="max", sample_weight_mode=None): """Initialize model weights and compile functions Notes ----- This function was modifed from `keras.models.compile` which is under MIT License. """ kmodel = self.keras_model kmodel.build() self.policy_rule = policies.get(policy_rule) self.optimizer = optimizers.get(optimizer) self.sample_weight_mode = sample_weight_mode self.loss = objectives.get(loss) weighted_loss = weighted_objective(self.loss) # input of model self.X_train = kmodel.get_input(train=True) self.X_test = kmodel.get_input(train=False) # calculate policy values values_train = kmodel.get_output(train=True) values_test = kmodel.get_output(train=False) self.y_train = self.policy_rule(values_train) self.y_test = self.policy_rule(values_test) # target of model self.y = K.placeholder(ndim=K.ndim(self.y_train)) if self.sample_weight_mode == 'temporal': self.weights = K.placeholder(ndim=2) else: self.weights = K.placeholder(ndim=1) if hasattr(kmodel.layers[-1], "get_output_mask"): mask = kmodel.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) for r in kmodel.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(kmodel.trainable_weights, kmodel.constraints, train_loss) updates += kmodel.updates if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] assert type(self.X_test) == list values_ins_test = self.X_test values_ins_train = self.X_train else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] values_ins_test = [self.X_test] values_ins_train = [self.X_train] self._train = K.function(train_ins, [train_loss], updates=updates) self._values_train = K.function(values_ins_train, [values_train], updates=kmodel.state_updates) self._values_test = K.function(values_ins_test, [values_test], updates=kmodel.state_updates) # TODO: check if this is necessary self._test = K.function(test_ins, [test_loss], updates=kmodel.state_updates)
max_train_examples = None max_develtest_examples = 100000 # for faster develtest examples_as_indices = True hidden_sizes = [300] hidden_activation = 'hard_sigmoid' # 'relu' batch_size = 50 epochs = 10 loss = 'categorical_crossentropy' # 'mse' verbosity = 1 # 0=quiet, 1=progress bar, 2=one line per epoch iobes = False # Map tags to IOBES on input token_level_eval = False # Token-level eval even if IOB-like tagging optimizer = 'adam' # 'sgd' test = False config = settings.from_cli(['datadir', 'wordvecs'], Defaults) optimizer = optimizers.get(config.optimizer) output_name = 'mlp--' + path.basename(config.datadir.rstrip('/')) common.setup_logging(output_name) settings.log_with(config, info) # Data data = input_data.read_data_sets(config.datadir, config.wordvecs, config) embedding = common.word_to_vector_to_matrix(config.word_to_vector) if config.max_train_examples and len(data.train) > config.max_train_examples: warn('cropping train data from %d to %d' % (len(data.train), config.max_train_examples)) data.train.crop(config.max_train_examples)