def get_session(): """Get the globally defined TensorFlow session. If the session is not already defined, then the function will create a global session. Returns: _ED_SESSION: tf.InteractiveSession. """ global _ED_SESSION if tf.get_default_session() is None: _ED_SESSION = tf.InteractiveSession() else: _ED_SESSION = tf.get_default_session() save_stderr = sys.stderr try: import os sys.stderr = open(os.devnull, 'w') # suppress keras import from keras import backend as K sys.stderr = save_stderr have_keras = True except ImportError: sys.stderr = save_stderr have_keras = False if have_keras: K.set_session(_ED_SESSION) return _ED_SESSION
def __init__(self, action_size): # environment settings self.state_size = (84, 84, 4) self.action_size = action_size self.discount_factor = 0.99 self.no_op_steps = 30 # optimizer parameters self.actor_lr = 2.5e-4 self.critic_lr = 2.5e-4 self.threads = 8 # create model for actor and critic network self.actor, self.critic = self.build_model() # method for training actor and critic network self.optimizer = [self.actor_optimizer(), self.critic_optimizer()] self.sess = tf.InteractiveSession() K.set_session(self.sess) self.sess.run(tf.global_variables_initializer()) self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary() self.summary_writer = tf.summary.FileWriter('summary/breakout_a3c', self.sess.graph)
def train_model_tensorflow(self, X_train, Y_train, s_date): print("training model %s model.cptk" % s_date) #model = BaseModel() def baseline_model(): model = Sequential() model.add(LSTM(23, input_shape=(30, 23))) model.add(Dense(1, init='he_normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model #Tensorflow GPU optimization config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=20, batch_size=64, verbose=1) self.estimator.fit(X_train, Y_train) print("finish training model") # saving model if not os.path.exists('../model/keras/lstm/%s/' % s_date): os.makedirs('../model/keras/lstm/%s/' % s_date) model_name = '../model/keras/lstm/%s/model.h5' % s_date json_model = self.estimator.model.to_json() open(model_name.replace('h5', 'json'), 'w').write(json_model) self.estimator.model.save_weights(model_name, overwrite=True)
def start_session_get_args_and_model(intra_ops, inter_ops, semantics_json, weights_hd5=None, tensor_type=None): K.clear_session() K.get_session().close() cfg = K.tf.ConfigProto(intra_op_parallelism_threads=intra_ops, inter_op_parallelism_threads=inter_ops) cfg.gpu_options.allow_growth = True K.set_session(K.tf.Session(config=cfg)) return args_and_model_from_semantics(semantics_json, weights_hd5, tensor_type)
def load(path, opts, vars): try: print('\nLoading model\nCreating session and graph') server = tf.train.Server.create_local_server() sess = tf.Session(server.target) graph = tf.get_default_graph() backend.set_session(sess) model_path = path + '.' + opts['network'] + '.h5' print('Loading model from {}'.format(model_path)) model = load_model(model_path); print('Create prediction function') model._make_predict_function() with graph.as_default(): with sess.as_default(): input_shape = list(model.layers[0].input_shape) input_shape[0] = 1 model.predict(np.zeros(tuple(input_shape))) vars['graph'] = graph vars['session'] = sess vars['model'] = model except Exception as e: print_exception(e, 'load') sys.exit()
def train(): global_step = tf.Variable(0, trainable=False) img = tf.placeholder(tf.float32, shape=(None, 120, 160, 3)) lbs = tf.placeholder(tf.float32, shape=(None, 10)) preds = cnn_model.load_model(img) loss = tf.reduce_mean(categorical_crossentropy(lbs, preds)) tf.scalar_summary('loss', loss) lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, cnn_input.decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary('learning_rate', lr) train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss, global_step=global_step) sess = tf.Session() K.set_session(sess) init = tf.initialize_all_variables() sess.run(init) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) with sess.as_default(): train_data = cnn_input.load_train_data() for epoch in cnn_input.epochs(train_data): for batch in cnn_input.batches(epoch): train_step.run(feed_dict={img: batch[0], lbs: batch[1], K.learning_phase(): 1})
def main(_): if FLAGS.dataset == 'cifar10': (X_train, y_train), (_, _) = cifar10.load_data() X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0) else: with open('data/train.p', mode='rb') as f: train = pickle.load(f) X_train, X_val, y_train, y_val = train_test_split(train['features'], train['labels'], test_size=0.33, random_state=0) train_output_file = "{}_{}_{}.p".format(FLAGS.network, FLAGS.dataset, 'bottleneck_features_train') validation_output_file = "{}_{}_{}.p".format(FLAGS.network, FLAGS.dataset, 'bottleneck_features_validation') print("Resizing to", (w, h, ch)) print("Saving to ...") print(train_output_file) print(validation_output_file) with tf.Session() as sess: K.set_session(sess) K.set_learning_phase(1) model = create_model() print('Bottleneck training') train_gen = gen(sess, X_train, y_train, batch_size) bottleneck_features_train = model.predict_generator(train_gen(), X_train.shape[0]) data = {'features': bottleneck_features_train, 'labels': y_train} pickle.dump(data, open(train_output_file, 'wb')) print('Bottleneck validation') val_gen = gen(sess, X_val, y_val, batch_size) bottleneck_features_validation = model.predict_generator(val_gen(), X_val.shape[0]) data = {'features': bottleneck_features_validation, 'labels': y_val} pickle.dump(data, open(validation_output_file, 'wb'))
def test_sample_attn_lstm_architecture(self): """Tests that an attention architecture can be created without crash.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): max_depth = 5 n_test = 5 n_support = 11 n_feat = 10 batch_size = 3 support_model = SequentialSupportGraph(n_feat) # Add layers support_model.add(GraphConv(64, activation='relu')) # Need to add batch-norm separately to test/support due to differing # shapes. support_model.add_test(BatchNormalization(epsilon=1e-5, mode=1)) support_model.add_support(BatchNormalization(epsilon=1e-5, mode=1)) support_model.add(GraphPool()) # Apply an attention lstm layer support_model.join(AttnLSTMEmbedding(n_test, n_support, max_depth)) # Gather Projection support_model.add(Dense(128, activation='relu')) support_model.add_test(BatchNormalization(epsilon=1e-5, mode=1)) support_model.add_support(BatchNormalization(epsilon=1e-5, mode=1)) support_model.add(GraphGather(batch_size, activation="tanh"))
def new_session(): if K.backend() == 'tensorflow': # pragma: no cover import tensorflow as tf K.clear_session() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True session = tf.Session(config=config) K.set_session(session)
def clear_session(): try: K.clear_session() K.get_session().close() cfg = K.tf.ConfigProto() cfg.gpu_options.allow_growth = True K.set_session(K.tf.Session(config=cfg)) except AttributeError as e: print('Could not clear session. Maybe you are using Theano backend?')
def __init__(self): self.session = tf.Session() K.set_session(self.session) self.model = self._build_model() self.graph = self._build_graph(self.model) self.session.run(tf.global_variables_initializer()) self.default_graph = tf.get_default_graph()
def main(_): g = tf.Graph() with g.as_default(), tf.Session() as session: graph_ops = build_tf_graph() saver = tf.train.Saver() K.set_session(session) train(session, graph_ops, saver)
def update_memory(fraction): tfconfig = tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=fraction, allow_growth=True, ) ) sess = tf.Session(config=tfconfig) K.set_session(sess)
def __call__(self, *x_batch, **kwargs) -> Union[List, np.ndarray]: """ Predicts answers on batch elements. Args: instance: a batch to predict answers on """ with self.graph.as_default(): K.set_session(self.sess) return self._net.predict_on_batch(x_batch, **kwargs)
def build_model(self): import keras.backend as K K.set_session( K.tf.Session( config=K.tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=K.tf.GPUOptions( per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) ) ) with K.tf.device(self.device): model = load_model(filename=self.filename, json_str=self.json_str, custom_objects=self.custom_objects, weights_file=self.weights) return model
def test_keras_reload(self): """Test that trained keras models can be reloaded correctly.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": 0.0, "learning_rate": 0.15, "momentum": 0.9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape(), } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > 0.9
def test_multitask_keras_mlp_ECFP_classification_API(self): """Straightforward test of Keras multitask deepchem classification API.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): task_type = "classification" # TODO(rbharath): There should be some automatic check to ensure that all # required model_params are specified. # TODO(rbharath): Turning off dropout to make tests behave. model_params = {"nb_hidden": 10, "activation": "relu", "dropout": .0, "learning_rate": .01, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": 5, "nb_epoch": 2, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False} input_file = os.path.join(self.current_dir, "multitask_example.csv") tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16"] task_types = {task: task_type for task in tasks} featurizer = CircularFingerprint(size=1024) loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) transformers = [] model_params["data_shape"] = train_dataset.get_data_shape() classification_metrics = [Metric(metrics.roc_auc_score), Metric(metrics.matthews_corrcoef), Metric(metrics.recall_score), Metric(metrics.accuracy_score)] model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics)
def init_seed(self, seed): """ Set various seeds for the result reproducibility """ self.seed = seed os.environ['PYTHONHASHSEED'] = '0' np.random.seed(seed) rn.seed(seed) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) tf.set_random_seed(seed) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess)
def set_keras_backend(backend): if K.backend() != backend: os.environ["KERAS_BACKEND"] = backend importlib.reload(K) assert K.backend() == backend if backend == "tensorflow": K.get_session().close() cfg = K.tf.ConfigProto() cfg.gpu_options.allow_growth = True K.set_session(K.tf.Session(config=cfg)) K.clear_session()
def main(_): g = tf.Graph() with g.as_default(), tf.Session() as session: K.set_session(session) graph_ops = build_graph() saver = tf.train.Saver() if TRAINING: train(session, graph_ops, saver) else: evaluation(session, graph_ops, saver)
def main(_): g = tf.Graph() with g.as_default(), tf.Session() as session: K.set_session(session) num_actions = get_num_actions() graph_ops = build_graph(num_actions) saver = tf.train.Saver() if FLAGS.testing: evaluation(session, graph_ops, saver) else: train(session, graph_ops, num_actions, saver)
def __init__(self, action_size): self.state_size = (84, 84, 4) self.action_size = action_size self.no_op_steps = 20 self.model = self.build_model() self.sess = tf.InteractiveSession() K.set_session(self.sess) self.avg_q_max, self.avg_loss = 0, 0 self.sess.run(tf.global_variables_initializer())
def train_step(self): ''' Perform a single train step on the Controller RNN Returns: the training loss ''' states = self.state_buffer[-1] label_list = [] # parse the state space to get real value of the states, # then one hot encode them for comparison with the predictions state_list = self.state_space.parse_state_space_list(states) for id, state_value in enumerate(state_list): state_one_hot = self.state_space.one_hot_encode(id, state_value) label_list.append(state_one_hot) # the initial input to the controller RNN state_input_size = self.state_space[0]['size'] state_input = states[0].reshape((1, state_input_size, 1)) print("State input to Controller for training : ", state_input.flatten()) # the discounted reward value reward = self.discount_rewards() reward = np.asarray([reward]).astype('float32') feed_dict = { self.state_input: state_input, self.discounted_rewards: reward } # prepare the feed dict with the values of all the policy labels for each # of the Controller outputs for i, label in enumerate(label_list): feed_dict[self.policy_labels[i]] = label with self.policy_session.as_default(): K.set_session(self.policy_session) print("Training RNN (States ip) : ", state_list) print("Training RNN (Reward ip) : ", reward.flatten()) _, loss, summary, global_step = self.policy_session.run([self.train_op, self.total_loss, self.summaries_op, self.global_step], feed_dict=feed_dict) self.summary_writer.add_summary(summary, global_step) self.saver.save(self.policy_session, save_path='weights/controller.ckpt', global_step=self.global_step) # reduce exploration after many train steps if global_step != 0 and global_step % 20 == 0 and self.exploration > 0.5: self.exploration *= 0.99 return loss
def test_keras_multitask_regression_overfit(self): """Test keras multitask overfits tiny data.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": .0, "learning_rate": .15, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .9
def __init__(self, sess, action_dimension, gamma=0.9, lr=1.e-3, beta=0,learning_method='SARSA', policy_type='softmax', use_target_models=0, tau=1.e-3, state_formatter=lambda x, y: x, action_formatter=None, batch_state_formatter=lambda x, y: x): self.SESSION = sess self.ACTION_DIM = action_dimension self.LR = lr self.BETA = beta self.LEARNING_METHOD = learning_method.upper() self.POLICY = policy_type.lower() self.TARGET_MODEL = use_target_models self.TAU = tau self.GAMMA = gamma self.state_formatter = state_formatter self.action_formatter = self.action_encoder if action_formatter is None else action_formatter self.batch_state_formatter = batch_state_formatter K.set_session(sess)
def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE): self.sess = sess self.BATCH_SIZE = BATCH_SIZE self.TAU = TAU self.LEARNING_RATE = LEARNING_RATE self.action_size = action_size K.set_session(sess) #Now create the model self.model, self.action, self.state = self.create_critic_network(state_size, action_size) self.target_model, self.target_action, self.target_state = self.create_critic_network(state_size, action_size) self.action_grads = tf.gradients(self.model.output, self.action) #GRADIENTS for policy update self.sess.run(tf.initialize_all_variables())
def __init__(self, environment, rho=0.9, rms_epsilon=0.0001, momentum=0, clip_delta=0, freeze_interval=1000, batch_size=32, update_rule="rmsprop", random_state=np.random.RandomState(), double_Q=False, neural_network_critic=NN, neural_network_actor=NN): """ Initialize environment """ ACNetwork.__init__(self,environment, batch_size) self._rho = rho self._rms_epsilon = rms_epsilon self._momentum = momentum self._freeze_interval = freeze_interval self._double_Q = double_Q self._random_state = random_state self.update_counter = 0 self.sess = tf.Session() K.set_session(self.sess) Q_net = neural_network_critic(self._batch_size, self._input_dimensions, self._n_actions, self._random_state, True) self.q_vals, self.params, self.inputsQ = Q_net._buildDQN() if (update_rule=="sgd"): optimizer = SGD(lr=self._lr, momentum=self._momentum, nesterov=False) elif (update_rule=="rmsprop"): optimizer = RMSprop(lr=self._lr, rho=self._rho, epsilon=self._rms_epsilon) else: raise Exception('The update_rule '+update_rule+ 'is not implemented.') self.q_vals.compile(optimizer=optimizer, loss='mse') self.next_q_vals, self.next_params, self.next_inputsQ = Q_net._buildDQN() self.next_q_vals.compile(optimizer='rmsprop', loss='mse') #The parameters do not matter since training is done on self.q_vals self._resetQHat() policy_net = neural_network_actor(self._batch_size, self._input_dimensions, self._n_actions, self._random_state, False) self.policy, self.params_policy = policy_net._buildDQN() self.policy.compile(optimizer=optimizer, loss='mse') self.next_policy, self.next_params_policy = policy_net._buildDQN() self.next_policy.compile(optimizer=optimizer, loss='mse') ### self.policy self.action_grads = tf.gradients(self.q_vals.output,self.inputsQ[-1]) #GRADIENTS for policy update self.sess.run(tf.initialize_all_variables())
def test_graph_conv_singletask_classification_overfit(self): """Test graph-conv multitask overfits tiny data.""" np.random.seed(123) tf.set_random_seed(123) g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 1 n_samples = 10 n_features = 3 n_classes = 2 # Load mini log-solubility dataset. featurizer = dc.feat.ConvMolFeaturizer() tasks = ["outcome"] input_file = os.path.join(self.current_dir, "example_classification.csv") loader = dc.data.CSVLoader( tasks=tasks, smiles_field="smiles", featurizer=featurizer) dataset = loader.featurize(input_file) classification_metric = dc.metrics.Metric( dc.metrics.accuracy_score) n_feat = 71 batch_size = 10 graph_model = dc.nn.SequentialGraph(n_feat) graph_model.add(dc.nn.GraphConv(64, activation='relu')) graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(dc.nn.GraphPool()) # Gather Projection graph_model.add(dc.nn.Dense(128, activation='relu')) graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh")) with self.test_session() as sess: model = dc.models.MultitaskGraphClassifier( sess, graph_model, n_tasks, batch_size=batch_size, learning_rate=1e-3, learning_rate_decay_time=1000, optimizer_type="adam", beta1=.9, beta2=.999) # Fit trained model model.fit(dataset, nb_epoch=20) model.save() # Eval model on train scores = model.evaluate(dataset, [classification_metric]) assert scores[classification_metric.name] > .75
def train_on_batch(self, texts: List[List[np.ndarray]], labels: list) -> [float, List[float]]: """ Train the model on the given batch Args: texts: list of tokenized text samples labels: list of labels Returns: metrics values on the given batch """ K.set_session(self.sess) features = self.pad_texts(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.train_on_batch(features, onehot_labels) return metrics_values
def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE): self.sess = sess self.BATCH_SIZE = BATCH_SIZE self.TAU = TAU self.LEARNING_RATE = LEARNING_RATE K.set_session(sess) #Now create the model self.model , self.weights, self.state = self.create_actor_network(state_size, action_size) self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size) self.action_gradient = tf.placeholder(tf.float32,[None, action_size]) self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient) grads = zip(self.params_grad, self.weights) self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads) self.sess.run(tf.initialize_all_variables())
def train(): images, labels = data_utils.read_data(FLAGS.train_data_dir, FLAGS.val_data_dir, FLAGS.test_data_dir, FLAGS.channel, FLAGS.img_size, FLAGS.n_aug_img) print("shape images train: ", images['train'].shape) print("shape labels train: ", labels['train'].shape) print("shape images test: ", images['test'].shape) print("shape labels test: ", labels['test'].shape) print("shape images valid: ", images['valid'].shape) print("shape labels valid: ", labels['valid'].shape) n_data = np.shape(images["train"])[0] print("Number of training data: %d" % (n_data)) g = tf.Graph() with g.as_default(): ops = get_ops(images, labels) child_ops = ops["child"] controller_ops = ops["controller"] saver = tf.train.Saver(max_to_keep=2) checkpoint_saver_hook = tf.train.CheckpointSaverHook( FLAGS.output_dir, save_steps=child_ops["num_train_batches"], saver=saver) hooks = [checkpoint_saver_hook] if FLAGS.child_sync_replicas: sync_replicas_hook = child_ops["optimizer"].make_session_run_hook( True) hooks.append(sync_replicas_hook) if FLAGS.controller_training and FLAGS.controller_sync_replicas: sync_replicas_hook = controller_ops[ "optimizer"].make_session_run_hook(True) hooks.append(sync_replicas_hook) print("-" * 80) config = tf.ConfigProto(allow_soft_placement=True) print_all_vars() print("-" * 80) print("Starting session") with tf.train.SingularMonitoredSession( config=config, hooks=hooks, checkpoint_dir=FLAGS.output_dir) as sess: K.set_session(sess) start_time = time.time() while True: run_ops = [ child_ops["loss"], child_ops["lr"], child_ops["grad_norm"], child_ops["train_acc"], child_ops["train_op"] ] loss, lr, gn, tr_acc, _ = sess.run(run_ops) global_step = sess.run(child_ops["global_step"]) if FLAGS.child_sync_replicas: actual_step = global_step * FLAGS.num_aggregate else: actual_step = global_step epoch = actual_step // ops["num_train_batches"] curr_time = time.time() if global_step % FLAGS.log_every == 0: log_string = "" log_string += "epoch = {:<6d}".format(epoch) log_string += "ch_step = {:<6d}".format(global_step) log_string += " loss = {:<8.6f}".format(loss) log_string += " lr = {:<8.4f}".format(lr) log_string += " |g| = {:<8.4f}".format(gn) log_string += " tr_acc = {:<3d}/{:>3d}".format( tr_acc, FLAGS.batch_size) log_string += " mins = {:<10.2f}".format( float(curr_time - start_time) / 60) print(log_string) if actual_step % ops["eval_every"] == 0: if (FLAGS.controller_training and epoch % FLAGS.controller_train_every == 0): print("Epoch {}: Training controller".format(epoch)) for ct_step in range(FLAGS.controller_train_steps * FLAGS.controller_num_aggregate): run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["lr"], controller_ops["grad_norm"], controller_ops["valid_acc"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"], ] loss, entropy, lr, gn, val_acc, bl, skip, _ = sess.run( run_ops) controller_step = sess.run( controller_ops["train_step"]) if ct_step % FLAGS.log_every == 0: curr_time = time.time() log_string = "" log_string += "ctrl_step = {:<6d}".format( controller_step) log_string += " loss = {:<7.3f}".format(loss) log_string += " ent = {:<5.2f}".format(entropy) log_string += " lr = {:<6.4f}".format(lr) log_string += " |g| = {:<8.4f}".format(gn) log_string += " acc = {:<6.4f}".format(val_acc) log_string += " bl = {:<5.2f}".format(bl) log_string += " mins = {:<.2f}".format( float(curr_time - start_time) / 60) print(log_string) print("Here are 10 architectures") for _ in range(10): arc, acc = sess.run([ controller_ops["sample_arc"], controller_ops["valid_acc"], ]) if FLAGS.search_for == "micro": normal_arc, reduce_arc = arc print(np.reshape(normal_arc, [-1])) print(np.reshape(reduce_arc, [-1])) else: start = 0 for layer_id in range(FLAGS.child_num_layers): if FLAGS.controller_search_whole_channels: end = start + 1 + layer_id else: end = start + 2 * FLAGS.child_num_branches + layer_id print(np.reshape(arc[start:end], [-1])) start = end print("val_acc = {:<6.4f}".format(acc)) print("-" * 80) print("Epoch {}: Eval".format(epoch)) if FLAGS.child_fixed_arc is None: ops["eval_func"](sess, "valid") ops["eval_func"](sess, "test") if epoch >= FLAGS.num_epochs: break
import tensorflow as tf import scipy.stats as st seed = 1 rn.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) from keras import backend as k config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, allow_soft_placement=True, device_count={'CPU': 1}) sess = tf.Session(graph=tf.get_default_graph(), config=config) k.set_session(sess) import pandas as pd import tqdm import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error from sklearn.preprocessing import StandardScaler, MinMaxScaler from keras import backend as K from statsmodels.tsa.holtwinters import ExponentialSmoothing from pmdarima.arima import auto_arima from statsmodels.tsa.stattools import adfuller from src.preparation.load_data import load_data from src.networks.train_model import train_model from src.networks.autoencoder import build_autoencoder
def __init__(self, params): config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) self.sess = tf.Session(config=config) K.set_session(self.sess) # Pull out all of the parameters self.batch_size = params['batch_size'] self.valid_batch_size = params['valid_batch_size'] self.seq_len = params['seq_len'] self.vocab_size = params['vocab_size'] self.embed_size = params['embed_size'] self.hidden_dim = params['hidden_dim'] self.num_layers = params['num_layers'] self.directoryOutLogs = params['directoryOutLogs'] self.mode = 'train' self.LSTM = LSTM(units=self.hidden_dim, return_sequences=True, name='rnn_1', stateful=True) self.initializerDone = False with tf.device('/gpu:0'): # Set up the input placeholder self.input_seq = tf.placeholder( tf.float32, shape=[self.batch_size, self.seq_len]) # Build the RNN self.rnn = Embedding(self.vocab_size, self.embed_size, input_length=self.seq_len, mask_zero=True)(self.input_seq) with tf.device('/gpu:1'): for l in range(self.num_layers): self.rnn = self.LSTM(self.rnn) rnn_output = tf.unstack(self.rnn, num=self.input_seq.shape[1], axis=1) self.w_proj = tf.Variable( tf.zeros([self.vocab_size, self.hidden_dim])) self.b_proj = tf.Variable(tf.zeros([self.vocab_size])) self.output_seq = tf.placeholder(tf.int64, shape=([None, self.seq_len])) losses = [] outputs = [] for t in range(self.seq_len): rnn_t = rnn_output[t] y_t = tf.reshape(self.output_seq[:, t], [-1, 1]) step_loss = tf.nn.sampled_softmax_loss( weights=self.w_proj, biases=self.b_proj, inputs=rnn_t, labels=y_t, num_sampled=512, num_classes=self.vocab_size) losses.append(step_loss) outputs.append( tf.matmul(rnn_t, tf.transpose(self.w_proj)) + self.b_proj) self.step_losses = losses self.output = outputs self.loss = tf.reduce_mean(self.step_losses) self.softmax = tf.nn.softmax(self.output)
def model_eval(X, y, Xval, yval): batch = 32 epochs = 100 rep = 1 # K fold procedure can be repeated multiple times Kfold = 1 Ntrain = 8528 # number of recordings on training set Nsamp = int(Ntrain / Kfold) # number of recordings to take as validation # Need to add dimension for training # X = np.expand_dims(X, axis=2) # Xval = np.expand_dims(Xval, axis=2) classes = [1, 2, 3, 4, 5, 6] Nclass = len(classes) cvconfusion = np.zeros((Nclass, Nclass, Kfold * rep)) cvscores = [] counter = 0 # repetitions of cross validation for r in range(rep): print("Rep %d" % (r + 1)) # cross validation loop for k in range(Kfold): print("Cross-validation run %d" % (k + 1)) # Callbacks definition callbacks = [ # Early stopping definition EarlyStopping(monitor='val_loss', patience=3, verbose=1), # Decrease learning rate by 0.1 factor AdvancedLearnignRateScheduler(monitor='val_loss', patience=1, verbose=1, mode='auto', decayRatio=0.1), # Saving best model ModelCheckpoint( 'testHAR-filters{}-poolingstr{}_resnet-filters{}-ksize{}-poolingstr{}-loopnum{}.hdf5' .format(encoder_confilt, encoder_poolstr, convfilt, ksize, poolstr, loop), monitor='val_loss', save_best_only=True, verbose=1), ] # Load model model = ResNet_model(WINDOW_SIZE) model.summary() model.fit(X, y, validation_data=(Xval, yval), epochs=epochs, batch_size=batch, callbacks=callbacks) # Evaluate best trained model model.load_weights( 'testHAR-filters{}-poolingstr{}_resnet-filters{}-ksize{}-poolingstr{}-loopnum{}.hdf5' .format(encoder_confilt, encoder_poolstr, convfilt, ksize, poolstr, loop)) ypred = model.predict(Xval) ypred = np.argmax(ypred, axis=1) ytrue = np.argmax(yval, axis=1) cvconfusion[:, :, counter] = confusion_matrix(ytrue, ypred) F1 = np.zeros((6, 1)) for i in range(6): F1[i] = 2 * cvconfusion[i, i, counter] / ( np.sum(cvconfusion[i, :, counter]) + np.sum(cvconfusion[:, i, counter])) print("F1 measure for {} rhythm: {:1.4f}".format( classes[i], F1[i, 0])) cvscores.append(np.mean(F1) * 100) print("Overall F1 measure: {:1.4f}".format(np.mean(F1))) K.clear_session() gc.collect() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) counter += 1 # Saving cross validation results scipy.io.savemat( 'xval_testHAR-filters{}-poolingstr{}_resnet-filters{}-ksize{}-poolingstr{}-loopnum{}.mat' .format(encoder_confilt, encoder_poolstr, convfilt, ksize, poolstr, loop), mdict={'cvconfusion': cvconfusion.tolist()}) return model
def limit_threads(num_threads): K.set_session( K.tf.Session( config=K.tf.ConfigProto( intra_op_parallelism_threads=num_threads, inter_op_parallelism_threads=num_threads)))
def train(solver): dataset_name = solver['dataset_name'] print('Preparing to train on {} data...'.format(dataset_name)) epochs = solver['epochs'] batch_size = solver['batch_size'] completed_epochs = solver['completed_epochs'] model_name = solver['model_name'] np.random.seed(1337) # for reproducibility dw = solver['dw'] dh = solver['dh'] resize_mode = str(solver['resize_mode']) instance_mode = bool(solver['instance_mode']) dataset = datasets.load(dataset_name) nc = dataset.num_classes() # categories + background model = select_model(model_name=model_name) autoencoder, model_name = model.build(nc=nc, w=dw, h=dh) if 'h5file' in solver: h5file = solver['h5file'] print('Loading model {}'.format(h5file)) h5file, ext = os.path.splitext(h5file) autoencoder.load_weights(h5file + ext) else: autoencoder = model.transfer_weights(autoencoder) if K.backend() == 'tensorflow': print('Tensorflow backend detected; Applying memory usage constraints') ss = K.tf.Session(config=K.tf.ConfigProto(gpu_options=K.tf.GPUOptions( allow_growth=True))) K.set_session(ss) ss.run(K.tf.global_variables_initializer()) print('Done loading {} model!'.format(model_name)) experiment_dir = os.path.join('models', dataset_name, model_name) log_dir = os.path.join(experiment_dir, 'logs') checkpoint_dir = os.path.join(experiment_dir, 'weights') ensure_dir(log_dir) ensure_dir(checkpoint_dir) train_dataset, train_generator = load_dataset(dataset_name=dataset_name, data_dir=os.path.join( 'data', dataset_name), data_type='train2014', instance_mode=instance_mode) train_gen = load_data(dataset=train_dataset, generator=train_generator, target_h=dh, target_w=dw, resize_mode=resize_mode) train_gen = batched(train_gen, batch_size) nb_train_samples = train_dataset.num_instances if instance_mode else train_dataset.num_images steps_per_epoch = nb_train_samples // batch_size validation_steps = steps_per_epoch // 10 val_dataset, val_generator = load_dataset( dataset_name=dataset_name, data_dir=os.path.join('data', dataset_name), data_type='val2014', sample_size=validation_steps * batch_size, instance_mode=instance_mode) val_gen = load_data(dataset=val_dataset, generator=val_generator, target_h=dh, target_w=dw, resize_mode=resize_mode) val_gen = batched(val_gen, batch_size) autoencoder.fit_generator( generator=train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1, callbacks=callbacks(log_dir, checkpoint_dir, model_name), validation_data=val_gen, validation_steps=validation_steps, initial_epoch=completed_epochs, )
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--cifar-path', type=str, default='../cifar10_data/test_batch', help= 'path to the test_batch file from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' ) parser.add_argument('--start', type=int, default=0) parser.add_argument('--end', type=int, default=100) parser.add_argument('--debug', action='store_true') args = parser.parse_args() test_loss = 0 correct = 0 total = 0 totalImages = 0 succImages = 0 faillist = [] # set up TensorFlow session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) #sess = tf.Session() # initialize a model model = get_model('cifar', softmax=False) ### change lid model path llj model.load_weights("../all_models/lid/lid_model_cifar.h5") # initialize a data provider for CIFAR-10 images provider = robustml.provider.CIFAR10(args.cifar_path) input_xs = tf.placeholder(tf.float32, [None, 32, 32, 3]) real_logits0 = model(input_xs) real_logits = tf.nn.softmax(real_logits0) start = 0 end = 100 total = 0 successlist = [] printlist = [] start_time = time.time() for i in range(start, end): success = False print('evaluating %d of [%d, %d)' % (i, start, end), file=sys.stderr) inputs, targets = provider[i] modify = np.random.randn(1, 3, 32, 32) * 0.001 ##### thermometer encoding logits = sess.run(real_logits, feed_dict={input_xs: [inputs]}) print(logits) if np.argmax(logits) != targets: print('skip the wrong example ', i) continue totalImages += 1 for runstep in range(400): Nsample = np.random.randn(npop, 3, 32, 32) modify_try = modify.repeat(npop, 0) + sigma * Nsample newimg = torch_arctanh( (inputs - boxplus) / boxmul).transpose(2, 0, 1) inputimg = np.tanh(newimg + modify_try) * boxmul + boxplus if runstep % 10 == 0: realinputimg = np.tanh(newimg + modify) * boxmul + boxplus realdist = realinputimg - (np.tanh(newimg) * boxmul + boxplus) realclipdist = np.clip(realdist, -epsi, epsi) realclipinput = realclipdist + (np.tanh(newimg) * boxmul + boxplus) l2real = np.sum((realclipinput - (np.tanh(newimg) * boxmul + boxplus))**2)**0.5 #l2real = np.abs(realclipinput - inputs.numpy()) print(inputs.shape) outputsreal = sess.run( real_logits, feed_dict={input_xs: realclipinput.transpose(0, 2, 3, 1)}) print(outputsreal) print(np.abs(realclipdist).max()) print('l2real: ' + str(l2real.max())) print(outputsreal) if (np.argmax(outputsreal) != targets) and (np.abs(realclipdist).max() <= epsi): succImages += 1 success = True print('clipimage succImages: ' + str(succImages) + ' totalImages: ' + str(totalImages)) print('lirealsucc: ' + str(realclipdist.max())) successlist.append(i) printlist.append(runstep) break dist = inputimg - (np.tanh(newimg) * boxmul + boxplus) clipdist = np.clip(dist, -epsi, epsi) clipinput = (clipdist + (np.tanh(newimg) * boxmul + boxplus)).reshape( npop, 3, 32, 32) target_onehot = np.zeros((1, 10)) target_onehot[0][targets] = 1. outputs = sess.run( real_logits, feed_dict={input_xs: clipinput.transpose(0, 2, 3, 1)}) target_onehot = target_onehot.repeat(npop, 0) real = np.log((target_onehot * outputs).sum(1) + 1e-30) other = np.log(((1. - target_onehot) * outputs - target_onehot * 10000.).max(1)[0] + 1e-30) loss1 = np.clip(real - other, 0., 1000) Reward = 0.5 * loss1 # Reward = l2dist Reward = -Reward A = (Reward - np.mean(Reward)) / (np.std(Reward) + 1e-7) modify = modify + (alpha / (npop * sigma)) * ( (np.dot(Nsample.reshape(npop, -1).T, A)).reshape(3, 32, 32)) if not success: faillist.append(i) print('failed:', faillist) else: print('successed:', successlist) print(faillist) success_rate = succImages / float(totalImages) np.savez('runstep', printlist) end_time = time.time() print('all time :', end_time - start_time) print('succc rate', success_rate)
def get_model(sess, image_shape=(32, 32, 3), gf_dim=64, df_dim=64, batch_size=64, name="autoencoder"): K.set_session(sess) with tf.variable_scope(name): # sizes ch = image_shape[2] rows = [4, 8, 16, 32] cols = [4, 8, 16, 32] # nets G = generator(batch_size, gf_dim, ch, rows, cols) G.compile("sgd", "mse") g_vars = G.trainable_weights print "G.shape: ", G.output_shape E = encoder(batch_size, df_dim, ch, rows, cols) E.compile("sgd", "mse") e_vars = E.trainable_weights print "E.shape: ", E.output_shape D = discriminator(batch_size, df_dim, ch, rows, cols) D.compile("sgd", "mse") d_vars = D.trainable_weights print "D.shape: ", D.output_shape Z2 = Input(batch_shape=(batch_size, z_dim), name='more_noise') Z = G.input Img = D.input image_grid = put_kernels_on_grid(tf.transpose(Img, [1, 2, 3, 0]), (8, 8)) sum_img = tf.summary.image("Img", image_grid, max_outputs=1) G_train = G(Z) E_mean, E_logsigma = E(Img) G_dec = G(E_mean + Z2 * E_logsigma) D_fake, F_fake = D(G_train) D_dec_fake, F_dec_fake = D(G_dec) D_legit, F_legit = D(Img) # costs recon_vs_gan = 1e-6 like_loss = tf.reduce_mean(tf.square(F_legit - F_dec_fake)) / 2. kl_loss = tf.reduce_mean( -E_logsigma + .5 * (-1 + tf.exp(2. * E_logsigma) + tf.square(E_mean))) d_loss_legit = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(D_legit, tf.ones_like(D_legit))) d_loss_fake1 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(D_fake, tf.zeros_like(D_fake))) d_loss_fake2 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(D_dec_fake, tf.zeros_like(D_dec_fake))) d_loss_fake = d_loss_fake1 + d_loss_fake2 d_loss = d_loss_legit + d_loss_fake g_loss1 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(D_fake, tf.ones_like(D_fake))) g_loss2 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(D_dec_fake, tf.ones_like(D_dec_fake))) g_loss = g_loss1 + g_loss2 + recon_vs_gan * like_loss e_loss = kl_loss + like_loss # optimizers print "Generator variables:" for v in g_vars: print v.name print "Discriminator variables:" for v in d_vars: print v.name print "Encoder variables:" for v in e_vars: print v.name e_optim = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(e_loss, var_list=e_vars) d_optim = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars) g_optim = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars) sess.run(tf.global_variables_initializer()) # summaries sum_d_loss_legit = tf.summary.scalar("d_loss_legit", d_loss_legit) sum_d_loss_fake = tf.summary.scalar("d_loss_fake", d_loss_fake) sum_d_loss = tf.summary.scalar("d_loss", d_loss) sum_g_loss = tf.summary.scalar("g_loss", g_loss) sum_e_loss = tf.summary.scalar("e_loss", e_loss) sum_e_mean = tf.summary.histogram("e_mean", E_mean) sum_e_sigma = tf.summary.histogram("e_sigma", tf.exp(E_logsigma)) sum_Z = tf.summary.histogram("Z", Z) image_grid = put_kernels_on_grid(tf.transpose(G_train, [1, 2, 3, 0]), (8, 8)) sum_gen = tf.summary.image("G", image_grid, max_outputs=1) image_grid = put_kernels_on_grid(tf.transpose(G_dec, [1, 2, 3, 0]), (8, 8)) sum_dec = tf.summary.image("E", image_grid, max_outputs=1) g_sum = tf.summary.merge( [sum_Z, sum_gen, sum_d_loss_fake, sum_g_loss, sum_img]) e_sum = tf.summary.merge([sum_dec, sum_e_loss, sum_e_mean, sum_e_sigma]) d_sum = tf.summary.merge([sum_d_loss_legit, sum_d_loss]) writer = tf.summary.FileWriter("train", sess.graph) # functions def train_d(images, z, counter, sess=sess): z2 = np.random.normal(0., 1., z.shape) outputs = [d_loss, d_loss_fake, d_loss_legit, d_sum, d_optim] images = np.transpose(np.reshape(images, (-1, 3, 32, 32)), (0, 2, 3, 1)) with tf.control_dependencies(outputs): updates = [tf.assign(p, new_p) for (p, new_p) in D.updates] outs = sess.run(outputs + updates, feed_dict={ Img: images, Z: z, Z2: z2, K.learning_phase(): 1 }) dl, dlf, dll, sums = outs[:4] writer.add_summary(sums, counter) return dl, dlf, dll def train_g(images, z, counter, sess=sess): # generator z2 = np.random.normal(0., 1., z.shape) outputs = [g_loss, G_train, g_sum, g_optim] images = np.transpose(np.reshape(images, (-1, 3, 32, 32)), (0, 2, 3, 1)) with tf.control_dependencies(outputs): updates = [tf.assign(p, new_p) for (p, new_p) in G.updates] outs = sess.run(outputs + updates, feed_dict={ Img: images, Z: z, Z2: z2, K.learning_phase(): 1 }) gl, samples, sums = outs[:3] writer.add_summary(sums, counter) # encoder outputs = [e_loss, G_dec, e_sum, e_optim] with tf.control_dependencies(outputs): updates = [tf.assign(p, new_p) for (p, new_p) in E.updates] outs = sess.run(outputs + updates, feed_dict={ Img: images, Z: z, Z2: z2, K.learning_phase(): 1 }) gl, samples, sums = outs[:3] writer.add_summary(sums, counter) return gl, samples, images def sampler(z, x): code = E.predict(x, batch_size=batch_size)[0] out = G.predict(code, batch_size=batch_size) return out, x return train_g, train_d, sampler, [G, D, E]
target_size = (320, 320) dataset = 'VOC2012_BERKELEY' if dataset == 'VOC2012_BERKELEY': # pascal voc + berkeley semantic contours annotations train_file_path = os.path.expanduser('~/.keras/datasets/VOC2012/combined_imageset_train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation val_file_path = os.path.expanduser('~/.keras/datasets/VOC2012/combined_imageset_val.txt') data_dir = os.path.expanduser('~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') label_dir = os.path.expanduser('~/.keras/datasets/VOC2012/combined_annotations') data_suffix='.jpg' label_suffix='.png' classes = 21 # ###################### loss function & metric ######################## if dataset == 'VOC2012' or dataset == 'VOC2012_BERKELEY': loss_fn = softmax_sparse_crossentropy_ignoring_last_label metrics = [sparse_accuracy_ignoring_last_label] loss_shape = None ignore_label = 255 label_cval = 255 class_weight = None config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) session = tf.Session(config=config) K.set_session(session) train(batch_size, epochs, lr_base, lr_power, weight_decay, classes, model_name, train_file_path, val_file_path, data_dir, label_dir, target_size=target_size, batchnorm_momentum=batchnorm_momentum, resume_training=resume_training, class_weight=class_weight, loss_fn=loss_fn, metrics=metrics, loss_shape=loss_shape, data_suffix=data_suffix, label_suffix=label_suffix, ignore_label=ignore_label, label_cval=label_cval)
regex += '(' + col + ')|' regex = regex[:-1] df_X = df_X.filter(regex=regex, axis=1) train_X, test_X, train_y, test_y = train_test_split(df_X, df_Y, test_size=0.10) y_scaler = MinMaxScaler() y_scaler.fit(train_y.values.reshape(-1, 1)) train_y = y_scaler.transform(train_y.values.reshape(-1, 1)) x_scaler = MinMaxScaler() x_scaler.fit(train_X) train_X = x_scaler.transform(train_X) scaler = {'x': x_scaler, 'y': y_scaler} ### LINES BELOW FOR KERAS DEEP NEURAL NET MODEL size_input = train_X.shape[1] K.set_session( K.tf.Session(config=K.tf.ConfigProto(intra_op_parallelism_threads=6, inter_op_parallelism_threads=6))) model = Sequential() model.add( Dense(size_input, input_dim=size_input, kernel_initializer='normal', activation='relu')) model.add(Dense(size_input * 2, kernel_initializer='normal', activation='relu')) #model.add(Dense(784, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error']) checkpoint_name = 'bestweights.hdf5'
def correct(trainer, piano, orch, silence_ind, duration_gen, path_to_config, model_name='model', orch_init=None, batch_size=5): # Perform N=batch_size orchestrations # Sample by sample generation # Input : # - piano score : numpy (time, pitch) # - model # - optionnaly the beginning of an orchestral score : numpy (time, pitch) # Output : # - orchestration by the model # Paths path_to_model = os.path.join(path_to_config, model_name) dimensions = pkl.load(open(path_to_config + '/../dimensions.pkl', 'rb')) is_keras = pkl.load(open(path_to_config + '/is_keras.pkl', 'rb')) # Get dimensions orch_dim = dimensions['orch_dim'] temporal_order = dimensions['temporal_order'] total_length = piano.shape[0] if orch_init is not None: init_length = orch_init.shape[0] assert ( init_length < total_length ), "Orchestration initialization is longer than the piano score"[0] assert ( init_length + 1 >= temporal_order ), "Orchestration initialization must be longer than the temporal order of the model" else: init_length = temporal_order - 1 orch_init = np.zeros((init_length, orch_dim)) # Instanciate generation if orch_gen is None: orch_gen = np.random.binomial(n=1, p=0.1, size=(batch_size, total_length, orch_dim)) # Restore model and preds graph tf.reset_default_graph( ) # First clear graph to avoid memory overflow when running training and generation in the same process trainer.load_pretrained_model(path_to_model) with tf.Session() as sess: if is_keras: K.set_session(sess) trainer.saver.restore(sess, path_to_model + '/model') time_indices = list(range(init_length, total_length - temporal_order)) pitch_indices = list(range(orch_dim)) mean_iteration_per_note = 10 # Gibbs sampling for iter in range(total_length * orch_dim * mean_iteration_per_note): silenced_time = True while silenced_time: time, pitch = (random.choice(time_indices), random.choice(pitch_indices)) # If piano is a silence, we automatically orchestrate by a silence (i.e. we do nothing) silenced_time = (time in silence_ind) # Just duplicate the temporal index to create batch generation batch_index = np.tile(time, batch_size) ######## ######## A ECRIRE # prediction = trainer.generation_step(sess, batch_index, piano, orch_gen, duration_gen, None) # # prediction should be a probability distribution. Then we can sample from it # # Note that it doesn't need to be part of the graph since we don't use the sampled value to compute the backproped error # prediction_sampled = np.random.binomial(1, prediction) orch_gen[:, time, pitch] = prediction_sampled ######## ######## return orch_gen
####Load data set (X_train, y_train), (X_test, y_test) = cifar10.load_data() X_train = X_train.reshape(X_train.shape[0], 32, 32, 3) X_test = X_test.reshape(X_test.shape[0], 32, 32, 3) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255. X_test /= 255. y_train = np.reshape(y_train,(y_train.shape[0])) y_test = np.reshape(y_test,(y_test.shape[0])) Y_train = np_utils.to_categorical(y_train, 10) Y_test = np_utils.to_categorical(y_test, 10) sess = tf.Session() backend.set_session(sess) backend._LEARNING_PHASE = tf.constant(0) backend.set_learning_phase(0) ####Load models model_source = sys.argv[1] print("Crafting adversarial examples on model: " + model_source) model = load_quantized_model("CIFAR10", model_source) print("Accuracy on test set of source model: " + str(model.evaluate(X_test, Y_test, verbose=0)[1])) model_target = sys.argv[3] print("Targetting model: " + model_target) model_target = load_quantized_model("CIFAR10", model_target) print("Accuracy on test set of target model: " + str(model_target.evaluate(X_test, Y_test, verbose=0)[1])) pred_source = np.argmax(model.predict(X_test), axis = 1)
def __init__(self, param_dict, Item_size): config = tf.ConfigProto( intra_op_parallelism_threads=THREAD, inter_op_parallelism_threads=THREAD, allow_soft_placement=True, ) session = tf.Session(config=config) backend.set_session(session) self.batch_size = int(param_dict['batch_size'] * 1.25) hand_feature_cols = len(Item_size['hand_feature']) name_seq_len = param_dict['name_Len'] desc_seq_len = param_dict['description_Len'] denselayer_units = param_dict['denselayer_units'] embed_name = param_dict['embed_name'] embed_desc = param_dict['embed_desc'] embed_brand = param_dict['embed_brand'] embed_cat_2 = param_dict['embed_cat_2'] embed_cat_3 = param_dict['embed_cat_3'] rnn_dim_name = param_dict['rnn_dim_name'] rnn_dim_desc = param_dict['rnn_dim_desc'] dense_drop = param_dict['dense_drop'] name_voc_size = Item_size['name'] desc_voc_size = Item_size['item_description'] brand_voc_size = Item_size['brand_name'] cat1_voc_size = Item_size['category_1'] cat2_voc_size = Item_size['category_2'] cat3_voc_size = Item_size['category_name'] # Inputs X_seq_name = Input(shape=[name_seq_len], name="X_seq_name", dtype='int32') X_seq_item_description = Input(shape=[desc_seq_len], name="X_seq_item_description", dtype='int32') X_brand_name = Input(shape=[1], name="X_brand_name", dtype='int32') X_category_1 = Input(shape=[1], name="X_category_1", dtype='int32') X_category_2 = Input(shape=[1], name="X_category_2", dtype='int32') X_category_name = Input(shape=[1], name="X_category_name", dtype='int32') X_item_condition_id = Input(shape=[1], name="X_item_condition_id", dtype='uint8') X_shipping = Input(shape=[1], name="X_shipping", dtype='float32') X_hand_feature = Input(shape=[hand_feature_cols], name="X_hand_feature", dtype='float32') # Embeddings layers name = Embedding(name_voc_size, embed_name)(X_seq_name) item_desc = Embedding(desc_voc_size, embed_desc)(X_seq_item_description) brand = Embedding(brand_voc_size, embed_brand)(X_brand_name) cat_2 = Embedding(cat2_voc_size, embed_cat_2)(X_category_2) cat_3 = Embedding(cat3_voc_size, embed_cat_3)(X_category_name) # RNN layers name = GRU(rnn_dim_name)(name) item_desc = GRU(rnn_dim_desc)(item_desc) # OneHot layers cond = Lambda(one_hot, arguments={'num_classes': 5}, output_shape=(1, 5))(X_item_condition_id) cat_1 = Lambda(one_hot, arguments={'num_classes': cat1_voc_size}, output_shape=(1, cat1_voc_size))(X_category_1) # main layer main_l = concatenate([ name, item_desc, Flatten()(cat_1), Flatten()(cat_2), Flatten()(cat_3), Flatten()(brand), Flatten()(cond), X_shipping, X_hand_feature, ]) main_l = Dropout(dense_drop)(Dense(denselayer_units, activation='relu')(main_l)) output = Dense(1, activation="linear")(main_l) # model model = Model([ X_seq_name, X_seq_item_description, X_brand_name, X_category_1, X_category_2, X_category_name, X_item_condition_id, X_shipping, X_hand_feature ], output) optimizer = optimizers.Adam(lr=param_dict['lr']) model.compile(loss='mse', optimizer=optimizer) self.model = model
def __init__(self, **kwargs): np.random.seed(0) tf.set_random_seed(0) self.batch_size = kwargs.pop('batch_size') self.data_sets = kwargs.pop('data_sets') self.train_dir = kwargs.pop('train_dir', 'output') log_dir = kwargs.pop('log_dir', 'log') self.model_name = kwargs.pop('model_name') self.num_classes = kwargs.pop('num_classes') self.initial_learning_rate = kwargs.pop('initial_learning_rate') # if 'keep_probs' in kwargs: self.keep_probs = kwargs.pop('keep_probs') # else: self.keep_probs = None # if 'mini_batch' in kwargs: self.mini_batch = kwargs.pop('mini_batch') # else: self.mini_batch = True if 'damping' in kwargs: self.damping = kwargs.pop('damping') else: self.damping = 0.0 if not os.path.exists(self.train_dir): os.makedirs(self.train_dir) # Initialize session config = tf.ConfigProto() self.sess = tf.Session(config=config) K.set_session(self.sess) # Setup input self.input_placeholder, self.labels_placeholder = self.placeholder_inputs( ) self.num_train_examples = self.data_sets.train.labels.shape[0] self.num_test_examples = self.data_sets.test.labels.shape[0] # Setup inference and training # if self.keep_probs is not None: # self.keep_probs_placeholder = tf.placeholder(tf.float32, shape=(2)) # self.logits = self.inference(self.input_placeholder, self.keep_probs_placeholder) # elif hasattr(self, 'inference_needs_labels'): # self.logits = self.inference(self.input_placeholder, self.labels_placeholder) # else: self.logits = self.inference(self.input_placeholder) self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg = self.loss( self.logits, self.labels_placeholder) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.learning_rate = tf.Variable(self.initial_learning_rate, name='learning_rate', trainable=False) #self.learning_rate_placeholder = tf.placeholder(tf.float32) #self.update_learning_rate_op = tf.assign(self.learning_rate, self.learning_rate_placeholder) self.train_op = self.get_train_op(self.total_loss, self.global_step, self.learning_rate) #self.train_sgd_op = self.get_train_sgd_op(self.total_loss, self.global_step, self.learning_rate) self.accuracy_op = self.get_accuracy_op(self.logits, self.labels_placeholder) self.preds = self.predictions(self.logits) # Setup misc self.saver = tf.train.Saver() # Setup gradients and Hessians self.params = self.get_all_params() self.grad_total_loss_op = tf.gradients(self.total_loss, self.params) self.grad_loss_no_reg_op = tf.gradients(self.loss_no_reg, self.params) self.v_placeholder = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params ] self.u_placeholder = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params ] self.hessian_vector = hessian_vector_product(self.total_loss, self.params, self.v_placeholder) self.grad_loss_wrt_input_op = tf.gradients(self.total_loss, self.input_placeholder) # Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum) self.influence_op = tf.add_n([ tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b))) for a, b in zip(self.grad_total_loss_op, self.v_placeholder) ]) self.grad_influence_wrt_input_op = tf.gradients( self.influence_op, self.input_placeholder) self.checkpoint_file = os.path.join(self.train_dir, "%s-checkpoint" % self.model_name) self.all_train_feed_dict = self.fill_feed_dict_with_all_ex( self.data_sets.train) self.all_test_feed_dict = self.fill_feed_dict_with_all_ex( self.data_sets.test) init = tf.global_variables_initializer() self.sess.run(init)
import pathlib import tensorflow as tf from keras.backend import set_session from keras.models import load_model lookback = 48 PATH = pathlib.Path(__file__).parent DEEP_MODEL_PATH = PATH.joinpath('pledge_company_model.h5') # 加载深度学习模型 sess = tf.Session() graph = tf.get_default_graph() set_session(sess) deep_model = load_model(DEEP_MODEL_PATH) def transform_price(price_df, forecast_close_line): price_df = price_df.drop(['ts_code', 'trade_date', 'pre_close'], axis=1) price_df['delta'] = price_df.apply(lambda x: x['close'] - forecast_close_line, axis=1) price_values = price_scaler.transform(price_df) price_values = price_values[:lookback] price_values = price_values[::-1] return price_values.reshape((1, lookback, -1)) def deep_predict(code, forecast_close_line, price_df): price_values = transform_price(price_df, forecast_close_line)
def simo_classification_tut( gpu_id: int, dataset: str, frac: float, validation_split: float, preprocessor: str, grid_size: float, batch_size: int, epochs: int, optimizer: str, dropout: float, corruption_level: float, num_neighbors: int, scaling: float, dae_hidden_layers: list, sdae_hidden_layers: list, cache: bool, common_hidden_layers: list, floor_hidden_layers: list, location_hidden_layers: list, floor_weight: float, location_weight: float, verbose: int): """Multi-floor indoor localization based on floor and coordinates classification using a single-input and multi-output (SIMO) deep neural network (DNN) model and TUT datasets. Keyword arguments: """ ### initialize numpy, random, TensorFlow, and keras np.random.seed() # based on current time or OS-specific randomness source rn.seed() # " tf.set_random_seed(rn.randint(0, 1000000)) if gpu_id >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) else: os.environ["CUDA_VISIBLE_DEVICES"] = '' sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) ### load datasets after scaling print("Loading data ...") if dataset == 'tut': from tut import TUT tut = TUT(cache=cache, frac=frac, preprocessor=preprocessor, classification_mode='hierarchical', grid_size=0) elif dataset == 'tut2': from tut import TUT2 tut = TUT2(cache=cache, frac=frac, preprocessor=preprocessor, classification_mode='hierarchical', grid_size=0, testing_split=0.2) elif dataset == 'tut3': from tut import TUT3 tut = TUT3(cache=cache, frac=frac, preprocessor=preprocessor, classification_mode='hierarchical', grid_size=0) else: print("'{0}' is not a supported data set.".format(dataset)) sys.exit(0) flr_height = tut.floor_height training_df = tut.training_df training_data = tut.training_data testing_df = tut.testing_df testing_data = tut.testing_data ### build and train a SIMO model print("Building and training a SIMO model for classification ...") rss = training_data.rss_scaled coord = training_data.coord_scaled coord_scaler = training_data.coord_scaler # for inverse transform labels = training_data.labels input = Input(shape=(rss.shape[1], ), name='input') # common input # (optional) build deep autoencoder or stacked denoising autoencoder if dae_hidden_layers != '': print("- Building a DAE model ...") model = deep_autoencoder(dataset=dataset, input_data=rss, preprocessor=preprocessor, hidden_layers=dae_hidden_layers, cache=cache, model_fname=None, optimizer=optimizer, batch_size=batch_size, epochs=epochs, validation_split=validation_split) x = model(input) elif sdae_hidden_layers != '': print("- Building an SDAE model ...") model = sdae(dataset=dataset, input_data=rss, preprocessor=preprocessor, hidden_layers=sdae_hidden_layers, cache=cache, model_fname=None, optimizer=optimizer, corruption_level=corruption_level, batch_size=batch_size, epochs=epochs, validation_split=validation_split) x = model(input) else: x = input # common hidden layers x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(dropout)(x) if common_hidden_layers != '': for units in common_hidden_layers: x = Dense(units)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(dropout)(x) common_hl_output = x # floor classification output if floor_hidden_layers != '': for units in floor_hidden_layers: x = Dense(units)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(dropout)(x) x = Dense(labels.floor.shape[1])(x) x = BatchNormalization()(x) floor_output = Activation('softmax', name='floor_output')( x) # no dropout for an output layer # location classification output if location_hidden_layers != '': for units in location_hidden_layers: x = Dense(units)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(dropout)(x) x = Dense(labels.location.shape[1])(x) x = BatchNormalization()(x) location_output = Activation('softmax', name='location_output')( x) # no dropout for an output layer model = Model(inputs=input, outputs=[floor_output, location_output]) model.compile( optimizer=optimizer, loss=['categorical_crossentropy', 'categorical_crossentropy'], loss_weights={ 'floor_output': floor_weight, 'location_output': location_weight }, metrics={ 'floor_output': 'accuracy', 'location_output': 'accuracy' }) weights_file = os.path.expanduser("~/tmp/best_weights.h5") checkpoint = ModelCheckpoint(weights_file, monitor='val_loss', save_best_only=True, verbose=0) early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0) print("- Training a floor and coordinates classifier ...", end='') startTime = timer() history = model.fit(x={'input': rss}, y={ 'floor_output': labels.floor, 'location_output': labels.location }, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=[checkpoint, early_stop], validation_split=validation_split, shuffle=True) elapsedTime = timer() - startTime print(" completed in {0:.4e} s".format(elapsedTime)) model.load_weights(weights_file) # load weights from the best model ### evaluate the model print("Evaluating the model ...") rss = testing_data.rss_scaled labels = testing_data.labels blds = labels.building flrs = labels.floor coord = testing_data.coord # original coordinates x_col_name = 'X' y_col_name = 'Y' # calculate the classification accuracies and localization errors flrs_pred, locs_pred = model.predict(rss, batch_size=batch_size) flr_results = (np.equal(np.argmax(flrs, axis=1), np.argmax(flrs_pred, axis=1))).astype(int) flr_acc = flr_results.mean() # calculate positioning error based on locations n_samples = len(flrs) n_locs = locs_pred.shape[1] # number of locations (reference points) idxs = np.argpartition( locs_pred, -num_neighbors )[:, -num_neighbors:] # (unsorted) indexes of up to num_neighbors nearest neighbors threshold = scaling * np.amax(locs_pred, axis=1) training_labels = np.concatenate( (training_data.labels.floor, training_data.labels.location), axis=1) training_coord_avg = training_data.coord_avg coord_est = np.zeros((n_samples, 2)) coord_est_weighted = np.zeros((n_samples, 2)) for i in range(n_samples): xs = [] ys = [] ws = [] for j in idxs[i]: if locs_pred[i][j] >= threshold[i]: loc = np.zeros(n_locs) loc[j] = 1 rows = np.where((training_labels == np.concatenate( (flrs[i], loc))).all(axis=1)) # tuple of row indexes if rows[0].size > 0: xs.append(training_df.loc[training_df.index[rows[0][0]], x_col_name]) ys.append(training_df.loc[training_df.index[rows[0][0]], y_col_name]) ws.append(locs_pred[i][j]) if len(xs) > 0: coord_est[i] = np.array((xs, ys)).mean(axis=1) coord_est_weighted[i] = np.array( (np.average(xs, weights=ws), np.average(ys, weights=ws))) else: if rows[0].size > 0: key = str(np.argmax(blds[i])) + '-' + str(np.argmax(flrs[i])) else: key = str(np.argmax(blds[i])) coord_est[i] = coord_est_weighted[i] = training_coord_avg[key] # calculate 2D localization errors dist_2d = norm(coord - coord_est, axis=1) dist_weighted_2d = norm(coord - coord_est_weighted, axis=1) mean_error_2d = dist_2d.mean() mean_error_weighted_2d = dist_weighted_2d.mean() median_error_2d = np.median(dist_2d) median_error_weighted_2d = np.median(dist_weighted_2d) # calculate 3D localization errors flr_diff = np.absolute( np.argmax(flrs, axis=1) - np.argmax(flrs_pred, axis=1)) z_diff_squared = (flr_height**2) * np.square(flr_diff) dist_3d = np.sqrt( np.sum(np.square(coord - coord_est), axis=1) + z_diff_squared) dist_weighted_3d = np.sqrt( np.sum(np.square(coord - coord_est_weighted), axis=1) + z_diff_squared) mean_error_3d = dist_3d.mean() mean_error_weighted_3d = dist_weighted_3d.mean() median_error_3d = np.median(dist_3d) median_error_weighted_3d = np.median(dist_weighted_3d) LocalizationResults = namedtuple('LocalizationResults', [ 'flr_acc', 'mean_error_2d', 'mean_error_weighted_2d', 'median_error_2d', 'median_error_weighted_2d', 'mean_error_3d', 'mean_error_weighted_3d', 'median_error_3d', 'median_error_weighted_3d', 'elapsedTime' ]) return LocalizationResults( flr_acc=flr_acc, mean_error_2d=mean_error_2d, mean_error_weighted_2d=mean_error_weighted_2d, median_error_2d=median_error_2d, median_error_weighted_2d=median_error_weighted_2d, mean_error_3d=mean_error_3d, mean_error_weighted_3d=mean_error_weighted_3d, median_error_3d=median_error_3d, median_error_weighted_3d=median_error_weighted_3d, elapsedTime=elapsedTime)
from keras.layers.convolutional import Convolution2D, Conv2DTranspose from keras.layers.normalization import BatchNormalization from keras.layers.advanced_activations import LeakyReLU from keras.optimizers import Adam from keras.regularizers import l2 from keras.datasets import mnist from keras import backend as K from functools import partial from scipy.sparse import vstack from keras.backend import set_session, tensorflow_backend import tensorflow as tf import scipy config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) config.gpu_options.per_process_gpu_memory_fraction = 0.8 set_session(tf.Session(config=config)) BATCH_SIZE = 128 TRAINING_RATIO = 5 # The training ratio is the number of wasserstein updates per generator update. The paper uses 5. GRADIENT_PENALTY_WEIGHT = 10 # As per the paper INPUT_SHAPE = (4096, ) mid_dim = 500 rate = 0.5 was_dim = 100 gen_rate = 1e-4 was_rate = 5 * 1e-4 thres = 0.9 epochs = 30 def make_generator():
def NN_wrapper(X, num_episodes=500): #inital parameters env = gym.make('CartPole-v0') #num_episodes = 100 h_layers = { 'layers': [24, 48], 'activation': ['tanh', 'tanh'], 'initializer': ['he_normal', 'he_normal'] } #Set learning parameters gamma = 0.99 e = 1 e_decay = 0.95 #95 e_min = 0.01 e_reset_decay = 0.825 epsilon_reset_interval = 40 BATCH_max = 32 episodes_till_target_update = 5 ''' print X[0] #Overwrite the initial parameters with parameters passed through the Optimizer equation [e_decay,e_min,gamma,lr,lr_d,BATCH_max,layer1,layer2,layer1_a,layer2_a] = X[0] BATCH_max = int(BATCH_max) layer1 = int(layer1) layer2 = int(layer2) layer1_a = int(layer1_a) layer2_a = int(layer2_a) layer_activations = ['tanh', 'relu', 'linear'] h_layers = {'layers':[layer1,layer2], 'activation':[layer_activations[layer1_a], layer_activations[layer2_a]], 'initializer':['he_normal', 'he_normal']} ''' [X, h_layers] = set_dtypes(X[0]) [ e_decay, e_min, gamma, lr, lr_d, BATCH_max, layer1, layer2, layer1_a, layer2_a ] = X #construct nn qnet = DDQN_Agent(4, 2, environment=env, hidden_layers=h_layers, epsilon=e, epsilon_min=e_min, epsilon_decay=e_decay, batch_size=BATCH_max, gamma=gamma, learning_rate=lr, lr_decay=lr_d) load = False simulation_mode = False train = True jList = [] r_list = [] e_list = [] loss = 0 completed = False with tf.Session() as sess: from keras import backend as K K.set_session(sess) qnet.update_target() if load: qnet.load_model('solved.h5') qnet.update_target() if simulation_mode: qnet.epsilon = 0.01 states = env.reset() states = np.reshape(states, [1, states.size]) completed = True for _ in range(1000): time.sleep(0.01) action = qnet.take_action(states) next_states, reward, done, _ = env.step(action) env.render() next_states = np.reshape(next_states, [1, states.size]) qnet.add_experience(states, action, reward, next_states, done) states = next_states while completed or not train: print crash break j_total = 0 for i in range(num_episodes): #Reset environment states = env.reset() states = np.reshape(states, [1, states.size]) #print states.shape r_sum = 0 done = False j = 0 #Let the simulation run: while j < 199: action = qnet.take_action(states) next_states, reward, done, _ = env.step(action) next_states = np.reshape(next_states, [1, states.size]) qnet.add_experience(states, action, reward, next_states, done) states = next_states r_sum += reward j += 1 j_total += 1 if done: break #Experience replay after ever episode, this is where the leraning happens qnet.replay() #update the epsilon value every episode e = qnet.update_epsilon(return_epsilon=True) #This equation addes discontinues behavior in hte form of a saw blade descent of epsilon. drastically improving the convergance speed and reliability qnet.reset_epsilon(i, epsilon_reset_interval, e_reset_decay) if i % episodes_till_target_update: qnet.update_target() e_list.append(e) #print(r_sum) r_list.append(r_sum) if i > 100: r_list_smooth_100 = movingaverage(r_list, 100) for k in range(len(r_list_smooth_100)): if r_list_smooth_100[k] > 195: print 'The Agent has solved the enviroment' completed = True #qnet.save_model('solved.h5') #print crash if completed: break if plot: print completed plt.figure(1) plt.title('EndScore Per Episode') plt.plot(range(len(r_list)), r_list) plt.figure(2) plt.title('Smoothed end Score Per Episode') #still need to add a offset to the moving average so it doesn't cut the front of the list off r_list_smooth = movingaverage(r_list, 10) r_list_smooth2 = movingaverage(r_list, 40) plt.plot(r_list_smooth) plt.plot(r_list_smooth2) plt.figure(3) plt.title('Greedy epsilon value') plt.plot(e_list) plt.show() #admittedly this is a very poor performance metric, but this is my first time tuning AI hyperparameters #and this metric works well enough to evaluate the optimizer, in future work i'll be using better methods ai_performance = r_list_smooth_100[len(r_list_smooth_100) - 1] print "Score: ", ai_preformance print "Using X: ", X return -ai_performance
def main(): max_tweet_length = 30 vector_size = 512 # generate model use_gpu = True config = tf.ConfigProto( intra_op_parallelism_threads=multiprocessing.cpu_count(), inter_op_parallelism_threads=multiprocessing.cpu_count(), allow_soft_placement=True, device_count={ 'CPU': multiprocessing.cpu_count(), 'GPU': 1 if use_gpu else 0 }) session = tf.Session(config=config) K.set_session(session) model = build_model(vector_size) # continue building keras_model = "deep_nn_weights.h5" model_name = 'tweet_word2vec.model' # static names dataset_location = './Sentiment Analysis Dataset.csv' model_location = './model/' tokenized_corpus_name = "tokenized_tweet_corpus.dill" groun_truth_name = 'ground_truth_tokenized_tweet_corpus.dill' model_name = 'tweet_word2vec.model' # Load all data with open(model_location + tokenized_corpus_name, 'rb') as f: tokenized_corpus = dill.load(f) with open(model_location + groun_truth_name, 'rb') as f: ground_truth = dill.load(f) # Load model and retrieve word vectors word2vec = Word2Vec.load(model_location + model_name) X_vecs = word2vec.wv batch_size = 64 nb_epochs = 5 test_size = 100000 validation_size = 100000 train_size = len(tokenized_corpus) - test_size - validation_size print("Train Size:{}, Validation Size:{}, Test Size:{}".format( train_size, validation_size, test_size)) X_corp_train, X_corp_valid, Y_train, Y_valid = train_test_split( tokenized_corpus, ground_truth, test_size=0.10, random_state=69) vectorizer = TfidfVectorizer(analyzer=lambda x: x, min_df=10) matrix = vectorizer.fit_transform([x for x in X_corp_train]) tfidf = dict(zip(vectorizer.get_feature_names(), vectorizer.idf_)) print('vocab size :', len(tfidf)) train_vecs_w2v = np.concatenate( [buildWordVector(z, 512, X_vecs, tfidf) for z in tqdm(X_corp_train)]) valid_vecs_w2v = np.concatenate( [buildWordVector(z, 512, X_vecs, tfidf) for z in tqdm(X_corp_valid)]) dummy_valid = [~np.isnan(train_vecs_w2v).any(axis=1)] train_vecs_w2v = train_vecs_w2v[dummy_valid] # convert back to tensor train_vecs_w2v = train_vecs_w2v.reshape( (train_vecs_w2v.shape[0], train_vecs_w2v.shape[1], 1)) Y_train = np.array(Y_train) Y_train = Y_train.reshape((len(Y_train), 1)) Y_train = Y_train[dummy_valid] dummy_valid = [~np.isnan(valid_vecs_w2v).any(axis=1)] valid_vecs_w2v = valid_vecs_w2v[dummy_valid] # convert to tensor valid_vecs_w2v = valid_vecs_w2v.reshape( (valid_vecs_w2v.shape[0], valid_vecs_w2v.shape[1], 1)) Y_valid = np.array(Y_valid) Y_valid = Y_valid.reshape((len(Y_valid), 1)) Y_valid = Y_valid[dummy_valid] del dummy_valid #dump_files = [X_train, Y_train, X_valid, Y_valid, X_test, Y_test ] # Super fucing memoery intensive print("Dataset has been created ") print("DATA SHAPE: {}".format(train_vecs_w2v.shape)) model.fit(train_vecs_w2v, Y_train, batch_size=batch_size, shuffle=True, epochs=nb_epochs, validation_data=(valid_vecs_w2v, Y_valid), callbacks=[EarlyStopping(min_delta=0.00025, patience=2)]) print("Model complete, saving") #model.save_weights('contin_deep_nn_2_weights.h5') model.save("mean_tfidf_max_min_dnn.h5")
if model_name == "cnnlstm": classifier = CNNLSTMClassifier() if model_name == "stacked": classifier = StackedCNNClassifier() if model_name == "tcn": classifier = TCNClassifier() if model_name == "attention": classifier = AttentionClassifier() if model_name == "capsule": classifier = CapsuleClassifier() return classifier if __name__ == "__main__": warnings.filterwarnings('ignore') os.environ["CUDA_VISIBLE_DEVICES"] = "0" config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) set_session(session) data_file = "./dataset/.cache/data.pkl" with open(data_file, "rb") as fp: dataUtils = pickle.load(fp) classifier = build_classifier("tcn") classifier.load_data(dataUtils, data_file) classifier.run(mode=0)
def playGame(train_indicator=0): #1 means Train, 0 means simply Run BUFFER_SIZE = 100000 BATCH_SIZE = 32 GAMMA = 0.99 TAU = 0.001 #Target Network HyperParameters LRA = 0.0001 #Learning rate for Actor LRC = 0.001 #Lerning rate for Critic action_dim = 3 #Steering/Acceleration/Brake state_dim = 24 #of sensors input np.random.seed(1337) vision = False EXPLORE = 300000. episode_count = 20000 max_steps = 100000 reward = 0 done = False step = 0 epsilon = 1.0 # epsilon = 1 indicator = 0 #Tensorflow GPU optimization config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA) critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC) buff = ReplayBuffer(BUFFER_SIZE) #Create replay buffer dangerQ_critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC) dangerQ_buff = ReplayBuffer(BUFFER_SIZE) # Generate a Torcs environment env = TorcsEnv(vision=vision, throttle=True, gear_change=False) #Now load the weight # load_name = "sample_v0_40" # print("Now we load the weight") # try: # actor.model.load_weights("saved/actormodel_{}.h5".format(load_name)) # critic.model.load_weights("saved/criticmodel_{}.h5".format(load_name)) # actor.target_model.load_weights("saved/actormodel_{}.h5".format(load_name)) # critic.target_model.load_weights("saved/criticmodel_{}.h5".format(load_name)) # print("Weight load successfully") # except: # print("Cannot find the weight") plt.figure() overall_scores = [] model_name = "dangerQ_v0" print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i) + " Replay Buffer " + str(buff.count())) if np.mod(i, 3) == 0: ob = env.reset( relaunch=True ) #relaunch TORCS every 3 episode because of the memory leak error else: ob = env.reset() s_t = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ)) total_reward = 0. cur_sample = [] for j in range(max_steps): # if j == 50: # time.sleep(0.099) # continue loss = 0 epsilon -= 1.0 / EXPLORE a_t = np.zeros([1, action_dim]) noise_t = np.zeros([1, action_dim]) a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0])) # if j > 120: noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][0], 0.0, 0.60, 0.30) noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][1], 0.5, 1.00, 0.10) noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][2], -0.1, 1.00, 0.05) #The following code do the stochastic brake #if random.random() <= 0.1: # print("********Now we apply the brake***********") # noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2], 0.2 , 1.00, 0.10) a_t[0][0] = a_t_original[0][0] + noise_t[0][0] a_t[0][1] = a_t_original[0][1] + noise_t[0][1] a_t[0][2] = a_t_original[0][2] + noise_t[0][2] if j < 20 and train_indicator: a_t[0][1] += 0.5 # if j == 91: # print("adversarial attack!") # if a_t[0][0] > 0: # a_t[0][0] = -0.6 # else: # a_t[0][0] = 0.6 # # print("%.2f"%a_t[0][0]) # a_t[0][2] += 0.7 # if ob.speedX > 0.6: # a_t[0][1] = 0 # if(step == 60): # a_t[0][0] = 1.0 ob, r_t, done, info = env.step(a_t[0]) print "step: {} reward: {:.2f} action: {:.2f} {:.2f} {:.2f} ".format( j, r_t, a_t[0][0], a_t[0][1], a_t[0][2]) # print "{:.5f} {:.5f} {:.5f} ".format(ob.angle, ob.trackPos, ob.speedX) # print "{:.5f} {:.5f} {:.5f} {:.5f} {:.5f}".format(r_t, ob.speedX, ob.speedY, ob.speedZ, ob.rpm) # if(r_t < -50): # r_t -= 10000 # done = True if j > 20 and ob.rpm <= 0.09426: r_t -= 1000 done = True if j > 500 and not done: done = True theta = 0.1 s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ)) # s_t1_new = np.array([val + np.abs(val)*random.uniform(-1,1)*theta for val in s_t1]) # print(np.linalg.norm(s_t1_new - s_t1)) # s_t1 = s_t1_new buff.add(s_t, a_t[0], r_t, s_t1, done) #Add replay buffer cur_step_sample = [ s_t.tolist(), a_t[0].tolist(), r_t, s_t1.tolist(), done ] cur_sample.append(cur_step_sample) if j <= 500 and done: dangerQ_r = 1 else: dangerQ_r = 0 dangerQ_buff.add(s_t, a_t[0], dangerQ_r, s_t1, done) #Do the batch update batch = buff.getBatch(BATCH_SIZE) states = np.asarray([e[0] for e in batch]) actions = np.asarray([e[1] for e in batch]) rewards = np.asarray([e[2] for e in batch]) new_states = np.asarray([e[3] for e in batch]) dones = np.asarray([e[4] for e in batch]) y_t = np.asarray([e[1] for e in batch]) target_q_values = critic.target_model.predict( [new_states, actor.target_model.predict(new_states)]) for k in range(len(batch)): if dones[k]: y_t[k] = rewards[k] else: y_t[k] = rewards[k] + GAMMA * target_q_values[k] if (train_indicator): loss += critic.model.train_on_batch([states, actions], y_t) a_for_grad = actor.model.predict(states) grads = critic.gradients(states, a_for_grad) actor.train(states, grads) actor.target_train() critic.target_train() # train danger Q dangerQ_batch = dangerQ_buff.getBatch(BATCH_SIZE) dangerQ_states = np.asarray([e[0] for e in dangerQ_batch]) dangerQ_actions = np.asarray([e[1] for e in dangerQ_batch]) dangerQ_rewards = np.asarray([e[2] for e in dangerQ_batch]) dangerQ_new_states = np.asarray([e[3] for e in dangerQ_batch]) dangerQ_dones = np.asarray([e[4] for e in dangerQ_batch]) dangerQ_y_t = np.asarray([e[1] for e in dangerQ_batch]) next_actions = actor.target_model.predict(new_states) dangerQ_target_q_values = [] for a_index in range(len(next_actions)): next_action = next_actions[a_index] next_action_array = [] for k in range(21): new_action = np.copy(next_action) next_action[0] = (k - 10.0) / 10.0 next_action_array.append(new_action) next_action_array = np.array(next_action_array) cur_next_state_array = np.array( [dangerQ_new_states[a_index] for k in range(21)]) cur_target_q_pre = dangerQ_critic.target_model.predict( [cur_next_state_array, next_action_array]) cur_target_q = np.max(cur_target_q_pre) dangerQ_target_q_values.append(cur_target_q) dangerQ_target_q_values = np.array(dangerQ_target_q_values) # dangerQ_target_q_values = dangerQ_critic.target_model.predict([dangerQ_new_states, actor.target_model.predict(new_states)]) for k in range(len(dangerQ_batch)): if dones[k]: dangerQ_y_t[k] = dangerQ_rewards[k] else: dangerQ_y_t[k] = dangerQ_rewards[ k] + GAMMA * dangerQ_target_q_values[k] dangerQ_critic.model.train_on_batch( [dangerQ_states, dangerQ_actions], dangerQ_y_t) dangerQ_critic.target_train() total_reward += r_t s_t = s_t1 # print("Episode", i, "Step", step, "Action", a_t, "Reward", r_t, "Loss", loss) step += 1 if done: break if np.mod(i, 3) == 0: if (train_indicator): print("Now we save model") actor.model.save_weights("saved/actormodel_{}_{}.h5".format( model_name, int(step / 10000)), overwrite=True) # with open("actormodel.json", "w") as outfile: # json.dump(actor.model.to_json(), outfile) critic.model.save_weights("saved/criticmodel_{}_{}.h5".format( model_name, int(step / 10000)), overwrite=True) # with open("criticmodel.json", "w") as outfile: # json.dump(critic.model.to_json(), outfile) critic.model.save_weights( "saved/dangerQ_criticmodel_{}_{}.h5".format( model_name, int(step / 10000)), overwrite=True) print("TOTAL REWARD @ " + str(i) + "-th Episode : Reward " + str(total_reward)) print("Total Step: " + str(step)) print("") s = "{},{},{:.3f}\n".format(i, j, total_reward) with open('logs/{}.csv'.format(model_name), 'a') as the_file: the_file.write(s) overall_scores.append(total_reward) plt.clf() plt.plot(overall_scores) plt.savefig("train_plots/{}_{}.jpg".format(model_name, int(step / 10000))) with open('samples/{}_{:05d}.pk'.format(model_name, i), 'w') as outfile: pickle.dump(cur_sample, outfile) env.end() # This is for shutting down TORCS print("Finish.")
def set_current_session(self, session): K.set_session(session)
def siso_regression_uji( gpu_id: int, dataset: str, frac: float, validation_split: float, preprocessor: str, batch_size: int, epochs: int, optimizer: str, dropout: float, corruption_level: float, dae_hidden_layers: list, sdae_hidden_layers: list, cache: bool, regression_hidden_layers: list, verbose: int ): """Multi-floor indoor localization based on three-dimensional regression of location coordinates using a single-input and single-output (SISO) deep neural network (DNN) model and UJIIndoorLoc datasets. Keyword arguments: """ ### initialize numpy, random, TensorFlow, and keras np.random.seed() # based on current time or OS-specific randomness source rn.seed() # " tf.set_random_seed(rn.randint(0, 1000000)) if gpu_id >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) else: os.environ["CUDA_VISIBLE_DEVICES"] = '' sess = tf.Session( graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) ### load datasets after scaling print("Loading data ...") if dataset == 'uji': from ujiindoorloc import UJIIndoorLoc uji = UJIIndoorLoc( cache=cache, frac=frac, preprocessor=preprocessor, classification_mode='hierarchical', grid_size=0) else: print("'{0}' is not a supported data set.".format(dataset)) sys.exit(0) flr_height = uji.floor_height training_df = uji.training_df training_data = uji.training_data testing_df = uji.testing_df testing_data = uji.testing_data ### build and train a SIMO model print( "Building and training a SISO model for three-dimensional regression ..." ) rss = training_data.rss_scaled coord = training_data.coord_3d_scaled coord_scaler = training_data.coord_3d_scaler # for inverse transform labels = training_data.labels input = Input(shape=(rss.shape[1], ), name='input') # common input # (optional) build deep autoencoder or stacked denoising autoencoder if dae_hidden_layers != '': print("- Building a DAE model ...") model = deep_autoencoder( dataset=dataset, input_data=rss, preprocessor=preprocessor, hidden_layers=dae_hidden_layers, cache=cache, model_fname=None, optimizer=optimizer, batch_size=batch_size, epochs=epochs, validation_split=validation_split) x = model(input) elif sdae_hidden_layers != '': print("- Building an SDAE model ...") model = sdae( dataset=dataset, input_data=rss, preprocessor=preprocessor, hidden_layers=sdae_hidden_layers, cache=cache, model_fname=None, optimizer=optimizer, corruption_level=corruption_level, batch_size=batch_size, epochs=epochs, validation_split=validation_split) x = model(input) else: x = input # regression hidden layers x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(dropout)(x) if regression_hidden_layers != '': for units in regression_hidden_layers: x = Dense(units)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(dropout)(x) # coordinates regression output x = Dense(coord.shape[1], kernel_initializer='normal')(x) x = BatchNormalization()(x) coordinates_output = Activation( 'linear', name='coordinates_output')(x) # 'linear' activation model = Model(inputs=input, outputs=coordinates_output) model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mean_squared_error']) weights_file = os.path.expanduser("~/tmp/best_weights.h5") checkpoint = ModelCheckpoint(weights_file, monitor='val_loss', save_best_only=True, verbose=0) early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0) print("- Training a coordinates regressor ...", end='') startTime = timer() history = model.fit( x={'input': rss}, y={'coordinates_output': coord}, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=[checkpoint, early_stop], validation_split=validation_split, shuffle=True) elapsedTime = timer() - startTime print(" completed in {0:.4e} s".format(elapsedTime)) model.load_weights(weights_file) # load weights from the best model ### evaluate the model print("Evaluating the model ...") rss = testing_data.rss_scaled labels = testing_data.labels flrs = labels.floor coord = testing_data.coord_3d # original coordinates # calculate the classification accuracies and localization errors coords_scaled_pred = model.predict(rss, batch_size=batch_size) coord_est = coord_scaler.inverse_transform(coords_scaled_pred) # inverse-scaling tmp = np.maximum(np.minimum(coord_est[:,2], 4*uji.floor_height), 0) # clamping to [0, 4*uji.floor_height] flrs_pred = np.floor(tmp/uji.floor_height+0.5) # floor number (0..4); N.B. round() behavior in Python 3 has been changed,so we cannot use it. flr_results = (np.equal(np.argmax(flrs, axis=1), flrs_pred)).astype(int) flr_acc = flr_results.mean() # calculate 2D localization errors dist_2d = norm(coord - coord_est, axis=1) mean_error_2d = dist_2d.mean() median_error_2d = np.median(dist_2d) # calculate 3D localization errors flr_diff = np.absolute(np.argmax(flrs, axis=1) - flrs_pred) z_diff_squared = (flr_height**2)*np.square(flr_diff) dist_3d = np.sqrt(np.sum(np.square(coord - coord_est), axis=1) + z_diff_squared) mean_error_3d = dist_3d.mean() median_error_3d = np.median(dist_3d) LocalizationResults = namedtuple('LocalizationResults', ['flr_acc', 'mean_error_2d', 'median_error_2d', 'mean_error_3d', 'median_error_3d', 'elapsedTime']) return LocalizationResults(flr_acc=flr_acc, mean_error_2d=mean_error_2d, median_error_2d=median_error_2d, mean_error_3d=mean_error_3d, median_error_3d=median_error_3d, elapsedTime=elapsedTime)
import random as rn import numpy as np import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ['PYTHONHASHSEED'] = '0' np.random.seed(42) rn.seed(12345) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) from keras import backend as K tf.set_random_seed(1234) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) from pandas import DataFrame from pandas import Series from pandas import concat from pandas import read_csv from pandas import datetime from deap import base, creator, tools, algorithms from sklearn.metrics import mean_squared_error from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense,Dropout from keras.layers import GRU from keras.optimizers import adam from keras import regularizers
def _main(): import os os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" from keras import backend as K config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) annotation_path = 'dataset/WIDER_train.txt' # 数据 classes_path = 'configs/wider_classes.txt' # 类别 log_dir = 'logs/004/' # 日志文件夹 # pretrained_path = 'model_data/yolo_weights.h5' # 预训练模型 pretrained_path = 'logs/003/ep074-loss26.535-val_loss27.370.h5' # 预训练模型 anchors_path = 'configs/yolo_anchors.txt' # anchors class_names = get_classes(classes_path) # 类别列表 num_classes = len(class_names) # 类别数 anchors = get_anchors(anchors_path) # anchors列表 input_shape = (416, 416) # 32的倍数,输入图像 model = create_model( input_shape, anchors, num_classes, freeze_body=2, weights_path=pretrained_path) # make sure you know what you freeze logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) # 只存储weights, reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) # 当评价指标不在提升时,减少学习率 early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) # 测试集准确率,下降前终止 val_split = 0.1 # 训练和验证的比例 with open(annotation_path) as f: lines = f.readlines() np.random.seed(47) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) # 验证集数量 num_train = len(lines) - num_val # 训练集数量 """ 把目标当成一个输入,构成多输入模型,把loss写成一个层,作为最后的输出,搭建模型的时候, 就只需要将模型的output定义为loss,而compile的时候, 直接将loss设置为y_pred(因为模型的输出就是loss,所以y_pred就是loss), 无视y_true,训练的时候,y_true随便扔一个符合形状的数组进去就行了。 """ if False: model.compile( optimizer=Adam(lr=1e-3), loss={ # 使用定制的 yolo_loss Lambda层 'yolo_loss': lambda y_true, y_pred: y_pred }) # 损失函数 batch_size = 32 # batch尺寸 print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=50, initial_epoch=0, callbacks=[logging, checkpoint]) model.save_weights( log_dir + 'trained_weights_stage_1.h5') # 存储最终的参数,再训练过程中,通过回调存储 if True: # 全部训练 for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=Adam(lr=1e-4), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print('Unfreeze all of the layers.') batch_size = 16 # note that more GPU memory is required after unfreezing the body print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator( data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=100, initial_epoch=50, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(log_dir + 'trained_weights_final.h5')
#!/usr/bin/env python3 # reproducible results import numpy as np import random as rn import tensorflow as tf np.random.seed(1337) rn.seed(1337) tf.set_random_seed(1337) import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['PYTHONHASHSEED'] = '0' from keras import backend as bke s = tf.Session(graph=tf.get_default_graph()) bke.set_session(s) # the rest of imports import sys, random, gc, keras sys.path.append('../Lib/') sys.dont_write_bytecode = True from sklearn.metrics import f1_score from keras.callbacks import EarlyStopping from sklearn.model_selection import train_test_split # ignore sklearn warnings def warn(*args, **kwargs): pass import warnings
from sklearn.metrics import roc_auc_score from mmoe import MMoE SEED = 1 # Fix numpy seed for reproducibility np.random.seed(SEED) # Fix random seed for reproducibility random.seed(SEED) # Fix TensorFlow graph-level seed for reproducibility tf.set_random_seed(SEED) tf_session = tf.Session(graph=tf.get_default_graph()) K.set_session(tf_session) # Simple callback to print out ROC-AUC class ROCCallback(Callback): def __init__(self, training_data, validation_data, test_data): self.train_X = training_data[0] self.train_Y = training_data[1] self.validation_X = validation_data[0] self.validation_Y = validation_data[1] self.test_X = test_data[0] self.test_Y = test_data[1] def on_train_begin(self, logs={}): return
def limit_mem(): K.get_session().close() cfg = K.tf.ConfigProto() cfg.gpu_options.allow_growth = True K.set_session(K.tf.Session(config=cfg))
def main(argv=None): '''Train a simple deep CNN on the CIFAR10 small images dataset on a SLURM cluster.''' main.__doc__ = __doc__ argv = sys.argv if argv is None else sys.argv.extend(argv) desc = main.__doc__ # .format(os.path.basename(__file__)) # CLI parser args = parser_(desc) # print('RDMA: {}'.format(args.rdma)) # rdma = getattr(args, 'rdma', None) rdma = args.rdma network = args.network # print('NETWORK: {}'.format(network)) checkpt = getattr(args, 'checkpt', None) checkpt_flag = False if checkpt is None else True filepath = checkpt # print('CHECKPT:', checkpt) batch_size = 32 num_classes = 10 epochs = args.epochs data_augmentation = args.aug logdevp = args.logdevp # ---------------------------------------------- Distributed setup on SLURM # Specifying network necessary for protocol='grpc+gdr'. GDR doesn't find # IB addresses automatically like 'grpc+verbs'. # The 'ib.cluster' is specific to NVIDIA psgcluster. # network = 'ib.cluster' if rdma == 'gdr' else None # network = 'ib.cluster' # On fast network even without RDMA speed up significant. RDMA still helps. scpar = SlurmClusterParser(network=network) cmgr_facade = TFClusterManagerFacade(scpar) logdevp_flag = True if _DEVPROF or logdevp else False gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto( log_device_placement=logdevp_flag, # True, allow_soft_placement=True, gpu_options=gpu_options) print('\n\tCLUSTER_SPEC_DICT: {}\n'.format(cmgr_facade.clusterspec_dict)) # TF 1.2.x RDMA: specify protocol='grpc+verbs' in server below. protocol = ProtocolType.get_server_protocol_str(rdma) # print('PROTOCOL: {}'.format(protocol)) server = cmgr_facade.get_server(config, protocol=protocol) tfsess = cmgr_facade.get_session(server) KB.set_session(tfsess) #: :type cluster_spec: tf.train.ClusterSpec # cluster_spec = cmgr_facade.get_cluster_spec() job_type = cmgr_facade.myjobtype # task_id = cmgr_facade.mytask_id is_chief = cmgr_facade.is_chief if job_type == JobType.ps: # JOIN PARAMETER SERVERS # server.join() cmgr_facade.join(server) # Once the server is started everything but the chief worker can join # the server and wait to process/service graph computations. Chief pushes # the compute graph. COMPARE TO: cifar10_cnn_distrib_v2_slurm if not is_chief: # JOIN WORKERS EXCEPT FOR CHIEF cmgr_facade.join(server) # sleep(2) # Have the chief wait just in case. Occasionally get errors. # The ngpus per host needs to be done with MPI or somehow sync'd. Currently # assuming all hosts have the same number of GPUs. gdev_list = get_available_gpus() ngpus = len(gdev_list) # List of all devices. The devices might be associated to the same worker. wgdev_list = cmgr_facade.get_allworkers_devlist(ngpus) print('\n\tWGDEV_LIST: {}\n'.format( [dev.to_string() for dev in wgdev_list])) # DEBUG # If 2 workers ea. w/ 4 devices then nworker_devices_total == 2 * 4 = 8 # If 4 workers ea. w/ 1 devices then nworker_devices_total == 4 * 1 = 4 nworker_devices_total = len(wgdev_list) batch_size = batch_size * nworker_devices_total psdev_list = cmgr_facade.get_allps_devlist() print('\n\tPSDEV_LIST: {}\n'.format( [dev.to_string() for dev in psdev_list])) # DEBUG # ------------------------------------ Data loading and basic preprocessing # The data, shuffled and split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() # Convert class vectors to binary class matrices. y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 nsamples = x_train.shape[0] steps_per_epoch = nsamples // batch_size print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # --------------------------------------------- Setup model and parallelize def _load_fn(unused_op): return 1 cspec = cmgr_facade.get_cluster_spec() num_ps = cmgr_facade.num_ps ps_strategy = \ tf.contrib.training.GreedyLoadBalancingStrategy(num_ps, _load_fn) # ps_device = tf.DeviceSpec(job=JobType.ps, device_type=DevType.cpu, # device_index=0).to_string() rdsetter = tf.train.replica_device_setter( cluster=cspec, ps_strategy=ps_strategy, # ps_device=ps_device, # '/job:ps/cpu:0' # seems to work # ps_device='/gpu:0' # for gdr maybe ) with tf.device(rdsetter): model_init = make_model(x_train.shape[1:], num_classes, filepath if checkpt_flag else None) callbacks = None if checkpt_flag: checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks = [checkpoint] # Data-Parallelize the model via function or class. model = make_parallel(model_init, wgdev_list) # , ps_device='/gpu:0' print_mgpu_modelsummary(model) # ------------------------------------------------------------ Run training lr = 0.0001 * nworker_devices_total opt = RMSprop(lr=lr, decay=1e-6) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 # divide inputs by std of the dataset featurewise_std_normalization=False, samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening # randomly rotate images in the range (degrees, 0 to 180) rotation_range=0, # randomly shift images horizontally (fraction of total width) width_shift_range=0.1, # randomly shift images vertically (fraction of total height) height_shift_range=0.1, horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=(x_test, y_test), callbacks=callbacks) # Run Validation if is_chief: model_init.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) metrics = model_init.evaluate(x=x_test, y=y_test, batch_size=batch_size) print('\nCIFAR VALIDATION LOSS, ACC: {}, {}'.format(*metrics)) # ------------------------------------------------------------- STOP SERVER cmgr_facade.stop_chief(server)
from sklearn.metrics.pairwise import cosine_similarity from sklearn.feature_extraction.text import TfidfVectorizer import tensorflow as tf import keras from keras.layers import * from keras.models import * from keras.callbacks import * from keras import optimizers from keras import backend as K from keras.utils import Sequence, to_categorical from keras.preprocessing.sequence import pad_sequences cfg = K.tf.ConfigProto() cfg.gpu_options.allow_growth = True K.set_session(K.tf.Session(config=cfg)) data_dir = 'data/' models_dir = 'models/' results_dir = 'results/' def predict(model, seq, convert2text=ohe_seq2chars): spell_pred = [] names_pred = [] for i in tqdm.tqdm(range(0, len(seq))): batch_spell_pred, batch_names_pred = model.predict_on_batch( seq.__getitem__(i)) spell_pred.append(batch_spell_pred)