def init_dist(args, backend="nccl"): if mp.get_start_method(allow_none=True) is None: mp.set_start_method("spawn") if not dist.is_available(): args.launcher = "none" if args.launcher == "pytorch": # DDP init_dist_pytorch(args, backend) return True elif args.launcher == "slurm": # DDP init_dist_slurm(args, backend) return True elif args.launcher == "none": # DataParallel or single GPU if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): args.total_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) else: args.total_gpus = torch.cuda.device_count() if args.total_gpus > 1: warnings.warn( "It is highly recommended to use DistributedDataParallel by setting " "args.launcher as 'slurm' or 'pytorch'." ) return False else: raise ValueError("Invalid launcher type: {}".format(args.launcher))
def set_mp_start_method() -> None: """ Sets the multiprocessing start method. """ start_method = _get_start_method() if not start_method: return import multiprocessing import multiprocess multiprocessing.set_start_method(start_method) multiprocess.set_start_method(start_method)
def run_safely(f, x): """Runs f(args) in a separate process.""" # f_global = f # globalize(f) # mp.freeze_support() mp.set_start_method("spawn") q = mp.Queue() p = Process(target=with_queue, args=(f, q, x)) p.start() p.join() if p.exception: error, traceback = p.exception print(traceback) raise error try: out = q.get(False, 2.0) # Non-blocking mode except queue.Empty: print("Empty queue!") print("Exit code: ", p.exitcode) raise MemoryError() return out
def train_parallel_trpo( env_id, predictor, make_env=gym.make, summary_writer=None, workers=1, runtime=1800, max_timesteps_per_episode=None, timesteps_per_batch=5000, max_kl=0.001, seed=0, discount_factor=0.995, cg_damping=0.1, ): # Tensorflow is not fork-safe, so we must use spawn instead # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405 # We use multiprocess rather than multiprocessing because Keras sets a multiprocessing context if not os.environ.get("SET_PARALLEL_TRPO_START_METHOD" ): # Use an env variable to prevent double-setting multiprocess.set_start_method('spawn') os.environ['SET_PARALLEL_TRPO_START_METHOD'] = "1" run_indefinitely = (runtime <= 0) if max_timesteps_per_episode is None: max_timesteps_per_episode = gym.spec(env_id).timestep_limit learner = TRPO(env_id, make_env, max_kl=max_kl, discount_factor=discount_factor, cg_damping=cg_damping) rollouts = ParallelRollout(env_id, make_env, predictor, workers, max_timesteps_per_episode, seed) iteration = 0 start_time = time() while run_indefinitely or time() < start_time + runtime: iteration += 1 # update the weights weights = learner.get_policy() rollouts.set_policy_weights(weights) # run a bunch of async processes that collect rollouts paths, rollout_time = rollouts.rollout(timesteps_per_batch) # learn from that data stats, learn_time = learner.learn(paths) # output stats print("-------- Iteration %d ----------" % iteration) frames_gathered_per_second = stats["Frames gathered"] / rollout_time stats["Frames gathered/second"] = int(frames_gathered_per_second) stats['Time spent gathering rollouts'] = rollout_time stats['Time spent updating weights'] = learn_time total_elapsed_seconds = time() - start_time stats["Total time"] = total_elapsed_seconds ##### HACK ##### stats[ "Predictor iteration"] = predictor.predictor._elapsed_predictor_training_iters if predictor.entropy_alpha is not None: stats["Predictor Entropy Alpha"] = predictor.entropy_alpha if predictor.softmax_beta is not None: stats["Predictor Softmax Beta"] = predictor.softmax_beta print_stats(stats) if summary_writer: # Log results to tensorboard mean_reward = np.mean( np.array([path["original_rewards"].sum() for path in paths])) summary = tf.Summary(value=[ tf.Summary.Value(tag="parallel_trpo/mean_reward", simple_value=mean_reward), tf.Summary.Value(tag="parallel_trpo/elapsed_seconds", simple_value=total_elapsed_seconds), tf.Summary.Value( tag="parallel_trpo/frames_gathered_per_second", simple_value=frames_gathered_per_second), ]) summary_writer.add_summary(summary, global_step=iteration) rollouts.end()
def main(global_config, **settings): """ This function returns a Pyramid WSGI application. """ multiprocess.set_start_method("spawn") engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) Base.metadata.bind = engine from pyramid.config import Configurator config_file = global_config['__file__'] parser = ConfigParser() parser.read(config_file) # TODO: DANGER storage = dict() for k, v in parser.items('backend:storage'): storage[k] = v settings['storage'] = storage if parser.has_section('app:desktop'): for k, v in parser.items('app:desktop'): storage[k] = v settings['desktop'] = storage config = Configurator(settings=settings) log = logging.getLogger(__name__) # TODO: Find a more neat way try: cache_kwargs = dict() for k, v in parser.items('cache:dogpile'): cache_kwargs[k] = v cache_args = dict() for k, v in parser.items('cache:dogpile:args'): cache_args[k] = v cache_kwargs['arguments'] = cache_args if 'expiration_time' in cache_kwargs: cache_kwargs['expiration_time'] = int(cache_kwargs['expiration_time']) if 'redis_expiration_time' in cache_kwargs: cache_kwargs['redis_expiration_time'] = int(cache_kwargs['redis_expiration_time']) except NoSectionError: log.warn("No 'cache:dogpile' or/and 'cache:dogpile:args' sections in config; disabling caching") initialize_cache(None) else: initialize_cache(cache_kwargs) # config.configure_celery('development_test.ini') authentication_policy = AuthTktAuthenticationPolicy(settings['secret'], hashalg='sha512', callback=groupfinder) authorization_policy = ACLAuthorizationPolicy() config.set_authentication_policy(authentication_policy) config.set_authorization_policy(authorization_policy) config.include('pyramid_chameleon') config.add_static_view(settings['storage']['static_route'], path=settings['storage']['path'], cache_max_age=3600) config.add_static_view('static', path='lingvodoc:static', cache_max_age=3600) configure_routes(config) config.add_route('testing', '/testing') # config.add_route('example', 'some/route/{object_id}/{client_id}/of/perspective', factory = 'lingvodoc.models.DictAcl') # config.add_route('home', '/') # config.add_route('login', 'login') # config.add_route('logout', 'logout') # config.add_route('register', 'register') # config.add_route('acquire_client_key', 'acquire_client_key') # config.add_route('dictionaries.list', 'dictionaries', factory='lingvodoc.models.DictionariesACL') # config.add_route('dictionary', 'dictionary') # config.add_route('metaword', 'dictionary/{dictionary_id}/etymology/metaword') config.scan('.views') return config.make_wsgi_app()
visualize.plot_species(stats, view=True) # p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4') # p.run(eval_genomes, 10) def start_sim(self, cmd): proc = sp.Popen(cmd, shell=False, stdout=sp.DEVNULL) def conn_sim(self, connection_string, sims, index): self.sims[index] = Simulation(connection_string, targets_amount=5, speedup=Const.SPEED_UP) if __name__ == '__main__': mp.set_start_method('spawn', True) # parse input parser = argparse.ArgumentParser( description='Train a NN with the NEAT algorithm.') parser.add_argument('-n_sims', type=int, default=1, help='Amount of simulators to run in parallel') parser.add_argument('-start_sims', type=bool, default=1, help='Set to False if sims are already running') args = parser.parse_args() NeatEvolver(args.n_sims, args.start_sims)
def train_parallel_trpo( env_id, predictor, make_env=gym.make, summary_writer=None, workers=1, runtime=1800, max_timesteps_per_episode=None, timesteps_per_batch=5000, max_kl=0.001, seed=0, discount_factor=0.995, cg_damping=0.1, save_freq=100, save_dir=None, load=False, iteration=0, ): # Tensorflow is not fork-safe, so we must use spawn instead # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405 # We use multiprocess rather than multiprocessing because Keras sets a multiprocessing context if not os.environ.get("SET_PARALLEL_TRPO_START_METHOD"): # Use an env variable to prevent double-setting multiprocess.set_start_method('spawn') os.environ['SET_PARALLEL_TRPO_START_METHOD'] = "1" run_indefinitely = (runtime <= 0) if max_timesteps_per_episode is None: max_timesteps_per_episode = gym.spec(env_id).timestep_limit learner = TRPO( env_id, make_env, max_kl=max_kl, discount_factor=discount_factor, cg_damping=cg_damping) if predictor.sess: predictor.sess.run(tf.initializers.variables(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='policy'))) if load: print("Loading Learner...") learner.load_session(save_dir) # with open(os.path.join(save_dir,'loaded_weights.txt'), 'w+') as f: # f.write(str(learner.get_policy())) print("Loading Predictor...") predictor.load_session(save_dir) rollouts = ParallelRollout(env_id, make_env, predictor, workers, max_timesteps_per_episode, seed) start_time = time() while run_indefinitely or time() < start_time + runtime: iteration += 1 # update the weights weights = learner.session.run(learner.get_policy) #weights = learner.get_policy() rollouts.set_policy_weights(weights) # run a bunch of async processes that collect rollouts paths, rollout_time = rollouts.rollout(timesteps_per_batch) # learn from that data stats, learn_time = learner.learn(paths) if iteration % save_freq == 0: # Saving learner fpath = os.path.join(save_dir, 'learner') learner.save_session(save_dir, global_step=iteration) #op with open(fpath + '_saved_weights_{}.txt'.format(iteration), 'w+') as f: # f.write(str(learner.get_policy())) # Saving predictor predictor.save_session(save_dir, global_step=iteration) tf.summary.FileWriter(os.path.join(save_dir, 'learner_graph'), learner.session.graph) #tf.summary.FileWriter(os.path.join(save_dir, 'predictor_graph'), predictor.sess.graph) # output stats print("-------- Iteration %d ----------" % iteration) frames_gathered_per_second = stats["Frames gathered"] / rollout_time stats["Frames gathered/second"] = int(frames_gathered_per_second) stats['Time spent gathering rollouts'] = rollout_time stats['Time spent updating weights'] = learn_time total_elapsed_seconds = time() - start_time stats["Total time"] = total_elapsed_seconds if predictor.comparison_collector: stats["Collected Comparisons"] = len(predictor.comparison_collector) print_stats(stats) if summary_writer: # Log results to tensorboard mean_reward = np.mean(np.array([path["original_rewards"].sum() for path in paths])) summary = tf.Summary(value=[ tf.Summary.Value(tag="parallel_trpo/mean_reward", simple_value=mean_reward), tf.Summary.Value(tag="parallel_trpo/elapsed_seconds", simple_value=total_elapsed_seconds), tf.Summary.Value(tag="parallel_trpo/frames_gathered_per_second", simple_value=frames_gathered_per_second), ]) summary_writer.add_summary(summary, global_step=iteration) rollouts.end()
def train_parallel_trpo(env_id, predictor, num_r, make_env=gym.make, summary_writer=None, workers=1, runtime=1800, max_timesteps_per_episode=None, timesteps_per_batch=5000, max_kl=0.001, seed=0, discount_factor=0.995, cg_damping=0.1, num_policy=0, exploration=False): # Tensorflow is not fork-safe, so we must use spawn instead # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405 # We use multiprocess rather than multiprocessing because Keras sets a multiprocessing context if not os.environ.get("SET_PARALLEL_TRPO_START_METHOD" ): # Use an env variable to prevent double-setting multiprocess.set_start_method('spawn') os.environ['SET_PARALLEL_TRPO_START_METHOD'] = "1" run_indefinitely = (runtime <= 0) if max_timesteps_per_episode is None: max_timesteps_per_episode = gym.spec(env_id).timestep_limit # explorations learners_policy = [] if exploration: for i in range(num_policy): _learner = TRPO(env_id, make_env, max_kl=max_kl, discount_factor=discount_factor, cg_damping=cg_damping, policy_name='policy' + str(i)) learners_policy.append(_learner) learner = TRPO(env_id, make_env, max_kl=max_kl, discount_factor=discount_factor, cg_damping=cg_damping, policy_name='baseline') if exploration: rollouts = ParallelRollout_1(env_id, make_env, predictor, workers, max_timesteps_per_episode, seed, num_r, num_policy) else: rollouts = ParallelRollout(env_id, make_env, predictor, workers, max_timesteps_per_episode, seed, num_r) iteration = 0 start_time = time() num_chosen_paths = 4 while run_indefinitely or time() < start_time + runtime: iteration += 1 # exploration if exploration: weights_list = [] for i in range(num_policy): weights_list.append(learners_policy[i].get_policy()) # update the weights for the baseline policy weights = learner.get_policy() weights_list.append(weights) rollouts.set_policy_weights(weights_list) # run a bunch of async processes that collect rollouts paths, rollout_time = rollouts.rollout(timesteps_per_batch) # learn from that data for i in range(num_policy): stats, learn_time = learners_policy[i].learn( random.choice(paths, num_chosen_paths)) stats, learn_time = learner.learn( random.choice(paths, num_chosen_paths)) else: weights = learner.get_policy() rollouts.set_policy_weights(weights) # run a bunch of async processes that collect rollouts paths, rollout_time = rollouts.rollout(timesteps_per_batch) # learn from that data stats, learn_time = learner.learn( random.choice(paths, num_chosen_paths)) # output stats print("-------- Iteration %d ----------" % iteration) frames_gathered_per_second = stats["Frames gathered"] / rollout_time stats["Frames gathered/second"] = int(frames_gathered_per_second) stats['Time spent gathering rollouts'] = rollout_time stats['Time spent updating weights'] = learn_time total_elapsed_seconds = time() - start_time stats["Total time"] = total_elapsed_seconds print_stats(stats) if summary_writer: # Log results to tensorboard mean_reward = np.mean( np.array([path["original_rewards"].sum() for path in paths])) summary = tf.Summary(value=[ tf.Summary.Value(tag="parallel_trpo/mean_reward", simple_value=mean_reward), tf.Summary.Value(tag="parallel_trpo/elapsed_seconds", simple_value=total_elapsed_seconds), tf.Summary.Value( tag="parallel_trpo/frames_gathered_per_second", simple_value=frames_gathered_per_second), ]) summary_writer.add_summary(summary, global_step=iteration) rollouts.end()
# Use thread to speed up processing (multithreading) # thread1 = threading.Thread(None, target=classic_gen_time_Sample1, # name="1", # args=[5, queue1], # daemon=True, ) # Use thread to speed up processing (multithreading) # thread2 = threading.Thread(None, target=classic_gen_time_Sample2, # name="2", # args=[5, [], queue2], # daemon=True, ) # queue1 = _Queue.Queue(0) # queue2 = _Queue.Queue(0) if __name__ == '__main__': mp.set_start_method('spawn') start_sampling = threading.Event() queue2 = Queue() queue1 = Queue() maxSample = Value('d', 2.0) p1 = Process(target=process_thread_1, name="p1", args=(maxSample, )) # thread1 = threading.Thread(None, target=classic_gen_time_Sample1, # name="1", # args=[maxSample, queue1], # daemon=True, ) p2 = Process(target=process_thread_2, name="p2", args=(maxSample, )) # thread2 = threading.Thread(None, target=classic_gen_time_Sample2, # name="2", # args=[maxSample, [], queue2], # daemon=True, )
# PosLbl module for microscopy data. # Contains all single TP FrameLbls for a specific well/position # Deals with tracking # AOY import sys import os import pandas as pd import numpy as np from skimage import measure from oyLabImaging import Metadata import multiprocess as mp #import Pool, set_start_method mp.set_start_method('spawn',force=True) from functools import partial from oyLabImaging.Processing import FrameLbl from scipy.spatial import KDTree import lap from tqdm import tqdm class PosLbl(object): """ Class for data from a single position (multi timepoint, position, single experiment, multi channel). Handles image tracking. Parameters ---------- MD : relevant metadata OR pth : str path to relevant metadata Attributes ---------- Pos : position name acq : acquisition name
print() stopTraining() response = "success" else: try: stopBot() base_model = request.form['base_model'] new_model = request.form['new_model'] startTraining(base_model, new_model) response = "success" except: response = "failure" return response, 200 ## Due to debug mode being on, the app reloads and startBot messes up, ## unless use_reloader is set to false, or startBot is placed inside the below: # @app.before_first_request # def setup(): # if botPid is None: # startBot() if __name__ == "__main__": import multiprocess multiprocess.set_start_method('spawn') app.secret_key = os.urandom(12) app.run(debug=app.config['DEBUG'], host='0.0.0.0', port=app.config['WEBSITEPORT']) #, use_reloader=False