Exemple #1
0
def init_dist(args, backend="nccl"):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method("spawn")

    if not dist.is_available():
        args.launcher = "none"

    if args.launcher == "pytorch":
        # DDP
        init_dist_pytorch(args, backend)
        return True

    elif args.launcher == "slurm":
        # DDP
        init_dist_slurm(args, backend)
        return True

    elif args.launcher == "none":
        # DataParallel or single GPU
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
            args.total_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(','))
        else:
            args.total_gpus = torch.cuda.device_count()
        if args.total_gpus > 1:
            warnings.warn(
                "It is highly recommended to use DistributedDataParallel by setting "
                "args.launcher as 'slurm' or 'pytorch'."
            )
        return False

    else:
        raise ValueError("Invalid launcher type: {}".format(args.launcher))
Exemple #2
0
def set_mp_start_method() -> None:
    """
    Sets the multiprocessing start method.
    """
    start_method = _get_start_method()
    if not start_method:
        return

    import multiprocessing
    import multiprocess

    multiprocessing.set_start_method(start_method)
    multiprocess.set_start_method(start_method)
def run_safely(f, x):
    """Runs f(args) in a separate process."""

    # f_global = f # globalize(f)
    # mp.freeze_support()
    mp.set_start_method("spawn")
    q = mp.Queue()
    p = Process(target=with_queue, args=(f, q, x))
    p.start()
    p.join()

    if p.exception:
        error, traceback = p.exception
        print(traceback)
        raise error

    try:
        out = q.get(False, 2.0)  # Non-blocking mode
    except queue.Empty:
        print("Empty queue!")
        print("Exit code: ", p.exitcode)
        raise MemoryError()

    return out
Exemple #4
0
def train_parallel_trpo(
    env_id,
    predictor,
    make_env=gym.make,
    summary_writer=None,
    workers=1,
    runtime=1800,
    max_timesteps_per_episode=None,
    timesteps_per_batch=5000,
    max_kl=0.001,
    seed=0,
    discount_factor=0.995,
    cg_damping=0.1,
):
    # Tensorflow is not fork-safe, so we must use spawn instead
    # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405
    # We use multiprocess rather than multiprocessing because Keras sets a multiprocessing context
    if not os.environ.get("SET_PARALLEL_TRPO_START_METHOD"
                          ):  # Use an env variable to prevent double-setting
        multiprocess.set_start_method('spawn')
        os.environ['SET_PARALLEL_TRPO_START_METHOD'] = "1"

    run_indefinitely = (runtime <= 0)

    if max_timesteps_per_episode is None:
        max_timesteps_per_episode = gym.spec(env_id).timestep_limit

    learner = TRPO(env_id,
                   make_env,
                   max_kl=max_kl,
                   discount_factor=discount_factor,
                   cg_damping=cg_damping)

    rollouts = ParallelRollout(env_id, make_env, predictor, workers,
                               max_timesteps_per_episode, seed)

    iteration = 0
    start_time = time()

    while run_indefinitely or time() < start_time + runtime:
        iteration += 1

        # update the weights
        weights = learner.get_policy()
        rollouts.set_policy_weights(weights)

        # run a bunch of async processes that collect rollouts
        paths, rollout_time = rollouts.rollout(timesteps_per_batch)

        # learn from that data
        stats, learn_time = learner.learn(paths)

        # output stats
        print("-------- Iteration %d ----------" % iteration)

        frames_gathered_per_second = stats["Frames gathered"] / rollout_time
        stats["Frames gathered/second"] = int(frames_gathered_per_second)

        stats['Time spent gathering rollouts'] = rollout_time
        stats['Time spent updating weights'] = learn_time

        total_elapsed_seconds = time() - start_time
        stats["Total time"] = total_elapsed_seconds
        ##### HACK #####
        stats[
            "Predictor iteration"] = predictor.predictor._elapsed_predictor_training_iters
        if predictor.entropy_alpha is not None:
            stats["Predictor Entropy Alpha"] = predictor.entropy_alpha
        if predictor.softmax_beta is not None:
            stats["Predictor Softmax Beta"] = predictor.softmax_beta

        print_stats(stats)

        if summary_writer:
            # Log results to tensorboard
            mean_reward = np.mean(
                np.array([path["original_rewards"].sum() for path in paths]))
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="parallel_trpo/mean_reward",
                                 simple_value=mean_reward),
                tf.Summary.Value(tag="parallel_trpo/elapsed_seconds",
                                 simple_value=total_elapsed_seconds),
                tf.Summary.Value(
                    tag="parallel_trpo/frames_gathered_per_second",
                    simple_value=frames_gathered_per_second),
            ])
            summary_writer.add_summary(summary, global_step=iteration)

    rollouts.end()
Exemple #5
0
def main(global_config, **settings):
    """ This function returns a Pyramid WSGI application.
    """
    multiprocess.set_start_method("spawn")
    engine = engine_from_config(settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    Base.metadata.bind = engine
    from pyramid.config import Configurator
    config_file = global_config['__file__']
    parser = ConfigParser()
    parser.read(config_file)
    # TODO: DANGER
    storage = dict()
    for k, v in parser.items('backend:storage'):
        storage[k] = v
    settings['storage'] = storage
    if parser.has_section('app:desktop'):
        for k, v in parser.items('app:desktop'):
            storage[k] = v
        settings['desktop'] = storage
    config = Configurator(settings=settings)
    log = logging.getLogger(__name__)

    # TODO: Find a more neat way
    try:
        cache_kwargs = dict()
        for k, v in parser.items('cache:dogpile'):
            cache_kwargs[k] = v
        cache_args = dict()
        for k, v in parser.items('cache:dogpile:args'):
            cache_args[k] = v
        cache_kwargs['arguments'] = cache_args
        if 'expiration_time' in cache_kwargs:
            cache_kwargs['expiration_time'] = int(cache_kwargs['expiration_time'])
        if 'redis_expiration_time' in cache_kwargs:
            cache_kwargs['redis_expiration_time'] = int(cache_kwargs['redis_expiration_time'])
    except NoSectionError:
        log.warn("No 'cache:dogpile' or/and 'cache:dogpile:args' sections in config; disabling caching")
        initialize_cache(None)
    else:
        initialize_cache(cache_kwargs)

    # config.configure_celery('development_test.ini')

    authentication_policy = AuthTktAuthenticationPolicy(settings['secret'],
                                                        hashalg='sha512', callback=groupfinder)
    authorization_policy = ACLAuthorizationPolicy()
    config.set_authentication_policy(authentication_policy)
    config.set_authorization_policy(authorization_policy)
    config.include('pyramid_chameleon')
    config.add_static_view(settings['storage']['static_route'], path=settings['storage']['path'], cache_max_age=3600)
    config.add_static_view('static', path='lingvodoc:static', cache_max_age=3600)
    configure_routes(config)
    config.add_route('testing', '/testing')
    #    config.add_route('example', 'some/route/{object_id}/{client_id}/of/perspective', factory = 'lingvodoc.models.DictAcl')
    #    config.add_route('home', '/')
    #    config.add_route('login', 'login')
    #    config.add_route('logout', 'logout')
    #    config.add_route('register', 'register')
    #    config.add_route('acquire_client_key', 'acquire_client_key')
    #    config.add_route('dictionaries.list', 'dictionaries', factory='lingvodoc.models.DictionariesACL')
    #    config.add_route('dictionary', 'dictionary')

    #    config.add_route('metaword', 'dictionary/{dictionary_id}/etymology/metaword')

    config.scan('.views')
    return config.make_wsgi_app()
Exemple #6
0
        visualize.plot_species(stats, view=True)

        # p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
        # p.run(eval_genomes, 10)

    def start_sim(self, cmd):
        proc = sp.Popen(cmd, shell=False, stdout=sp.DEVNULL)

    def conn_sim(self, connection_string, sims, index):
        self.sims[index] = Simulation(connection_string,
                                      targets_amount=5,
                                      speedup=Const.SPEED_UP)


if __name__ == '__main__':
    mp.set_start_method('spawn', True)

    # parse input
    parser = argparse.ArgumentParser(
        description='Train a NN with the NEAT algorithm.')
    parser.add_argument('-n_sims',
                        type=int,
                        default=1,
                        help='Amount of simulators to run in parallel')
    parser.add_argument('-start_sims',
                        type=bool,
                        default=1,
                        help='Set to False if sims are already running')
    args = parser.parse_args()

    NeatEvolver(args.n_sims, args.start_sims)
Exemple #7
0
def train_parallel_trpo(
        env_id,
        predictor,
        make_env=gym.make,
        summary_writer=None,
        workers=1,
        runtime=1800,
        max_timesteps_per_episode=None,
        timesteps_per_batch=5000,
        max_kl=0.001,
        seed=0,
        discount_factor=0.995,
        cg_damping=0.1,
        save_freq=100,
        save_dir=None,
        load=False,
        iteration=0,
):
    # Tensorflow is not fork-safe, so we must use spawn instead
    # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405
    # We use multiprocess rather than multiprocessing because Keras sets a multiprocessing context
    if not os.environ.get("SET_PARALLEL_TRPO_START_METHOD"): # Use an env variable to prevent double-setting
        multiprocess.set_start_method('spawn')
        os.environ['SET_PARALLEL_TRPO_START_METHOD'] = "1"


    run_indefinitely = (runtime <= 0)

    if max_timesteps_per_episode is None:
        max_timesteps_per_episode = gym.spec(env_id).timestep_limit

    learner = TRPO(
        env_id, make_env,
        max_kl=max_kl,
        discount_factor=discount_factor,
        cg_damping=cg_damping)

    if predictor.sess:
        predictor.sess.run(tf.initializers.variables(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='policy')))

    if load:
        print("Loading Learner...")
        learner.load_session(save_dir)
        # with open(os.path.join(save_dir,'loaded_weights.txt'), 'w+') as f:
        #     f.write(str(learner.get_policy()))

        print("Loading Predictor...")
        predictor.load_session(save_dir)


    rollouts = ParallelRollout(env_id, make_env, predictor, workers, max_timesteps_per_episode, seed)

    start_time = time()

    while run_indefinitely or time() < start_time + runtime:
        iteration += 1

        # update the weights
        weights = learner.session.run(learner.get_policy)
        #weights = learner.get_policy()
        rollouts.set_policy_weights(weights)

        # run a bunch of async processes that collect rollouts
        paths, rollout_time = rollouts.rollout(timesteps_per_batch)

        # learn from that data
        stats, learn_time = learner.learn(paths)

        if iteration % save_freq == 0:
            # Saving learner
            fpath = os.path.join(save_dir, 'learner')
            learner.save_session(save_dir, global_step=iteration)
            #op with open(fpath + '_saved_weights_{}.txt'.format(iteration), 'w+') as f:
            #     f.write(str(learner.get_policy()))

            # Saving predictor
            predictor.save_session(save_dir, global_step=iteration)

            tf.summary.FileWriter(os.path.join(save_dir, 'learner_graph'), learner.session.graph)
            #tf.summary.FileWriter(os.path.join(save_dir, 'predictor_graph'), predictor.sess.graph)

        # output stats
        print("-------- Iteration %d ----------" % iteration)

        frames_gathered_per_second = stats["Frames gathered"] / rollout_time
        stats["Frames gathered/second"] = int(frames_gathered_per_second)

        stats['Time spent gathering rollouts'] = rollout_time
        stats['Time spent updating weights'] = learn_time

        total_elapsed_seconds = time() - start_time
        stats["Total time"] = total_elapsed_seconds

        if predictor.comparison_collector:
            stats["Collected Comparisons"] = len(predictor.comparison_collector)

        print_stats(stats)

        if summary_writer:
            # Log results to tensorboard
            mean_reward = np.mean(np.array([path["original_rewards"].sum() for path in paths]))
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="parallel_trpo/mean_reward", simple_value=mean_reward),
                tf.Summary.Value(tag="parallel_trpo/elapsed_seconds", simple_value=total_elapsed_seconds),
                tf.Summary.Value(tag="parallel_trpo/frames_gathered_per_second", simple_value=frames_gathered_per_second),
            ])
            summary_writer.add_summary(summary, global_step=iteration)

    rollouts.end()
Exemple #8
0
def train_parallel_trpo(env_id,
                        predictor,
                        num_r,
                        make_env=gym.make,
                        summary_writer=None,
                        workers=1,
                        runtime=1800,
                        max_timesteps_per_episode=None,
                        timesteps_per_batch=5000,
                        max_kl=0.001,
                        seed=0,
                        discount_factor=0.995,
                        cg_damping=0.1,
                        num_policy=0,
                        exploration=False):
    # Tensorflow is not fork-safe, so we must use spawn instead
    # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405
    # We use multiprocess rather than multiprocessing because Keras sets a multiprocessing context
    if not os.environ.get("SET_PARALLEL_TRPO_START_METHOD"
                          ):  # Use an env variable to prevent double-setting
        multiprocess.set_start_method('spawn')
        os.environ['SET_PARALLEL_TRPO_START_METHOD'] = "1"

    run_indefinitely = (runtime <= 0)

    if max_timesteps_per_episode is None:
        max_timesteps_per_episode = gym.spec(env_id).timestep_limit

    # explorations
    learners_policy = []

    if exploration:
        for i in range(num_policy):
            _learner = TRPO(env_id,
                            make_env,
                            max_kl=max_kl,
                            discount_factor=discount_factor,
                            cg_damping=cg_damping,
                            policy_name='policy' + str(i))

            learners_policy.append(_learner)

    learner = TRPO(env_id,
                   make_env,
                   max_kl=max_kl,
                   discount_factor=discount_factor,
                   cg_damping=cg_damping,
                   policy_name='baseline')

    if exploration:
        rollouts = ParallelRollout_1(env_id, make_env, predictor, workers,
                                     max_timesteps_per_episode, seed, num_r,
                                     num_policy)

    else:
        rollouts = ParallelRollout(env_id, make_env, predictor, workers,
                                   max_timesteps_per_episode, seed, num_r)

    iteration = 0
    start_time = time()

    num_chosen_paths = 4

    while run_indefinitely or time() < start_time + runtime:
        iteration += 1

        # exploration
        if exploration:
            weights_list = []
            for i in range(num_policy):
                weights_list.append(learners_policy[i].get_policy())
        # update the weights for the baseline policy
            weights = learner.get_policy()
            weights_list.append(weights)

            rollouts.set_policy_weights(weights_list)

            # run a bunch of async processes that collect rollouts
            paths, rollout_time = rollouts.rollout(timesteps_per_batch)

            # learn from that data
            for i in range(num_policy):
                stats, learn_time = learners_policy[i].learn(
                    random.choice(paths, num_chosen_paths))

            stats, learn_time = learner.learn(
                random.choice(paths, num_chosen_paths))

        else:
            weights = learner.get_policy()
            rollouts.set_policy_weights(weights)

            # run a bunch of async processes that collect rollouts
            paths, rollout_time = rollouts.rollout(timesteps_per_batch)

            # learn from that data
            stats, learn_time = learner.learn(
                random.choice(paths, num_chosen_paths))

        # output stats
        print("-------- Iteration %d ----------" % iteration)

        frames_gathered_per_second = stats["Frames gathered"] / rollout_time
        stats["Frames gathered/second"] = int(frames_gathered_per_second)

        stats['Time spent gathering rollouts'] = rollout_time
        stats['Time spent updating weights'] = learn_time

        total_elapsed_seconds = time() - start_time
        stats["Total time"] = total_elapsed_seconds

        print_stats(stats)

        if summary_writer:
            # Log results to tensorboard
            mean_reward = np.mean(
                np.array([path["original_rewards"].sum() for path in paths]))
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="parallel_trpo/mean_reward",
                                 simple_value=mean_reward),
                tf.Summary.Value(tag="parallel_trpo/elapsed_seconds",
                                 simple_value=total_elapsed_seconds),
                tf.Summary.Value(
                    tag="parallel_trpo/frames_gathered_per_second",
                    simple_value=frames_gathered_per_second),
            ])
            summary_writer.add_summary(summary, global_step=iteration)

    rollouts.end()
Exemple #9
0
# Use thread to speed up processing (multithreading)
# thread1 = threading.Thread(None, target=classic_gen_time_Sample1,
#                            name="1",
#                            args=[5, queue1],
#                            daemon=True, )

# Use thread to speed up processing (multithreading)
# thread2 = threading.Thread(None, target=classic_gen_time_Sample2,
#                            name="2",
#                            args=[5, [], queue2],
#                            daemon=True, )
# queue1 = _Queue.Queue(0)
# queue2 = _Queue.Queue(0)

if __name__ == '__main__':
    mp.set_start_method('spawn')
    start_sampling = threading.Event()
    queue2 = Queue()
    queue1 = Queue()
    maxSample = Value('d', 2.0)

    p1 = Process(target=process_thread_1, name="p1", args=(maxSample, ))
    # thread1 = threading.Thread(None, target=classic_gen_time_Sample1,
    #                            name="1",
    #                            args=[maxSample, queue1],
    #                            daemon=True, )
    p2 = Process(target=process_thread_2, name="p2", args=(maxSample, ))
    # thread2 = threading.Thread(None, target=classic_gen_time_Sample2,
    #                            name="2",
    #                            args=[maxSample, [], queue2],
    #                            daemon=True, )
Exemple #10
0
# PosLbl module for microscopy data.
# Contains all single TP FrameLbls for a specific well/position
# Deals with tracking
# AOY

import sys
import os
import pandas as pd
import numpy as np
from skimage import measure
from oyLabImaging import Metadata
import multiprocess as mp #import Pool, set_start_method
mp.set_start_method('spawn',force=True)
from functools import partial
from oyLabImaging.Processing import FrameLbl
from scipy.spatial import KDTree
import lap
from tqdm import tqdm

class PosLbl(object):
    """
    Class for data from a single position (multi timepoint, position, single experiment, multi channel). Handles image tracking.
    Parameters
    ----------
    MD : relevant metadata OR
    pth : str path to relevant metadata

    Attributes
    ----------
    Pos : position name
    acq : acquisition name
Exemple #11
0
        print()
        stopTraining()
        response = "success"
    else:
        try:
            stopBot()
            base_model = request.form['base_model']
            new_model = request.form['new_model']
            startTraining(base_model, new_model)
            response = "success"
        except:
            response = "failure"
    return response, 200


## Due to debug mode being on, the app reloads and startBot messes up,
## unless use_reloader is set to false, or startBot is placed inside the below:
# @app.before_first_request
# def setup():
#     if botPid is None:
#         startBot()

if __name__ == "__main__":

    import multiprocess
    multiprocess.set_start_method('spawn')
    app.secret_key = os.urandom(12)
    app.run(debug=app.config['DEBUG'],
            host='0.0.0.0',
            port=app.config['WEBSITEPORT'])  #, use_reloader=False