def update_config(config: OmegaConf): """ Serialize and sync config with trains :param config: the config to sync :return: """ # expected config_global format schema = OmegaConf.structured(config._metadata.object_type) # serialize config # For config logging we use yaml format (Trains: Artifacts -> Model configuration) # save config in a temp yaml file config_global_file = tempfile.NamedTemporaryFile("w+t") config_global_file.write(OmegaConf.to_yaml(config)) config_global_file.flush() config_global_file_name = config_global_file.name # sync with server if a task has been created current_task = Task.current_task() if current_task: # send yaml to trains server config_global_file_name = Task.current_task().connect_configuration( config_global_file_name) # for visualization (Trains: Hyperparameters) Task.current_task().connect( generate_trains_hyperparameter_dict(config)) config_back_ = OmegaConf.load(config_global_file_name) config_back = OmegaConf.merge(schema, config_back_) return config_back
def mp_worker(arguments): print('sub process', os.getpid()) inputs, the_time = arguments from random import randint additional_parameters = {'stuff_' + str(randint(0, 100)): 'some stuff ' + str(randint(0, 100))} Task.current_task().connect(additional_parameters) print(" Process %s\tWaiting %s seconds" % (inputs, the_time)) time.sleep(int(the_time)) print(" Process %s\tDONE" % inputs)
def __init__(self, logger: TrainsLogger = None, output_uri: str = None, dirname: str = None, *args, **kwargs): try: from trains import Task except ImportError: raise RuntimeError( "This contrib module requires trains to be installed. " "You may install trains using: \n pip install trains \n") if logger and not isinstance(logger, TrainsLogger): raise TypeError("logger must be an instance of TrainsLogger") self.task = Task.current_task() if not self.task: raise RuntimeError( "TrainsSaver requires a Trains Task to be initialized." "Please use the `logger` argument or call `trains.Task.init()`." ) if not dirname: dirname = tempfile.mkdtemp(prefix="ignite_checkpoints_{}".format( datetime.now().strftime("%Y_%m_%d_%H_%M_%S_"))) warnings.warn( "TrainsSaver created a temporary checkpoints directory: {}". format(dirname)) super(TrainsSaver, self).__init__(dirname=dirname, *args, **kwargs) if output_uri: self.task.output_uri = output_uri
def __init__( self, pool_frequency=0.2, # type: float default_execution_queue=None, # type: Optional[str] pipeline_time_limit=None, # type: Optional[float] auto_connect_task=True, # type: Union[bool, Task] always_create_task=False, # type: bool add_pipeline_tags=False, # type: bool ): # type: (...) -> () """ Create a new pipeline controller. The newly created object will launch and monitor the new experiments. :param float pool_frequency: The pooling frequency (in minutes) for monitoring experiments / states. :param str default_execution_queue: The execution queue to use if no execution queue is provided :param float pipeline_time_limit: The maximum time (minutes) for the entire pipeline process. The default is ``None``, indicating no time limit. :param bool auto_connect_task: Store pipeline arguments and configuration in the Task - ``True`` - The pipeline argument and configuration will be stored in the current Task. All arguments will be under the hyper-parameter section ``Pipeline``, and the pipeline DAG will be stored as a Task configuration object named ``Pipeline``. - ``False`` - Do not store with Task. - ``Task`` - A specific Task object to connect the pipeline with. :param bool always_create_task: Always create a new Task - ``True`` - No current Task initialized. Create a new task named ``Pipeline`` in the ``base_task_id`` project. - ``False`` - Use the :py:meth:`task.Task.current_task` (if exists) to report statistics. :param bool add_pipeline_tags: (default: False) if True, add `pipe: <pipeline_task_id>` tag to all steps (Tasks) created by this pipeline. """ self._nodes = {} self._running_nodes = [] self._start_time = None self._pipeline_time_limit = pipeline_time_limit * 60. if pipeline_time_limit else None self._default_execution_queue = default_execution_queue self._pool_frequency = pool_frequency * 60. self._thread = None self._stop_event = None self._experiment_created_cb = None self._add_pipeline_tags = add_pipeline_tags self._task = auto_connect_task if isinstance( auto_connect_task, Task) else Task.current_task() self._step_ref_pattern = re.compile(self._step_pattern) if not self._task and always_create_task: self._task = Task.init( project_name='Pipelines', task_name='Pipeline {}'.format(datetime.now()), task_type=Task.TaskTypes.controller, ) # make sure all the created tasks are our children, as we are creating them if self._task: self._task.add_tags([self._tag]) self._auto_connect_task = bool(auto_connect_task)
def run(num_workers): """ Distributed Synchronous SGD Example """ th.manual_seed(1234) train_set, bsz = partition_dataset(num_workers) model = Net() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) num_batches = ceil(len(train_set.dataset) / float(bsz)) from random import randint param = {'worker_{}_stuff'.format(dist.get_rank()): 'some stuff ' + str(randint(0, 100))} Task.current_task().connect(param) Task.current_task().upload_artifact( 'temp {:02d}'.format(dist.get_rank()), artifact_object={'worker_rank': dist.get_rank()}) for epoch in range(2): epoch_loss = 0.0 for i, (data, target) in enumerate(train_set): optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) epoch_loss += loss.item() loss.backward() average_gradients(model) optimizer.step() if i % 10 == 0: print('{}] Train Epoch {} - {} \tLoss {:.6f}'.format(dist.get_rank(), epoch, i, loss)) Task.current_task().get_logger().report_scalar( 'loss', 'worker {:02d}'.format(dist.get_rank()), value=loss.item(), iteration=i) if i > 100: break print('Rank ', dist.get_rank(), ', epoch ', epoch, ': ', epoch_loss / num_batches)
def trains_log_text(text): logger = None if "TRAINS_STD_LOGGER" in globals(): logger = TRAINS_STD_LOGGER else: try: from trains import Task logger = Task.current_task().get_logger() except: pass if logger is None: return logger.report_text(text) logger.flush()
def compute_and_log_cm(): cm = cm_metric.compute() # CM: values are normalized such that diagonal values represent class recalls cm = ConfusionMatrix.normalize(cm, "recall").cpu().numpy() if idist.get_rank() == 0: from trains import Task trains_logger = Task.current_task().get_logger() trains_logger.report_confusion_matrix( title="Final Confusion Matrix", series="cm-preds-gt", matrix=cm, iteration=trainer.state.iteration, xlabels=VOCSegmentationOpencv.target_names, ylabels=VOCSegmentationOpencv.target_names, )
def _setup_check_trains(self, logger, output_uri): try: from trains import Task except ImportError: raise RuntimeError( "This contrib module requires trains to be installed. " "You may install trains using: \n pip install trains \n") if logger and not isinstance(logger, TrainsLogger): raise TypeError("logger must be an instance of TrainsLogger") self._task = Task.current_task() if not self._task: raise RuntimeError( "TrainsSaver requires a Trains Task to be initialized." "Please use the `logger` argument or call `trains.Task.init()`." ) if output_uri: self._task.output_uri = output_uri
def _daemon(cls, jupyter_notebook_filename): from trains import Task # load jupyter notebook package # noinspection PyBroadException try: from nbconvert.exporters.script import ScriptExporter _script_exporter = ScriptExporter() except Exception: return # load pigar # noinspection PyBroadException try: from pigar.reqs import get_installed_pkgs_detail, file_import_modules from pigar.modules import ReqsModules from pigar.log import logger logger.setLevel(logging.WARNING) except Exception: file_import_modules = None # main observer loop notebook = Path(jupyter_notebook_filename) last_update_ts = None counter = 0 prev_script_hash = None while True: if cls._exit_event.wait(cls._sample_frequency if counter else cls. _first_sample_frequency): return counter += 1 # noinspection PyBroadException try: if not notebook.exists(): continue # check if notebook changed if last_update_ts is not None and notebook.stat( ).st_mtime - last_update_ts <= 0: continue last_update_ts = notebook.stat().st_mtime task = Task.current_task() if not task: continue # get notebook python script script_code, resources = _script_exporter.from_filename( jupyter_notebook_filename) current_script_hash = hash(script_code) if prev_script_hash and prev_script_hash == current_script_hash: continue requirements_txt = '' # parse jupyter python script and prepare pip requirements (pigar) # if backend supports requirements if file_import_modules and Session.api_version > '2.1': fmodules, _ = file_import_modules(notebook.parts[-1], script_code) installed_pkgs = get_installed_pkgs_detail() reqs = ReqsModules() for name in fmodules: if name in installed_pkgs: pkg_name, version = installed_pkgs[name] reqs.add(pkg_name, version, fmodules[name]) requirements_txt = ScriptRequirements.create_requirements_txt( reqs) # update script prev_script_hash = current_script_hash data_script = task.data.script data_script.diff = script_code data_script.requirements = {'pip': requirements_txt} task._update_script(script=data_script) # update requirements if requirements_txt: task._update_requirements(requirements=requirements_txt) except Exception: pass
def _daemon(cls, jupyter_notebook_filename): from trains import Task # load jupyter notebook package # noinspection PyBroadException try: # noinspection PyPackageRequirements from nbconvert.exporters.script import ScriptExporter _script_exporter = ScriptExporter() except Exception: return # load pigar # noinspection PyBroadException try: from ....utilities.pigar.reqs import get_installed_pkgs_detail, file_import_modules from ....utilities.pigar.modules import ReqsModules from ....utilities.pigar.log import logger logger.setLevel(logging.WARNING) except Exception: file_import_modules = None # load IPython # noinspection PyBroadException try: # noinspection PyPackageRequirements from IPython import get_ipython except Exception: # should not happen get_ipython = None # setup local notebook files if jupyter_notebook_filename: notebook = Path(jupyter_notebook_filename) local_jupyter_filename = jupyter_notebook_filename else: notebook = None fd, local_jupyter_filename = mkstemp(suffix='.ipynb') os.close(fd) last_update_ts = None counter = 0 prev_script_hash = None # noinspection PyBroadException try: from ....version import __version__ our_module = cls.__module__.split('.')[0], __version__ except Exception: our_module = None # noinspection PyBroadException try: import re replace_ipython_pattern = re.compile(r'\n([ \t]*)get_ipython\(\)') except Exception: replace_ipython_pattern = None # main observer loop, check if we need to exit while not cls._exit_event.wait(timeout=0.): # wait for timeout or sync event cls._sync_event.wait(cls._sample_frequency if counter else cls. _first_sample_frequency) cls._sync_event.clear() counter += 1 # noinspection PyBroadException try: # if there is no task connected, do nothing task = Task.current_task() if not task: continue script_code = None fmodules = None current_cell = None # if we have a local file: if notebook: if not notebook.exists(): continue # check if notebook changed if last_update_ts is not None and notebook.stat( ).st_mtime - last_update_ts <= 0: continue last_update_ts = notebook.stat().st_mtime else: # serialize notebook to a temp file if cls._jupyter_history_logger: script_code, current_cell = cls._jupyter_history_logger.history_to_str( ) else: # noinspection PyBroadException try: # noinspection PyBroadException try: os.unlink(local_jupyter_filename) except Exception: pass get_ipython().run_line_magic( 'history', '-t -f {}'.format(local_jupyter_filename)) with open(local_jupyter_filename, 'r') as f: script_code = f.read() # load the modules from ....utilities.pigar.modules import ImportedModules fmodules = ImportedModules() for nm in set( [str(m).split('.')[0] for m in sys.modules]): fmodules.add(nm, 'notebook', 0) except Exception: continue # get notebook python script if script_code is None: script_code, _ = _script_exporter.from_filename( local_jupyter_filename) current_script_hash = hash(script_code + (current_cell or '')) if prev_script_hash and prev_script_hash == current_script_hash: continue # remove ipython direct access from the script code # we will not be able to run them anyhow if replace_ipython_pattern: script_code = replace_ipython_pattern.sub( r'\n# \g<1>get_ipython()', script_code) requirements_txt = '' conda_requirements = '' # parse jupyter python script and prepare pip requirements (pigar) # if backend supports requirements if file_import_modules and Session.check_min_api_version( '2.2'): if fmodules is None: fmodules, _ = file_import_modules( notebook.parts[-1] if notebook else 'notebook', script_code) if current_cell: cell_fmodules, _ = file_import_modules( notebook.parts[-1] if notebook else 'notebook', current_cell) # noinspection PyBroadException try: fmodules |= cell_fmodules except Exception: pass # add current cell to the script if current_cell: script_code += '\n' + current_cell fmodules = ScriptRequirements.add_trains_used_packages( fmodules) # noinspection PyUnboundLocalVariable installed_pkgs = get_installed_pkgs_detail() # make sure we are in installed packages if our_module and (our_module[0] not in installed_pkgs): installed_pkgs[our_module[0]] = our_module # noinspection PyUnboundLocalVariable reqs = ReqsModules() for name in fmodules: if name in installed_pkgs: pkg_name, version = installed_pkgs[name] reqs.add(pkg_name, version, fmodules[name]) requirements_txt, conda_requirements = ScriptRequirements.create_requirements_txt( reqs) # update script prev_script_hash = current_script_hash data_script = task.data.script data_script.diff = script_code data_script.requirements = { 'pip': requirements_txt, 'conda': conda_requirements } # noinspection PyProtectedMember task._update_script(script=data_script) # update requirements # noinspection PyProtectedMember task._update_requirements(requirements=requirements_txt) except Exception: pass
def _daemon(cls, jupyter_notebook_filename): from trains import Task # load jupyter notebook package # noinspection PyBroadException try: from nbconvert.exporters.script import ScriptExporter _script_exporter = ScriptExporter() except Exception: return # load pigar # noinspection PyBroadException try: from pigar.reqs import get_installed_pkgs_detail, file_import_modules from pigar.modules import ReqsModules from pigar.log import logger logger.setLevel(logging.WARNING) except Exception: file_import_modules = None # load IPython # noinspection PyBroadException try: from IPython import get_ipython except Exception: # should not happen get_ipython = None # setup local notebook files if jupyter_notebook_filename: notebook = Path(jupyter_notebook_filename) local_jupyter_filename = jupyter_notebook_filename else: notebook = None fd, local_jupyter_filename = mkstemp(suffix='.ipynb') os.close(fd) last_update_ts = None counter = 0 prev_script_hash = None # main observer loop, check if we need to exit while not cls._exit_event.wait(timeout=0.): # wait for timeout or sync event cls._sync_event.wait(cls._sample_frequency if counter else cls. _first_sample_frequency) cls._sync_event.clear() counter += 1 # noinspection PyBroadException try: # if there is no task connected, do nothing task = Task.current_task() if not task: continue # if we have a local file: if notebook: if not notebook.exists(): continue # check if notebook changed if last_update_ts is not None and notebook.stat( ).st_mtime - last_update_ts <= 0: continue last_update_ts = notebook.stat().st_mtime else: # serialize notebook to a temp file # noinspection PyBroadException try: get_ipython().run_line_magic('notebook', local_jupyter_filename) except Exception as ex: continue # get notebook python script script_code, resources = _script_exporter.from_filename( local_jupyter_filename) current_script_hash = hash(script_code) if prev_script_hash and prev_script_hash == current_script_hash: continue requirements_txt = '' conda_requirements = '' # parse jupyter python script and prepare pip requirements (pigar) # if backend supports requirements if file_import_modules and Session.check_min_api_version( '2.2'): fmodules, _ = file_import_modules(notebook.parts[-1], script_code) fmodules = ScriptRequirements.add_trains_used_packages( fmodules) installed_pkgs = get_installed_pkgs_detail() reqs = ReqsModules() for name in fmodules: if name in installed_pkgs: pkg_name, version = installed_pkgs[name] reqs.add(pkg_name, version, fmodules[name]) requirements_txt, conda_requirements = ScriptRequirements.create_requirements_txt( reqs) # update script prev_script_hash = current_script_hash data_script = task.data.script data_script.diff = script_code data_script.requirements = { 'pip': requirements_txt, 'conda': conda_requirements } task._update_script(script=data_script) # update requirements task._update_requirements(requirements=requirements_txt) except Exception: pass
def _trains_log_artifact(fp): from trains import Task task = Task.current_task() task.upload_artifact(Path(fp).name, fp)
def _trains_log_params(params_dict): from trains import Task task = Task.current_task() task.connect(params_dict)
from trains import Task task = Task.init(project_name='examples', task_name='Manual reporting') # standard python logging logging.info('This is an info message') # this is loguru test example try: from loguru import logger logger.info("That's it, beautiful and simple logging! (using ANSI colors)") except ImportError: pass # get TRAINS logger object for any metrics / reports logger = Task.current_task().get_logger() # log text logger.report_text("hello") # report scalar values logger.report_scalar("example_scalar", "series A", iteration=0, value=100) logger.report_scalar("example_scalar", "series A", iteration=1, value=200) # report histogram histogram = np.random.randint(10, size=10) logger.report_histogram("example_histogram", "random histogram", iteration=1, values=histogram)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--hid', type=int, default=64, help='Size of hidden layers in the model') parser.add_argument('--l', type=int, default=2, help='Number of hidden layers') parser.add_argument('--gamma', type=float, default=0.99, help='Discount factor') parser.add_argument('--cpu', type=int, default=4, help='Force to use cpu during training') parser.add_argument('--steps', type=int, default=4000, help='Maximum number of steps') parser.add_argument('--exp_name', type=str, default='knapsack-ppo') parser.add_argument('--output', type=str, help='Where to save results') parser.add_argument('--items', type=int, default=200, help='Number of items in knapsack instances') parser.add_argument('--capacity', type=float, default=20, help='Total initial capacity of the knapsack') parser.add_argument('--trains', action="store_true", help='Use trains to log training progress.') args = parser.parse_args() EVAL_BASELINES_RESULTS_FILENAME = ( f'experiments/{args.items}n_{args.capacity}c/' f'baseline_values.json') dict_env = Simulator(max_steps=args.items, max_capacity=args.capacity, problem_generator=ItemGenerator()) env = KnapsackArrayWrapper(dict_env) model_config = dict(observation_space=env.observation_space.shape[0], action_space=env.action_space.n, hidden_sizes=[args.hid] * args.l, activation='relu') agent_config = dict( run_name=f'{args.exp_name}-n{args.items}-c{args.capacity}', lr=3e-4, discount=0.99, # number of episodes to do altogether number_of_episodes=50000, # a batch is N episodes where N is number_of_episodes_in_batch number_of_episodes_in_batch= 50, # this must be a division of number of episodes total_num_eval_seeds=100, num_eval_seeds=10, evaluate_every=50, num_train_seeds=1000, reward_average_window_size=10, entropy_coeff=0.01, # consider decreasing this back value_coeff=0.3, minibatch_size=256, model_config=model_config, save_checkpoint_every=1000, eps_clip=0.2, n_ppo_updates=20, target_kl=0.005, logit_normalizer=10, problem_name='oks') agent_config['policy_model_class'] = 'MLPActorCritic' if args.trains: task = Task.init( project_name="train_knapsack_pytorch", task_name=f'train_ppo_agent_{args.items}n_{args.capacity}c') logger = Task.current_task().get_logger() logger.tensorboard_single_series_per_graph(single_series=True) else: logger = None # First compute results baseline algorithms baseline_results_path = Path(EVAL_BASELINES_RESULTS_FILENAME) eval_seeds = list(range(agent_config['total_num_eval_seeds'])) if not baseline_results_path.exists(): baseline_values = { 'Random': evaluate_policy_simple(dict_env, eval_seeds, random_policy, samples_per_seed=5), 'Simple': evaluate_policy_simple(dict_env, eval_seeds, simple_policy, samples_per_seed=1) } baseline_results_path.parent.mkdir(parents=True, exist_ok=True) with open(baseline_results_path, 'w') as f: json.dump(baseline_values, f, indent=2) else: print(f"loading: {EVAL_BASELINES_RESULTS_FILENAME}") with open(baseline_results_path, 'r') as f: baseline_values = json.load(f) # JSON saves dictionary keys as strings, so we have to convert them back to ints baseline_values = { baseline: {int(seed): val for seed, val in baseline_dict.items()} for baseline, baseline_dict in baseline_values.items() } agent_model = MLPActorCritic(**model_config) ppo_agent = PPOAgent(env=env, config=agent_config, model=agent_model, eval_seeds=eval_seeds, baseline_eval_values=baseline_values) set_seeds() ppo_agent.train()
def main(): # Init environment use_trains = False problem_name = 'cvrp' problem_type = 'uniform_offline' max_customer_times = 0 size = 20 vehicle_velocity = 1 vehicle_capacity = 30 random_seed = 0 max_demand = 10 start_at_depot = True EVAL_BASELINES_RESULTS_FILENAME = ( f"experiments/{problem_name}/{size}s_{vehicle_capacity}c_{max_customer_times}t/" f"baseline_values.json") env_config = { 'problem_type': problem_type, 'max_customer_times': max_customer_times, 'size': size, 'max_demand': max_demand, 'vehicle_velocity': vehicle_velocity, 'vehicle_capacity': vehicle_capacity, 'start_at_depot': start_at_depot, 'random_seed': random_seed, 'eval_baseline_results_filename': EVAL_BASELINES_RESULTS_FILENAME } if use_trains: task = Task.init( project_name="train_cvrp_pytorch", task_name= f'train_ppo_agent_{size}s_{vehicle_capacity}c_{max_customer_times}t' ) logger = Task.current_task().get_logger() logger.tensorboard_single_series_per_graph(single_series=True) else: logger = None env = create_uniform_dynamic_problem(max_customer_times=max_customer_times, size=size, max_demand=max_demand, vehicle_velocity=vehicle_velocity, vehicle_capacity=vehicle_capacity, random_seed=random_seed, start_at_depot=start_at_depot) # customer_positions = [[0.25, 0.25], [0.5, 0.5], [1, 1]] # env = create_fixed_static_problem(customer_positions=customer_positions, # depot_position=[0, 0], # initial_vehicle_capacity=10, # initial_vehicle_position=[0, 0], # customer_demands=[1]*len(customer_positions), # customer_times=[0]*len(customer_positions), # vehicle_velocity=1) # # env_config = {'problem_type': 'fixed_problem', # 'size': 3, # 'vehicle_capacity': 10, # 'vehicle_position': [0, 0], # 'customer_positions': customer_positions, # 'start_at_depot': True # } # EVAL_BASELINES_RESULTS_FILENAME = (f'experiments/{3}s_{10}c_{0}t/' # f'baseline_values.json') tg_env = GeometricAttentionWrapper(env) tg_env.reset() # model_config = { # 'use_value_critic': True, # 'num_features': 4, # 'embedding_dim': 128, # 'value_embedding_dim': 128, # 'use_batch_norm': False # } model_config = { 'n_passes': 4, 'edge_embedding_dim': 64, 'node_embedding_dim': 64, 'global_embedding_dim': 64, 'edge_hidden_dim': 64, 'edge_target_dim': 64, 'node_target_dim': 64, 'node_dim_out': 1, 'edge_dim_out': 1, 'node_hidden_dim': 64, 'global_hidden_dim': 64, 'global_target_dim': 64, 'global_dim_out': 64, 'edge_feature_dim': 1, 'node_feature_dim': 5, # indicator, x, y, demand/capacity, is_visited 'global_feature_dim': 1, 'value_embedding_dim': 64, 'use_value_critic': True, 'use_batch_norm': False } agent_config = { 'lr': 0.0003, 'discount': 0.99, # number of episodes to do altogether 'number_of_episodes': 50000000, # a batch is N episodes where N is number_of_episodes_in_batch 'number_of_episodes_in_batch': 20, # this must be a division of number of episodes 'total_num_eval_seeds': 100, 'num_eval_seeds': 10, 'evaluate_every': 50, 'num_train_seeds': 1000, 'reward_average_window_size': 10, 'entropy_coeff': 0.001, # consider decreasing this back 'value_coeff': 0.1, 'model_config': model_config, 'save_checkpoint_every': 1000, 'eps_clip': 0.5, 'n_ppo_updates': 80, 'target_kl': 0.0001, 'logit_normalizer': 5, 'problem_name': problem_name # used for saving results } model_config['logit_normalizer'] = agent_config['logit_normalizer'] agent_config['run_name'] = f"ep_in_batch_{agent_config['number_of_episodes_in_batch']}_" \ f"n_eval_{agent_config['num_eval_seeds']}_lr_{agent_config['lr']}" eval_seeds = list(range(agent_config['total_num_eval_seeds'])) baseline_results_path = Path(EVAL_BASELINES_RESULTS_FILENAME) or_tools_policy = ORToolsPolicy(timeout=10) if not baseline_results_path.exists(): baseline_values = { 'distance': evaluate_policy_simple(env, eval_seeds, distance_proportional_policy, samples_per_seed=5), 'ORTools': evaluate_policy_simple(env, eval_seeds, or_tools_policy, samples_per_seed=5) } baseline_results_path.parent.mkdir(parents=True, exist_ok=True) with open(baseline_results_path, 'w') as f: json.dump(baseline_values, f, indent=2) else: print(f"loading: {EVAL_BASELINES_RESULTS_FILENAME}") with open(baseline_results_path, 'r') as f: baseline_values = json.load(f) # JSON saves dictionary keys as strings, so we have to convert them back to ints baseline_values = { baseline: {int(seed): val for seed, val in baseline_dict.items()} for baseline, baseline_dict in baseline_values.items() } # model = PolicyFullyConnectedGAT(cfg=model_config, model_name='ppo_policy_model') model = PolicyFullyConnectedMessagePassing( cfg=model_config, model_name='ppo_message_passing_model') set_seeds() if use_trains: parameters_agent = task.connect(agent_config, name='agent_config') parameters_env = task.connect(env_config, name='env_config') agent_config['env_config'] = env_config ppo_agent = PPOAgent(tg_env, config=agent_config, model=model, eval_seeds=eval_seeds, baseline_eval_values=baseline_values) ppo_agent.train()
def main(config: AppConfig): task = Task.current_task() task.set_name(config.task_name)
def main(): # Init environment use_trains = False problem_name = 'gc' problem_type = 'er_offline' num_new_nodes = 0 num_initial_nodes = 20 prob_edge = 0.3 is_online = False random_seed = 0 if use_trains: task = Task.init( project_name="train_gc_pytorch", task_name=f'train_ppo_agent_{num_initial_nodes}n_{num_new_nodes}new_n_{prob_edge}p' ) logger = Task.current_task().get_logger() logger.tensorboard_single_series_per_graph(single_series=True) else: task = None env = create_er_random_graph_problem(num_new_nodes=num_new_nodes, num_initial_nodes=num_initial_nodes, prob_edge=prob_edge, is_online=is_online, random_seed=random_seed) env_tg = GraphWithColorsWrapper(env) env_tg.reset() model_config = { 'n_passes': 4, 'edge_embedding_dim': 128, 'node_embedding_dim': 128, 'global_embedding_dim': 128, 'edge_hidden_dim': 128, 'edge_target_dim': 128, 'node_target_dim': 128, 'node_dim_out': 128, 'edge_dim_out': 1, 'node_hidden_dim': 128, 'global_hidden_dim': 128, 'global_target_dim': 128, 'global_dim_out': 128, 'edge_feature_dim': 1, 'node_feature_dim': 2, # indicator, color 'global_feature_dim': 1, 'value_embedding_dim': 128, 'use_value_critic': True, 'use_batch_norm': False } agent_config = { 'lr': 0.0001, 'discount': 1.0, # number of episodes to do altogether 'number_of_episodes': 50000, # a batch is N episodes where N is number_of_episodes_in_batch 'number_of_episodes_in_batch': 40, # this must be a division of number of episodes 'total_num_eval_seeds': 100, 'num_eval_seeds': 10, 'evaluate_every': 50, 'num_train_seeds': 1000, 'reward_average_window_size': 10, 'entropy_coeff': 0.01, # consider decreasing this back 'value_coeff': 0.3, 'model_config': model_config, 'save_checkpoint_every': 1000, 'eps_clip': 0.5, 'n_ppo_updates': 20, 'target_kl': 0.005, 'logit_normalizer': 10, 'problem_name': problem_name # used for saving results } model_config['logit_normalizer'] = agent_config['logit_normalizer'] EVAL_BASELINES_RESULTS_FILENAME = (f"experiments/{problem_name}/" f"{agent_config['total_num_eval_seeds']}n-seeds_{num_initial_nodes}n_" f"{num_new_nodes}new_n_{prob_edge}p/" f"baseline_values.json") env_config = {'problem_type': problem_type, 'num_new_nodes': num_new_nodes, 'num_initial_nodes': num_initial_nodes, 'prob_edge': prob_edge, 'is_online': is_online, 'random_seed': random_seed, 'eval_baseline_results_filename': EVAL_BASELINES_RESULTS_FILENAME} agent_config['run_name'] = f"master_with_change_in_wrapper_and_get_loss_update" \ f"_ep_in_batch_{agent_config['number_of_episodes_in_batch']}_" \ f"n_eval_{agent_config['num_eval_seeds']}_lr_{agent_config['lr']}" eval_seeds = list(range(agent_config['total_num_eval_seeds'])) baseline_results_path = Path(EVAL_BASELINES_RESULTS_FILENAME) or_tools_policy = ORToolsOfflinePolicy(timeout=10000) if not baseline_results_path.exists(): baseline_values = { 'random_wo_nc': evaluate_policy_simple(env, eval_seeds, random_policy_without_newcolor, samples_per_seed=1), # 'ORTools': evaluate_policy_simple(env, eval_seeds, or_tools_policy, samples_per_seed=1) } baseline_results_path.parent.mkdir(parents=True, exist_ok=True) with open(baseline_results_path, 'w') as f: json.dump(baseline_values, f, indent=2) else: print(f"loading: {EVAL_BASELINES_RESULTS_FILENAME}") with open(baseline_results_path, 'r') as f: baseline_values = json.load(f) # JSON saves dictionary keys as strings, so we have to convert them back to ints baseline_values = { baseline: {int(seed): val for seed, val in baseline_dict.items() } for baseline, baseline_dict in baseline_values.items() } model = PolicyModel(cfg=model_config, model_name='ppo_policy_model') set_seeds() if use_trains: task.connect(agent_config, name='agent_config') task.connect(env_config, name='env_config') agent_config['env_config'] = env_config ppo_agent = PPOAgent(env_tg, config=agent_config, model=model, eval_seeds=eval_seeds, baseline_eval_values=baseline_values) ppo_agent.train()
}, index=['falcon', 'dog', 'spider', 'fish']) # Register Pandas object as artifact to watch # (it will be monitored in the background and automatically synced and uploaded) task.register_artifact('train', df, metadata={ 'counting': 'legs', 'max legs': 69 }) # change the artifact object df.sample(frac=0.5, replace=True, random_state=1) # or access it from anywhere using the Task Task.current_task().artifacts['train'].sample(frac=0.5, replace=True, random_state=1) # add and upload pandas.DataFrame (onetime snapshot of the object) task.upload_artifact('Pandas', artifact_object=df) # add and upload local file artifact task.upload_artifact('local file', artifact_object='samples/dancing.jpg') # add and upload dictionary stored as JSON) task.upload_artifact('dictionary', df.to_dict()) # add and upload Numpy Object (stored as .npz file) task.upload_artifact('Numpy Eye', np.eye(100, 100)) # add and upload Image (stored as .png file) im = Image.open('samples/dancing.jpg') task.upload_artifact('pillow_image', im) # do something
}, index=['falcon', 'dog', 'spider', 'fish']) # Register Pandas object as artifact to watch # (it will be monitored in the background and automatically synced and uploaded) task.register_artifact('train', df, metadata={ 'counting': 'legs', 'max legs': 69 }) # change the artifact object df.sample(frac=0.5, replace=True, random_state=1) # or access it from anywhere using the Task's get_registered_artifacts() Task.current_task().get_registered_artifacts()['train'].sample(frac=0.5, replace=True, random_state=1) # add and upload pandas.DataFrame (onetime snapshot of the object) task.upload_artifact('Pandas', artifact_object=df) # add and upload local file artifact task.upload_artifact('local file', artifact_object='samples/dancing.jpg') # add and upload dictionary stored as JSON) task.upload_artifact('dictionary', df.to_dict()) # add and upload Numpy Object (stored as .npz file) task.upload_artifact('Numpy Eye', np.eye(100, 100)) # add and upload Image (stored as .png file) im = Image.open('samples/dancing.jpg') task.upload_artifact('pillow_image', im) # add and upload a folder, artifact_object should be the folder path task.upload_artifact('local folder', artifact_object='samples/')
param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=6 ) print(model_cv) x_train, x_test, y_trian, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.5, random_state=1) model_cv.fit(x_train, y_trian) model_cv_best = model_cv.best_estimator_ gird_param = dict(cv=5, scoring='neg_mean_squared_error') print(gird_param) parameters_dict = Task.current_task().connect(model_cv.best_params_) Task.current_task().connect(gird_param) print("Best Model Parameter: ", model_cv.best_params_) # 予測を打ち込む y_pred = model_cv.best_estimator_.predict(x_test) print(y_pred) joblib.dump(model_cv, 'model.pkl') loaded_model = joblib.load('model.pkl') number_layers = 10 accuracy = model_cv_best.score(x_test, y_test) logger = Task.current_task().get_logger()