def __init__(self, project_name: Text = None): self._project_name = project_name model_config_file = os.environ.get("MODEL_CONFIG_FILE") if project_name is None or model_config_file is None: self._experiment = None self._conf = None else: self._experiment = comet_ml.Experiment( project_name=self._project_name) slurm_log_file = os.environ.get("SLURM_LOG_FILE") if slurm_log_file is not None: self._experiment.log_asset(slurm_log_file, overwrite=True) model_config_file = os.environ.get("MODEL_CONFIG_FILE") if model_config_file is not None: self._experiment.log_asset(model_config_file) with open(model_config_file) as f: self._conf = toml.load(f) self._experiment.log_asset(self._conf["allennlp_conf"]) for key, val in self._conf["params"].items(): self._experiment.log_parameter(key, val) self._experiment.add_tag(self._conf["name"]) self._experiment.log_parameter("generated_id", self._conf["generated_id"]) trial = self._conf.get("trial", 0) self._experiment.log_parameter("trial", trial) slurm_job_id = os.environ.get("SLURM_JOB_ID") if slurm_job_id is not None: self._experiment.log_other("slurm_job_id", slurm_job_id) self._experiment.log_html_url( f"http://users.umiacs.umd.edu/~entilzha/logs/{slurm_job_id}.log", label="slurm_log", ) self._experiment.log_other("hostname", socket.gethostname())
def _get_experiment(self, mode, experiment_id=None): if mode == "offline": if experiment_id is not None: return comet_ml.ExistingOfflineExperiment( previous_experiment=experiment_id, workspace=self.workspace, project_name=self.project_name, **self.experiment_kwargs, ) return comet_ml.OfflineExperiment( workspace=self.workspace, project_name=self.project_name, **self.experiment_kwargs, ) else: if experiment_id is not None: return comet_ml.ExistingExperiment( previous_experiment=experiment_id, workspace=self.workspace, project_name=self.project_name, **self.experiment_kwargs, ) return comet_ml.Experiment( workspace=self.workspace, project_name=self.project_name, **self.experiment_kwargs, )
def setup(self, args, state, model): """ Setup the optional Comet.ml integration. Environment: COMET_MODE (:obj:`str`, `optional`): "OFFLINE", "ONLINE", or "DISABLED" COMET_PROJECT_NAME (:obj:`str`, `optional`): Comet.ml project name for experiments COMET_OFFLINE_DIRECTORY (:obj:`str`, `optional`): Folder to use for saving offline experiments when :obj:`COMET_MODE` is "OFFLINE" For a number of configurable items in the environment, see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__. """ self._initialized = True if state.is_world_process_zero: comet_mode = os.getenv("COMET_MODE", "ONLINE").upper() args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")} experiment = None if comet_mode == "ONLINE": experiment = comet_ml.Experiment(**args) logger.info("Automatic Comet.ml online logging enabled") elif comet_mode == "OFFLINE": args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./") experiment = comet_ml.OfflineExperiment(**args) logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished") if experiment is not None: experiment._set_model_graph(model, framework="transformers") experiment._log_parameters(args, prefix="args/", framework="transformers") if hasattr(model, "config"): experiment._log_parameters(model.config, prefix="config/", framework="transformers")
def __init__(self, experiment_name, logdir, dic, hyperparam={"hyper":1}): current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") experiment_name = experiment_name+"_"+current_time self.tensorboard_writer = SummaryWriter( log_dir=pth.join(logdir,experiment_name), filename_suffix=experiment_name) self.comet_exp = comet_ml.Experiment(project_name="masterthesis") self.comet_exp.log_parameters(hyperparam) self.comet_exp.log_asset("train.py") self.comet_exp.log_asset("visualize.py") self.comet_exp.log_asset("dataset.py") self.comet_exp.log_asset("model.py") self.word_dic = {v: k for k, v in dic['word_dic'].items()} self.answer_dic = {v: k for k, v in dic['answer_dic'].items()} self.word_vect = np.vectorize(lambda x: self.word_dic[x] if x > 0 else "") self.answer_vect = np.vectorize(lambda x: self.answer_dic[x]) self.hooks = {} self.epoch = 0 self.step = 0
def setup_comet(self): """ Setup the optional Comet.ml integration. Environment: COMET_MODE: (Optional): str - "OFFLINE", "ONLINE", or "DISABLED" COMET_PROJECT_NAME: (Optional): str - Comet.ml project name for experiments COMET_OFFLINE_DIRECTORY: (Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE" For a number of configurable items in the environment, see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__ """ comet_mode = os.getenv("COMET_MODE", "ONLINE").upper() args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")} experiment = None if comet_mode == "ONLINE": experiment = comet_ml.Experiment(**args) logger.info("Automatic Comet.ml online logging enabled") elif comet_mode == "OFFLINE": args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./") experiment = comet_ml.OfflineExperiment(**args) logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished") if experiment is not None: experiment._set_model_graph(self.model, framework="transformers") experiment._log_parameters(self.args, prefix="args/", framework="transformers") experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
def main(): experiment = comet_ml.Experiment() cfg = setup() for d in ["train", "val"]: DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts("balloon/" + d)) MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"]) balloon_metadata = MetadataCatalog.get("balloon_train") # Wrap the Detectron Default Trainer trainer = CometDefaultTrainer(cfg, experiment) trainer.resume_or_load(resume=False) # Register Hook to compute metrics using an Evaluator Object trainer.register_hooks([ hooks.EvalHook(10, lambda: trainer.evaluate_metrics(cfg, trainer.model)) ]) # Register Hook to compute eval loss trainer.register_hooks([ hooks.EvalHook(10, lambda: trainer.evaluate_loss(cfg, trainer.model)) ]) trainer.train() # Evaluate Model Predictions cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold predictor = DefaultPredictor(cfg) log_predictions(predictor, get_balloon_dicts("balloon/val"), experiment)
def before_fit(self): try: self.experiment = comet_ml.Experiment( project_name=self.project_name) except ValueError: print("No active experiment") try: self.experiment.log_parameter("n_epoch", str(self.learn.n_epoch)) self.experiment.log_parameter("model_class", str(type(self.learn.model))) except: print(f"Did not log all properties.") try: with tempfile.NamedTemporaryFile(mode="w") as f: with open(f.name, "w") as g: g.write(repr(self.learn.model)) self.experiment.log_asset(f.name, "model_summary.txt") except: print( "Did not log model summary. Check if your model is PyTorch model." ) if self.log_model_weights and not hasattr(self.learn, "save_model"): print("Unable to log model to Comet.\n", )
def __init__(self, api_key: str = None, project_name: str = None, workspace: str = None): print("Initializing cometml_epochCallback ...") # define specifications to save expr, and model_config_file self._project_name = project_name self._api_key = api_key self._workspace = workspace # define experiment based on api key, project name, and workspace print("api_key:", api_key) print("self_api_key:", self._api_key) self._experiment = comet_ml.Experiment(self._api_key, project_name=self._project_name, workspace=self._workspace, auto_output_logging=False) # log current slurm log file slurm_log_file = os.environ.get("SLURM_LOG_FILE") if slurm_log_file is not None: self._experiment.log_asset(slurm_log_file, overwrite=True) # log model configs/params (if they are not None) model_config_file = os.environ.get("MODEL_CONFIG_FILE") if model_config_file is not None: self._experiment.log_asset(model_config_file) with open(model_config_file) as f: self._conf = toml.load(f) for key, val in self._conf["params"].items(): self._experiment.log_parameter(key, val) self._experiment.add_tag(self._conf["name"]) # log experiment host name self._experiment.log_other("hostname", socket.gethostname())
def train(self, *args, **kwargs): import comet_ml self.experiment = comet_ml.Experiment(log_code=False) self.experiment.set_code(" ".join(args)) self.experiment.set_filename("Ludwig CLI") self._log_html(" ".join(args)) config = comet_ml.get_config() self._save_config(config)
def main(): args = parse_args() if os.path.exists(args.output_path): print(f"Output path {args.output_path} exists!!!") return random.seed(args.seed) np.random.seed(args.seed) if args.limit: print( "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT." ) if args.tasks == "all_tasks": task_names = tasks.ALL_TASKS else: task_names = args.tasks.split(",") task_dict = tasks.get_task_dict(task_names) lm = models.get_model(args.model) train_args = simple_parse_args_string(args.train_args) model_args = simple_parse_args_string(args.model_args) if train_args: train_args.update(model_args) train_args["seed"] = args.seed results = evaluator.evaluate(lm, task_dict, args.provide_description, args.num_fewshot, args.limit, train_args, args.model_args, args.seed) results["args"] = args.__dict__ dumped = json.dumps(results, indent=2) print(dumped) if args.output_path: with open(args.output_path, "w") as f: f.write(dumped) for task, task_res in results.items(): if task not in task_names: continue if "train_args" not in task_res: experiment = comet_ml.Experiment( api_key=os.environ.get('COMET_API_KEY'), project_name=os.environ.get('COMET_PROJECT', "few-shot"), workspace=os.environ.get('COMET_WORKSPACE', "yuvalkirstain"), ) experiment.log_asset(args.output_path) else: experiment = comet_ml.ExistingExperiment( api_key=os.environ.get('COMET_API_KEY'), previous_experiment=task_res["train_args"] ["previous_experiment"]) experiment.log_asset(args.output_path)
def new_experiment(): experiment = comet_ml.Experiment( project_name="belleflopt", workspace="nickrsan", api_key=os.environ["COMET_ML_API_KEY"], auto_output_logging="simple", ) experiment.disable_mp() return experiment
def setup_comet_ml(dpmodel): if 'comet_ml' in dpmodel.config["train"].keys(): experiment = comet_ml.Experiment( api_key=dpmodel.config["train"]["comet_ml"]["api_key"], project_name=dpmodel.config["train"]["comet_ml"]["project_name"]) if dpmodel.config["experiment_name"] != "results": experiment.set_name(dpmodel.config["experiment_name"]) experiment.log_others(dpmodel.config) else: experiment = None return experiment
def setup_logger( nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr ) -> Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]: try: experiment = comet_ml.Experiment(project_name=project_name) except Exception: experiment = None raise ValueError( Errors.E881.format( library="Comet", url="https://comet.ml/docs/python-sdk/spacy/")) from None config = get_filtered_config(nlp, remove_config_values) if experiment is not None: experiment.log_asset_data(config, "spacy-config.cfg") # Get methods for step console processing: console_log_step, console_finalize = console(nlp, stdout, stderr) def log_step(info: Optional[Dict[str, Any]]): console_log_step(info) if experiment is not None: if info is not None: # Log items: epoch = info.get("epoch", None) step = info.get("step", None) if "score" in info: experiment.log_metric("score", info["score"], step=step, epoch=epoch) if "other_scores" in info: results = {} flatten_dictionary("", info["other_scores"], results) experiment.log_metrics(results, step=step, epoch=epoch) if "losses" in info: experiment.log_metrics( { "loss_%s" % k: v for (k, v) in info["losses"].items() }, step=step, epoch=epoch) def finalize() -> None: if experiment is not None: experiment.end() console_finalize() return log_step, finalize
def setup_comet_ml(dpmodel): if dpmodel.config["train"]["comet_ml"]["track"]: experiment = comet_ml.Experiment( api_key=dpmodel.config["train"]["comet_ml"]["api_key"], project_name=dpmodel.config["train"]["comet_ml"]["project_name"]) if "experiment_name" in dpmodel.config["train"]["comet_ml"].keys(): experiment.set_name( dpmodel.config["train"]["comet_ml"]["experiment_name"]) else: experiment = None return experiment
def get_experiment(run_id): experiment_id = hashlib.sha1(run_id.encode("utf-8")).hexdigest() os.environ["COMET_EXPERIMENT_KEY"] = experiment_id api = comet_ml.API() # Assumes API key is set in config/env api_experiment = api.get_experiment_by_id(experiment_id) if api_experiment is None: return comet_ml.Experiment(project_name=PROJECT_NAME) else: return comet_ml.ExistingExperiment(project_name=PROJECT_NAME)
def __init__( self, project_name: str = None, upload_serialization_dir: bool = True, log_interval: int = 100, send_notification: bool = True, ): self._project_name = project_name self._experiment = comet_ml.Experiment(project_name=self._project_name) self.upload_serialization_dir = upload_serialization_dir self.log_interval = log_interval self.send_notification = send_notification
def init_logger(self, cfg): logger = None # Check to see if there is a key in environment: EXPERIMENT_KEY = cfg.experiment_key # First, let's see if we continue or start fresh: CONTINUE_RUN = cfg.resume if (EXPERIMENT_KEY is not None): # There is one, but the experiment might not exist yet: api = comet_ml.API() # Assumes API key is set in config/env try: api_experiment = api.get_experiment_by_id(EXPERIMENT_KEY) except Exception: api_experiment = None if api_experiment is not None: CONTINUE_RUN = True # We can get the last details logged here, if logged: # step = int(api_experiment.get_parameters_summary("batch")["valueCurrent"]) # epoch = int(api_experiment.get_parameters_summary("epochs")["valueCurrent"]) if CONTINUE_RUN: # 1. Recreate the state of ML system before creating experiment # otherwise it could try to log params, graph, etc. again # ... # 2. Setup the existing experiment to carry on: logger = comet_ml.ExistingExperiment( previous_experiment=EXPERIMENT_KEY, log_env_details=True, # to continue env logging log_env_gpu=True, # to continue GPU logging log_env_cpu=True, # to continue CPU logging auto_histogram_weight_logging=True, auto_histogram_gradient_logging=True, auto_histogram_activation_logging=True) # Retrieved from above APIExperiment # self.logger.set_epoch(epoch) else: # 1. Create the experiment first # This will use the COMET_EXPERIMENT_KEY if defined in env. # Otherwise, you could manually set it here. If you don't # set COMET_EXPERIMENT_KEY, the experiment will get a # random key! logger = comet_ml.Experiment( disabled=cfg.disabled, project_name=cfg.project, auto_histogram_weight_logging=True, auto_histogram_gradient_logging=True, auto_histogram_activation_logging=True) logger.add_tags(cfg.tags.split()) logger.log_parameters(self.cfg) return logger
def _setup_comet(self): with tf.gfile.GFile(FLAGS.comet_key_file) as key_file: key = key_file.read().rstrip() experiment = comet_ml.Experiment(api_key=key, project_name=FLAGS.comet_project_name, team_name=FLAGS.comet_team_name, auto_param_logging=False, parse_args=False) experiment.log_parameter('train_path', FLAGS.train_path) experiment.log_parameter('validate_path', FLAGS.validate_path) experiment.log_parameter('model_dir', self._model_dir()) experiment.log_multiple_params(self._model.hparams().values()) return experiment
def import_call(argv, *args, **kwargs): """ Enable Third-party support from comet.ml Allows experiment tracking, visualization, and management. """ try: import comet_ml comet_ml.Experiment() except: logging.error("Ignored --comet " + "See: https://www.comet.ml/" + "docs/python-sdk/getting-started/ " + "for more information")
def setup(self, args, state, model): """ Setup the optional Comet.ml integration. Environment: COMET_MODE (:obj:`str`, `optional`): Whether to create an online, offline experiment or disable Comet logging. Can be "OFFLINE", "ONLINE", or "DISABLED". Defaults to "ONLINE". COMET_PROJECT_NAME (:obj:`str`, `optional`): Comet project name for experiments COMET_OFFLINE_DIRECTORY (:obj:`str`, `optional`): Folder to use for saving offline experiments when :obj:`COMET_MODE` is "OFFLINE" COMET_LOG_ASSETS (:obj:`str`, `optional`): Whether or not to log training assets (tf event logs, checkpoints, etc), to Comet. Can be "TRUE", or "FALSE". Defaults to "TRUE". For a number of configurable items in the environment, see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__. """ self._initialized = True log_assets = os.getenv("COMET_LOG_ASSETS", "FALSE").upper() if log_assets in {"TRUE", "1"}: self._log_assets = True if state.is_world_process_zero: comet_mode = os.getenv("COMET_MODE", "ONLINE").upper() experiment = None experiment_kwargs = { "project_name": os.getenv("COMET_PROJECT_NAME", "huggingface") } if comet_mode == "ONLINE": experiment = comet_ml.Experiment(**experiment_kwargs) experiment.log_other("Created from", "transformers") logger.info("Automatic Comet.ml online logging enabled") elif comet_mode == "OFFLINE": experiment_kwargs["offline_directory"] = os.getenv( "COMET_OFFLINE_DIRECTORY", "./") experiment = comet_ml.OfflineExperiment(**experiment_kwargs) experiment.log_other("Created from", "transformers") logger.info( "Automatic Comet.ml offline logging enabled; use `comet upload` when finished" ) if experiment is not None: experiment._set_model_graph(model, framework="transformers") experiment._log_parameters(args, prefix="args/", framework="transformers") if hasattr(model, "config"): experiment._log_parameters(model.config, prefix="config/", framework="transformers")
def main(): parser = argparse.ArgumentParser( description='''StyleGAN2 generator. Run 'python %(prog)s <subcommand> --help' for subcommand help.''', epilog=_examples, formatter_class=argparse.RawDescriptionHelpFormatter ) e = cm.Experiment("Your API KEY") # This line will used for logging images e.log_image("Your Data Set Path") subparsers = parser.add_subparsers(help='Sub-commands', dest='command') parser_generate_images = subparsers.add_parser('generate-images', help='Generate images') parser_generate_images.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) parser_generate_images.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', required=True) parser_generate_images.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) parser_generate_images.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') parser_style_mixing_example = subparsers.add_parser('style-mixing-example', help='Generate style mixing video') parser_style_mixing_example.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) parser_style_mixing_example.add_argument('--row-seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True) parser_style_mixing_example.add_argument('--col-seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True) parser_style_mixing_example.add_argument('--col-styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6') parser_style_mixing_example.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) parser_style_mixing_example.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') args = parser.parse_args() kwargs = vars(args) subcmd = kwargs.pop('command') if subcmd is None: print ('Error: missing subcommand. Re-run with --help for usage.') sys.exit(1) sc = dnnlib.SubmitConfig() sc.num_gpus = 1 sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True sc.run_dir_root = kwargs.pop('result_dir') sc.run_desc = subcmd func_name_map = { 'generate-images': 'run_generator.generate_images', 'style-mixing-example': 'run_generator.style_mixing_example' } dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs)
def on_experiment_start(self): """Start of experiment.""" self.experiment = comet_ml.Experiment( project_name=self.project_name, api_key=self.api_token, workspace=self.workspace, log_code=False, display_summary_level=0, ) if self.tags is not None: self.experiment.add_tags(self.tags) if self.params is not None: self.experiment.log_parameters(self.params)
def train(self, *args, **kwargs): import comet_ml try: self.experiment = comet_ml.Experiment(log_code=False) except Exception: logging.error("comet_ml.Experiment() had errors. Perhaps you need to define COMET_API_KEY") return logging.info("comet.train() called......") cli = self._make_command_line(args) self.experiment.set_code(cli) self.experiment.set_filename("Ludwig CLI") self._log_html(cli) config = comet_ml.get_config() self._save_config(config)
def __init__(self, args): import comet_ml comet_args = dict( project_name=args.comet_project_name, auto_metric_logging=args.comet_auto_metric_logging, auto_output_logging=args.comet_auto_output_logging, log_code=args.comet_log_code, log_env_cpu=args.comet_log_env_cpu, log_env_gpu=args.comet_log_env_gpu, log_env_host=args.comet_log_env_host, log_graph=args.comet_log_graph, ) if args.comet_offline: self.logger = comet_ml.OfflineExperiment(offline_directory=args.comet_offline_dir, **comet_args) else: self.logger = comet_ml.Experiment(**comet_args)
def dropout_exp(args, asrc, embedding, model_classes, runner: Runner, start=None, end=None): for model_class in model_classes: choices = [ 0, 0.1, 0.2, 0.3, .4, 0.5, ] if start is None: choices = choices else: choices = choices[start:end] for dropout in choices: exp = comet_ml.Experiment( project_name="ASR" if not args.test else "ASR-test", workspace="Robustness", log_env_cpu=False) exp.add_tag("%s" % model_class.__name__) exp.add_tag("dropout") exp.log_parameter("embedding_name", str(embedding)) runner.prepare(model_class, extra_terms=asrc._terms) runner.logger = exp if args.batch_size: batch_size = args.batch_size else: batch_size = 32 * args.gpu_num if model_class != mz.models.Bert else 3 * args.gpu_num runner.train( epochs=3 if args.test else 10, dropout=dropout, dropout_rate=dropout, batch_size=batch_size, lr=3e-4 if model_class != mz.models.Bert else 3e-5, devices=multi_gpu( args.gpu_num if model_class != mz.models.MatchLSTM else 1), clip_norm=10, batch_accumulation=args.batch_accumulation) runner.eval_asrc(asrc) runner.free_memory()
def _setup_comet_ml_experiment(self, api_key, workspace=None): comet_key_file_path = join(self.experiment_dir, "_cometml.key") if exists(comet_key_file_path): with open(comet_key_file_path, "r") as comet_key_file: exp_key = comet_key_file.readline().strip() else: comet_experiment = comet_ml.Experiment( api_key=api_key, project_name=self.model.name, workspace=(workspace or DEFAULT_COMET_WORKSPACE), log_env_details=False, ) comet_experiment.set_name(self.contd_tag) exp_key = comet_experiment.get_key() with open(comet_key_file_path, "w+") as comet_key_file: comet_key_file.write(exp_key) self.model.attach_comet_ml_experiment(api_key, exp_key)
def weight_decay_exp(args, asrc, embedding, model_classes, runner: Runner, start=None, end=None): for model_class in model_classes: choices = [ 0.01, 0.05, 0.2, 0.4, 0.6, .8, 1.0, 0.1, 0.0001, 0.0005, 0.001, 0.005 ] if start is None: choices = choices else: choices = choices[start:end] for weight_decay in choices: exp = comet_ml.Experiment( project_name="ASR" if not args.test else "ASR-test", workspace="Robustness", log_env_cpu=False) exp.add_tag("%s" % model_class.__name__) exp.add_tag("weight_decay") exp.log_parameter("embedding_name", str(embedding)) print("Weight Decay: %d" % weight_decay) runner.prepare(model_class, extra_terms=asrc._terms) runner.logger = exp if args.batch_size: batch_size = args.batch_size else: batch_size = 32 * args.gpu_num if model_class != mz.models.Bert else 3 * args.gpu_num runner.train( epochs=3 if args.test else 10, weight_decay=weight_decay, optimizer_cls=torch.optim.AdamW, batch_size=batch_size, lr=3e-4 if model_class != mz.models.Bert else 3e-5, devices=multi_gpu( args.gpu_num if model_class != mz.models.MatchLSTM else 1), clip_norm=10, batch_accumulation=args.batch_accumulation, embedding_weight_decay=0 if args.no_embedding_reg else None) runner.eval_asrc(asrc) runner.free_memory()
def init(project_name: str, backend: Backend = Backend.Void, debug=False): _configs["backend"] = backend _configs["debug"] = debug _configs["project_name"] = project_name project_name = project_name.replace(" ", "_") if backend & Backend.Neptune: neptune_configs = { "project_qualified_name": f"tihbe/{project_name}", "api_token": os.environ["NEPTUNE_API_TOKEN"], "backend": neptune.OfflineBackend() if debug else None, } neptune.init(**neptune_configs) experiment = neptune.create_experiment(upload_stdout=False, upload_stderr=True) _configs["neptune_experiment"] = experiment if backend & Backend.Comet: experiment = comet.Experiment( api_key=os.environ["COMET_API_KEY"], project_name=project_name, workspace="tihbe", disabled=debug, ) _configs["comet_experiment"] = experiment if backend & Backend.Logging: logfile = os.path.join( os.getcwd(), f"{project_name}{'_DEBUG' if debug else ''}.log") if os.path.isfile( logfile): # Roll logs if file exist ;; up to 20 files temp_handler = logging.handlers.RotatingFileHandler(logfile, backupCount=20) temp_handler.doRollover() temp_handler.close() logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S", filename=logfile, )
def wrapper(*args, **kwargs): self = args[0] global comet_installed if self._logging is None and comet_installed: self._logging = False try: if 'api_key' not in self._comet_config.keys(): comet_ml.init() if comet_ml.get_global_experiment() is not None: logger.warning("You have already created a comet \ experiment manually, which might \ cause clashes") self._experiment = comet_ml.Experiment( **self._comet_config) self._logging = True self._experiment.log_other("Created from", "tensorboardX") except Exception as e: logger.warning(e) if self._logging is True: return method(*args, **kwargs)
def train_init(self, experiment_directory, experiment_name, model_name, resume, output_directory): if self.cometml_experiment: # Comet ML already initialized return import comet_ml try: self.cometml_experiment = comet_ml.Experiment(log_code=False, project_name=experiment_name) except Exception: self.cometml_experiment = None logger.exception( "comet_ml.Experiment() had errors. Perhaps you need to define COMET_API_KEY") return logger.info("comet.train_init() called......") self.cometml_experiment.set_name(model_name) self.cometml_experiment.set_filename("Ludwig API") config = comet_ml.get_config() self._save_config(config, directory=experiment_directory)