コード例 #1
0
def run(config):
    if "seed" in config:
        torch.manual_seed(config["seed"])
        np.random.seed(config["seed"])
        random.seed(config["seed"])
    model_config = config["model"]
    model_folder_name = f"{model_config['name']}-{datetime.now().strftime('%Y-%m-%d_%H-%M')}"
    model_save_path = os.path.join(config["model_path"], model_folder_name)
    os.makedirs(model_save_path, exist_ok=True)

    if config.get("use_wandb", False):
        wandb_config = config["wandb"]
        run = wandb.init(config=config,
                         project=wandb_config.get("wandb_project", "mmemb"),
                         name=wandb_config.get("wandb_name",
                                               model_folder_name),
                         reinit=True)
    logging.info(config)

    if config.get("do_dev_test", False):
        logging.info("### Testing on Dev ###")
        logging.info("Loading model from ",
                     model_config.get("model_path_embedding", model_save_path))
        emb_model = MultimodalTransformer(model_name_or_path=model_config.get(
            "model_path_embedding", model_save_path))
        logging.info("Loading model from ",
                     model_config.get("model_path_cross", model_save_path))
        cross_model = MultimodalTransformer(
            model_name_or_path=model_config.get("model_path_cross",
                                                model_save_path))
        cross_model.image_dict = emb_model.image_dict
        dev_config = config["test"]
        dev_evaluator = get_evaluator(config["data"], dev_config, emb_model,
                                      "dev")
        dev_evaluator([emb_model, cross_model], output_path=model_save_path)

    if config.get("do_test", True):
        logging.info("### Testing ###")
        test_config = config["test"]
        logging.info("Loading model from ",
                     model_config.get("model_path_embedding", model_save_path))
        emb_model = MultimodalTransformer(model_name_or_path=model_config.get(
            "model_path_embedding", model_save_path))
        logging.info("Loading model from ",
                     model_config.get("model_path_cross", model_save_path))
        cross_model = MultimodalTransformer(
            model_name_or_path=model_config.get("model_path_cross",
                                                model_save_path))
        cross_model.image_dict = emb_model.image_dict
        test_evaluator = get_evaluator(config["data"], test_config, emb_model,
                                       "test")
        test_evaluator([emb_model, cross_model], output_path=model_save_path)

    with open(os.path.join(model_save_path, "config.yaml"), "w") as f:
        yaml.dump(config, f)

    if config.get("use_wandb", False):
        wandb.save(os.path.join(model_save_path, "*.csv"))
        wandb.join()
        run.finish()
コード例 #2
0
ファイル: test_wandb.py プロジェクト: wsgharvey/pytorch-utils
    def test_logging(self):

        net = WandbLoggedNet.easy_init()
        net.set_save_valid_conditions('valid', 'every', 1, 'epochs')

        net.train_one_epoch()
        net.train_one_epoch()

        # ensure that we are not saving log in net (should be online instead)
        self.assertTrue(net.logs['train'] == {})
        self.assertTrue(net.logs['valid'] == {})

        net.to_cuda()
        net.train_one_epoch()

        self.assertTrue(wandb.run.project_name() == net.wandb_project)

        url = wandb.run.get_url()

        wandb.join()

        api = wandb.Api()
        path = '/'.join(url.split('/')[-3:])
        run = api.run(path)

        self.assertTrue(run.state == 'finished')
        for key in ['seed', 'nn_args', 'name_prefix']:
            self.assertTrue(run.config[key] == net.default_init_kwargs[key])
        history = run.history(pandas=False)
        self.assertTrue(history[0]['train-two'] == 2)
        self.assertTrue(history[2]['iter'] == history[2]['_step'])
        self.assertTrue(history[-1]['epoch'] == net.epochs)
            def train():
                wandb.init(WAND_PROJECT_NAME)
                modelArgs = { "max_seq_length": self.maxSeqLength, "output_dir": self.modelOutputDir, "overwrite_output_dir": True, "best_model_dir": self.bestModelOutputDir,
                              "wandb_project": WAND_PROJECT_NAME, "num_training_epochs": wandb.config.epochs, "learning_rate": wandb.config.learning_rate,
                              "do_lower_case": True, "cache_dir": self.modelCacheDir, "encoding": "utf-8", "train_batch_size": 5, "eval_batch_size": 5,
                              "evaluate_during_training_steps": 50, "evaluate_during_training_verbose": True, "logging_steps": 5, "sliding_window": True,
                              "reprocess_input_data": True, "evaluate_during_training": True, "use_multiprocessing": True,
                              "labels_list": SECTOR_LABELS }

                model = ClassificationModel(self.modelType, self.modelNameOrPath, args=modelArgs, sweep_config=wandb.config, use_cuda=torch.cuda.is_available(), num_labels=len(SECTOR_LABELS), )

                # Training and evaluation
                try:
                    log.info(f"Started training/finetuning BERT on multi-class classification task..")
                    model.train_model(train_df=self.trainDataset, eval_df=self.evalDataset, show_running_loss=True,
                                      output_dir=self.modelOutputDir,
                                      mcc=sklearn.metrics.matthews_corrcoef,
                                      acc=sklearn.metrics.balanced_accuracy_score, )
                    log.info(f"Finished finetuning and evaluating our fine-tuned model on multi-class classification task. Check the folder '{self.modelOutputDir}' for finetuned weights.")
                    log.info(f"It took {round((time.time() - startTime) / 3600, 1)} hours to finetune and evaluate our fine-tuned model on multi-class classification task.")
                except:
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    err = f"Error occurred while training and evaluating the finetuned model on multi-class classification task. Error is: {exc_type}; {exc_value}."
                    log.error(err)

                wandb.join()
コード例 #4
0
    def log_fn(self, stop_event: Event):
        try:
            self._super_create_loggers()
            self.resposne_queue.put({
                k: self.__dict__[k]
                for k in ["save_dir", "tb_logdir", "is_sweep"]
            })

            while True:
                try:
                    cmd = self.draw_queue.get(True, 0.1)
                except EmptyQueue:
                    if stop_event.is_set():
                        break
                    else:
                        continue

                self._super_log(*cmd)
                self.resposne_queue.put(True)
        except:
            print("Logger process crashed.")
            raise
        finally:
            print("Logger: syncing")
            if self.use_wandb:
                wandb.join()

            stop_event.set()
            print("Logger process terminating...")
コード例 #5
0
def launch_training_on_all_splits(experiment: str, splits: List, base_model: str, dropout: float, learning_rate: float):
    project = f'buchwald_hartwig_training_{experiment}_{base_model}'
    model_args = {
    'wandb_project': project, 'num_train_epochs': 10, 'overwrite_output_dir': True,
    'learning_rate': learning_rate, 'gradient_accumulation_steps': 1,
    'regression': True, "num_labels":1, "fp16": False,
    "evaluate_during_training": True, 'manual_seed': 42,
    "max_seq_length": 300, "train_batch_size": 16,"warmup_ratio": 0.00,
    "config" : { 'hidden_dropout_prob': dropout } }
        
    for (name, split) in splits:
        if wandb_available: wandb.init(name=name, project=project, reinit=True)

        df_doyle = pd.read_excel('../data/Buchwald-Hartwig/Dreher_and_Doyle_input_data.xlsx', sheet_name=name)
        df_doyle['rxn'] = generate_buchwald_hartwig_rxns(df_doyle)

        train_df = df_doyle.iloc[:split-1][['rxn', 'Output']] # paper has starting index 1 not 0
        test_df = df_doyle.iloc[split-1:][['rxn', 'Output']] # paper has starting index 1 not 0

        train_df.columns = ['text', 'labels']
        test_df.columns = ['text', 'labels']
        mean = train_df.labels.mean()
        std = train_df.labels.std()
        train_df['labels'] = (train_df['labels'] - mean) / std
        test_df['labels'] = (test_df['labels'] - mean) / std

        model_path =  pkg_resources.resource_filename("rxnfp", f"models/transformers/bert_{base_model}")
        pretrained_bert = SmilesClassificationModel("bert", model_path, num_labels=1, args=model_args, use_cuda=torch.cuda.is_available())
        pretrained_bert.train_model(train_df, output_dir=f"outputs_buchwald_hartwig_{experiment}_{base_model}_{name}_split_{str(split).replace('-','_')}", eval_df=test_df, r2=sklearn.metrics.r2_score)
        if wandb_available: wandb.join() # multiple runs in same script
コード例 #6
0
    def run_job(self, job):
        run_id = job.run_id

        config_file = os.path.join("wandb", "sweep-" + self._sweep_id,
                                   "config-" + run_id + ".yaml")
        config_util.save_config_file_from_dict(config_file, job.config)
        os.environ[wandb.env.RUN_ID] = run_id
        os.environ[wandb.env.CONFIG_PATHS] = config_file
        os.environ[wandb.env.SWEEP_ID] = self._sweep_id
        wandb.setup(_reset=True)

        print("wandb: Agent Starting Run: {} with config:\n".format(run_id) +
              "\n".join([
                  "\t{}: {}".format(k, v["value"])
                  for k, v in job.config.items()
              ]))
        try:
            self._function()
            if wandb.run:
                wandb.join()
        except KeyboardInterrupt as e:
            print("Keyboard interrupt", e)
            return True
        except Exception as e:
            print("Problem", e)
            return True
コード例 #7
0
def test_wandb_experiment(csv_filename):
    # Test W&B integration

    # add wandb arg and detect flag
    sys.argv.append('--wandb')
    ludwig.contrib.contrib_import()

    # disable sync to cloud
    os.environ['WANDB_MODE'] = 'dryrun'

    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Inputs & Outputs
    input_features = [image_feature(folder=image_dest_folder)]
    output_features = [category_feature()]
    rel_path = generate_data(input_features, output_features, csv_filename)

    # Run experiment
    run_experiment(input_features, output_features, data_csv=rel_path)

    # Check a W&B run was created
    assert wandb.run is not None

    # End session
    wandb.join()

    # Remove instance from contrib_registry
    ludwig.contrib.contrib_registry['instances'].pop()

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
コード例 #8
0
ファイル: run_lstm_pl.py プロジェクト: seungjaeryanlee/MagNet
def main(CONFIG):
    # Reproducibility
    random.seed(CONFIG.SEED)
    np.random.seed(CONFIG.SEED)
    torch.manual_seed(CONFIG.SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Setup model
    net = Net(CONFIG).double()

    # Setup wandb
    wandb.init(project="MagNet",
               config=OmegaConf.to_container(CONFIG),
               reinit=True)
    wandb.watch(net)

    # Log number of parameters
    CONFIG.NUM_PARAMETERS = count_parameters(net)
    trainer = pl.Trainer(
        # TODO: Add CONFIG parameters for devices
        gpus=1,
        # Don't show progress bar
        progress_bar_refresh_rate=0,
        check_val_every_n_epoch=1,
        # TODO: Try early stopping
        max_epochs=CONFIG.NUM_EPOCH,
    )
    trainer.fit(net)

    # Close wandb
    wandb.join()
コード例 #9
0
def test():
    print('Testing')
    model.eval()

    pos = 0
    total = 0
    prediction_list = []
    groundtruth_list = []
    for _, (data, target) in enumerate(tqdm(test_data_loader)):
        data, target = data.cuda(), target.long().cuda()
        with torch.no_grad():
            out = model(data)
        pred = torch.max(out, out.dim() - 1)[1]
        pos = pos + torch.eq(pred.cpu().long(),
                             target.data.cpu().long()).sum().item()

        groundtruth_list += target.data.tolist()
        prediction_list += out[:, 1].tolist()

        total = total + data.size(0)
    acc = pos * 1.0 / total * 100
    print('Acc: %.2f' % acc)

    if args.is_test:
        #Log test accuracy in wandb
        wandb.log({"Test Accuracy": pos * 1.0 / total * 100})

        wandb.join()

    else:
        #Log validation accuracy in wandb
        wandb.log({"Validation Accuracy": pos * 1.0 / total * 100})

    return acc
コード例 #10
0
    def run(self):
        result_folder = luigi.configuration.get_config().get(
            'GlobalConfig', 'result_folder')
        model = GPT2LMHeadModel.from_pretrained("distilgpt2")
        tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")

        train_dataset = TextDataset(tokenizer,
                                    self.input()['train'].path,
                                    block_size=self.block_size)
        test_dataset = TextDataset(tokenizer,
                                   self.input()['test'].path,
                                   block_size=self.block_size)

        data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,
                                                        mlm=False)

        training_args = TrainingArguments(
            do_eval=self.do_eval,
            do_train=self.do_train,
            eval_steps=self.eval_steps,
            evaluate_during_training=self.evaluate_during_training,
            gradient_accumulation_steps=self.gradient_accumulation_steps,
            logging_dir='./logs',
            logging_steps=self.logging_steps,
            learning_rate=self.learning_rate,
            max_grad_norm=self.max_grad_norm,
            num_train_epochs=self.num_train_epochs,
            output_dir=result_folder,
            overwrite_output_dir=True,
            per_device_train_batch_size=self.per_device_train_batch_size,
            per_device_eval_batch_size=self.per_device_eval_batch_size,
            save_steps=self.save_steps,
            seed=self.seed,
            warmup_steps=self.warmup_steps,
            weight_decay=self.weight_decay,
        )

        trainer = Trainer(model=model,
                          args=training_args,
                          data_collator=data_collator,
                          train_dataset=train_dataset,
                          eval_dataset=test_dataset)

        trainer.train()

        trainer.save_model()
        tokenizer.save_pretrained(result_folder)

        wanb_disabled = os.environ.get('WANDB_DISABLED', False)

        if wanb_disabled:
            run_name = time.strftime('%Y%m%d-%H%M%S')
        else:
            wandb.run.save()
            wandb.join()
            run_name = wandb.run.name

        with open(self.output()['run_name'].path, 'w') as f:
            f.write(run_name)
コード例 #11
0
def main():
    # This flag allows you to enable the inbuilt cudnn auto-tuner to
    # find the best algorithm to use for your hardware.
    torch.backends.cudnn.benchmark = True

    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    args = get_args()
    for args in iterate_args(args):
        output_dir = f'{args.output_dir}/{socket.gethostname()}/{args.experiment}/{args.network}/' + \
        '_'.join([str(_) for _ in args.params])+'/'
        writer = Writer(output_dir=output_dir,
                        file=f'{args.source[0]}_{args.target}')

        model = model_fns[args.network](num_usv_classes=args.num_usv_classes,
                                        num_classes=args.num_classes)
        # monitor_memory()

        if args.wandb:
            tags = [
                args.source[0] + '_' + args.target,
                "_".join([str(_) for _ in args.params])
            ]
            wandb.init(
                project=f'{args.experiment}_{args.network}',
                tags=tags,
                dir=dirname(__file__),
                config=args,
                reinit=True,
                name=
                f'{"-".join([str(_) for _ in args.params])}-{args.source[0]}-{args.target}'
            )
        if args.redirect_to_file and args.redirect_to_file != 'null':
            print('redirect to ', output_dir + args.redirect_to_file)
            sys.stdout = open(output_dir + args.redirect_to_file, 'a')
            # wandb.watch(model, log='all') # can lead to continuous increment of GPU memory usage

        data_loaders = get_DGSSR_data_loader(args.source, args.target,
                                             args.data_dir, args.val_size,
                                             args.original_img_prob,
                                             args.batch_size,
                                             args.max_num_s_img, args)
        optimizer = get_optimizer(model,
                                  lr=args.learning_rate,
                                  train_all=args.train_all_param)
        scheduler = optim.lr_scheduler.StepLR(optimizer, int(args.epochs * .8))
        Trainer(args, model, data_loaders, optimizer, scheduler, writer)

        save_model_dir = f'{args.data_dir}/cache/{socket.gethostname()}/{args.experiment}/{args.network}/' + \
                         '_'.join([str(_) for _ in args.params])
        if not os.path.exists(save_model_dir): os.makedirs(save_model_dir)

        torch.save(model.state_dict(),
                   save_model_dir + f'/{args.source[0]}_{args.target}.pkl')

        # wandb.save(args.data_dir+'/cache/model.pkl')
        if args.wandb:
            wandb.join()
コード例 #12
0
def train_from_cache(
    *,
    architecture: str = "BiT",
    size: int = 256,
    augmentation: int = 1,
    epochs: int = 20,
    batch_size: int = 32,
    learning_rate: float = 0.001,
    lr_decay_rate: float = 0.99,
    lr_decay_steps: int = 5e2,
    **model_params,
) -> float:
    """
    Trains the model using embeddings that were previously cached.
    """
    train_data, dev_data = load_embeddings(architecture, size, augmentation)

    # Make and init the wandb run.
    wandb.init(project="Plant Pathology", reinit=True)
    wandb.config.update(
        {
            "architecture": architecture,
            "epochs": epochs,
            "batch_size": batch_size,
            "learning_rate": learning_rate,
            "lr_decay_rate": lr_decay_rate,
            "lr_decay_steps": lr_decay_steps,
            "augmentation": augmentation,
            "size": size,
        }
    )

    model = PathologistModel(nclasses=constants.NCLASSES, **model_params)
    model.compile(
        optimizer=Adam(
            learning_rate=ExponentialDecay(learning_rate, lr_decay_steps, lr_decay_rate)
        ),
        loss="categorical_crossentropy",
        metrics=["categorical_accuracy"],
    )

    model.fit(
        train_data["X"],
        train_data["y"],
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(dev_data["X"], dev_data["y"]),
        callbacks=[WandbCallback(save_model=False)],
    )

    # Log the scores
    train_loss, train_acc = model.evaluate(train_data["X"], train_data["y"])
    _, dev_acc = model.evaluate(dev_data["X"], dev_data["y"])
    wandb.run.summary.update(
        {"final_train_loss": train_loss, "final_train_acc": train_acc}
    )
    wandb.join()
    return dev_acc
コード例 #13
0
def run_batch_episode_exp(total_eps: int,
                          update_every: int,
                          use_norm: bool,
                          wandb_project: str,
                          wandb_group: str):
    # NOTE:
    # This code doesn't run properly on Windows 10.
    # The result can be reproduced on Ubuntu and Mac OS.

    config = dict()
    config['update_every'] = update_every
    config['use_norm'] = use_norm

    wandb.init(project=wandb_project,
               entity='junyoung-park',
               reinit=True,
               group=wandb_group,
               config=config)

    env = gym.make('CartPole-v1')
    s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.n

    net = MLP(s_dim, a_dim, [128])
    agent = REINFORCE(net)
    memory = EpisodicMemory(max_size=100, gamma=1.0)
    n_update = 0

    for ep in range(total_eps):
        s = env.reset()
        cum_r = 0

        while True:
            s = to_tensor(s, size=(1, 4))
            a = agent.get_action(s)
            ns, r, done, info = env.step(a.item())

            # preprocess data
            r = torch.ones(1, 1) * r
            done = torch.ones(1, 1) * done

            memory.push(s, a, r, torch.tensor(ns), done)

            s = ns
            cum_r += r
            if done:
                break

        if ep % update_every == 0:
            s, a, _, _, done, g = memory.get_samples()
            agent.update_episodes(s, a, g, use_norm=use_norm)
            memory.reset()
            n_update += 1
        wandb.log({"episode return": cum_r, "num_update": n_update})

    torch.save(agent.state_dict(), join(wandb.run.dir, "agent.pt"))
    wandb.join()
コード例 #14
0
def run_experiment(cfg: Dict, save_weights: bool = False):
    """Run a training experiment.

    Configuration file can be generated using deepblink config.
    NOTE - There are currently only one type of dataset and model option. This is intentional
    to make future development easier of new models such as 3D / 4D options.

    Args:
        cfg: Dictionary configuration file.
        save_weights: If model weights should be saved separately.
            The complete model is automatically saved.
    """
    dataset_class = get_from_module("deepblink.datasets", cfg["dataset"])
    model_class = get_from_module("deepblink.models", cfg["model"])
    network_fn = get_from_module("deepblink.networks", cfg["network"])
    optimizer_fn = get_from_module("deepblink.optimizers", cfg["optimizer"])
    loss_fn = get_from_module("deepblink.losses", cfg["loss"])

    network_args = (cfg.get("network_args", {})
                    if cfg.get("network_args", {}) is not None else {})
    dataset_args = cfg.get("dataset_args", {})
    train_args = cfg.get("train_args", {})

    network_args["cell_size"] = dataset_args["cell_size"]

    dataset = dataset_class(dataset_args["version"], dataset_args["cell_size"])

    use_wandb = cfg["use_wandb"]
    model = model_class(
        dataset_args=dataset_args,
        dataset_cls=dataset,
        loss_fn=loss_fn,
        network_args=network_args,
        network_fn=network_fn,
        optimizer_fn=optimizer_fn,
        train_args=train_args,
    )

    cfg["system"] = {
        "gpus": tf.config.list_logical_devices("GPU"),
        "version": platform.version(),
        "platform": platform.platform(),
    }

    now = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
    run_name = f"{now}_{cfg['run_name']}"

    if use_wandb:
        wandb.init(name=run_name, project=cfg["name"], config=cfg)

    model = train_model(model, dataset, cfg, run_name, use_wandb)

    if use_wandb:
        wandb.join()

    if save_weights:
        model.save_weights()
コード例 #15
0
            def train():
                wandb.init(WAND_PROJECT_NAME)
                modelArgs = {
                    "max_seq_length": self.maxSeqLength,
                    "output_dir": self.modelOutputDir,
                    "overwrite_output_dir": True,
                    "best_model_dir": self.bestModelOutputDir,
                    "wandb_project": WAND_PROJECT_NAME,
                    "num_training_epochs": wandb.config.epochs,
                    "learning_rate": wandb.config.learning_rate,
                    "do_lower_case": True,
                    "cache_dir": self.modelCacheDir,
                    "encoding": "utf-8",
                    "train_batch_size": 5,
                    "eval_batch_size": 5,
                    "evaluate_during_training_steps": 50,
                    "evaluate_during_training_verbose": True,
                    "logging_steps": 5,
                    "sliding_window": True,
                    "reprocess_input_data": True,
                    "evaluate_during_training": True,
                    "use_multiprocessing": False,
                    "regression": True
                }
                model = ClassificationModel(self.modelType,
                                            self.modelNameOrPath,
                                            args=modelArgs,
                                            sweep_config=wandb.config,
                                            use_cuda=torch.cuda.is_available(),
                                            num_labels=1)

                # Training
                try:
                    log.info(
                        f"Started finetuning BERT on sentiment analysis/regression task.."
                    )
                    model.train_model(
                        train_df=self.trainDataFrame,
                        eval_df=self.evalDataFrame,
                        show_running_loss=True,
                        output_dir=self.modelOutputDir,
                        mse=sklearn.metrics.mean_squared_error,
                        r2Score=sklearn.metrics.r2_score,
                    )
                    log.info(
                        f"Finished training and evaluation of our finetuned model on sentiment analysis/regression task. Check the folder '{self.modelOutputDir}' for finetuned weights."
                    )
                    log.info(
                        f"It took {round((time.time() - startTime) / 3600, 1)} hours to train/finetune BERT model on sentiment analysis/regression task."
                    )
                except:
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    err = f"Error occurred while training finetuned model on sentiment analysis/regression task. Error is: {str(exc_type)}; {str(exc_value)}."
                    log.error(err)

                wandb.join()
コード例 #16
0
ファイル: wandb.py プロジェクト: srikalyan/ray
 def run(self):
     wandb.init(*self.args, **self.kwargs)
     while True:
         result = self.queue.get()
         if result == _WANDB_QUEUE_END:
             break
         log, config_update = self._handle_result(result)
         wandb.config.update(config_update, allow_val_change=True)
         wandb.log(log)
     wandb.join()
コード例 #17
0
def launch_training_on_all_splits(experiment: str, splits: List,
                                  base_model: str, dropout: float,
                                  learning_rate: float):
    project = f'suzuki_miyaura_training_{experiment}_{base_model}'
    model_args = {
        'wandb_project': project,
        'num_train_epochs': 15,
        'overwrite_output_dir': True,
        'learning_rate': learning_rate,
        'gradient_accumulation_steps': 1,
        'regression': True,
        "num_labels": 1,
        "fp16": False,
        "evaluate_during_training": False,
        'manual_seed': 42,
        "max_seq_length": 300,
        "train_batch_size": 16,
        "warmup_ratio": 0.00,
        "config": {
            'hidden_dropout_prob': dropout
        }
    }

    for (name, split) in splits:
        if wandb_available: wandb.init(name=name, project=project, reinit=True)
        df = pd.read_csv(f'../data/Suzuki-Miyaura/random_splits/{name}.tsv',
                         sep='\t')

        train_df = df.iloc[:split][['rxn', 'y']]
        test_df = df.iloc[split:][['rxn', 'y']]

        train_df.columns = ['text', 'labels']
        test_df.columns = ['text', 'labels']

        mean = train_df.labels.mean()
        std = train_df.labels.std()

        train_df['labels'] = (train_df['labels'] - mean) / std
        test_df['labels'] = (test_df['labels'] - mean) / std

        model_path = pkg_resources.resource_filename(
            "rxnfp", f"models/transformers/bert_{base_model}")
        pretrained_bert = SmilesClassificationModel(
            "bert",
            model_path,
            num_labels=1,
            args=model_args,
            use_cuda=torch.cuda.is_available())
        pretrained_bert.train_model(
            train_df,
            output_dir=
            f"outputs_suzuki_miyaura_{experiment}_{base_model}_{name}_split_{str(split).replace('-','_')}",
            eval_df=test_df,
            r2=sklearn.metrics.r2_score)
        if wandb_available: wandb.join()
コード例 #18
0
def offline_log_to_wandb(project_name,
                         args_dict,
                         early_stop_results_dict,
                         summary_df,
                         workdir=None,
                         wandb_log_subset_of_metrics=False):

    if project_name is None:
        project_name = args_dict['exp']['project_name'] + '_offline'
        if wandb_log_subset_of_metrics:
            project_name += '_subset'
    print(f'Writing to W&B project {project_name}')

    curve_metric_names = None
    if wandb_log_subset_of_metrics:
        curve_metric_names = get_wandb_curve_metrics()

    print(f'Start dump results to W&B project: {project_name}')
    wandb_myinit(project_name=project_name,
                 experiment_name=args_dict['exp']['experiment_name'],
                 instance_name=args_dict['exp']['instance_name'],
                 config=args_dict,
                 workdir=workdir)

    global_step_name = 'epoch'
    summary_df = summary_df.set_index(global_step_name)
    print(f'Dump run curves')
    first_iter = True
    for global_step, step_metrics in summary_df.iterrows():
        if first_iter:
            first_iter = False
            if curve_metric_names is not None:
                for metric in curve_metric_names:
                    if metric not in step_metrics:
                        warnings.warn(
                            f"Can't log '{metric}'. It doesn't exists.")

        if wandb_log_subset_of_metrics:
            metrics_to_log = slice_dict_to_dict(step_metrics.to_dict(),
                                                curve_metric_names,
                                                ignore_missing_keys=True)
        else:
            # log all metrics
            metrics_to_log = step_metrics.to_dict()

        metrics_to_log[global_step_name] = global_step
        wandb.log(metrics_to_log)

    early_stop_results_to_wandb_summary(early_stop_results_dict)
    dump_preds_at_early_stop(early_stop_results_dict, workdir, use_wandb=True)

    # terminate nicely offline w&b run
    wandb.join()
コード例 #19
0
ファイル: sweep.py プロジェクト: tcy1999/SI630
def training():
    wandb.init()

    model = NERModel("roberta",
                     "roberta-base",
                     use_cuda=True,
                     args=model_args,
                     sweep_config=wandb.config)
    # model = NERModel("distilbert", "distilbert-base-cased", use_cuda=True, args=model_args, sweep_config=wandb.config)
    model.train_model(train_df, eval_data=trial_df)

    wandb.join()
コード例 #20
0
ファイル: main_funcs.py プロジェクト: ehrhorn/CubeML
def evaluate_model(model_dir, wandb_ID=None, predict=True):
    """Predicts on the dataset and makes performance plots induced by the model_dir. If wanted, the results are logged to W&B.
    
    Arguments:
        model_dir {str} -- Full or partial path to a trained model
    
    Keyword Arguments:
        wandb_ID {str} -- The unique W&B-ID of the experiment. If None, no logging is performed. (default: {None})
    """
    
    # ======================================================================== 
    # SAVE OPERATION PLOTS
    # ======================================================================== 
    if wandb_ID is not None:
        hyper_pars, data_pars, arch_pars, meta_pars = load_model_pars(model_dir)
        WANDB_DIR = get_project_root()+'/models'
        PROJECT = meta_pars['project']
        wandb.init(resume=True, id=wandb_ID, dir=WANDB_DIR, project=PROJECT)
    print(model_dir)
    log_operation_plots(model_dir, wandb_ID=wandb_ID)

    # ======================================================================== 
    # PREDICT USING BEST MODEL
    # ========================================================================
     
    if predict:
        hyper_pars, data_pars, arch_pars, meta_pars = load_model_pars(model_dir)

        if data_pars['dataloader'] == 'PickleLoader' or data_pars['dataloader'] == 'SqliteLoader' :
            calc_predictions_pickle(model_dir, wandb_ID=wandb_ID)
        else:
            calc_predictions(model_dir, wandb_ID=wandb_ID)
    
    # ======================================================================== #
    # REPORT PERFORMANCE
    # ======================================================================== #

    log_performance_plots(model_dir, wandb_ID=wandb_ID) 
    summarize_model_performance(model_dir, wandb_ID=wandb_ID)

    if wandb_ID is not None:
        wandb.log()
        wandb.join()
    
    # Update the meta_pars-file
    with open(model_dir+'/meta_pars.json') as json_file:
        meta_pars = json.load(json_file)
    meta_pars['status'] = 'Finished'
    with open(model_dir+'/meta_pars.json', 'w') as fp:
        json.dump(meta_pars, fp)

    # Close all open figures
    plt.close('all')
コード例 #21
0
def setup_and_evaluate(run: Run, blueprints: List[Tuple[BlueprintGenome, int]],
                       in_size: List[int], feature_mul: int):
    if config.use_wandb:
        if config.resume_fully_train:
            resume_ft_run(True)
        else:
            new_ft_run(True)

    for blueprint, gen_num in blueprints:
        eval_with_retries(run, blueprint, gen_num, in_size, feature_mul)

    wandb.join()
コード例 #22
0
    def _start(self, finished_q, env, function, run_id, in_jupyter):
        if env:
            for k, v in env.items():
                os.environ[k] = v

        # call user function
        print("wandb: Agent Started Run:", run_id)
        if function:
            function()
        print("wandb: Agent Finished Run:", run_id, "\n")

        if run := wandb.run:
            wandb.join()
コード例 #23
0
def test_resume_allow_success(live_mock_server, test_settings):
    res = live_mock_server.set_ctx({"resume": True})
    print("CTX AFTER UPDATE", res)
    print("GET RIGHT AWAY", live_mock_server.get_ctx())
    wandb.init(reinit=True, resume="allow", settings=test_settings)
    wandb.log({"acc": 10})
    wandb.join()
    server_ctx = live_mock_server.get_ctx()
    print("CTX", server_ctx)
    first_stream_hist = first_filestream(server_ctx)["files"]["wandb-history.jsonl"]
    print(first_stream_hist)
    assert first_stream_hist["offset"] == 15
    assert json.loads(first_stream_hist["content"][0])["_step"] == 16
コード例 #24
0
def transfer_train(
    *,
    train_set: str,
    size: int = 256,
    epochs: int = 20,
    batch_size: int = 32,
    learning_rate: float = 0.001,
    lr_decay_rate: float = 0.99,
    lr_decay_steps: int = 5e2,
    make_submission: bool = False,
    **model_params,
):
    """
    Trains a new head on top of a transfer model. No fine tuning of the transfer
    model is conducted. Transfer model embeddings are computed once at the
    beginning of the training run. If `make_submission==True`, returns this model's
    scores on the test set, along with the corresponding image ids.
    """

    # Make and init the wandb run.
    wandb.init(project="Plant Pathology", reinit=True)
    wandb.config.update({
        "epochs": epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "lr_decay_rate": lr_decay_rate,
        "lr_decay_steps": lr_decay_steps,
        "size": size,
    })

    model = TransferModel(constants.NCLASSES, size, batch_size, **model_params)
    model.compile(
        optimizer=Adam(learning_rate=ExponentialDecay(
            learning_rate, lr_decay_steps, lr_decay_rate)),
        loss="categorical_crossentropy",
        metrics=["categorical_accuracy"],
    )

    # Train the model (just the new layers on top of the transfer model)
    model.fit_head(
        train_set,
        "dev",
        epochs=epochs,
        callbacks=[WandbCallback(save_model=False)],
    )

    # Log the scores
    wandb.join()

    if make_submission:
        return model.predict_on_test()
コード例 #25
0
ファイル: wandb_sweep.py プロジェクト: cltl/a-proof-zonmw
    def train():
        wandb.init()

        model = MultiLabelClassificationModel(
            model_type,
            model_name,
            num_labels=len(labels),
            args=model_args,
            use_cuda=cuda_available,
        )

        model.train_model(train_data, eval_df=eval_data)

        wandb.join()
コード例 #26
0
def train():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = ClassificationModel("roberta", "roberta-base", use_cuda=True, args=model_args, sweep_config=wandb.config,)

    # Train the model
    model.train_model(train_df, eval_df=eval_df)

    # Evaluate the model
    model.eval_model(eval_df)

    # Sync wandb
    wandb.join()
コード例 #27
0
    def main(self):
        wandb.init(config=self.cli_args, sync_tensorboard=True)

        for epoch_ndx in range(2):

            self.logMetrics(epoch_ndx, 'trn')
            # self.logMetrics(epoch_ndx, 'val')

        if self.trn_writer is not None:
            self.trn_writer.close()

        if self.val_writer is not None:
            self.val_writer.close()

        wandb.join()
コード例 #28
0
def train(sweep_q, worker_q):
    reset_wandb_env()
    worker_data = worker_q.get()
    run_name = "{}-{}".format(worker_data.sweep_run_name, worker_data.num)
    config = worker_data.config
    run = wandb.init(
        group=worker_data.sweep_id,
        job_type=worker_data.sweep_run_name,
        name=run_name,
        config=config,
    )
    val_accuracy = random.random()
    run.log(dict(val_accuracy=val_accuracy))
    wandb.join()
    sweep_q.put(WorkerDoneData(val_accuracy=val_accuracy))
コード例 #29
0
def main():
    num_folds = 5

    # Spin up workers before calling wandb.init()
    # Workers will be blocked on a queue waiting to start
    sweep_q = multiprocessing.Queue()
    workers = []
    for num in range(num_folds):
        q = multiprocessing.Queue()
        p = multiprocessing.Process(target=train,
                                    kwargs=dict(sweep_q=sweep_q, worker_q=q))
        p.start()
        workers.append(Worker(queue=q, process=p))

    sweep_run = wandb.init()
    sweep_id = sweep_run.sweep_id or "unknown"
    sweep_url = sweep_run.get_sweep_url()
    project_url = sweep_run.get_project_url()
    sweep_group_url = "{}/groups/{}".format(project_url, sweep_id)
    sweep_run.notes = sweep_group_url
    sweep_run.save()
    sweep_run_name = sweep_run.name or sweep_run.id or "unknown"

    metrics = []
    for num in range(num_folds):
        worker = workers[num]
        # start worker
        worker.queue.put(
            WorkerInitData(
                sweep_id=sweep_id,
                num=num,
                sweep_run_name=sweep_run_name,
                config=dict(sweep_run.config),
            ))
        # get metric from worker
        result = sweep_q.get()
        # wait for worker to finish
        worker.process.join()
        # log metric to sweep_run
        metrics.append(result.val_accuracy)

    sweep_run.log(dict(val_accuracy=sum(metrics) / len(metrics)))
    wandb.join()

    print("*" * 40)
    print("Sweep URL:       ", sweep_url)
    print("Sweep Group URL: ", sweep_group_url)
    print("*" * 40)
コード例 #30
0
def train():
    wandb.init()
    model_args.wandb_kwargs = {"id": wandb.run.id}

    model = ClassificationModel(
        "roberta",
        "roberta-base",
        num_labels=num_of_labels,
        use_cuda=True,
        args=model_args,
        sweep_config=wandb.config,
    )

    model.train_model(train_df, eval_df=eval_df, f1=f1_multiclass)

    wandb.join()