Esempio n. 1
0
 def initialize_cometml_experiment(self, hyper_params):
     """
     Initialize the comet_ml experiment (only if enabled in config file)
     :param hyper_params: current hyper parameters dictionary
     :return:
     """
     if (
         self.comet_ml_experiment is None
         and self.cfg.COMET_ML_UPLOAD is True
     ):
         # Create an experiment
         self.comet_ml_experiment = Experiment(
             api_key=os.environ["COMET_API_KEY"],
             project_name="general",
             workspace="proguoram",
         )
         if self.comet_ml_experiment.disabled is True:
             # There is problably no internet (in the cluster for example)
             # So we create a offline experiment
             self.comet_ml_experiment = OfflineExperiment(
                 workspace="proguoram",
                 project_name="general",
                 offline_directory=self.output_dir,
             )
         self.comet_ml_experiment.log_parameters(hyper_params)
Esempio n. 2
0
def _set_comet_experiment(configuration, config_key):
    experiment = OfflineExperiment(
        project_name='general',
        workspace='benjaminbenoit',
        offline_directory="../damic_comet_experiences")
    experiment.set_name(config_key)
    experiment.log_parameters(configuration)
    return experiment
Esempio n. 3
0
    def __init__(
        self,
        batch_size: int,
        snapshot_dir: Optional[str] = None,
        snapshot_mode: str = "last",
        snapshot_gap: int = 1,
        exp_set: Optional[str] = None,
        use_print_exp: bool = False,
        saved_exp: Optional[str] = None,
        **kwargs,
    ):
        """
        :param kwargs: passed to comet's Experiment at init.
        """
        if use_print_exp:
            self.experiment = PrintExperiment()
        else:
            from comet_ml import Experiment, ExistingExperiment, OfflineExperiment

            if saved_exp:
                self.experiment = ExistingExperiment(
                    previous_experiment=saved_exp, **kwargs
                )
            else:
                try:
                    self.experiment = Experiment(**kwargs)
                except ValueError:  # no API key
                    log_dir = Path.home() / "logs"
                    log_dir.mkdir(exist_ok=True)
                    self.experiment = OfflineExperiment(offline_directory=str(log_dir))

        self.experiment.log_parameter("complete", False)
        if exp_set:
            self.experiment.log_parameter("exp_set", exp_set)
        if snapshot_dir:
            snapshot_dir = Path(snapshot_dir) / self.experiment.get_key()
        # log_traj_window (int): How many trajectories to hold in deque for computing performance statistics.
        self.log_traj_window = 100
        self._cum_metrics = {
            "n_unsafe_actions": 0,
            "constraint_used": 0,
            "cum_completed_trajs": 0,
            "logging_time": 0,
        }
        self._new_completed_trajs = 0
        self._last_step = 0
        self._start_time = self._last_time = time()
        self._last_snapshot_upload = 0
        self._snaphot_upload_time = 30 * 60

        super().__init__(batch_size, snapshot_dir, snapshot_mode, snapshot_gap)
def test_regression_2():
    runner = tasks.RegressionRunner(
        model, optimizer, nn.MSELoss(),
        OfflineExperiment(offline_directory="./logs", display_summary_level=0))
    runner.train_config(epochs=1)

    try:
        runner.run(verbose=False)
        is_pass = True

    except ValueError:
        is_pass = False

    assert is_pass is False
def test_classification_3():
    runner = tasks.ClassificationRunner(
        model,
        optimizer,
        nn.CrossEntropyLoss(),
        OfflineExperiment(offline_directory="../tmp")
    )
    try:
        runner.fit(x.astype(np.float32), y.astype(np.int64), verbose=False, epochs=1, batch_size=32)
        is_pass = True

    except Exception as e:
        print(e)
        is_pass = False

    assert is_pass is True
def test_regression_3():
    runner = tasks.RegressionRunner(
        model, optimizer, nn.MSELoss(),
        OfflineExperiment(offline_directory="./logs", display_summary_level=0))
    try:
        runner.fit(x.astype(np.float32),
                   y.astype(np.float32),
                   batch_size=32,
                   epochs=1,
                   verbose=False)
        is_pass = True

    except Exception:
        is_pass = False

    assert is_pass is True
Esempio n. 7
0
    def init_comet_experiment(self):
        if self.FLAGS.upload:
            # Save experimental data in cloud
            experiment = Experiment(api_key="VNQSdbR1pw33EkuHbUsGUSZWr",
                                    project_name="general",
                                    workspace="florpi")

        else:
            # Save experimental data locally
            experiment = OfflineExperiment(
                api_key="VNQSdbR1pw33EkuHbUsGUSZWr",
                project_name="general",
                workspace="florpi",
                #offline_directory="/cosma/home/dp004/dc-beck3/4_GaHaCo/GaHaCo/comet/",
                offline_directory="/cosma/home/dp004/dc-cues1/GaHaCo/comet/",
            )
        self.experiment = experiment
def test_regression_1():
    runner = tasks.RegressionRunner(
        model, optimizer, nn.MSELoss(),
        OfflineExperiment(offline_directory="./logs", display_summary_level=0))
    runner.add_loader("train",
                      train_loader).add_loader("val", val_loader).add_loader(
                          "test", test_loader)
    runner.train_config(epochs=1)

    try:
        runner.run(verbose=True)
        is_pass = True
    except Exception as e:
        print(e)
        is_pass = False

    assert is_pass is True
Esempio n. 9
0
    def __init__(self, send_logs, tags, parameters, experiment=None):
        self.stations = 5
        self.send_logs = send_logs
        if self.send_logs:
            if experiment is None:
                json_loc = glob.glob("./**/comet_token.json")[0]
                with open(json_loc, "r") as f:
                    kwargs = json.load(f)

                self.experiment = OfflineExperiment(**kwargs)
            else:
                self.experiment = experiment
        self.sent_mb = 0
        self.speed_window = deque(maxlen=100)
        self.step_time = None
        self.current_speed = 0
        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)
def test_classification_2():
    runner = tasks.ClassificationRunner(
        model,
        optimizer,
        nn.CrossEntropyLoss(),
        OfflineExperiment(offline_directory="./logs", display_summary_level=0)
    )
    runner.train_config(epochs=1)

    try:
        runner.run(verbose=False)
        is_pass = True

    except ValueError:
        is_pass = False

    except Exception as e:
        print(e)
        is_pass = True

    assert is_pass is False
Esempio n. 11
0
    def _setup(self, *args):
        tf.logging.set_verbosity(tf.logging.INFO)
        tf.logging.info("calling setup")
        self.hparams = tf.contrib.training.HParams(**self.config)
        self.hparams.add_hparam('trial_name', self.trial_name)

        if self.hparams.disable_comet:
            tf.logging.info("Started logging offline for comet ml")
            self.comet_experiment = OfflineExperiment(
                project_name=self.hparams.name,
                workspace="new",
                offline_directory=os.path.join(self.hparams.local_dir,
                                               self.hparams.name))
        else:
            tf.logging.info("Started logging to comet ml online")
            self.comet_experiment = Experiment(
                api_key="enter_your_api_key_from_cometml",
                project_name=self.hparams.name,
                workspace="new")

        self.trainer = ModelTrainer(self.hparams,
                                    comet_exp=self.comet_experiment)
Esempio n. 12
0
def get_logger(logger, root, project=None, workspace=None, offline=True) -> Logger:
    from genomics_utils import LocalLogger, CometLogger
    
    if logger.lower() == "local":
        return LocalLogger(root)
    
    elif logger.lower() == "comet":
        assert project is not None, 'for comet logger, please, provide project name'
        assert workspace is not None, 'for comet logger, please, provide workspace'
        
        if offline:
            comet_path, = ensure_directories(root, "comet/")
            experiment = OfflineExperiment(project_name=project,
                                           workspace=workspace,
                                           offline_directory=comet_path
                                           )
        else:
            experiment = Experiment(project_name=project, workspace=workspace)
        return CometLogger(root=root, experiment=experiment)
    
    else:
        raise ValueError("Unknown experiment context")
def test_classification_4():
    runner = tasks.ClassificationRunner(
        model,
        optimizer,
        nn.CrossEntropyLoss(),
        OfflineExperiment(offline_directory="./logs", display_summary_level=0)
    )
    runner.add_loader("train", train_loader).add_loader("val", val_loader).add_loader("test", test_loader)

    try:
        runner.run(verbose=False)
        runner.train_config(epochs=1, checkpoint_path="../tmp/checkpoints", monitor="train_avg_acc >= 0.6")
        is_pass = True

    except TypeError:
        is_pass = False

    except Exception as e:
        print(e)
        is_pass = True

    assert is_pass is False
def BSN_Train_TEM(opt):
    global_step = 0
    epoch = 0
    if opt['do_representation']:
        model = TEM(opt)
        optimizer = optim.Adam(model.parameters(),
                               lr=opt["tem_training_lr"],
                               weight_decay=opt["tem_weight_decay"])
        global_step, epoch = _maybe_load_checkpoint(
            model, optimizer, global_step, epoch,
            os.path.join(opt["checkpoint_path"], opt['name']))
        if opt['representation_checkpoint']:
            # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0])
            if opt['do_random_model']:
                print('DOING RANDOM MDOEL!!!')
            else:
                print('DOING Pretrianed modelll!!!')
                partial_load(opt['representation_checkpoint'], model)
            # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0])
        if not opt['no_freeze']:
            for param in model.representation_model.parameters():
                param.requires_grad = False
        print(len([p for p in model.representation_model.parameters()]))
    else:
        model = TEM(opt)
        optimizer = optim.Adam(model.parameters(),
                               lr=opt["tem_training_lr"],
                               weight_decay=opt["tem_weight_decay"])
        global_step, epoch = _maybe_load_checkpoint(
            model, optimizer, global_step, epoch,
            os.path.join(opt["checkpoint_path"], opt['name']))

    model = torch.nn.DataParallel(model).cuda()
    # summary(model, (2, 3, 224, 224))

    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    if opt['dataset'] == 'gymnastics':
        # default image_dir is '/checkpoint/cinjon/spaceofmotion/sep052019/rawframes.426x240.12'
        img_loading_func = get_img_loader(opt)
        train_data_set = GymnasticsImages(opt,
                                          subset='Train',
                                          img_loading_func=img_loading_func,
                                          image_dir=opt['gym_image_dir'],
                                          video_info_path=os.path.join(
                                              opt['video_info'],
                                              'Train_Annotation.csv'))
        train_sampler = GymnasticsSampler(train_data_set, opt['sampler_mode'])
        test_data_set = GymnasticsImages(opt,
                                         subset="Val",
                                         img_loading_func=img_loading_func,
                                         image_dir=opt['gym_image_dir'],
                                         video_info_path=os.path.join(
                                             opt['video_info'],
                                             'Val_Annotation.csv'))
    elif opt['dataset'] == 'gymnasticsfeatures':
        # feature_dirs should roughly look like:
        # /checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/rgb,/checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/flow
        feature_dirs = opt['feature_dirs'].split(',')
        train_data_set = GymnasticsFeatures(opt,
                                            subset='Train',
                                            feature_dirs=feature_dirs,
                                            video_info_path=os.path.join(
                                                opt['video_info'],
                                                'Train_Annotation.csv'))
        test_data_set = GymnasticsFeatures(opt,
                                           subset='Val',
                                           feature_dirs=feature_dirs,
                                           video_info_path=os.path.join(
                                               opt['video_info'],
                                               'Val_Annotation.csv'))
        train_sampler = None
    elif opt['dataset'] == 'thumosfeatures':
        feature_dirs = opt['feature_dirs'].split(',')
        train_data_set = ThumosFeatures(opt,
                                        subset='Val',
                                        feature_dirs=feature_dirs)
        test_data_set = ThumosFeatures(opt,
                                       subset="Test",
                                       feature_dirs=feature_dirs)
        train_sampler = None
    elif opt['dataset'] == 'thumosimages':
        img_loading_func = get_img_loader(opt)
        train_data_set = ThumosImages(
            opt,
            subset='Val',
            img_loading_func=img_loading_func,
            image_dir=
            '/checkpoint/cinjon/thumos/rawframes.TH14_validation_tal.30',
            video_info_path=os.path.join(opt['video_info'],
                                         'Val_Annotation.csv'))
        test_data_set = ThumosImages(
            opt,
            subset='Test',
            img_loading_func=img_loading_func,
            image_dir='/checkpoint/cinjon/thumos/rawframes.TH14_test_tal.30',
            video_info_path=os.path.join(opt['video_info'],
                                         'Test_Annotation.csv'))
        train_sampler = None
    elif opt['dataset'] == 'activitynet':
        train_sampler = None
        representation_module = opt['representation_module']
        train_transforms = get_video_transforms(representation_module,
                                                opt['do_augment'])
        test_transforms = get_video_transforms(representation_module, False)
        train_data_set = VideoDataset(opt,
                                      train_transforms,
                                      subset='train',
                                      fraction=0.3)
        # We use val because we don't have annotations for test.
        test_data_set = VideoDataset(opt,
                                     test_transforms,
                                     subset='val',
                                     fraction=0.3)

    print('train_loader / val_loader sizes: ', len(train_data_set),
          len(test_data_set))
    train_loader = torch.utils.data.DataLoader(
        train_data_set,
        batch_size=model.module.batch_size,
        shuffle=False if train_sampler else True,
        sampler=train_sampler,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False)

    test_loader = torch.utils.data.DataLoader(
        test_data_set,
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False)
    # test_loader = None

    milestones = [int(k) for k in opt['tem_lr_milestones'].split(',')]
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=opt['tem_step_gamma'])

    if opt['log_to_comet']:
        comet_exp = CometExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                    project_name="bsn",
                                    workspace="cinjon",
                                    auto_metric_logging=True,
                                    auto_output_logging=None,
                                    auto_param_logging=False)
    elif opt['local_comet_dir']:
        comet_exp = OfflineExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                      project_name="bsn",
                                      workspace="cinjon",
                                      auto_metric_logging=True,
                                      auto_output_logging=None,
                                      auto_param_logging=False,
                                      offline_directory=opt['local_comet_dir'])
    else:
        comet_exp = None

    if comet_exp:
        comet_exp.log_parameters(opt)
        comet_exp.set_name(opt['name'])

    # test_TEM(test_loader, model, optimizer, 0, 0, comet_exp, opt)
    for epoch in range(epoch + 1, opt["tem_epoch"] + 1):
        global_step = train_TEM(train_loader, model, optimizer, epoch,
                                global_step, comet_exp, opt)
        test_TEM(test_loader, model, optimizer, epoch, global_step, comet_exp,
                 opt)
        if opt['dataset'] == 'activitynet':
            test_loader.dataset._subset_dataset(.3)
            train_loader.dataset._subset_dataset(.3)
        scheduler.step()
Esempio n. 15
0

if __name__ == '__main__':

    home = os.environ['HOME']
    tf.reset_default_graph()
    randint = np.random.randint(99999999)
    if args.randname: args.sugg += '-' + str(randint)
    print(args.sugg)
    logdir = join(home, 'ckpt/swissroll/' + args.sugg)
    os.makedirs(logdir, exist_ok=True)

    # comet experiment init
    if args.offline:
        experiment = OfflineExperiment(offline_directory=join(logdir, 'comet'),
                                       parse_args=False,
                                       project_name='swissroll-' + args.tag,
                                       workspace="wronnyhuang")
    else:
        experiment = Experiment(api_key="vPCPPZrcrUBitgoQkvzxdsh9k",
                                parse_args=False,
                                project_name='swissroll-' + args.tag,
                                workspace="wronnyhuang")

    open(join(logdir, 'comet_expt_key.txt'), 'w+').write(experiment.get_key())
    if any([a.find('nhidden1') != -1 for a in sys.argv[1:]]):
        args.nhidden = [
            args.nhidden1, args.nhidden2, args.nhidden3, args.nhidden4,
            args.nhidden5, args.nhidden6
        ]
    experiment.log_parameters(vars(args))
    experiment.set_name(args.sugg)
Esempio n. 16
0
                    type=str,
                    default=".",
                    help="outputs path")
parser.add_argument("--resume", action="store_true")
parser.add_argument("--trainer",
                    type=str,
                    default="MUNIT",
                    help="MUNIT|UNIT|DoubleMUNIT")
parser.add_argument("--seed",
                    type=int,
                    default=None,
                    help="Torch and numpy seeds")
opts, unknownargs = parser.parse_known_args()

comet_exp = OfflineExperiment(project_name="munit",
                              workspace="vict0rsch",
                              offline_directory=opts.output_path)

cudnn.benchmark = False
if opts.seed is not None:
    np.random.seed(opts.seed)
    torch.manual_seed(opts.seed)
    torch.cuda.manual_seed(opts.seed)
    torch.backends.cudnn.deterministic = True

# Load experiment setting
config = get_config(opts.config)
if unknownargs:
    for u in unknownargs:
        try:
            k, v = u.split("=")
Esempio n. 17
0
    valid_split = configuration['valid_split']
    train_labeled_split = configuration['train_labeled_split']
    latent_dim = configuration['latent_dim']
    flattened = False  # Default
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Set all seeds for full reproducibility
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    # Set up Comet Experiment tracking
    experiment = OfflineExperiment("z15Um8oxWZwiXQXZxZKGh48cl",
                                   workspace='swechhachoudhary',
                                   offline_directory="../swechhas_experiments")

    experiment.set_name(
        name=args.config +
        "_dim={}_overlapped={}".format(latent_dim, train_split))
    experiment.log_parameters(configuration)

    if encoding_model == 'pca':
        encoding_model = PCAEncoder(seed)
        flattened = True
    elif encoding_model == 'vae':
        encoding_model = VAE(latent_dim=latent_dim).to(device)
        flattened = True
    elif encoding_model == "ae":
        encoding_model = AE(latent_dim=latent_dim).to(device)
def BSN_Train_PEM(opt):
    model = PEM(opt)
    model = torch.nn.DataParallel(model).cuda()
    optimizer = optim.Adam(model.parameters(),
                           lr=opt["pem_training_lr"],
                           weight_decay=opt["pem_weight_decay"])

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    def collate_fn(batch):
        batch_data = torch.cat([x[0] for x in batch])
        batch_iou = torch.cat([x[1] for x in batch])
        return batch_data, batch_iou

    train_dataset = ProposalDataSet(opt, subset="train")
    train_sampler = ProposalSampler(train_dataset.proposals,
                                    train_dataset.indices,
                                    max_zero_weight=opt['pem_max_zero_weight'])

    global_step = 0
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=model.module.batch_size,
        shuffle=False,
        sampler=train_sampler,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False,
        collate_fn=collate_fn if not opt['pem_do_index'] else None)

    subset = "validation" if opt['dataset'] == 'activitynet' else "test"
    test_loader = torch.utils.data.DataLoader(
        ProposalDataSet(opt, subset=subset),
        batch_size=model.module.batch_size,
        shuffle=True,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False,
        collate_fn=collate_fn if not opt['pem_do_index'] else None)

    milestones = [int(k) for k in opt['pem_lr_milestones'].split(',')]
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=opt['pem_step_gamma'])

    if opt['log_to_comet']:
        comet_exp = CometExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                    project_name="bsnpem",
                                    workspace="cinjon",
                                    auto_metric_logging=True,
                                    auto_output_logging=None,
                                    auto_param_logging=False)
    elif opt['local_comet_dir']:
        comet_exp = OfflineExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                      project_name="bsnpem",
                                      workspace="cinjon",
                                      auto_metric_logging=True,
                                      auto_output_logging=None,
                                      auto_param_logging=False,
                                      offline_directory=opt['local_comet_dir'])
    else:
        comet_exp = None

    if comet_exp:
        comet_exp.log_parameters(opt)
        comet_exp.set_name(opt['name'])

    test_PEM(test_loader, model, -1, -1, comet_exp, opt)
    for epoch in range(opt["pem_epoch"]):
        global_step = train_PEM(train_loader, model, optimizer, epoch,
                                global_step, comet_exp, opt)
        test_PEM(test_loader, model, epoch, global_step, comet_exp, opt)
        scheduler.step()
Esempio n. 19
0
def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq,
          customize, comet_offline):

    # tf.debugging.enable_check_numerics()
    """Train a model defined by config"""
    config_file_path = config
    config, config_file_stem = parse_config(config,
                                            nepochs=nepochs,
                                            weights=weights)

    if plot_freq:
        config["callbacks"]["plot_freq"] = plot_freq

    if customize:
        config = customization_functions[customize](config)

    # Decide tf.distribute.strategy depending on number of available GPUs
    horovod_enabled = config["setup"]["horovod_enabled"]
    if horovod_enabled:
        num_gpus = initialize_horovod()
    else:
        strategy, num_gpus = get_strategy()

    outdir = ""
    if not horovod_enabled or hvd.rank() == 0:
        outdir = create_experiment_dir(prefix=prefix + config_file_stem + "_",
                                       suffix=platform.node())
        shutil.copy(
            config_file_path, outdir + "/config.yaml"
        )  # Copy the config file to the train dir for later reference

    try:
        if comet_offline:
            print("Using comet-ml OfflineExperiment, saving logs locally.")
            from comet_ml import OfflineExperiment

            experiment = OfflineExperiment(
                project_name="particleflow-tf",
                auto_metric_logging=True,
                auto_param_logging=True,
                auto_histogram_weight_logging=True,
                auto_histogram_gradient_logging=False,
                auto_histogram_activation_logging=False,
                offline_directory=outdir + "/cometml",
            )
        else:
            print("Using comet-ml Experiment, streaming logs to www.comet.ml.")
            from comet_ml import Experiment

            experiment = Experiment(
                project_name="particleflow-tf",
                auto_metric_logging=True,
                auto_param_logging=True,
                auto_histogram_weight_logging=True,
                auto_histogram_gradient_logging=False,
                auto_histogram_activation_logging=False,
            )
    except Exception as e:
        print("Failed to initialize comet-ml dashboard: {}".format(e))
        experiment = None
    if experiment:
        experiment.set_name(outdir)
        experiment.log_code("mlpf/tfmodel/model.py")
        experiment.log_code("mlpf/tfmodel/utils.py")
        experiment.log_code(config_file_path)

    ds_train, num_train_steps = get_datasets(config["train_test_datasets"],
                                             config, num_gpus, "train")
    ds_test, num_test_steps = get_datasets(config["train_test_datasets"],
                                           config, num_gpus, "test")
    ds_val, ds_info = get_heptfds_dataset(
        config["validation_datasets"][0],
        config,
        num_gpus,
        "test",
        config["setup"]["num_events_validation"],
        supervised=False,
    )
    ds_val = ds_val.batch(5)

    if ntrain:
        ds_train = ds_train.take(ntrain)
        num_train_steps = ntrain
    if ntest:
        ds_test = ds_test.take(ntest)
        num_test_steps = ntest

    print("num_train_steps", num_train_steps)
    print("num_test_steps", num_test_steps)
    total_steps = num_train_steps * config["setup"]["num_epochs"]
    print("total_steps", total_steps)

    if horovod_enabled:
        model, optim_callbacks, initial_epoch = model_scope(
            config, total_steps, weights, horovod_enabled)
    else:
        with strategy.scope():
            model, optim_callbacks, initial_epoch = model_scope(
                config, total_steps, weights)

    callbacks = prepare_callbacks(
        config,
        outdir,
        ds_val,
        comet_experiment=experiment,
        horovod_enabled=config["setup"]["horovod_enabled"])

    verbose = 1
    if horovod_enabled:
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))
        callbacks.append(hvd.callbacks.MetricAverageCallback())
        verbose = 1 if hvd.rank() == 0 else 0

        num_train_steps /= hvd.size()
        num_test_steps /= hvd.size()

    callbacks.append(optim_callbacks)

    model.fit(
        ds_train.repeat(),
        validation_data=ds_test.repeat(),
        epochs=initial_epoch + config["setup"]["num_epochs"],
        callbacks=callbacks,
        steps_per_epoch=num_train_steps,
        validation_steps=num_test_steps,
        initial_epoch=initial_epoch,
        verbose=verbose,
    )
Esempio n. 20
0
 def __init__(self, exp_name, online=True, **kwargs):
     super(CometExperimentLogger, self).__init__(exp_name, **kwargs)
     if online:
         self.comet = Experiment(project_name=exp_name, **kwargs)
     else:
         self.comet = OfflineExperiment(project_name=exp_name, **kwargs)
Esempio n. 21
0
    # Parse configuration file
    batch_size = configuration['batch_size']
    seed = configuration['seed']
    n_epochs = configuration['n_epochs']

    # Set all seeds for full reproducibility
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    latent_dim = configuration['Zdim']
    if not os.path.exists('experiments'):
        print('mkdir ', 'experiments')
        os.mkdir('experiments')

    if configuration['encode']:
        experiment = OfflineExperiment(project_name="ali", workspace='timothynest',
                                       offline_directory=str('../experiments/' + configuration['experiment']))
    elif configuration['cluster']:
        experiment = OfflineExperiment(project_name="ali", workspace='timothynest', offline_directory=str(
            '../experiments/' + configuration['experiment'] + '/cluster'))
    experiment.set_name(name=configuration['experiment'])

    experiment.log_parameters(configuration)
    experiment.add_tag(configuration['experiment'])

    # Initiate experiment
    main(datapath, configuration, experiment)
Esempio n. 22
0
    # Parse configuration file
    seed = configuration['seed']

    # Set all seeds for full reproducibility
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    if not os.path.exists('experiments'):
        print('mkdir ', 'experiments')
        os.mkdir('experiments')

    experiment = OfflineExperiment(
        project_name="ali",
        workspace='timothynest',  # Replace this with appropriate comet workspace
        offline_directory=str('../experiments/' + configuration['experiment']))

    experiment.set_name(name=configuration['experiment'])
    experiment.log_parameters(configuration)

    experiment.add_tag(configuration['experiment'])

    MODEL_PATH = '../experiments/' + configuration['experiment'] + '/models'

    if not os.path.exists(MODEL_PATH):
        print('mkdir ', MODEL_PATH)
        os.mkdir(MODEL_PATH)

    configuration['MODEL_PATH'] = MODEL_PATH
Esempio n. 23
0

if __name__ == '__main__':
    SEED = 1234

    random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    config = get_config(sys.argv[1])
    # experiment = Experiment("wXwnV8LZOtVfxqnRxr65Lv7C2")
    comet_dir_path = os.path.join(config["result_directory"], config["model"])
    makedirs(comet_dir_path)
    experiment = OfflineExperiment(
        project_name="DeepGenomics",
        offline_directory=comet_dir_path)
    experiment.log_parameters(config)
    if torch.cuda.is_available():
        # torch.cuda.set_device(str(os.environ["CUDA_VISIBLE_DEVICES"]))
        device = torch.device('cuda:{}'.format(os.environ["CUDA_VISIBLE_DEVICES"]))
    else:
        device = torch.device('cpu')
    print(device)
    number_of_examples = len(get_filenames(os.path.join(config["data"], "x")))
    list_ids = [str(i) for i in range(number_of_examples)]
    random.shuffle(list_ids)
    t_ind, v_ind = round(number_of_examples * 0.7), round(number_of_examples * 0.9)
    train_indices, validation_indices, test_indices = list_ids[:t_ind], list_ids[t_ind:v_ind], list_ids[v_ind:]
    
    params = {'batch_size': config["training"]["batch_size"],
# Random seed
if args.manualSeed is None:
    args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
torch.manual_seed(args.manualSeed)
if use_cuda:
    torch.cuda.manual_seed_all(args.manualSeed)

best_loss = 0  # best test accuracy

if not args.no_log_to_comet:
    if params['local_comet_dir']:
        comet_exp = OfflineExperiment(
            api_key="hIXq6lDzWzz24zgKv7RYz6blo",
            project_name="supercyclecons",
            workspace="cinjon",
            auto_metric_logging=True,
            auto_output_logging=None,
            auto_param_logging=False,
            offline_directory=params['local_comet_dir'])
    else:
        comet_exp = CometExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                    project_name="supercyclecons",
                                    workspace="cinjon",
                                    auto_metric_logging=True,
                                    auto_output_logging=None,
                                    auto_param_logging=False)
    comet_exp.log_parameters(vars(args))
    comet_exp.set_name(params['name'])


def partial_load(pretrained_dict, model):
Esempio n. 25
0
hyper_params = {
    "sequence_length": 28,
    "input_size": 28,
    "hidden_size": 128,
    "num_layers": 2,
    "num_classes": 10,
    "batch_size": 100,
    "num_epochs": 3,
    "learning_rate": 0.01
}

optimizer = Optimizer("pA3Hqc1pEswNvXOPtSoRobt7C")

experiment = OfflineExperiment(project_name="horoma",
                               offline_directory="./experiments",
                               disabled=False)
experiment.log_parameters(hyper_params)

# MNIST Dataset
train_dataset = dsets.MNIST(root='./data/',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data/',
                           train=False,
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(
import sys
sys.path.insert(0, os.path.abspath('../machine-tasks'))
from tasks import get_task
from loss import L1Loss

comet_args = {
    'project_name': 'attentive-guidance',
    'workspace': 'andresespinosapc',
}
if os.environ.get('COMET_DISABLE'):
    comet_args['disabled'] = True
    comet_args['api_key'] = ''
if os.environ.get('COMET_OFFLINE'):
    comet_args['api_key'] = ''
    comet_args['offline_directory'] = 'comet_offline'
    experiment = OfflineExperiment(**comet_args)
else:
    experiment = Experiment(**comet_args)


def log_comet_parameters(opt):
    opt_dict = vars(opt)
    for key in opt_dict.keys():
        experiment.log_parameter(key, opt_dict[key])


TASK_DEFAULT_PARAMS = {
    'task_defaults': {
        'batch_size': 128,
        'k': 3,
        'max_len': 60,
Esempio n. 27
0
def main(args):
    torch.manual_seed(0)

    # Get device
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Get dataset
    dataset = Dataset("train.txt")
    loader = DataLoader(dataset,
                        batch_size=hp.batch_size**2,
                        shuffle=True,
                        collate_fn=dataset.collate_fn,
                        drop_last=True,
                        num_workers=hp.num_workers)

    speaker_encoder = None
    if hp.speaker_encoder_path != "":
        speaker_encoder = load_speaker_encoder(Path(hp.speaker_encoder_path),
                                               device).to(device)
        for param in speaker_encoder.parameters():
            param.requires_grad = False
        else:
            speaker_encoder.train()

    # Define model
    fastspeech_model = FastSpeech2(speaker_encoder).to(device)
    model = nn.DataParallel(fastspeech_model).to(device)
    print("Model Has Been Defined")
    num_param = utils.get_param_num(model)
    print('Number of FastSpeech2 Parameters:', num_param)

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-4,
                                 betas=hp.betas,
                                 eps=hp.eps,
                                 weight_decay=hp.weight_decay)
    scheduled_optim = ScheduledOptim(optimizer, hp.decoder_hidden,
                                     hp.n_warm_up_step, args.restore_step)
    Loss = FastSpeech2Loss().to(device)
    print("Optimizer and Loss Function Defined.")

    # Load checkpoint if exists
    checkpoint_path = os.path.join(hp.checkpoint_path)
    try:
        checkpoint = torch.load(
            os.path.join(checkpoint_path,
                         'checkpoint_{}.pth.tar'.format(args.restore_step)))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step {}---\n".format(args.restore_step))
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

    # Load vocoder
    if hp.vocoder == 'melgan':
        vocoder = utils.get_melgan()
        vocoder_infer = utils.melgan_infer
    elif hp.vocoder == 'waveglow':
        vocoder = utils.get_waveglow()
        vocoder_infer = utils.waveglow_infer
    else:
        raise ValueError("Vocoder '%s' is not supported", hp.vocoder)

    comet_experiment = None
    use_comet = int(os.getenv("USE_COMET", default=0))
    if use_comet != 0:
        if use_comet == 1:
            offline_dir = os.path.join(hp.models_path, "comet")
            os.makedirs(offline_dir, exist_ok=True)
            comet_experiment = OfflineExperiment(
                project_name="mlp-project",
                workspace="ino-voice",
                offline_directory=offline_dir,
            )
        elif use_comet == 2:
            comet_experiment = Experiment(
                api_key="BtyTwUoagGMh3uN4VZt6gMOn8",
                project_name="mlp-project",
                workspace="ino-voice",
            )

        comet_experiment.set_name(args.experiment_name)
        comet_experiment.log_parameters(hp)
        comet_experiment.log_html(args.m)

    start_time = time.perf_counter()
    first_mel_train_loss, first_postnet_train_loss, first_d_train_loss, first_f_train_loss, first_e_train_loss = \
        None, None, None, None, None

    for epoch in range(hp.epochs):
        total_step = hp.epochs * len(loader) * hp.batch_size
        for i, batchs in enumerate(loader):
            for j, data_of_batch in enumerate(batchs):
                model = model.train()

                current_step = i * hp.batch_size + j + args.restore_step + epoch * len(
                    loader) * hp.batch_size + 1

                # Get Data
                text = torch.from_numpy(
                    data_of_batch["text"]).long().to(device)
                mel_target = torch.from_numpy(
                    data_of_batch["mel_target"]).float().to(device)
                D = torch.from_numpy(data_of_batch["D"]).long().to(device)
                log_D = torch.from_numpy(
                    data_of_batch["log_D"]).float().to(device)
                f0 = torch.from_numpy(data_of_batch["f0"]).float().to(device)
                energy = torch.from_numpy(
                    data_of_batch["energy"]).float().to(device)
                src_len = torch.from_numpy(
                    data_of_batch["src_len"]).long().to(device)
                mel_len = torch.from_numpy(
                    data_of_batch["mel_len"]).long().to(device)
                max_src_len = np.max(data_of_batch["src_len"]).astype(np.int32)
                max_mel_len = np.max(data_of_batch["mel_len"]).astype(np.int32)

                # text = torch.from_numpy(data_of_batch["text"]).long()
                # mel_target = torch.from_numpy(data_of_batch["mel_target"]).float()
                # D = torch.from_numpy(data_of_batch["D"]).long()
                # log_D = torch.from_numpy(data_of_batch["log_D"]).float()
                # f0 = torch.from_numpy(data_of_batch["f0"]).float()
                # energy = torch.from_numpy(data_of_batch["energy"]).float()
                # src_len = torch.from_numpy(data_of_batch["src_len"]).long()
                # mel_len = torch.from_numpy(data_of_batch["mel_len"]).long()
                # max_src_len = np.max(data_of_batch["src_len"]).astype(np.int32)
                # max_mel_len = np.max(data_of_batch["mel_len"]).astype(np.int32)

                # Forward
                mel_output, mel_postnet_output, log_duration_output, f0_output, energy_output, src_mask, mel_mask, _ = \
                    model(text, src_len, mel_target, mel_len, D, f0, energy, max_src_len, max_mel_len)

                # Cal Loss
                mel_loss, mel_postnet_loss, d_loss, f_loss, e_loss = Loss(
                    log_duration_output, log_D, f0_output, f0, energy_output,
                    energy, mel_output, mel_postnet_output, mel_target,
                    ~src_mask, ~mel_mask)
                total_loss = mel_loss + mel_postnet_loss + d_loss + f_loss + e_loss

                # Set initial values for scaling
                if first_mel_train_loss is None:
                    first_mel_train_loss = mel_loss
                    first_postnet_train_loss = mel_postnet_loss
                    first_d_train_loss = d_loss
                    first_f_train_loss = f_loss
                    first_e_train_loss = e_loss

                mel_l = mel_loss.item() / first_mel_train_loss
                mel_postnet_l = mel_postnet_loss.item(
                ) / first_postnet_train_loss
                d_l = d_loss.item() / first_d_train_loss
                f_l = f_loss.item() / first_f_train_loss
                e_l = e_loss.item() / first_e_train_loss

                # Logger
                if comet_experiment is not None:
                    comet_experiment.log_metric(
                        "total_loss", mel_l + mel_postnet_l + d_l + f_l + e_l,
                        current_step)
                    comet_experiment.log_metric("mel_loss", mel_l,
                                                current_step)
                    comet_experiment.log_metric("mel_postnet_loss",
                                                mel_postnet_l, current_step)
                    comet_experiment.log_metric("duration_loss", d_l,
                                                current_step)
                    comet_experiment.log_metric("f0_loss", f_l, current_step)
                    comet_experiment.log_metric("energy_loss", e_l,
                                                current_step)

                # Backward
                total_loss = total_loss / hp.acc_steps
                total_loss.backward()
                if current_step % hp.acc_steps != 0:
                    continue

                # Clipping gradients to avoid gradient explosion
                nn.utils.clip_grad_norm_(model.parameters(),
                                         hp.grad_clip_thresh)

                # Update weights
                scheduled_optim.step_and_update_lr()
                scheduled_optim.zero_grad()

                # Print
                if current_step % hp.log_step == 0:
                    now = time.perf_counter()

                    print("\nEpoch [{}/{}], Step [{}/{}]:".format(
                        epoch + 1, hp.epochs, current_step, total_step))
                    print(
                        "Total Loss: {:.4f}, Mel Loss: {:.5f}, Mel PostNet Loss: {:.5f}, Duration Loss: {:.5f}, "
                        "F0 Loss: {:.5f}, Energy Loss: {:.5f};".format(
                            mel_l + mel_postnet_l + d_l + f_l + e_l, mel_l,
                            mel_postnet_l, d_l, f_l, e_l))
                    print("Time Used: {:.3f}s".format(now - start_time))
                    start_time = now

                if current_step % hp.checkpoint == 0:
                    file_path = os.path.join(
                        checkpoint_path,
                        'checkpoint_{}.pth.tar'.format(current_step))
                    torch.save(
                        {
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict()
                        }, file_path)
                    print("saving model at to {}".format(file_path))

                if current_step % hp.synth_step == 0:
                    length = mel_len[0].item()
                    mel_target_torch = mel_target[
                        0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel_target = mel_target[
                        0, :length].detach().cpu().transpose(0, 1)
                    mel_torch = mel_output[0, :length].detach().unsqueeze(
                        0).transpose(1, 2)
                    mel = mel_output[0, :length].detach().cpu().transpose(0, 1)
                    mel_postnet_torch = mel_postnet_output[
                        0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel_postnet = mel_postnet_output[
                        0, :length].detach().cpu().transpose(0, 1)

                    if comet_experiment is not None:
                        comet_experiment.log_audio(
                            audiotools.inv_mel_spec(mel), hp.sampling_rate,
                            "step_{}_griffin_lim.wav".format(current_step))
                        comet_experiment.log_audio(
                            audiotools.inv_mel_spec(mel_postnet),
                            hp.sampling_rate,
                            "step_{}_postnet_griffin_lim.wav".format(
                                current_step))
                        comet_experiment.log_audio(
                            vocoder_infer(mel_torch,
                                          vocoder), hp.sampling_rate,
                            'step_{}_{}.wav'.format(current_step, hp.vocoder))
                        comet_experiment.log_audio(
                            vocoder_infer(mel_postnet_torch, vocoder),
                            hp.sampling_rate, 'step_{}_postnet_{}.wav'.format(
                                current_step, hp.vocoder))
                        comet_experiment.log_audio(
                            vocoder_infer(mel_target_torch,
                                          vocoder), hp.sampling_rate,
                            'step_{}_ground-truth_{}.wav'.format(
                                current_step, hp.vocoder))

                        f0 = f0[0, :length].detach().cpu().numpy()
                        energy = energy[0, :length].detach().cpu().numpy()
                        f0_output = f0_output[
                            0, :length].detach().cpu().numpy()
                        energy_output = energy_output[
                            0, :length].detach().cpu().numpy()

                        utils.plot_data(
                            [(mel_postnet.numpy(), f0_output, energy_output),
                             (mel_target.numpy(), f0, energy)],
                            comet_experiment, [
                                'Synthesized Spectrogram',
                                'Ground-Truth Spectrogram'
                            ])

                if current_step % hp.eval_step == 0:
                    model.eval()
                    with torch.no_grad():
                        if comet_experiment is not None:
                            with comet_experiment.validate():
                                d_l, f_l, e_l, m_l, m_p_l = evaluate(
                                    model, current_step, comet_experiment)
                                t_l = d_l + f_l + e_l + m_l + m_p_l

                                comet_experiment.log_metric(
                                    "total_loss", t_l, current_step)
                                comet_experiment.log_metric(
                                    "mel_loss", m_l, current_step)
                                comet_experiment.log_metric(
                                    "mel_postnet_loss", m_p_l, current_step)
                                comet_experiment.log_metric(
                                    "duration_loss", d_l, current_step)
                                comet_experiment.log_metric(
                                    "F0_loss", f_l, current_step)
                                comet_experiment.log_metric(
                                    "energy_loss", e_l, current_step)
from a2c_ppo_acktr.model import Policy
from a2c_ppo_acktr.storage import RolloutStorage
from a2c_ppo_acktr.utils import get_vec_normalize, update_linear_schedule
from a2c_ppo_acktr.visualize import visdom_plot


args = get_args()

assert args.algo in ['a2c', 'ppo', 'acktr']
if args.recurrent_policy:
    assert args.algo in ['a2c', 'ppo'], \
        'Recurrent policy is not implemented for ACKTR'

if args.comet == "offline":
    experiment = OfflineExperiment(project_name="recurrent-value", workspace="nishanthvanand",
    disabled=args.disable_log, offline_directory="../comet_offline",
    parse_args=False)
elif args.comet == "online":
    experiment = Experiment(api_key="tSACzCGFcetSBTapGBKETFARf",
                        project_name="recurrent-value", workspace="nishanthvanand",
                        disabled=args.disable_log,
                        parse_args=False)
else:
    raise ValueError

experiment.log_parameters(vars(args))

num_updates = int(args.num_env_steps) // args.num_steps // args.num_processes

torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
Esempio n. 29
0
        verbose = 10,
        n_jobs = 2,
        n_points = 2,
        scoring = 'accuracy',
    )

    checkpoint_callback = skopt.callbacks.CheckpointSaver(f'D:\\FINKI\\8_dps\\Project\\MODELS\\skopt_checkpoints\\{EXPERIMENT_ID}.pkl')
    hyperparameters_optimizer.fit(X_train, y_train, callback = [checkpoint_callback])
    skopt.dump(hyperparameters_optimizer, f'saved_models\\{EXPERIMENT_ID}.pkl')

    y_pred = hyperparameters_optimizer.best_estimator_.predict(X_test)

    for i in range(len(hyperparameters_optimizer.cv_results_['params'])):
        exp = OfflineExperiment(
            api_key = 'A8Lg71j9LtIrsv0deBA0DVGcR',
            project_name = ALGORITHM,
            workspace = "8_dps",
            auto_output_logging = 'native',
            offline_directory = f'D:\\FINKI\\8_dps\\Project\\MODELS\\comet_ml_offline_experiments\\{EXPERIMENT_ID}'
        )
        exp.set_name(f'{EXPERIMENT_ID}_{i + 1}')
        exp.add_tags([DS, SEGMENTS_LENGTH, ])
        for k, v in hyperparameters_optimizer.cv_results_.items():
            if k == "params": exp.log_parameters(dict(v[i]))
            else: exp.log_metric(k, v[i])
        exp.end()

        
        
        
            CLIENT_OPT_ARGS=common.get_args(client_opt),
            # CLIENT_OPT_L2=1e-4,
            CLIENT_OPT_STRATEGY=client_opt_strategy,
            SERVER_OPT=common.get_name(server_opt),
            SERVER_OPT_ARGS=common.get_args(server_opt),
            SERVER_LEARNING_RATE=server_lr,
            IS_IID_DATA=is_iid,
            BATCH_SIZE=B,
            CLIENT_FRACTION=C,
            N_CLIENTS=NC,
            N_EPOCH_PER_CLIENT=E,
            MAX_ROUNDS=max_rounds,
            MODEL=model,
        )
        config_technical = TorchFederatedLearnerTechnicalConfig(
            BREAK_ROUND=300,
            EVAL_ROUND=1,
            TEST_LAST=1,
            STORE_OPT_ON_DISK=False,
            STORE_MODEL_IN_RAM=False,
        )
        name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}"
        experiment = OfflineExperiment(offline_directory="/tmp",
                                       workspace="federated-learning-emnistlm",
                                       project_name=project_name)
        try:
            common.do_training_emnist(experiment, name, config,
                                      config_technical)
        except ToLargeLearningRateExcpetion:
            pass