Exemple #1
0
import wandb
wandb.login('1e505430989c86455d2d70e1ef990b4bc50cb69c')
wandb.init(project="ift6760-exp",anonymous='allow')
wandb.save("*.pth")
from load_data import Data
import numpy as np
import torch
import time
from collections import defaultdict
from model import *
from torch.optim.lr_scheduler import ExponentialLR
import argparse
import os


class Experiment:

    def __init__(self, learning_rate=0.0005, ent_vec_dim=200, rel_vec_dim=200, 
                 num_iterations=500, batch_size=128, decay_rate=0., cuda=False, 
                 input_dropout=0.3, hidden_dropout1=0.4, hidden_dropout2=0.5,
                 label_smoothing=0., bk=False):
        self.learning_rate = learning_rate
        self.ent_vec_dim = ent_vec_dim
        self.rel_vec_dim = rel_vec_dim
        self.num_iterations = num_iterations
        self.batch_size = batch_size
        self.decay_rate = decay_rate
        self.label_smoothing = label_smoothing
        self.cuda = cuda
        self.kwargs = {"input_dropout": input_dropout, "hidden_dropout1": hidden_dropout1,
                       "hidden_dropout2": hidden_dropout2, "bk": bk}
Exemple #2
0
def test_login_anonymous(mock_server, local_netrc):
    os.environ["WANDB_API_KEY"] = "B" * 40
    wandb.login(anonymous="must")
    assert wandb.api.api_key == "ANONYMOOSE" * 4
from utils.plots import plot_images, plot_results
from utils.torch_utils import de_parallel

LOGGERS = ('csv', 'tb', 'wandb')  # text-file, TensorBoard, Weights & Biases
RANK = int(os.getenv('RANK', -1))

try:
    import wandb

    assert hasattr(wandb, '__version__')  # verify package import not local dir
    if pkg.parse_version(
            wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in [
                0, -1
            ]:
        try:
            wandb_login_success = wandb.login(timeout=30)
        except wandb.errors.UsageError:  # known non-TTY terminal issue
            wandb_login_success = False
        if not wandb_login_success:
            wandb = None
except (ImportError, AssertionError):
    wandb = None


class Loggers():
    # YOLOv5 Loggers class
    def __init__(self,
                 save_dir=None,
                 weights=None,
                 opt=None,
                 hyp=None,
Exemple #4
0
    def setup(self, kwargs):
        """Complete setup for wandb.init().

        This includes parsing all arguments, applying them with settings and enabling
        logging.

        """
        self.kwargs = kwargs

        self._wl = wandb_setup._setup()
        # Make sure we have a logger setup (might be an early logger)
        _set_logger(self._wl._get_logger())

        # Start with settings from wandb library singleton
        settings = self._wl._clone_settings()
        settings_param = kwargs.pop("settings", None)
        if settings_param:
            settings._apply_settings(settings_param)

        self._reporter = reporting.setup_reporter(
            settings=settings.duplicate().freeze())

        sm_config = sagemaker.parse_sm_config()
        if sm_config:
            sm_api_key = sm_config.get("wandb_api_key", None)
            sm_run, sm_env = sagemaker.parse_sm_resources()
            if sm_env:
                if sm_api_key:
                    sm_env["WANDB_API_KEY"] = sm_api_key
                settings._apply_environ(sm_env)
                wandb.setup(settings=settings)
            for k, v in six.iteritems(sm_run):
                kwargs.setdefault(k, v)

        # Remove parameters that are not part of settings
        init_config = kwargs.pop("config", None) or dict()
        config_include_keys = kwargs.pop("config_include_keys", None)
        config_exclude_keys = kwargs.pop("config_exclude_keys", None)

        # Add deprecation message once we can better track it and document alternatives
        # if config_include_keys or config_exclude_keys:
        #     wandb.termwarn(
        #       "config_include_keys and config_exclude_keys are deprecated:"
        #       " use config=wandb.helper.parse_config(config_object, include=('key',))"
        #       " or config=wandb.helper.parse_config(config_object, exclude=('key',))"
        #     )

        init_config = parse_config(init_config,
                                   include=config_include_keys,
                                   exclude=config_exclude_keys)

        # merge config with sweep or sm (or config file)
        self.config = sm_config or self._wl._config or dict()
        for k, v in init_config.items():
            self.config.setdefault(k, v)

        monitor_gym = kwargs.pop("monitor_gym", None)
        if monitor_gym and len(wandb.patched["gym"]) == 0:
            wandb.gym.monitor()

        tensorboard = kwargs.pop("tensorboard", None)
        sync_tensorboard = kwargs.pop("sync_tensorboard", None)
        if tensorboard or sync_tensorboard and len(
                wandb.patched["tensorboard"]) == 0:
            wandb.tensorboard.patch()

        magic = kwargs.get("magic")
        if magic not in (None, False):
            magic_install(kwargs)

        # handle login related parameters as these are applied to global state
        anonymous = kwargs.pop("anonymous", None)
        force = kwargs.pop("force", None)
        login_key = wandb.login(anonymous=anonymous, force=force)
        if not login_key:
            settings.mode = "offline"

        # apply updated global state after login was handled
        settings._apply_settings(wandb.setup()._settings)

        settings._apply_init(kwargs)

        # TODO(jhr): should this be moved? probably.
        d = dict(
            _start_time=time.time(),
            _start_datetime=datetime.datetime.now(),
        )
        settings.update(d)

        if settings._jupyter:
            self._jupyter_setup(settings)

        self._log_setup(settings)

        self.settings = settings.freeze()
Exemple #5
0
def test_login_existing_key(local_netrc):
    os.environ["WANDB_API_KEY"] = "B" * 40
    wandb.ensure_configured()
    wandb.login()
    assert wandb.api.api_key == "B" * 40
Exemple #6
0
import argparse

sys.path.append(os.path.join(os.path.dirname(__file__), '../'))

import pandas as pd
import numpy as np

import pickle
from collections import Counter
from tqdm import tqdm

try:
    from dotenv import find_dotenv, load_dotenv
    import wandb
    load_dotenv(find_dotenv())
    wandb.login(key=os.environ['WANDB_API_KEY'])
    from wandb.keras import WandbCallback
    _has_wandb = True
except:
    _has_wandb = False

import tensorflow as tf
import tensorflow.keras.backend as K

import tokenizers
from transformers import TFAutoModel, AutoTokenizer, AutoConfig

from src import data, models

from sklearn.metrics import f1_score, precision_score, recall_score
def run(opts):

    rank = opts.local_rank if torch.cuda.device_count() > 1 else 0

    # Set the random seed
    torch.manual_seed(opts.seed + rank)
    random.seed(opts.seed + rank)
    np.random.seed(opts.seed + rank)

    if not os.path.exists(opts.save_dir) and rank == 0:
        os.makedirs(opts.save_dir)

    # Optionally configure wandb
    if not opts.no_wandb and rank == 0:
        wandb.login('never', '31ce01e4120061694da54a54ab0dafbee1262420')
        wandb.init(dir=opts.save_dir,
                   config=opts,
                   project='large_scale_tsp',
                   name=opts.run_name,
                   sync_tensorboard=True,
                   save_code=True)

    # Set the device
    if opts.use_cuda:
        torch.cuda.set_device(rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        opts.device = torch.device("cuda", rank)

    else:
        opts.device = torch.device("cpu")

    # Figure out what's the problem
    problem = load_problem(opts.problem)

    # Load data from load_path
    load_data = {}
    assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given"
    load_path = opts.load_path if opts.load_path is not None else opts.resume
    if load_path is not None:
        if rank == 0:
            print('  [*] Loading data from {}'.format(load_path))
        load_data = torch_load_cpu(load_path)

    # Initialize model
    model_class = {
        'attention': AttentionModel,
        'pointer': PointerNetwork
    }.get(opts.model, None)
    assert model_class is not None, "Unknown model: {}".format(model_class)
    model: torch.nn.Module = model_class(
        opts.embedding_dim,
        opts.hidden_dim,
        problem,
        attention_type=opts.attention_type,
        n_encode_layers=opts.n_encode_layers,
        n_heads=opts.n_heads,
        feed_forward_dim=opts.feed_forward_dim,
        encoding_knn_size=opts.encoding_knn_size,
        decoding_knn_size=opts.decoding_knn_size,
        mask_inner=True,
        mask_logits=True,
        normalization=opts.normalization,
        tanh_clipping=opts.tanh_clipping,
        checkpoint_encoder=opts.checkpoint_encoder,
        shrink_size=opts.shrink_size).to(opts.device)

    if opts.init_normalization_parameters:
        for m in model.modules():
            if isinstance(m, Normalization):
                m.init_parameters()

    if opts.use_cuda:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(
            opts.device)
        model = DDP(model, device_ids=[rank])

    # Overwrite model parameters by parameters to load
    model_ = get_inner_model(model)
    model_.load_state_dict({
        **model_.state_dict(),
        **load_data.get('model', {})
    })

    # Initialize baseline
    if opts.baseline == 'exponential':
        baseline = ExponentialBaseline(opts.exp_beta)
    elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm':
        assert problem.NAME == 'tsp', "Critic only supported for TSP"
        baseline = CriticBaseline(
            (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim,
                               opts.n_encode_layers, opts.tanh_clipping)
             if opts.baseline == 'critic_lstm' else CriticNetwork(
                 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers,
                 opts.normalization)).to(opts.device))
    elif opts.baseline == 'rollout':
        baseline = RolloutBaseline(model, problem, opts)
    else:
        assert opts.baseline is None, "Unknown baseline: {}".format(
            opts.baseline)
        baseline = NoBaseline()

    if opts.bl_warmup_epochs > 0:
        baseline = WarmupBaseline(baseline,
                                  opts.bl_warmup_epochs,
                                  warmup_exp_beta=opts.exp_beta)

    # Load baseline from data, make sure script is called with same type of baseline
    if 'baseline' in load_data:
        baseline.load_state_dict(load_data['baseline'])

    # Initialize optimizer
    optimizer = optim.Adam([{
        'params': model.parameters(),
        'lr': opts.lr_model
    }] + ([{
        'params': baseline.get_learnable_parameters(),
        'lr': opts.lr_critic
    }] if len(baseline.get_learnable_parameters()) > 0 else []))

    scaler = torch.cuda.amp.GradScaler() if opts.precision == 16 else None

    # Load optimizer state
    if 'optimizer' in load_data:
        optimizer.load_state_dict(load_data['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                # if isinstance(v, torch.Tensor):
                if torch.is_tensor(v):
                    state[k] = v.to(opts.device)

    # Initialize learning rate scheduler, decay by lr_decay once per epoch!
    lr_scheduler = optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: opts.lr_decay**epoch)

    # Start the actual training loop
    val_dataset = problem.make_dataset(size=opts.graph_size,
                                       num_samples=opts.val_size,
                                       filename=opts.val_dataset,
                                       distribution=opts.data_distribution)

    if opts.resume:
        epoch_resume = int(
            os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1])

        torch.set_rng_state(load_data['rng_state'])
        if opts.use_cuda:
            torch.cuda.set_rng_state_all(load_data['cuda_rng_state'])
        # Set the random states
        # Dumping of state was done before epoch callback, so do that now (model is loaded)
        baseline.epoch_callback(model, epoch_resume)
        if rank == 0:
            print("Resuming after {}".format(epoch_resume))
        opts.epoch_start = epoch_resume + 1

    if opts.eval_only:
        validate(model, val_dataset, opts)
    else:
        for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs):
            train_epoch(model, optimizer, scaler, baseline, lr_scheduler,
                        epoch, val_dataset, problem, opts)
def main(argv):
    wandb.login()

    is_gpu = torch.cuda.is_available()

    config = RobertaConfig(
        vocab_size=FLAGS.vocab_size,
        max_position_embeddings=FLAGS.max_position_embeddings,
        num_attention_heads=FLAGS.num_attention_heads,
        num_hidden_layers=FLAGS.num_hidden_layers,
        type_vocab_size=FLAGS.type_vocab_size,
    )

    if FLAGS.tokenizer_path:
        tokenizer_path = FLAGS.tokenizer_path
    elif FLAGS.tokenizer_type.upper() == "BPE":
        tokenizer_path = FLAGS.output_tokenizer_dir
        if not os.path.isdir(tokenizer_path):
            os.makedirs(tokenizer_path)

        tokenizer = ByteLevelBPETokenizer()
        tokenizer.train(
            files=FLAGS.dataset_path,
            vocab_size=FLAGS.vocab_size,
            min_frequency=FLAGS.BPE_min_frequency,
            special_tokens=["<s>", "<pad>", "</s>", "<unk>", "<mask>"])
        tokenizer.save_model(tokenizer_path)
    else:
        print("Please provide a tokenizer path if using the SMILES tokenizer")

    tokenizer = RobertaTokenizerFast.from_pretrained(
        tokenizer_path, max_len=FLAGS.max_tokenizer_len)

    model = RobertaForMaskedLM(config=config)
    model.num_parameters()

    dataset = RawTextDataset(tokenizer=tokenizer,
                             file_path=FLAGS.dataset_path,
                             block_size=FLAGS.tokenizer_block_size)

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer, mlm=True, mlm_probability=FLAGS.mlm_probability)

    training_args = TrainingArguments(
        output_dir=FLAGS.output_dir,
        overwrite_output_dir=FLAGS.overwrite_output_dir,
        num_train_epochs=FLAGS.num_train_epochs,
        per_device_train_batch_size=FLAGS.per_device_train_batch_size,
        save_steps=FLAGS.save_steps,
        save_total_limit=FLAGS.save_total_limit,
        fp16=is_gpu,  # fp16 only works on CUDA devices
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=dataset,
    )

    trainer.train()
    trainer.save_model(FLAGS.model_name)
Exemple #9
0
def test_login_invalid_key():
    os.environ["WANDB_API_KEY"] = "B" * 40
    wandb.ensure_configured()
    with pytest.raises(wandb.UsageError):
        wandb.login()
    del os.environ["WANDB_API_KEY"]
Exemple #10
0
def main():
    wandb.login()

    # CIFAR use this
    config = dict(
        n_epochs=120,
        batch_size=128,
        classes=10,
        noise_rate=0.4,
        is_symmetric_noise=True,
        fraction=1.0,
        compute_memorization=True,
        dataset_name=
        'CIFAR10',  # opt: 'CIFAR10', 'CIFAR100', 'CDON' (not implemented)
        model_path='./models/CIFAR10_20.mdl',
        plot_path='./results/CIFAR10_20',
        learning_rate=0.02,
        momentum=0.9,
        weight_decay=1e-3,
        milestones=[40, 80],
        gamma=0.01,
        enable_amp=True,
        use_ELR=True,
        elr_lambda=3.0,
        elr_beta=0.7)

    # CDON use this
    config = dict(
        n_epochs=120,
        batch_size=128,
        classes=64,  #157 categories for clothing # total subcategories is 3516
        noise_rate=0.0,
        is_symmetric_noise=True,
        fraction=1.0,
        compute_memorization=False,
        dataset_name='CDON',  # opt: 'CIFAR10', 'CIFAR100', 'CDON'
        model_path='./models/CDON_CE.mdl',
        plot_path='./results/CDON_CE',
        learning_rate=0.02,
        momentum=0.9,
        weight_decay=1e-3,
        milestones=[40, 80],
        gamma=0.01,
        enable_amp=True,
        use_ELR=False,
        elr_lambda=3.0,
        elr_beta=0.7)

    trainer_config = {
        'model': ResNet34,
        'optimizer': optim.SGD,
        'optimizer_params': {
            'lr': config['learning_rate'],
            'momentum': config['momentum'],
            'weight_decay': config['weight_decay']
        },
        'scheduler': optim.lr_scheduler.MultiStepLR,
        'scheduler_params': {
            'milestones': config['milestones'],
            'gamma': config['gamma']
        },
        'criterion': torch.nn.CrossEntropyLoss,
        'criterion_params': {}
    }

    # use_CosAnneal = {
    #     'scheduler': optim.lr_scheduler.CosineAnnealingWarmRestarts,
    #     'scheduler_params': {"T_0": 10, "eta_min": 0.001},
    #     # 'scheduler_params': {'T_max': 200, 'eta_min': 0.001}
    # }
    # trainer_config.update(use_CosAnneal)

    if config['use_ELR']:
        use_ELR = {
            'criterion': ELR_loss,
            'criterion_params': {
                'beta': config['elr_beta'],
                'lam': config['elr_lambda']
            }
        }
        trainer_config.update(use_ELR)

    model_pipeline(config, trainer_config, loadExistingWeights=False)
train_ds = train_ds.map(add_channel_dim)
val_ds = val_ds.map(add_channel_dim)

"""### simple model"""

import tensorflow.keras as keras
from tensorflow.keras import layers, losses, optimizers

if running_in_colab:
    !pip install wandb -qqq

import wandb as wb
from wandb.keras import WandbCallback

wb.login()

wb.init(project='bird_ID', config={'lr': 1e-3, 'bs': 32})
config = wb.config

keras.backend.clear_session()

model = tf.keras.Sequential([
            layers.Conv2D(filters=32, kernel_size=(4,4), strides=1, activation='relu', input_shape=(284, 257, 1)),
            layers.MaxPool2D(pool_size=(4,4)),
            layers.Conv2D(filters=64, kernel_size=(4,4), strides=1, activation='relu'),
            layers.MaxPool2D(pool_size=(4,4)),
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dense(3)
])
Exemple #12
0
def main():
    global args, is_server
    if is_server:
        wandb.login()

    config = dict(
        vis_prefix=args.vis_prefix,
        resume=args.resume,
    )

    if is_server:
        wandb.init(config=config,
                   project="vol.4",
                   name=args.run_name,
                   tags=args.tags)

    # define constants
    # vis_prefix = 'baseline'
    CLASS_AMOUNT = 5
    DEPTH = 50
    root_dir = 'data/'
    # resume = "checkpoints/baseline_checkpoint.pth"
    traindir = os.path.join(root_dir, 'train')
    train_labels = os.path.join(root_dir, 'train', 'images_onehot_train.txt')
    valdir = os.path.join(root_dir, 'val')
    val_labels = os.path.join(root_dir, 'val', 'images_onehot_val.txt')

    # define the model
    model = ResidualNet('ImageNet', DEPTH, CLASS_AMOUNT, 'CBAM')
    if is_server:
        model = model.cuda(args.cuda_device)

    # # load the checkpoint
    if os.path.isfile(args.resume):
        print(f"=> loading checkpoint '{args.resume}'")
        checkpoint = torch.load(args.resume, map_location=torch.device('cpu'))
        state_dict = checkpoint['state_dict']

        model.load_state_dict(state_dict)
        print(f"=> loaded checkpoint '{args.resume}'")
        print(f"epoch = {checkpoint['epoch']}")
    else:
        print(f"=> no checkpoint found at '{args.resume}'")
        return -1

    # define datasets and data loaders
    size0 = 224
    segm_dir = "images/256ISIC2018_Task1_Training_GroundTruth/"
    train_dataset = DatasetISIC2018(
        train_labels,
        traindir,
        segm_dir,
        size0,
        False,  # perform flips
        False,  # perform random resized crop with size = 224
        transforms.CenterCrop(size0))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               pin_memory=True)

    val_dataset = DatasetISIC2018(val_labels, valdir, segm_dir, size0, False,
                                  False, transforms.CenterCrop(size0))
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True)

    # create directories to save plots
    if args.vis_prefix is not None:
        if not os.path.exists(f'vis/{args.vis_prefix}'):
            os.mkdir(f'vis/{args.vis_prefix}')

        if not os.path.exists(f'vis/{args.vis_prefix}/train'):
            os.mkdir(f'vis/{args.vis_prefix}/train')

        if not os.path.exists(f'vis/{args.vis_prefix}/val'):
            os.mkdir(f'vis/{args.vis_prefix}/val')

    for i, dictionary in enumerate(train_loader):
        input_img = dictionary['image']
        img_name = dictionary['name'][0]
        no_norm_image = dictionary['no_norm_image']
        segm = dictionary['segm']
        if is_server:
            input_img = input_img.cuda(args.cuda_device)
        make_plot_and_save(input_img,
                           img_name,
                           no_norm_image,
                           segm,
                           model,
                           'train',
                           vis_prefix=args.vis_prefix)
        return

    for i, dictionary in enumerate(val_loader):
        input_img = dictionary['image']
        img_name = dictionary['name'][0]
        no_norm_image = dictionary['no_norm_image']
        segm = dictionary['segm']
        if is_server:
            input_img = input_img.cuda(args.cuda_device)
        make_plot_and_save(input_img,
                           img_name,
                           no_norm_image,
                           segm,
                           model,
                           'val',
                           vis_prefix=args.vis_prefix)
Exemple #13
0
from a2c_ppo_acktr.arguments import get_args
from a2c_ppo_acktr.envs import make_vec_envs
from a2c_ppo_acktr.model import Policy, RandomPolicy, NaviBase, VGGBase, MobilenetBase, EfficientnetBase
from a2c_ppo_acktr.storage import RolloutStorage
from evaluation import evaluate

args = get_args()

if args.wandb is not None:
    if not utils.is_connected():
        print("no internet connection. Going in dry")  # ehehehe
        os.environ["WANDB_MODE"] = "dryrun"
    import wandb

    if args.wandb_key is not None:
        wandb.login(key=args.wandb_key)
    if args.wandb_name is None:
        wandb.init(project=args.wandb)
    else:
        wandb.init(project=args.wandb, name=args.wandb_name)

    if args.env_name.startswith("Pupper"):
        env_name_parts = args.env_name.split("-")
        params = []
        for part in env_name_parts:
            if "_" in part:
                parts = part.split("_")
                setattr(args, parts[0], float(parts[1]))

        envtypes = ["Incremental", "Absolute", "Relative"]
        # get the right type and delist all others
def test_login(test_settings):
    s = wandb.Settings(mode="disabled")
    test_settings._apply_settings(s)
    wandb.setup(test_settings)
    wandb.login("")
from tasks.binary import Binary
from tasks.multi import Multi 
from tasks.tokenize import Tokenize

from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.preprocessing import label_binarize

from pyhdfs import HdfsClient
import pickle
import numpy as np

import wandb
wandb.login(key='6525130c8b35bcd27b1bc36f79ad88847e7dd982')

class Wheel(object):

    MODEL = 'CNNRNNPooling'
    TASK = 'SB'

    def __init__(self,config):
        #Basic Configs
        self.device = 'cuda'
        self.__test_iter = None
        self.C3_HDFS_HOST = 'c3.nn01.nhnsystem.com:50070'

        #Config File
        self.username = config['username']
        self.batch_size = config['batch']
Exemple #16
0
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import (
    BertTokenizer,
    BertModel,
    BertConfig,
    Trainer,
    TrainingArguments,
)
from transformers.models.bert.modeling_bert import (
    BertPreTrainedModel,
    BertOnlyMLMHead,
    MaskedLMOutput,
)

wandb.login(key='8cefb8016177b89343b4f6c8eed0c154b55b006b')
os.system('wandb online')
os.environ['WANDB_PROJECT'] = 'bert_oppo_pretrain'

from .utils import seed_everyone

os.environ['CUDA_VISIBLE_DEVICES'] = '2'


class BertForMaskedLM(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config=config, add_pooling_layer=False)
        self.cls = BertOnlyMLMHead(config)

    def forward(
Exemple #17
0
def main(args):
    train_loader, _ = data_helper.get_data(args.dataset, args.batch_size,
                                           args.image_size, args.environment)
    if args.wandb:
        wandb_name = "%s[%d]_%s" % (args.dataset, args.image_size,
                                    args.model_name)
        wandb.login()
        wandb.init(project="AAE", config=args, name=wandb_name)
    inception_model_score = load_inception_model(train_loader, args.dataset,
                                                 args.image_size,
                                                 args.environment)
    ae_optimizer, d_optimizer, decoder, discriminator, encoder, g_optimizer, mapper = \
        model.get_aae_model_and_optimizer(args)
    if args.model_name == 'mimic':
        mapper = model.Mimic(args.latent_dim, args.latent_dim,
                             args.mapper_inter_nz,
                             args.mapper_inter_layer).to(args.device)
        decoder, encoder = pretrain_autoencoder(ae_optimizer, args, decoder,
                                                encoder, train_loader)
    if args.model_name == 'non-prior':
        mapper, m_optimizer = model.get_nonprior_model_and_optimizer(args)
    if args.model_name == 'learning-prior':
        mapper, m_optimizer, discriminator_forpl, dpl_optimizer = \
            model.get_learning_prior_model_and_optimizer(args)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=1e-4)

    global start_time
    start_time = time.time()
    if args.pretrain_epoch > 0:
        pretrain_autoencoder(ae_optimizer, args, decoder, encoder,
                             train_loader)

    log_dict, log, log2 = {}, {}, {}
    for i in range(0, args.epochs):
        log_dict, log, log2 = {}, {}, {}
        if args.time_limit and timeout(args.time_limit, start_time): break
        encoded_feature_list = []
        for each_batch in tqdm.tqdm(train_loader,
                                    desc="train[%d/%d]" % (i, args.epochs)):
            each_batch = each_batch[0].to(args.device)
            if args.model_name in ['aae', 'mask_aae']:
                log = model.update_aae(ae_optimizer, args, d_optimizer,
                                       decoder, discriminator, each_batch,
                                       encoder, g_optimizer, args.latent_dim)
            elif args.model_name == 'mimic':
                log, encoded_feature = \
                    model.update_autoencoder(ae_optimizer, each_batch, encoder, decoder, return_encoded_feature=True)
                encoded_feature_list.append(encoded_feature)
            elif args.model_name == 'non-prior':
                log, encoded_feature = model.update_autoencoder(
                    ae_optimizer,
                    each_batch,
                    encoder,
                    decoder,
                    return_encoded_feature_gpu=True,
                    flag_retain_graph=False)
                log2 = model.update_posterior_part(args, mapper, discriminator,
                                                   m_optimizer, d_optimizer,
                                                   encoded_feature)
            elif args.model_name == 'learning-prior':
                log = model.update_aae_with_mappedz(args, ae_optimizer,
                                                    d_optimizer, decoder,
                                                    discriminator, mapper,
                                                    each_batch, encoder,
                                                    g_optimizer)
                log2 = model.update_mapper_with_discriminator_forpl(
                    args, dpl_optimizer, decoder_optimizer, m_optimizer,
                    discriminator_forpl, decoder, mapper, each_batch)
            if args.model_name == 'mimic':
                g_loss = model.train_mapper(args, encoder, mapper, args.device,
                                            args.lr, args.batch_size,
                                            encoded_feature_list)

        log_dict.update(log)
        log_dict.update(log2)

        # wandb log를 남기고, time_check와 time_limit 옵션이 둘다 없을때만, log interval마다 기록을 남김
        if args.wandb and not args.time_check and not args.time_limit:
            decoder, discriminator, encoder, mapper = log_and_write_pca(
                args, decoder, discriminator, encoder, i,
                inception_model_score, mapper, log_dict)

    # wandb log를 남기고, time_check 또는 time_limit 옵션 둘 중 하나라도 있으면, 최후에 기록을 남김
    if args.wandb and (args.time_check or args.time_limit):
        decoder, discriminator, encoder, mapper = log_and_write_pca(
            args, decoder, discriminator, encoder, i, inception_model_score,
            mapper, log_dict)

    save_models(args, decoder, encoder, mapper)

    if args.wandb:
        wandb.finish()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, record=False, record_project= 'benchmarking', record_name = 'trained' , data_path='', config_name='test', max_len_rb=100, benchmark=False, log_prefix=''):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    ep_cost = 0
    local_steps_per_epoch = int(4000 / num_procs())

    obs_dim = env.observation_space.shape
    act_dim = env.action_space.shape

    rew_mov_avg_10 = []
    cost_mov_avg_10 = []

    if benchmark:
        ep_costs = []
        ep_rewards = []

    if record:
        wandb.login()
        # 4 million env interactions
        wandb.init(project=record_project, name=record_name)

        rb = ReplayBuffer(size=10000,
                          env_dict={
                              "obs": {"shape": obs_dim},
                              "act": {"shape": act_dim},
                              "rew": {},
                              "next_obs": {"shape": obs_dim},
                              "done": {}})

        # columns = ['observation', 'action', 'reward', 'cost', 'done']
        # sim_data = pd.DataFrame(index=[0], columns=columns)

    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        next_o, r, d, info = env.step(a)

        if record:
            # buf.store(next_o, a, r, None, info['cost'], None, None, None)
            done_int = int(d==True)
            rb.add(obs=o, act=a, rew=r, next_obs=next_o, done=done_int)

        ep_ret += r
        ep_len += 1
        ep_cost += info['cost']

        # Important!
        o = next_o

        if d or (ep_len == max_ep_len):
            # finish recording and save csv
            if record:
                rb.on_episode_end()

                # make directory if does not exist
                if not os.path.exists(data_path + config_name + '_episodes'):
                    os.makedirs(data_path + config_name + '_episodes')

                # buf = CostPOBuffer(obs_dim, act_dim, local_steps_per_epoch, 0.99, 0.99)

            if len(rew_mov_avg_10) >= 25:
                rew_mov_avg_10.pop(0)
                cost_mov_avg_10.pop(0)

            rew_mov_avg_10.append(ep_ret)
            cost_mov_avg_10.append(ep_cost)

            mov_avg_ret = np.mean(rew_mov_avg_10)
            mov_avg_cost = np.mean(cost_mov_avg_10)

            expert_metrics = {log_prefix + 'episode return': ep_ret,
                              log_prefix + 'episode cost': ep_cost,
                              # 'cumulative return': cum_ret,
                              # 'cumulative cost': cum_cost,
                              log_prefix + '25ep mov avg return': mov_avg_ret,
                              log_prefix + '25ep mov avg cost': mov_avg_cost
                              }

            if benchmark:
                ep_rewards.append(ep_ret)
                ep_costs.append(ep_cost)

            wandb.log(expert_metrics)
            logger.store(EpRet=ep_ret, EpLen=ep_len, EpCost=ep_cost)
            print('Episode %d \t EpRet %.3f \t EpLen %d \t EpCost %d' % (n, ep_ret, ep_len, ep_cost))
            o, r, d, ep_ret, ep_len, ep_cost = env.reset(), 0, False, 0, 0, 0
            n += 1


    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    if record:
        print("saving final buffer")
        bufname_pk = data_path + config_name + '_episodes/sim_data_' + str(int(num_episodes)) + '_buffer.pkl'
        file_pi = open(bufname_pk, 'wb')
        pickle.dump(rb.get_all_transitions(), file_pi)
        wandb.finish()

        return rb

    if benchmark:
        return ep_rewards, ep_costs
Exemple #19
0
import subprocess
import argparse
import wandb
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Set random seed
seed = 42

# Construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-k", "--key", required=True, help="wandb API key")
args = vars(ap.parse_args())

# Set wandb up
print(wandb.login(key=args["key"]))
print(wandb.init(project="wandb-github-action"))

################################
########## DATA PREP ###########
################################

# Load in the data
df = pd.read_csv("wine_quality.csv")

# Split into train and test sections
y = df.pop("quality")
X_train, X_test, y_train, y_test = train_test_split(df,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=seed)
Exemple #20
0
def fit_network(network,
                train_loader,
                valid_loader,
                criterion,
                acc_metric,
                optimizer,
                num_epochs=5,
                checkpoint_path=None,
                metric='loss',
                start_from_checkpoint=False,
                lr_scheduler=None,
                log_dict={}):
    """
    Trains and validates the given network for specified number of epochs. 
    Includes logging, saving checkpoints and lr_scheduling facilities.

    Parameters
    ----------
    network : nn.Module
        network you want to train for segmentation
    train_loader : torch dataloader
        provides batches of training data
    vaild_loader : torch dataloader
        provides batches of validation data
    criterion : nn.Module
        loss criterion
    acc_metric : function
        accuracy metric
    optimizer : torch.optim
        optimizer
    num_epochs : int, optional
        number of epochs, by default 5
    checkpoint_path : str, optional
        path of the checkpoint to load from or save to 
        (w.r.t current working dir.), by default 'segmentation.pth'
    metric : str, by default 'loss'
        metric to use for saving the best network, 
        'loss': min valid loss is used,
        'accuracy': max valid accuracy is used
    start_from_checkpoint : bool, optional
        start training form checkpoint, by default False
    lr_scheduler : torch lr scheduler, optional
        learning rate scheduler, by default None
    logging : dict, optional
        log training data to wandb. To enable logging provide the log_dict. 
        Logging will prompt you to provide login credentials for wandb, by default False

        Sample log_dict:
        log_dict={
            'name': 'Package',                  # Name of each run (change at each run)
            'entity': 'MML',          # username of wandb account
            'project': 'package_test',          # project name
            'notes': 'Test run',                # adding notes to the run
            'tags': ['Test'],                   # adding tags to runs for grouping
                                                # list of tags can have multiple tags

            'log_histograms': False       # log network parameters and weights (True/False)
            }
    """
    ## Sanity Check ##
    if start_from_checkpoint == True and checkpoint_path is None:
        raise AssertionError(
            'start_from_checkpoint must be False when checkpoint_path is None')

    if metric.lower() not in ['loss', 'accuracy']:
        raise AssertionError("metric must be in ['loss', 'accuracy']")

    if not start_from_checkpoint:
        start_epoch = 1
        loss_min = np.inf
        print('Starting Training from Epoch: {}\n'.format(start_epoch))
    else:
        if not os.path.exists(checkpoint_path):
            raise AssertionError(
                'Checkpoint Path does not exist. Make start_from_checkpoint=False.'
            )

        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        network.load_state_dict(checkpoint['network'])
        optimizer = checkpoint['optimizer']
        loss_min = checkpoint['loss']

        if start_epoch > num_epochs:
            raise AssertionError(
                'Increase the number of num of epochs beyond {} to continue training'
                .format(start_epoch))
        print('Resuming Training from Epoch: {}\n'.format(start_epoch))

    # if log_dict is not None, log to wandb
    if log_dict:
        wandb.login()
        # configuration parameters
        config_dict = {
            'network':
            network.__class__.__name__,
            'criterion':
            criterion.__class__.__name__,
            'acc_metric':
            acc_metric.__class__.__name__,
            'learning_rate':
            optimizer.state_dict()['param_groups'][0]['lr'],
            'num_epochs':
            num_epochs,
            'optimizer':
            optimizer.__class__.__name__,
            'patience':
            lr_scheduler.state_dict()['patience'] if lr_scheduler else None,
            'decay_factor':
            lr_scheduler.state_dict()['factor'] if lr_scheduler else None,
            'metric':
            metric,
            'dataset':
            train_loader.dataset.__dict__['dataset'].name
        }

        # initialization parameters
        wandb.init(name=log_dict['name'],
                   entity=log_dict['entity'],
                   project=log_dict['project'],
                   notes=log_dict['notes'],
                   tags=log_dict['tags'],
                   config=config_dict)

        # if you want to log network weight histograms
        if log_dict['log_histograms']:
            print('\nLogging Network Parameters\n')
            wandb.watch(network)

    loss_min = np.inf
    acc_max = 0
    for epoch in range(start_epoch, num_epochs + 1):

        loss_train, acc_train = train(network, criterion, optimizer,
                                      train_loader, acc_metric)

        loss_valid, acc_valid = validate(network, criterion, valid_loader,
                                         acc_metric)

        # Reduce the learning rate on Plateau
        if lr_scheduler:
            lr_scheduler.step(loss_valid)

        sys.stdout.write('\r')
        sys.stdout.flush()
        print(
            '\n----------------------------------------------------------------------------------------'
        )
        print(
            'Epoch: {}  Train Loss: {:.4f}  Train Acc: {:.4f}  Valid Loss: {:.4f}  Valid Acc: {:.4f}'
            .format(epoch, loss_train, acc_train, loss_valid, acc_valid))
        print(
            '----------------------------------------------------------------------------------------'
        )

        # Save checkpoint
        loss_min, acc_max = save_checkpoint(epoch, network, optimizer,
                                            loss_valid, acc_valid, loss_min,
                                            acc_max, metric, checkpoint_path)

        # log the performance to wandb
        if log_dict:
            log_wandb(epoch, loss_train, loss_valid, loss_min, acc_train,
                      acc_valid, acc_max, lr_scheduler)
    print('\n---Training Complete---')
Exemple #21
0
if __name__ == '__main__':
    opt = TrainOptions().parse()  # get training options
    model = create_model(
        opt)  # create a model given opt.model and other options
    model.setup(
        opt)  # regular setup: load and print networks; create schedulers
    dataset = create_dataset(
        opt)  # create a dataset given opt.dataset_mode and other options
    dataset_size = len(dataset)  # get the number of images in the dataset.
    print('The number of training samples = %d' % dataset_size)
    evaluator = get_evaluator(opt, model=model, dataset=dataset)
    total_iters = 0  # the total number of training iterations
    epoch = 0

    if opt.wandb:
        wandb.login(key=os.environ.get('WANDB_API_KEY'))
        wandb.init(config=opt)

    while total_iters < opt.total_num_giters:
        epoch_start_time = time.time()  # timer for entire epoch
        iter_data_time = time.time()  # timer for data loading per iteration
        epoch_iter = 0  # the number of training iterations in current epoch, reset to 0 every epoch

        for i, data in enumerate(dataset):  # inner loop within one epoch
            iter_start_time = time.time(
            )  # timer for computation per iteration

            if total_iters % opt.print_freq == 0:
                t_data = iter_start_time - iter_data_time

            model.set_input(
checkpoints = list(
    filter(lambda x: '.ckpt' in x,
           os.listdir(classifier_model_dir))) if load_pretrained else []
load_pretrained = load_pretrained and len(checkpoints) > 0

# print(model)
# logger = TensorBoardLogger("logs", name=classifier_model_name, log_graph=True)
#%%
if __name__ == "__main__":
    if load_pretrained:
        checkpoint_path = os.path.join(classifier_model_dir, checkpoints[-1])
        model = classifier.load_from_checkpoint(checkpoint_path)
    else:
        model = classifier(**model_args)
    pl.seed_everything(42)
    wandb.login(key='355d7f0e367b84fb9f8a140be052641fbd926fb5')
    logger = WandbLogger(name=classifier_model_name,
                         save_dir='logs',
                         offline=True)
    logger.watch(model, log='gradients', log_freq=100)
    #logger = TensorBoardLogger("logs", name=classifier_model_name, log_graph=True)
    grad_acumulator = GradientAccumulationScheduler(scheduling={0: 2, 1: 3})
    lr_monitor = LearningRateMonitor(logging_interval='step')
    model_chkpt = ModelCheckpoint(dirpath=classifier_model_dir,
                                  monitor='val_acc_epoch',
                                  filename='{epoch}-{val_acc_epoch:.2f}',
                                  verbose=True)
    early_stopper = EarlyStopping(monitor='val_acc_epoch',
                                  patience=6,
                                  verbose=True)
    trainer = pl.Trainer(logger=logger,
Exemple #23
0
def test_login_key(local_netrc, capsys):
    wandb.login(key="A" * 40)
    out, err = capsys.readouterr()
    assert "wandb: WARNING If" in err
    assert wandb.api.api_key == "A" * 40
Exemple #24
0
def wandb_setup():
    wandb.login()
    wandb.init(
        project="mnist-single-node-single-gpu",
        entity=os.environ.get("WANDB_USERNAME", "my-user-name"),
    )
Exemple #25
0
def test_login_jupyter_anonymous(mock_server, local_netrc, mocker):
    python = mocker.patch("wandb._get_python_type")
    python.return_value = "ipython"
    wandb.login(anonymous="allow")
    assert wandb.api.api_key == "ANONYMOOSE" * 4
Exemple #26
0
def build_trainer(opt, device_id, model, fields, optim, model_saver=None):
    """
    Simplify `Trainer` creation based on user `opt`s*

    Args:
        opt (:obj:`Namespace`): user options (usually from argument parsing)
        model (:obj:`onmt.models.NMTModel`): the model to train
        fields (dict): dict of fields
        optim (:obj:`onmt.utils.Optimizer`): optimizer used during training
        data_type (str): string describing the type of data
            e.g. "text", "img", "audio"
        model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object
            used to save the model
    """

    tgt_field = dict(fields)["tgt"].base_field
    train_loss = onmt.utils.loss.build_loss_compute(model, tgt_field, opt)
    valid_loss = onmt.utils.loss.build_loss_compute(model,
                                                    tgt_field,
                                                    opt,
                                                    train=False)

    trunc_size = opt.truncated_decoder  # Badly named...
    shard_size = opt.max_generator_batches if opt.model_dtype == 'fp32' else 0

    # @memray: BPTT is not compatible with Orth and SemCov,
    # Otherwise will trigger error: raise RuntimeError("grad can be implicitly created only for scalar outputs")
    #    at function shards() in loss.py (torch.autograd.backward(inputs, grads))
    if opt.data_type == 'keyphrase' or opt.model_type == 'keyphrase':
        trunc_size = 0
        shard_size = 0

    norm_method = opt.normalization
    accum_count = opt.accum_count
    accum_steps = opt.accum_steps
    n_gpu = opt.world_size
    average_decay = opt.average_decay
    average_every = opt.average_every
    dropout = opt.dropout
    dropout_steps = opt.dropout_steps
    if device_id >= 0:
        gpu_rank = opt.gpu_ranks[device_id]
    else:
        gpu_rank = 0
        n_gpu = 0
    gpu_verbose_level = opt.gpu_verbose_level

    earlystopper = onmt.utils.EarlyStopping(
        opt.early_stopping, scorers=onmt.utils.scorers_from_opts(opt)) \
        if opt.early_stopping > 0 else None

    report_manager = onmt.utils.build_report_manager(opt, gpu_rank)

    # setup wandb if applicable
    if opt.wandb and gpu_rank == 0:
        from datetime import datetime
        now = datetime.now()
        datetime = now.strftime("-%Y%m%d-%H%M%S")
        wandb.login(key=opt.wandb_key)
        wandb.init(project=opt.wandb_project,
                   id=opt.exp + datetime,
                   resume=True,
                   dir=opt.wandb_log_dir)
        wandb.config.update(opt, allow_val_change=True)
        wandb.watch(model, log='all')

    trainer = onmt.Trainer(model,
                           train_loss,
                           valid_loss,
                           optim,
                           trunc_size,
                           shard_size,
                           norm_method,
                           accum_count,
                           accum_steps,
                           n_gpu,
                           gpu_rank,
                           gpu_verbose_level,
                           report_manager,
                           with_align=True if opt.lambda_align > 0 else False,
                           model_saver=model_saver if gpu_rank == 0 else None,
                           average_decay=average_decay,
                           average_every=average_every,
                           model_dtype=opt.model_dtype,
                           earlystopper=earlystopper,
                           dropout=dropout,
                           dropout_steps=dropout_steps)
    return trainer
Exemple #27
0
import math
from collections import deque
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import wandb

module_path = os.path.abspath(os.path.join('.'))
sys.path.append(module_path + "/RideHailing/envs/")
import RideHailing
from RideHailing.envs import *
from RideHailing.envs.RideHailing_env import *

env = gym.make('RideHailing-v0', config=CONFIG)

wandb.login(key='74f441f8a5ff9046ae53fad3d92540a168c6bc83')
wandb.init(project='RL', tags=['DQN_FirstTrial'])

# https://www.kaggle.com/isbhargav/guide-to-pytorch-learning-rate-scheduling
DQN_config = {
    'replay_size': 16384,
    'min_replay_size': 2048,
    'epsilon': 0.8,
    'epsilon_decay_param': 50000,
    'epsilon_decay_power': 0.6,
    'target_update_freq': 200,  #update target Q network every x steps
    'sample_replay_size': 1024,
    'discount': 1,
    'lr': 0.005,
    'step': 20,
    'gamma': 0.2,
Exemple #28
0
def main(args):

    global inception_model_score

    # load real images info or generate real images info
    model_name = args.model_name
    #torch.cuda.set_device(device=args.device)
    device = args.device
    epochs = args.epochs
    batch_size = args.batch_size
    img_size = args.img_size
    save_image_interval = args.save_image_interval
    loss_calculation_interval = args.loss_calculation_interval
    latent_dim = args.latent_dim
    project_name = args.project_name
    dataset = args.dataset
    lr = args.lr
    n_iter = args.n_iter

    fixed_z = make_fixed_z(model_name, latent_dim, device)

    image_shape = [3, img_size, img_size]

    time_limit_sec = timeparse(args.time_limit)

    if args.wandb:
        wandb.login()
        wandb_name = dataset + ',' + model_name + ',' + str(
            img_size) + ",convchange"
        if args.run_test: wandb_name += ', test run'
        wandb.init(project=project_name, config=args, name=wandb_name)
        config = wandb.config
    '''
    customize
    '''
    if model_name in ['vanilla']:
        args.mapper_inter_layer = 0

    if model_name in [
            'vanilla', 'pointMapping_but_aae', 'non-prior', 'mimic+non-prior',
            'vanilla-mimic'
    ]:
        encoder = Encoder(latent_dim, img_size).to(device)
        decoder = Decoder(latent_dim, img_size).to(device)
        discriminator = Discriminator(latent_dim).to(device)
        ae_optimizer = torch.optim.Adam(itertools.chain(
            encoder.parameters(), decoder.parameters()),
                                        lr=lr)
        d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr)

    elif model_name in [
            'ulearning', 'ulearning_point', 'mimic_at_last', 'mimic'
    ]:
        encoder = Encoder(latent_dim, img_size).to(device)
        decoder = Decoder(latent_dim, img_size).to(device)
        discriminator = None
        d_optimizer = None
        ae_optimizer = torch.optim.Adam(itertools.chain(
            encoder.parameters(), decoder.parameters()),
                                        lr=lr)

    ###########################################
    #####              Score              #####
    ###########################################
    inception_model_score.lazy_mode(True)
    '''
    dataset 채워주세요!
    customize
    '''
    if dataset == 'CelebA':
        train_loader = get_celebA_dataset(batch_size, img_size)
    elif dataset == 'FFHQ':
        train_loader = get_ffhq_thumbnails(batch_size, img_size)
    elif dataset == 'mnist':
        train_loader = get_mnist_dataset(batch_size, img_size)
    elif dataset == 'mnist_fashion':
        train_loader = get_mnist_fashion_dataset(batch_size, img_size)
    elif dataset == 'emnist':
        train_loader = get_emnist_dataset(batch_size, img_size)
    elif dataset == 'LSUN_dining_room':
        #wget http://dl.yf.io/lsun/scenes/dining_room_train_lmdb.zip
        #unzip dining_room_train_lmdb.zip
        #located dining_room_train_lmdb folder in dataset directory
        train_loader = get_lsun_dataset(batch_size,
                                        img_size,
                                        classes='dining_room_train')
    elif dataset == 'LSUN_classroom':
        #wget http://dl.yf.io/lsun/scenes/classroom_train_lmdb.zip
        #unzip classroom_train_lmdb.zip
        #located classroom_train_lmdb folder in dataset directory
        train_loader = get_lsun_dataset(batch_size,
                                        img_size,
                                        classes='classroom_train')
    elif dataset == 'LSUN_conference':
        #wget http://dl.yf.io/lsun/scenes/conference_room_train_lmdb.zip
        #unzip conference_room_train_lmdb.zip
        #located conference_room_train_lmdb folder in dataset directory
        train_loader = get_lsun_dataset(batch_size,
                                        img_size,
                                        classes='conference_room_train')
    elif dataset == 'LSUN_churches':
        #wget http://dl.yf.io/lsun/scenes/church_outdoor_train_lmdb.zip
        #unzip church_outdoor_train_lmdb.zip
        #located church_outdoor_train_lmdb folder in dataset directory
        train_loader = get_lsun_dataset(batch_size,
                                        img_size,
                                        classes='church_outdoor_train')
    else:
        print("dataset is forced selected to cifar10")
        train_loader = get_cifar1_dataset(batch_size, img_size)

    real_images_info_file_name = hashlib.md5(
        str(train_loader.dataset).encode()).hexdigest() + '.pickle'
    if args.run_test: real_images_info_file_name += '.run_test'

    os.makedirs('../../inception_model_info', exist_ok=True)
    if os.path.exists('../../inception_model_info/' +
                      real_images_info_file_name):
        print("Using generated real image info.")
        print(train_loader.dataset)
        inception_model_score.load_real_images_info(
            '../../inception_model_info/' + real_images_info_file_name)

    else:
        inception_model_score.model_to(device)

        #put real image
        for each_batch in tqdm.tqdm(train_loader, desc='insert real dataset'):
            X_train_batch = each_batch[0]
            inception_model_score.put_real(X_train_batch)
            if args.run_test: break

        #generate real images info
        inception_model_score.lazy_forward(batch_size=256,
                                           device=device,
                                           real_forward=True)
        inception_model_score.calculate_real_image_statistics()
        #save real images info for next experiments
        inception_model_score.save_real_images_info(
            '../../inception_model_info/' + real_images_info_file_name)
        #offload inception_model
        inception_model_score.model_to('cpu')

    if args.mapper_inter_layer > 0:
        if model_name in ['ulearning_point', 'mimic_at_last']:
            mapper = EachLatentMapping(
                nz=args.latent_dim,
                inter_nz=args.mapper_inter_nz,
                linear_num=args.mapper_inter_layer).to(device)
            m_optimizer = None
        elif model_name in ['pointMapping_but_aae']:
            mapper = EachLatentMapping(
                nz=args.latent_dim,
                inter_nz=args.mapper_inter_nz,
                linear_num=args.mapper_inter_layer).to(device)
            m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr)
        elif model_name in ['ulearning', 'non-prior']:
            mapper = Mapping(args.latent_dim, args.mapper_inter_nz,
                             args.mapper_inter_layer).to(device)
            m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr)
        elif model_name in ['mimic', 'vanilla-mimic']:
            mapper = Mimic(args.latent_dim, args.latent_dim,
                           args.mapper_inter_nz,
                           args.mapper_inter_layer).to(device)
            m_optimizer = torch.optim.Adam(mapper.parameters(),
                                           lr=lr,
                                           weight_decay=1e-3)
        elif model_name in [
                'mimic+non-prior',
        ]:
            mapper = MimicStack(args.latent_dim, args.latent_dim,
                                args.mapper_inter_nz,
                                args.mapper_inter_layer).to(device)
            m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr)
    else:
        # case vanilla and there is no mapper
        mapper = lambda x: x
        m_optimizer = None

    if args.load_netE != '': load_model(encoder, args.load_netE)
    if args.load_netM != '': load_model(mapper, args.load_netM)
    if args.load_netD != '': load_model(decoder, args.load_netD)

    time_start_run = time.time()

    AE_pretrain(args, train_loader, device, ae_optimizer, encoder, decoder)

    M_pretrain(args, train_loader, device, d_optimizer, m_optimizer, mapper,
               encoder, discriminator)

    # train phase
    i = 0
    loss_log = {}
    for i in range(1, epochs + 1):
        loss_log = train_main(args, train_loader, i, device, ae_optimizer,
                              m_optimizer, d_optimizer, encoder, decoder,
                              mapper, discriminator)
        loss_log.update({'spend time': time.time() - time_start_run})

        if check_time_over(time_start_run, time_limit_sec) == True:
            print("time limit over")
            break

        if i % save_image_interval == 0:
            insert_sample_image_inception(args, i, epochs, train_loader,
                                          mapper, decoder,
                                          inception_model_score)
            matric = gen_matric(wandb, args, train_loader, encoder, mapper,
                                decoder, discriminator, inception_model_score)
            loss_log.update(matric)
        if args.wandb:
            wandb_update(wandb, i, args, train_loader, encoder, mapper,
                         decoder, device, fixed_z, loss_log)
        else:
            print(loss_log)

        if i % args.save_model_every == 0:
            now_time = str(datetime.now())
            save_model([encoder, mapper, decoder], [
                "%s[%d epoch].netE" % (now_time, i),
                "%s[%d epoch].netM" % (now_time, i),
                "%s[%d epoch].netD" % (now_time, i)
            ])

    #make last matric
    if model_name in ['mimic_at_last']:
        M_train_at_last(args, train_loader, device, d_optimizer, m_optimizer,
                        mapper, encoder, discriminator)
    if i % save_image_interval != 0:
        insert_sample_image_inception(args, i, epochs, train_loader, mapper,
                                      decoder, inception_model_score)
        matric = gen_matric(wandb, args, train_loader, encoder, mapper,
                            decoder, discriminator, inception_model_score)
    if args.wandb:
        loss_log.update(matric)
        wandb_update(wandb, i, args, train_loader, encoder, mapper, decoder,
                     device, fixed_z, loss_log)

    now_time = str(datetime.now())
    save_model([encoder, mapper, decoder], [
        "%s[%d epoch].netE" % (now_time, i),
        "%s[%d epoch].netM" % (now_time, i),
        "%s[%d epoch].netD" % (now_time, i)
    ])

    if args.wandb: wandb.finish()
Exemple #29
0
import datetime
import sys
import os

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model

import wandb
from wandb.keras import WandbCallback

wandb.login()

# Import mlcompute module to use the optional set_mlc_device API for device selection with ML Compute.
#from tensorflow.python.compiler.mlcompute import mlcompute
# Select CPU device.
#mlcompute.set_mlc_device(device_name='any') # Available options are 'cpu', 'gpu', and 'any'.

from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model


def train_test_split(df, n_test):
    if len(df) < 8760:
Exemple #30
0
    def init(self):
        s = self.settings
        config = self.config

        if s.mode == "noop":
            # TODO(jhr): return dummy object
            return None

        # Make sure we are logged in
        wandb.login()

        stdout_master_fd = None
        stderr_master_fd = None
        stdout_slave_fd = None
        stderr_slave_fd = None
        console = s.console

        if console == "redirect":
            pass
        elif console == "off":
            pass
        elif console == "mock":
            pass
        elif console == "file":
            pass
        elif console == "iowrap":
            stdout_master_fd, stdout_slave_fd = io_wrap.wandb_pty(resize=False)
            stderr_master_fd, stderr_slave_fd = io_wrap.wandb_pty(resize=False)
        elif console == "_win32":
            # Not used right now
            stdout_master_fd, stdout_slave_fd = win32_create_pipe()
            stderr_master_fd, stderr_slave_fd = win32_create_pipe()
        else:
            self._reporter.internal("Unknown console: %s", console)

        backend = Backend(mode=s.mode)
        backend.ensure_launched(
            settings=s,
            stdout_fd=stdout_master_fd,
            stderr_fd=stderr_master_fd,
            use_redirect=self._use_redirect,
        )
        backend.server_connect()

        # resuming needs access to the server, check server_status()?

        run = RunManaged(config=config, settings=s)
        run._set_backend(backend)
        run._set_reporter(self._reporter)
        # TODO: pass mode to backend
        # run_synced = None

        backend._hack_set_run(run)

        if s.mode == "online":
            ret = backend.interface.send_run_sync(run, timeout=30)
            # TODO: fail on error, check return type
            run._set_run_obj(ret.run)
        elif s.mode in ("offline", "dryrun"):
            backend.interface.send_run(run)
        elif s.mode in ("async", "run"):
            ret = backend.interface.send_run_sync(run, timeout=10)
            # TODO: on network error, do async run save
            backend.interface.send_run(run)

        self.run = run
        self.backend = backend
        set_global(run=run, config=run.config, log=run.log, join=run.join)
        self._reporter.set_context(run=run)
        run.on_start()

        logger.info("atexit reg")
        self._hooks = ExitHooks()
        self._hooks.hook()
        atexit.register(lambda: self._atexit_cleanup())

        if self._use_redirect:
            # setup fake callback
            self._redirect_cb = self._callback

        self._redirect(stdout_slave_fd, stderr_slave_fd)

        # for super agent
        # run._save_job_spec()

        return run