예제 #1
0
        averaged_model = build_model()
        averaged_model.to(device)
        averaged_model.load_state_dict(checkpoint["state_dict"])
        for name, param in averaged_model.named_parameters():
            if param.requires_grad:
                ema.register(name, param.data)
    return model, optimizer, ema


model = build_model()
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
criterion = GaussianLoss()

ema = ExponentialMovingAverage(args.ema_decay)
for name, param in model.named_parameters():
    if param.requires_grad:
        ema.register(name, param.data)

global_step, global_epoch = 0, 0
load_step = args.load_step

log = open(os.path.join(args.log, '{}.txt'.format(args.model_name)), 'w')
state = {k: v for k, v in args._get_kwargs()}

if load_step == 0:
    list_train_loss, list_loss = [], []
    log.write(json.dumps(state) + '\n')
    test_loss = 100.0
else:
예제 #2
0
        averaged_model.load_state_dict(checkpoint["state_dict"])
        for name, param in averaged_model.named_parameters():
            if param.requires_grad:
                ema.register(name, param.data)
    return model, optimizer, ema


model = build_model()
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
# optimizer = optim.Adamax(model.parameters(), lr=args.learning_rate)
criterion = GaussianLoss()

ema = ExponentialMovingAverage(
    args.ema_decay
)  #Maintains moving averages of variables by employing an exponential decay.
for name, param in model.named_parameters():
    if param.requires_grad:  #Every Tensor has a flag: requires_grad that allows for fine grained exclusion of subgraphs from gradient computation and can increase efficiency.
        ema.register(name, param.data)

global_step, global_epoch = 0, 0
load_step = args.load_step

log = open(os.path.join(args.log, '{}.txt'.format(args.model_name)), 'w')
state = {k: v for k, v in args._get_kwargs()}

if load_step == 0:
    list_train_loss, list_loss = [], []
    log.write(json.dumps(state) + '\n')
    test_loss = 100.0