Example #1
0
source_loader = DataLoader(source_dataset,
                           batch_size=args.batch_size,
                           shuffle=True)
target_loader = DataLoader(target_dataset,
                           batch_size=args.batch_size,
                           shuffle=True)

# models
F = FeatureExtractor(resnet=args.resnet_type).to(device)
C = LabelPredictor(resnet=args.resnet_type).to(device)
D = DomainClassifier(resnet=args.resnet_type).to(device)

class_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCEWithLogitsLoss()

opt_F = optim.AdamW(F.parameters())
opt_C = optim.AdamW(C.parameters())
opt_D = optim.AdamW(D.parameters())

# train
F.train()
D.train()
C.train()
lamb, p, gamma, now, tot = 0, 0, 10, 0, len(source_loader) * args.n_epoch
if not args.adaptive_lamb:
    lamb = 0.1
best_domain_loss, best_epoch = 0, 0
for epoch in range(args.n_epoch):
    domain_loss, class_loss = 0, 0
    total_hit, total_num = 0, 0
    for i, ((source_data, source_label),
Example #2
0
    # Actor Critic
    actor = Actor(n_actions=env.action_space.n, space_dims=4, hidden_dims=32)
    critic = Critic(space_dims=4, hidden_dims=32)

    # ICM
    feature_extractor = FeatureExtractor(env.observation_space.shape[0], 32)
    forward_model = ForwardModel(env.action_space.n, 32)
    inverse_model = InverseModel(env.action_space.n, 32)

    # Actor Critic
    a_optim = torch.optim.Adam(actor.parameters(), lr=args.lr_actor)
    c_optim = torch.optim.Adam(critic.parameters(), lr=args.lr_critic)

    # ICM
    icm_params = list(feature_extractor.parameters()) + list(
        forward_model.parameters()) + list(inverse_model.parameters())
    icm_optim = torch.optim.Adam(icm_params, lr=args.lr_icm)

    pg_loss = PGLoss()
    mse_loss = nn.MSELoss()
    xe_loss = nn.CrossEntropyLoss()

    global_step = 0
    n_eps = 0
    reward_lst = []
    mva_lst = []
    mva = 0.
    avg_ireward_lst = []

    while n_eps < args.max_eps: