source_loader = DataLoader(source_dataset, batch_size=args.batch_size, shuffle=True) target_loader = DataLoader(target_dataset, batch_size=args.batch_size, shuffle=True) # models F = FeatureExtractor(resnet=args.resnet_type).to(device) C = LabelPredictor(resnet=args.resnet_type).to(device) D = DomainClassifier(resnet=args.resnet_type).to(device) class_criterion = nn.CrossEntropyLoss() domain_criterion = nn.BCEWithLogitsLoss() opt_F = optim.AdamW(F.parameters()) opt_C = optim.AdamW(C.parameters()) opt_D = optim.AdamW(D.parameters()) # train F.train() D.train() C.train() lamb, p, gamma, now, tot = 0, 0, 10, 0, len(source_loader) * args.n_epoch if not args.adaptive_lamb: lamb = 0.1 best_domain_loss, best_epoch = 0, 0 for epoch in range(args.n_epoch): domain_loss, class_loss = 0, 0 total_hit, total_num = 0, 0 for i, ((source_data, source_label),
# Actor Critic actor = Actor(n_actions=env.action_space.n, space_dims=4, hidden_dims=32) critic = Critic(space_dims=4, hidden_dims=32) # ICM feature_extractor = FeatureExtractor(env.observation_space.shape[0], 32) forward_model = ForwardModel(env.action_space.n, 32) inverse_model = InverseModel(env.action_space.n, 32) # Actor Critic a_optim = torch.optim.Adam(actor.parameters(), lr=args.lr_actor) c_optim = torch.optim.Adam(critic.parameters(), lr=args.lr_critic) # ICM icm_params = list(feature_extractor.parameters()) + list( forward_model.parameters()) + list(inverse_model.parameters()) icm_optim = torch.optim.Adam(icm_params, lr=args.lr_icm) pg_loss = PGLoss() mse_loss = nn.MSELoss() xe_loss = nn.CrossEntropyLoss() global_step = 0 n_eps = 0 reward_lst = [] mva_lst = [] mva = 0. avg_ireward_lst = [] while n_eps < args.max_eps: