Esempio n. 1
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)
    assert args.exp_name
    assert os.path.exists(args.load_path)
    args.path_helper = set_log_dir('logs_eval', args.exp_name)
    logger = create_logger(args.path_helper['log_path'], phase='test')

    gen_net = Generator(args=args).cuda()

    # set writer
    print(f'=> resuming from {args.load_path}')
    assert os.path.exists(args.load_path)
    checkpoint_file = os.path.join(args.load_path, 'Model',
                                   'checkpoint_best1.pth')
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)
    gen_net.load_state_dict(checkpoint['gen_state_dict'])

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'valid_global_steps': 0,
    }

    z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (args.eval_batch_size, args.latent_dim)))
    gen_imgs = gen_net(z).mul_(127.5).add_(127.5).clamp_(0.0, 255.0).permute(
        0, 2, 3, 1).to('cpu', torch.uint8).numpy()
    for img_idx, img in enumerate(gen_imgs):
        file_name = os.path.join(fid_buffer_dir,
                                 f'iter{iter_idx}_b{img_idx}.png')
        imsave(file_name, img)

    print('Images saved at: ' + fid_buffer_dir)
Esempio n. 2
0
def main():
    args = cfg.parse_args()
    torch.manual_seed(args.random_seed)
    random.seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    torch.backends.cudnn.benchmark = True
    
    # set tf env

    # import network
    gen_net = eval('models.'+args.model+'.Generator')(args=args).cuda()
    # initial
    np.random.seed(args.random_seed)
    fixed_z = torch.cuda.FloatTensor(np.random.normal(0, 1, (16, args.latent_dim)))

    # set writer
    print(f'=> resuming from {args.load_path}')
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)
    gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        
    if 'avg_gen_state_dict' in checkpoint:
        gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        epoch = checkpoint['epoch']
        print(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})')
    else:
        gen_net.load_state_dict(checkpoint)
        print(f'=> loaded checkpoint {checkpoint_file}')

    print(args)
    imgs = validate_cp(fixed_z, gen_net, n_row=4)
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    imsave(os.path.join(args.save_path, 'test_result.png'), imgs)
Esempio n. 3
0
def main():
    args = cfg.parse_args()
    torch.manual_seed(args.random_seed)
    random.seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    assert args.exp_name
    assert args.load_path.endswith('.pth')
    assert os.path.exists(args.load_path)
    args.path_helper = set_log_dir('logs_eval', args.exp_name)
    logger = create_logger(args.path_helper['log_path'], phase='test')

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import network
    gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda()

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    assert os.path.exists(fid_stat)

    # initial
    np.random.seed(args.random_seed)
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))
    if args.percent < 0.9:
        pruning_generate(gen_net, (1 - args.percent))
        see_remain_rate(gen_net)

    # set writer
    logger.info(f'=> resuming from {args.load_path}')
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)

    if 'avg_gen_state_dict' in checkpoint:
        gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        epoch = checkpoint['epoch']
        logger.info(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})')
    else:
        gen_net.load_state_dict(checkpoint)
        logger.info(f'=> loaded checkpoint {checkpoint_file}')

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'valid_global_steps': 0,
    }
    inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net,
                                          writer_dict, epoch)
    logger.info(f'Inception score: {inception_score}, FID score: {fid_score}.')

    writer_dict['writer'].close()
Esempio n. 4
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)
    assert args.exp_name
    assert args.load_path.endswith(".pth")
    assert os.path.exists(args.load_path)
    args.path_helper = set_log_dir("logs_eval", args.exp_name)
    logger = create_logger(args.path_helper["log_path"], phase="test")

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import network
    gen_net = eval("models." + args.gen_model + ".Generator")(args=args).cuda()

    # fid stat
    if args.dataset.lower() == "cifar10":
        fid_stat = "fid_stat/fid_stats_cifar10_train.npz"
    elif args.dataset.lower() == "stl10":
        fid_stat = "fid_stat/stl10_train_unlabeled_fid_stats_48.npz"
    else:
        raise NotImplementedError(f"no fid stat for {args.dataset.lower()}")
    assert os.path.exists(fid_stat)

    # initial
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))

    # set writer
    logger.info(f"=> resuming from {args.load_path}")
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)

    if "avg_gen_state_dict" in checkpoint:
        gen_net.load_state_dict(checkpoint["avg_gen_state_dict"])
        epoch = checkpoint["epoch"]
        logger.info(f"=> loaded checkpoint {checkpoint_file} (epoch {epoch})")
    else:
        gen_net.load_state_dict(checkpoint)
        logger.info(f"=> loaded checkpoint {checkpoint_file}")

    logger.info(args)
    writer_dict = {
        "writer": SummaryWriter(args.path_helper["log_path"]),
        "valid_global_steps": 0,
    }
    inception_score, fid_score = validate(args,
                                          fixed_z,
                                          fid_stat,
                                          gen_net,
                                          writer_dict,
                                          clean_dir=False)
    logger.info(f"Inception score: {inception_score}, FID score: {fid_score}.")
Esempio n. 5
0
def main():

    args = cfg.parse_args()

    # write into tensorboard
    log_path = os.path.join(args.demo_path, args.demo_name + '/log')
    vid_path = os.path.join(args.demo_path, args.demo_name + '/vids')

    if not os.path.exists(log_path) and not os.path.exists(vid_path):
        os.makedirs(log_path)
        os.makedirs(vid_path)
    writer = SummaryWriter(log_path)

    device = torch.device("cuda:0")

    G = Generator().to(device)
    G = nn.DataParallel(G)
    G.load_state_dict(torch.load(args.model_path))

    with torch.no_grad():
        G.eval()

        za = torch.randn(args.n_za_test, args.d_za, 1, 1, 1).to(device)
        zm = torch.randn(args.n_zm_test, args.d_zm, 1, 1, 1).to(device)

        n_za = za.size(0)
        n_zm = zm.size(0)
        za = za.unsqueeze(1).repeat(1, n_zm, 1, 1, 1, 1).contiguous().view(
            n_za * n_zm, -1, 1, 1, 1)
        zm = zm.repeat(n_za, 1, 1, 1, 1)

        vid_fake = G(za, zm)

        vid_fake = vid_fake.transpose(2, 1)  # bs x 16 x 3 x 64 x 64
        vid_fake = ((vid_fake - vid_fake.min()) /
                    (vid_fake.max() - vid_fake.min())).data

        writer.add_video(tag='generated_videos',
                         global_step=1,
                         vid_tensor=vid_fake)
        writer.flush()

        # save into videos
        print('==> saving videos...')
        save_videos(vid_path, vid_fake, n_za, n_zm)

    return
Esempio n. 6
0
def main():
    global g_vel_cmd, g_swa_cmd, g_acc_cmd, g_sar_cmd, g_cfg_type_cmd, g_enable_cmd, g_clear_cmd, g_vel_pre, g_vel_err_pre_int, g_vel_err_pre, g_throttle_pre, g_published
    g_vel_cmd = 0.0
    g_swa_cmd = 0.0
    g_acc_cmd = 0.0
    g_sar_cmd = 0.0
    g_cfg_type_cmd = 0
    g_enable_cmd = 0
    g_clear_cmd = 0
    g_vel_pre = 0.0
    g_vel_err_pre = 0.0
    g_vel_err_pre_int = 0.0
    g_throttle_pre = 0.0
    g_published = False
    vehicle_ns = parse_args()
    ns_obj = VehicleCfg(vehicle_ns)
    node_name, brake_topic_name, throttle_topic_name, steering_topic_name, gear_topic_name, turnsignal_topic_name, _ = ns_obj.get_llc_properties(
    )
    control_topic_name = ns_obj.get_control_properties()
    brake_obj = BrakeVehicle()
    throttle_obj = ThrottleVehicle()
    steering_obj = SteeringVehicle()
    gear_obj = GearVehicle()
    turnsignal_obj = TurnSignalVehicle()
    rospy.init_node(node_name, anonymous=True)
    brake_obj.set_pub(topic_name=brake_topic_name)
    throttle_obj.set_pub(topic_name=throttle_topic_name)
    steering_obj.set_pub(topic_name=steering_topic_name)
    gear_obj.set_pub(topic_name=gear_topic_name)
    turnsignal_obj.set_pub(topic_name=turnsignal_topic_name)
    rate = rospy.Rate(10)
    while not rospy.is_shutdown():
        rospy.Subscriber(control_topic_name, PlatMsgVehicleCmd,
                         control_callback)
        if g_published:
            cfg_type_cmd_cur = g_cfg_type_cmd
            vel_cmd_cur, swa_cmd_cur, acc_cmd_cur, sar_cmd_cur = set_control_cfg(
                cfg_type_cmd_cur)
            brake_req_cur, throttle_req_cur, steering_req_cur = exec_low_level_control(
                cfg_type_cmd_cur, vel_cmd_cur, swa_cmd_cur, acc_cmd_cur,
                sar_cmd_cur)
            fill_pub_msgs(cfg_type_cmd_cur, brake_obj, throttle_obj,
                          steering_obj, brake_req_cur, throttle_req_cur,
                          steering_req_cur)
        g_published = False
        rate.sleep()
Esempio n. 7
0
def main():
    args = cfg.parse_args()
    torch.manual_seed(args.random_seed)
    random.seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    torch.backends.cudnn.benchmark = True

    # set tf env

    # import network
    gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda()
    # initial
    np.random.seed(args.random_seed)

    # set writer
    print(f'=> resuming from {args.load_path}')
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)
    pruning_generate(
        gen_net, checkpoint['avg_gen_state_dict'])  # Create a buffer for mask]
    gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])

    if 'avg_gen_state_dict' in checkpoint:
        gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        epoch = checkpoint['epoch']
        print(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})')
    else:
        gen_net.load_state_dict(checkpoint)
        print(f'=> loaded checkpoint {checkpoint_file}')

    print(args)

    count = 0
    for _ in range(1000):
        fixed_z = torch.cuda.FloatTensor(
            np.random.normal(0, 1, (60, args.latent_dim)))
        gen_imgs = gen_net(fixed_z)
        gen_imgs = np.moveaxis(gen_imgs.detach().cpu().numpy(), 1, -1)
        for i in range(gen_imgs.shape[0]):
            img = gen_imgs[i]
            img = (img + 1) / 2
            imsave(
                os.path.join(args.save_path,
                             'test_result_{}.png'.format(count)), img)
            count = count + 1
Esempio n. 8
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)
    assert args.exp_name
    assert args.load_path.endswith('.pth')
    assert os.path.exists(args.load_path)
    args.path_helper = set_log_dir('logs_eval', args.exp_name)
    logger = create_logger(args.path_helper['log_path'], phase='test')

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import network
    gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda()

    # initial
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))

    # set writer
    logger.info(f'=> resuming from {args.load_path}')
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)

    if 'avg_gen_state_dict' in checkpoint:
        gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        epoch = checkpoint['epoch']
        logger.info(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})')
    else:
        gen_net.load_state_dict(checkpoint)
        logger.info(f'=> loaded checkpoint {checkpoint_file}')

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'valid_global_steps': 0,
    }
    inception_score, fid_score = validate(args, fixed_z, gen_net, writer_dict)
    logger.info(f'Inception score: {inception_score}, FID score: {fid_score}.')
Esempio n. 9
0
def main():
    args = cfg.parse_args()
    random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True
    os.environ['PYTHONHASHSEED'] = str(args.random_seed)

    # import network
    gen_net = eval('models.' + args.model + '.Generator')(args=args)
    dis_net = eval('models.' + args.model + '.Discriminator')(args=args)

    # weight init
    def weights_init(m):
        if isinstance(m, nn.Conv2d):
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    initial_dis_net_weight = deepcopy(dis_net.state_dict())
    initial_gen_net_weight = deepcopy(gen_net.state_dict())

    torch.save(initial_dis_net_weight,
               os.path.join(args.save_path, 'initial_dis_net.pth'))
    torch.save(initial_gen_net_weight,
               os.path.join(args.save_path, 'initial_gen_net.pth'))
Esempio n. 10
0
def main():
    vehicle_ns  = cfg.parse_args()
    ns_obj      = VehicleCfg(vehicle_ns)
    node_name, topic_name, model_name, frame_id = ns_obj.get_odom_properties()
    rospy.init_node(node_name, anonymous=True)
    odom_pub = rospy.Publisher(topic_name, Odometry, queue_size=100)
    rospy.wait_for_service('/gazebo/get_model_state')
    get_model_srv = rospy.ServiceProxy('/gazebo/get_model_state', GetModelState)
    header = Header()
    odom = Odometry()
    header.frame_id = frame_id
    model = GetModelStateRequest()
    model.model_name = model_name
    rate = rospy.Rate(100)
    while not rospy.is_shutdown():
        result              = get_model_srv(model)
        odom.pose.pose      = result.pose
        odom.twist.twist    = result.twist
        header.stamp        = rospy.Time.now()
        odom.header         = header
        odom_pub.publish(odom)
        rate.sleep()
Esempio n. 11
0
def test_autogan_cifar10_a_Generator(args1, myargs):
    import cfg, os, torch
    import numpy as np
    myargs.config = getattr(myargs.config, 'train_autogan_cifar10_a')
    myargs.args = args1
    args = cfg.parse_args()
    for k, v in myargs.config.items():
        setattr(args, k, v)

    args.tf_inception_model_dir = os.path.expanduser(
        args.tf_inception_model_dir)
    args.fid_stat = os.path.expanduser(args.fid_stat)
    args.data_path = os.path.expanduser(args.data_path)

    gen_net = Generator(args=args).cuda()
    z = torch.cuda.FloatTensor(np.random.normal(0, 1, (16, args.latent_dim)))
    x = gen_net(z)

    import torchviz
    g = torchviz.make_dot(x)
    g.view()
    pass
Esempio n. 12
0
def main():

	args = cfg.parse_args()

	# write into tensorboard
	log_path = os.path.join(args.demo_path, args.demo_name + '/log')
	vid_path = os.path.join(args.demo_path, args.demo_name + '/vids')
	if not os.path.exists(log_path) and not os.path.exists(vid_path):
		os.makedirs(log_path)
		os.makedirs(vid_path)
	writer = SummaryWriter(log_path)

	device = torch.device("cuda:0")

	G = Generator().to(device)
	G = nn.DataParallel(G)
	G.load_state_dict(torch.load(args.model_path))

	with torch.no_grad():
		G.eval()

		za = torch.randn(args.n_za_test, args.d_za, 1, 1, 1).to(device) # appearance

		# generating frames from [16, 20, 24, 28, 32, 36, 40, 44, 48]
		for i in range(9):
			zm = torch.randn(args.n_zm_test, args.d_zm, (i+1), 1, 1).to(device) # 16+i*4
			vid_fake = G(za, zm)
			vid_fake = vid_fake.transpose(2,1)
			vid_fake = ((vid_fake - vid_fake.min()) / (vid_fake.max() - vid_fake.min())).data
			writer.add_video(tag='generated_videos_%dframes'%(16+i*4), global_step=1, vid_tensor=vid_fake)
			writer.flush()

			print('saving videos')
			save_videos(vid_path, vid_fake, args.n_za_test, (16+i*4))

	return
Esempio n. 13
0
        args.n_classes = 143
        fid_stat = None
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    if fid_stat:
        assert os.path.exists(fid_stat), f"{fid_stat} not found"

    # get network
    gen_net, _ = get_network_func(args)
    gen_net.cuda()

    # load checkpoint
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file), print(
        f"checkpoint file {checkpoint_file} not found.")
    logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
    checkpoint = torch.load(checkpoint_file,
                            map_location=lambda storage, loc: storage)
    gen_net.load_state_dict(checkpoint)
    logger.info(f"=> loaded checkpoint '{checkpoint_file}' ")

    # evaluation
    torch.cuda.empty_cache()
    inception_score, fid_score = validate(args, fid_stat, gen_net, None)
    logger.info(f'Inception score: {inception_score}, FID score: {fid_score} ')


if __name__ == '__main__':
    config = parse_args()
    main(config)
Esempio n. 14
0
                   , default=argparse.SUPPRESS
                   )
parser.add_argument( 'align' 
                   , help='alignments between target and source words' 
                   , nargs='?'
                   , default=argparse.SUPPRESS
                   )
parser.add_argument( '-s','--subcorpora'
                   , help='YAML description of subcorpora lines (space separated file list)'
                   , default=argparse.SUPPRESS
                   , action=store_training
                   )
if __name__ == '__main__':
    os.putenv('LANG','C')
    os.putenv('LC_ALL','C')
    d = cfg.parse_args(parser,write='$outdir/rules.config',modeldir=True)
    cfgf = open(os.path.join(d.outdir,'rules.config'),'a')
    print >> cfgf, '\nrules:', d.outdir
    cfgf.close()
    dir = os.path.abspath(os.path.dirname(__file__))
    finp = os.path.join(dir,'ghkm','filterbadinput')
    names = []
    triplefiles = [d.config['target'], d.config['source'], d.config['align']]
    
    steps = cfg.steps(d)

    hp = d.hadoop

    training = os.path.join(d.tmpdir,'training')
    trainingtmp = os.path.join(d.tmpdir,'training.tmp')
    trainingnew = trainingtmp + '.new'
Esempio n. 15
0
    if logfile:
        print >> decodescript, ' 2> $LOG \\'
        #print >> decodescript, '  2> >(gzip > $LOG.gz) \\'
    if stage == 'forest':
        print >> decodescript, "| %s/join_forests" % d.scriptdir 
    else:
        print >> decodescript, "| %s/join_nbests %s" % (d.scriptdir,d.config['decoder']['options']['nbests'])
    print >> decodescript, '\n\n'
    if logfile:
        print >> decodescript, 'gzip  $LOG\n\n'
        if include_instruction_pipe:
            print >> decodescript, 'gzip $INSLOG\n\n'
    decodescript.close()
    os.chmod(decodefile, stat.S_IRWXU | os.stat(decodefile)[stat.ST_MODE])
    return decodefile

if __name__ == '__main__':
    import argparse
    arp = argparse.ArgumentParser()
    arp.add_argument( 'decodepipe')
    arp.add_argument( 'tunedir'
                    , nargs='?'
                    , help='output directory of ruleset pipeline'
                    , action=cfg.store_abspath
                    , default=argparse.SUPPRESS
                    )
    d = cfg.parse_args(arp,default='$tunedir/tune.config', modeldir=True)

    write_script(d,'nbest', weightstring=os.path.join(d.config['tunedir'],'weights.final'),logfile=False,include_instruction_pipe=True,decodefile=d.config['decodepipe'])
    pass
Esempio n. 16
0
def main():

    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)

    print(args)

    # create logging folder
    log_path = os.path.join(args.save_path, args.exp_name + '/log')
    model_path = os.path.join(args.save_path, args.exp_name + '/models')
    os.makedirs(log_path, exist_ok=True)
    os.makedirs(model_path, exist_ok=True)
    writer = SummaryWriter(log_path)  # tensorboard

    # load model
    print('==> loading models')
    device = torch.device("cuda:0")

    G = Generator(args.dim_z, args.dim_a, args.nclasses, args.ch).to(device)
    VD = VideoDiscriminator(args.nclasses, args.ch).to(device)
    ID = ImageDiscriminator(args.ch).to(device)

    G = nn.DataParallel(G)
    VD = nn.DataParallel(VD)
    ID = nn.DataParallel(ID)

    # optimizer
    optimizer_G = torch.optim.Adam(G.parameters(), args.g_lr, (0.5, 0.999))
    optimizer_VD = torch.optim.Adam(VD.parameters(), args.d_lr, (0.5, 0.999))
    optimizer_ID = torch.optim.Adam(ID.parameters(), args.d_lr, (0.5, 0.999))

    # loss
    criterion_gan = nn.BCEWithLogitsLoss().to(device)
    criterion_l1 = nn.L1Loss().to(device)

    # prepare dataset
    print('==> preparing dataset')
    transform = torchvision.transforms.Compose([
        transforms_vid.ClipResize((args.img_size, args.img_size)),
        transforms_vid.ClipToTensor(),
        transforms_vid.ClipNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])

    transform_test = torchvision.transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])

    if args.dataset == 'mug':
        dataset_train = MUG('train', args.data_path, transform=transform)
        dataset_val = MUG('val', args.data_path, transform=transform)
        dataset_test = MUG_test(args.data_path, transform=transform_test)
    else:
        raise NotImplementedError

    dataloader_train = torch.utils.data.DataLoader(
        dataset=dataset_train,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        shuffle=True,
        pin_memory=True,
        drop_last=True)

    dataloader_val = torch.utils.data.DataLoader(dataset=dataset_val,
                                                 batch_size=args.batch_size,
                                                 num_workers=args.num_workers,
                                                 shuffle=False,
                                                 pin_memory=True)

    dataloader_test = torch.utils.data.DataLoader(
        dataset=dataset_test,
        batch_size=args.batch_size_test,
        num_workers=args.num_workers,
        shuffle=False,
        pin_memory=True)

    print('==> start training')
    for epoch in range(args.max_epoch):
        train(args, epoch, G, VD, ID, optimizer_G, optimizer_VD, optimizer_ID,
              criterion_gan, criterion_l1, dataloader_train, writer, device)

        if epoch % args.val_freq == 0:
            val(args, epoch, G, criterion_l1, dataloader_val, device, writer)
            test(args, epoch, G, dataloader_test, device, writer)

        if epoch % args.save_freq == 0:
            torch.save(G.state_dict(),
                       os.path.join(model_path, 'G_%d.pth' % (epoch)))
            torch.save(VD.state_dict(),
                       os.path.join(model_path, 'VD_%d.pth' % (epoch)))
            torch.save(ID.state_dict(),
                       os.path.join(model_path, 'ID_%d.pth' % (epoch)))

    return
Esempio n. 17
0
def main():
    args = cfg.parse_args()
    random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import netwo

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda()
    dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda()
    gen_net.apply(weights_init)
    dis_net.apply(weights_init)
    avg_gen_net = deepcopy(gen_net)
    initial_gen_net_weight = torch.load(os.path.join(args.init_path,
                                                     'initial_gen_net.pth'),
                                        map_location="cpu")
    initial_dis_net_weight = torch.load(os.path.join(args.init_path,
                                                     'initial_dis_net.pth'),
                                        map_location="cpu")
    assert id(initial_dis_net_weight) != id(dis_net.state_dict())
    assert id(initial_gen_net_weight) != id(gen_net.state_dict())

    # set optimizer
    gen_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
        (args.beta1, args.beta2))
    dis_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
        (args.beta1, args.beta2))
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)

    # set up data_loader
    dataset = datasets.ImageDataset(args)
    train_loader = dataset.train

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/fid_stats_stl10_train.npz'
    else:
        raise NotImplementedError('no fid stat for %s' % args.dataset.lower())
    assert os.path.exists(fid_stat)

    # epoch number for dis_net
    args.max_epoch = args.max_epoch * args.n_critic
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter * args.n_critic /
                                 len(train_loader))

    # initial
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))

    start_epoch = 0
    best_fid = 1e4

    print('=> resuming from %s' % args.load_path)
    assert os.path.exists(args.load_path)
    checkpoint_file = args.load_path
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)
    pruning_generate(gen_net, checkpoint['gen_state_dict'])
    dis_net.load_state_dict(checkpoint['dis_state_dict'])
    total = 0
    total_nonzero = 0
    for m in dis_net.modules():
        if isinstance(m, nn.Conv2d):
            total += m.weight_orig.data.numel()
            mask = m.weight_orig.data.abs().clone().gt(0).float().cuda()
            total_nonzero += torch.sum(mask)
    conv_weights = torch.zeros(total)
    index = 0
    for m in dis_net.modules():
        if isinstance(m, nn.Conv2d):
            size = m.weight_orig.data.numel()
            conv_weights[index:(
                index + size)] = m.weight_orig.data.view(-1).abs().clone()
            index += size

    y, i = torch.sort(conv_weights)
    # thre_index = int(total * args.percent)
    # only care about the non zero weights
    # e.g: total = 100, total_nonzero = 80, percent = 0.2, thre_index = 36, that means keep 64
    thre_index = total - total_nonzero
    thre = y[int(thre_index)]
    pruned = 0
    print('Pruning threshold: {}'.format(thre))
    zero_flag = False
    masks = OrderedDict()
    for k, m in enumerate(dis_net.modules()):
        if isinstance(m, nn.Conv2d):
            weight_copy = m.weight_orig.data.abs().clone()
            mask = weight_copy.gt(thre).float()
            masks[k] = mask
            pruned = pruned + mask.numel() - torch.sum(mask)
            m.weight_orig.data.mul_(mask)
            if int(torch.sum(mask)) == 0:
                zero_flag = True
            print(
                'layer index: {:d} \t total params: {:d} \t remaining params: {:d}'
                .format(k, mask.numel(), int(torch.sum(mask))))
    print('Total conv params: {}, Pruned conv params: {}, Pruned ratio: {}'.
          format(total, pruned, pruned / total))

    pruning_generate(avg_gen_net, checkpoint['gen_state_dict'])
    see_remain_rate(gen_net)

    if not args.finetune_G:
        gen_weight = gen_net.state_dict()
        gen_orig_weight = rewind_weight(initial_gen_net_weight,
                                        gen_weight.keys())
        gen_weight.update(gen_orig_weight)
        gen_net.load_state_dict(gen_weight)
    gen_avg_param = copy_params(gen_net)

    if args.finetune_D:
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
    else:
        dis_net.load_state_dict(initial_dis_net_weight)

    for k, m in enumerate(dis_net.modules()):
        if isinstance(m, nn.Conv2d):
            m.weight_orig.data.mul_(masks[k])

    orig_dis_net = eval('models.' + args.model +
                        '.Discriminator')(args=args).cuda()
    orig_dis_net.load_state_dict(checkpoint['dis_state_dict'])
    orig_dis_net.eval()

    args.path_helper = set_log_dir('logs',
                                   args.exp_name + "_{}".format(args.percent))
    logger = create_logger(args.path_helper['log_path'])
    #logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch))

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    # train loop
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)),
                      desc='total progress'):
        lr_schedulers = (gen_scheduler,
                         dis_scheduler) if args.lr_decay else None
        see_remain_rate(gen_net)
        see_remain_rate_orig(dis_net)
        if not args.use_kd_D:
            train_with_mask(args, gen_net, dis_net, gen_optimizer,
                            dis_optimizer, gen_avg_param, train_loader, epoch,
                            writer_dict, masks, lr_schedulers)
        else:
            train_with_mask_kd(args, gen_net, dis_net, orig_dis_net,
                               gen_optimizer, dis_optimizer, gen_avg_param,
                               train_loader, epoch, writer_dict, masks,
                               lr_schedulers)

        if epoch and epoch % args.val_freq == 0 or epoch == int(
                args.max_epoch) - 1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, fid_score = validate(args, fixed_z, fid_stat,
                                                  gen_net, writer_dict, epoch)
            logger.info(
                'Inception score: %.4f, FID score: %.4f || @ epoch %d.' %
                (inception_score, fid_score, epoch))
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        avg_gen_net.load_state_dict(gen_net.state_dict())
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.model,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'avg_gen_state_dict': avg_gen_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])
Esempio n. 18
0
def main():
    args = cfg.parse_args()
    random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True
    os.environ['PYTHONHASHSEED'] = str(args.random_seed)
    
    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import network
    gen_net = eval('models.'+args.model+'.Generator')(args=args)
    dis_net = eval('models.'+args.model+'.Discriminator')(args=args)

    initial_gen_net_weight = torch.load(os.path.join(args.init_path, 'initial_gen_net.pth'), map_location="cpu")
    initial_dis_net_weight = torch.load(os.path.join(args.init_path, 'initial_dis_net.pth'), map_location="cpu")
    
    gen_net = gen_net.cuda()
    dis_net = dis_net.cuda()
    
    gen_net.load_state_dict(initial_gen_net_weight)
    dis_net.load_state_dict(initial_dis_net_weight)
    
    # set optimizer
    gen_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, gen_net.parameters()),
                                     args.g_lr, (args.beta1, args.beta2))
    dis_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, dis_net.parameters()),
                                     args.d_lr, (args.beta1, args.beta2))
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic)

    # set up data_loader
    dataset = datasets.ImageDataset(args)
    train_loader = dataset.train

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/fid_stats_stl10_train.npz'
    else:
        raise NotImplementedError('no fid stat for %s' % args.dataset.lower())
    assert os.path.exists(fid_stat)

    # epoch number for dis_net
    args.max_epoch = args.max_epoch * args.n_critic
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter * args.n_critic / len(train_loader))

    # initial
    fixed_z = torch.cuda.FloatTensor(np.random.normal(0, 1, (25, args.latent_dim)))
    gen_avg_param = copy_params(gen_net)
    start_epoch = 0
    best_fid = 1e4

    # set writer
    if args.load_path:
        print('=> resuming from %s' % args.load_path)
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint['epoch']
        best_fid = checkpoint['best_fid']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        avg_gen_net = deepcopy(gen_net)
        avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        gen_avg_param = copy_params(avg_gen_net)
        del avg_gen_net

        args.path_helper = checkpoint['path_helper']
        logger = create_logger(args.path_helper['log_path'])
        logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch))
    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('logs', args.exp_name)
        logger = create_logger(args.path_helper['log_path'])

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    # train loop
    switch = False
    for epoch in range(int(start_epoch), int(args.max_epoch)):
            
        lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None
        train(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict,
              lr_schedulers)

        if epoch and epoch % args.val_freq == 0 or epoch == int(args.max_epoch)-1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict, epoch)
            logger.info('Inception score: %.4f, FID score: %.4f || @ epoch %d.' % (inception_score, fid_score, epoch))
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        avg_gen_net = deepcopy(gen_net)
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint({
            'epoch': epoch + 1,
            'model': args.model,
            'gen_state_dict': gen_net.state_dict(),
            'dis_state_dict': dis_net.state_dict(),
            'avg_gen_state_dict': avg_gen_net.state_dict(),
            'gen_optimizer': gen_optimizer.state_dict(),
            'dis_optimizer': dis_optimizer.state_dict(),
            'best_fid': best_fid,
            'path_helper': args.path_helper,
            'seed': args.random_seed
        }, is_best, args.path_helper['ckpt_path'])
        del avg_gen_net
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)

    # Setting up the resnet model
    if args.pretrained:
        resnet = torchvision.models.resnet50(pretrained=True, progress=True)
    else:
        resnet = torchvision.models.resnet50(pretrained=False, progress=True)
    num_features = resnet.fc.in_features
    resnet.fc = nn.Linear(num_features, args.num_classes)
    resnet = resnet.cuda()

    # Setting up the optimizer
    if args.optimizer == 'sgd':
        optimizer = optim.SGD(resnet.parameters(),
                              lr=args.lr,
                              weight_decay=1e-4)
    elif args.optimizer == 'sgd_momentum':
        optimizer = optim.SGD(resnet.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=1e-4)
    elif args.optimizer == 'adam':
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, resnet.parameters()), args.g_lr,
            (args.beta1, args.beta2))
    else:
        optimizer = None
    assert optimizer != None

    criterion = nn.CrossEntropyLoss()

    if args.percentage == 1.0:
        train_data, val_data, test_data = get_train_validation_test_data(
            args.train_csv_path, args.train_img_path, args.val_csv_path,
            args.val_img_path, args.test_csv_path, args.test_img_path)
    else:
        train_data = get_label_unlabel_dataset(args.train_csv_path,
                                               args.train_img_path,
                                               args.percentage)
        _, val_data, test_data = get_train_validation_test_data(
            args.train_csv_path, args.train_img_path, args.val_csv_path,
            args.val_img_path, args.test_csv_path, args.test_img_path)

    train_loader = DataLoader(train_data,
                              batch_size=args.train_batch_size,
                              shuffle=True,
                              drop_last=True,
                              num_workers=args.num_workers)
    val_loader = DataLoader(val_data,
                            batch_size=args.eval_batch_size,
                            shuffle=True,
                            drop_last=True,
                            num_workers=args.num_workers)
    test_loader = DataLoader(test_data,
                             batch_size=args.eval_batch_size,
                             shuffle=True,
                             drop_last=True,
                             num_workers=args.num_workers)
    print('Training Datasize:', len(train_data))

    start_epoch = 0
    best_acc1 = 0
    best_acc2 = 0
    best_acc3 = 0

    # set writer
    if args.load_path:
        print(f'=> resuming from {args.load_path}')
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path, 'Model',
                                       'checkpoint_last.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint['epoch']
        resnet.load_state_dict(checkpoint['resnet_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        args.path_helper = checkpoint['path_helper']
        logger = create_logger(args.path_helper['log_path'])
        logger.info(
            f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})')
    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('logs', args.exp_name)
        logger = create_logger(args.path_helper['log_path'])

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    start = time.time()
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)),
                      desc='total progress'):
        best_curr_acc1, best_curr_acc2, best_curr_acc3 = train(
            args, resnet, optimizer, criterion, train_loader, val_loader,
            epoch, writer_dict, best_acc1, best_acc2, best_acc3)

        best_acc1, best_acc2, best_acc3 = best_curr_acc1, best_curr_acc2, best_curr_acc3

        if epoch and epoch % args.val_freq == 0 or epoch == int(
                args.max_epoch) - 1:
            val_acc = get_val_acc(val_loader, resnet)
            logger.info(f'Validation Accuracy {val_acc} || @ epoch {epoch}.')

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'resnet_state_dict': resnet.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'path_helper': args.path_helper
            },
            False,
            False,
            False,
            args.path_helper['ckpt_path'],
            filename='checkpoint_last.pth')

    end = time.time()
    final_val_acc = get_val_acc(val_loader, resnet)
    final_test_acc = get_val_acc(test_loader, resnet)
    time_elapsed = end - start

    print('\n Final Validation Accuracy:', final_val_acc.data,
          '\n Final Test Accuracy:', final_test_acc.data, '\n Time Elapsed:',
          time_elapsed, 'seconds.')
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)

    # set visible GPU ids
    if len(args.gpu_ids) > 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_ids

    # set TensorFlow environment for evaluation (calculate IS and FID)
    _init_inception()
    inception_path = check_or_download_inception('./tmp/imagenet/')
    create_inception_graph(inception_path)

    # the first GPU in visible GPUs is dedicated for evaluation (running Inception model)
    str_ids = args.gpu_ids.split(',')
    args.gpu_ids = []
    for id in range(len(str_ids)):
        if id >= 0:
            args.gpu_ids.append(id)
    if len(args.gpu_ids) > 1:
        args.gpu_ids = args.gpu_ids[1:]
    else:
        args.gpu_ids = args.gpu_ids

    # genotype G
    genotypes_root = os.path.join('exps', args.genotypes_exp, 'Genotypes')
    genotype_G = np.load(os.path.join(genotypes_root, 'latest_G.npy'))

    # import network from genotype
    basemodel_gen = eval('archs.' + args.arch + '.Generator')(args, genotype_G)
    gen_net = torch.nn.DataParallel(
        basemodel_gen, device_ids=args.gpu_ids).cuda(args.gpu_ids[0])
    basemodel_dis = eval('archs.' + args.arch + '.Discriminator')(args)
    dis_net = torch.nn.DataParallel(
        basemodel_dis, device_ids=args.gpu_ids).cuda(args.gpu_ids[0])

    # basemodel_gen = eval('archs.' + args.arch + '.Generator')(args=args)
    # gen_net = torch.nn.DataParallel(basemodel_gen, device_ids=args.gpu_ids).cuda(args.gpu_ids[0])
    # basemodel_dis = eval('archs.' + args.arch + '.Discriminator')(args=args)
    # dis_net = torch.nn.DataParallel(basemodel_dis, device_ids=args.gpu_ids).cuda(args.gpu_ids[0])

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    # set up data_loader
    dataset = datasets.ImageDataset(args)
    train_loader = dataset.train

    # epoch number for dis_net
    args.max_epoch_D = args.max_epoch_G * args.n_critic
    if args.max_iter_G:
        args.max_epoch_D = np.ceil(args.max_iter_G * args.n_critic /
                                   len(train_loader))
    max_iter_D = args.max_epoch_D * len(train_loader)

    # set optimizer
    gen_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
        (args.beta1, args.beta2))
    dis_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
        (args.beta1, args.beta2))
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, max_iter_D)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, max_iter_D)

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz'
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    assert os.path.exists(fid_stat)

    # initial
    gen_avg_param = copy_params(gen_net)
    start_epoch = 0
    best_fid = 1e4

    # set writer
    if args.checkpoint:
        # resuming
        print(f'=> resuming from {args.checkpoint}')
        assert os.path.exists(os.path.join('exps', args.checkpoint))
        checkpoint_file = os.path.join('exps', args.checkpoint, 'Model',
                                       'checkpoint_best.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint['epoch']
        best_fid = checkpoint['best_fid']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        avg_gen_net = deepcopy(gen_net)
        avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        gen_avg_param = copy_params(avg_gen_net)
        del avg_gen_net

        args.path_helper = checkpoint['path_helper']
        logger = create_logger(args.path_helper['log_path'])
        logger.info(
            f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})')
    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('exps', args.exp_name)
        logger = create_logger(args.path_helper['log_path'])

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    # model size
    logger.info('Param size of G = %fMB', count_parameters_in_MB(gen_net))
    logger.info('Param size of D = %fMB', count_parameters_in_MB(dis_net))
    print_FLOPs(basemodel_gen, (1, args.latent_dim), logger)
    print_FLOPs(basemodel_dis, (1, 3, args.img_size, args.img_size), logger)

    # for visualization
    if args.draw_arch:
        from utils.genotype import draw_graph_G
        draw_graph_G(genotype_G,
                     save=True,
                     file_path=os.path.join(args.path_helper['graph_vis_path'],
                                            'latest_G'))
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (100, args.latent_dim)))

    # train loop
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch_D)),
                      desc='total progress'):
        lr_schedulers = (gen_scheduler,
                         dis_scheduler) if args.lr_decay else None
        train(args, gen_net, dis_net, gen_optimizer, dis_optimizer,
              gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers)

        if epoch % args.val_freq == 0 or epoch == int(args.max_epoch_D) - 1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, std, fid_score = validate(args, fixed_z, fid_stat,
                                                       gen_net, writer_dict)
            logger.info(
                f'Inception score mean: {inception_score}, Inception score std: {std}, '
                f'FID score: {fid_score} || @ epoch {epoch}.')
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        # save model
        avg_gen_net = deepcopy(gen_net)
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.arch,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'avg_gen_state_dict': avg_gen_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])
        del avg_gen_net
Esempio n. 21
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)

    # set visible GPU ids
    if len(args.gpu_ids) > 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_ids

    # set TensorFlow environment for evaluation (calculate IS and FID)
    _init_inception()
    inception_path = check_or_download_inception('./tmp/imagenet/')
    create_inception_graph(inception_path)

    # the first GPU in visible GPUs is dedicated for evaluation (running Inception model)
    str_ids = args.gpu_ids.split(',')
    args.gpu_ids = []
    for id in range(len(str_ids)):
        if id >= 0:
            args.gpu_ids.append(id)
    if len(args.gpu_ids) > 1:
        args.gpu_ids = args.gpu_ids[1:]
    else:
        args.gpu_ids = args.gpu_ids

    # genotype G
    genotypes_root = os.path.join('exps', args.genotypes_exp, 'Genotypes')
    genotype_G = np.load(os.path.join(genotypes_root, 'latest_G.npy'))

    # import network from genotype
    basemodel_gen = eval('archs.' + args.arch + '.Generator')(args, genotype_G)
    gen_net = torch.nn.DataParallel(
        basemodel_gen, device_ids=args.gpu_ids).cuda(args.gpu_ids[0])
    basemodel_dis = eval('archs.' + args.arch + '.Discriminator')(args)
    dis_net = torch.nn.DataParallel(
        basemodel_dis, device_ids=args.gpu_ids).cuda(args.gpu_ids[0])

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz'
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    assert os.path.exists(fid_stat)

    # set writer
    print(f'=> resuming from {args.checkpoint}')
    assert os.path.exists(os.path.join('exps', args.checkpoint))
    checkpoint_file = os.path.join('exps', args.checkpoint, 'Model',
                                   'checkpoint_best.pth')
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)
    epoch = checkpoint['epoch'] - 1
    gen_net.load_state_dict(checkpoint['gen_state_dict'])
    dis_net.load_state_dict(checkpoint['dis_state_dict'])
    avg_gen_net = deepcopy(gen_net)
    avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
    gen_avg_param = copy_params(avg_gen_net)
    del avg_gen_net
    assert args.exp_name
    args.path_helper = set_log_dir('exps', args.exp_name)
    logger = create_logger(args.path_helper['log_path'])
    logger.info(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})')

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'valid_global_steps': epoch // args.val_freq,
    }

    # model size
    logger.info('Param size of G = %fMB', count_parameters_in_MB(gen_net))
    logger.info('Param size of D = %fMB', count_parameters_in_MB(dis_net))
    print_FLOPs(basemodel_gen, (1, args.latent_dim), logger)
    print_FLOPs(basemodel_dis, (1, 3, args.img_size, args.img_size), logger)

    # for visualization
    if args.draw_arch:
        from utils.genotype import draw_graph_G
        draw_graph_G(genotype_G,
                     save=True,
                     file_path=os.path.join(args.path_helper['graph_vis_path'],
                                            'latest_G'))
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (100, args.latent_dim)))

    # test
    load_params(gen_net, gen_avg_param)
    inception_score, std, fid_score = validate(args, fixed_z, fid_stat,
                                               gen_net, writer_dict)
    logger.info(
        f'Inception score mean: {inception_score}, Inception score std: {std}, '
        f'FID score: {fid_score} || @ epoch {epoch}.')
Esempio n. 22
0
def main():
    args = cfg.parse_args()

    torch.cuda.manual_seed(args.random_seed)
    print(args)

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan(
        args, weights_init)

    # initial
    start_search_iter = 0

    # set writer
    if args.load_path:
        print(f'=> resuming from {args.load_path}')
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path, 'Model',
                                       'checkpoint.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        cur_stage = checkpoint['cur_stage']

        start_search_iter = checkpoint['search_iter']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        prev_archs = checkpoint['prev_archs']
        prev_hiddens = checkpoint['prev_hiddens']

        args.path_helper = checkpoint['path_helper']
        logger = create_logger(args.path_helper['log_path'])
        logger.info(
            f'=> loaded checkpoint {checkpoint_file} (search iteration {start_search_iter})'
        )
    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('logs', args.exp_name)
        logger = create_logger(args.path_helper['log_path'])
        prev_archs = None
        prev_hiddens = None

        # set controller && its optimizer
        cur_stage = 0

    # set up data_loader
    dataset = datasets.ImageDataset(args, 2**(cur_stage + 3))
    train_loader = dataset.train
    print(args.rl_num_eval_img, "##############################")
    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'controller_steps': start_search_iter * args.ctrl_step
    }

    g_loss_history = RunningStats(args.dynamic_reset_window)
    d_loss_history = RunningStats(args.dynamic_reset_window)

    # train loop
    Agent = SAC(131)
    print(Agent.alpha)

    memory = ReplayMemory(2560000)
    updates = 0
    outinfo = {
        'rewards': [],
        'a_loss': [],
        'critic_error': [],
    }
    Best = False
    Z_NUMPY = None
    WARMUP = True
    update_time = 1
    for search_iter in tqdm(range(int(start_search_iter), 100),
                            desc='search progress'):
        logger.info(f"<start search iteration {search_iter}>")
        if search_iter >= 1:
            WARMUP = False

        ### Define number of layers, currently only support 1->3
        total_layer_num = 3
        ### Different image size for different layers
        ds = [
            datasets.ImageDataset(args, 2**(k + 3))
            for k in range(total_layer_num)
        ]
        train_loaders = [d.train for d in ds]
        last_R = 0.  # Initial reward
        last_fid = 10000  # Inital reward
        last_arch = []

        # Set exploration
        if search_iter > 69:
            update_time = 10
            Best = True
        else:
            Best = False

        gen_net.set_stage(-1)
        last_R, last_fid, last_state = get_is(args,
                                              gen_net,
                                              args.rl_num_eval_img,
                                              get_is_score=True)
        for layer in range(total_layer_num):

            cur_stage = layer  # This defines which layer to use as output, for example, if cur_stage==0, then the output will be the first layer output. Set it to 2 if you want the output of the last layer.
            action = Agent.select_action([layer, last_R, 0.01 * last_fid] +
                                         last_state, Best)
            arch = [
                action[0][0], action[0][1], action[1][0], action[1][1],
                action[1][2], action[2][0], action[2][1], action[2][2],
                action[3][0], action[3][1], action[4][0], action[4][1],
                action[5][0], action[5][1]
            ]
            # print(arch)
            # argmax to get int description of arch
            cur_arch = [np.argmax(k) for k in action]
            # Pad the skip option 0=False (for only layer 1 and layer2, not layer0, see builing_blocks.py for why)
            if layer == 0:
                cur_arch = cur_arch[0:4]
            elif layer == 1:
                cur_arch = cur_arch[0:5]
            elif layer == 2:
                if cur_arch[4] + cur_arch[5] == 2:
                    cur_arch = cur_arch[0:4] + [3]
                elif cur_arch[4] + cur_arch[5] == 0:
                    cur_arch = cur_arch[0:4] + [0]
                elif cur_arch[4] == 1 and cur_arch[5] == 0:
                    cur_arch = cur_arch[0:4] + [1]
                else:
                    cur_arch = cur_arch[0:4] + [2]

            # Get the network arch with the new architecture attached.
            last_arch += cur_arch
            gen_net.set_arch(last_arch,
                             layer)  # Set the network, given cur_stage
            # Train network
            dynamic_reset = train_qin(args,
                                      gen_net,
                                      dis_net,
                                      g_loss_history,
                                      d_loss_history,
                                      gen_optimizer,
                                      dis_optimizer,
                                      train_loaders[layer],
                                      cur_stage,
                                      smooth=False,
                                      WARMUP=WARMUP)

            # Get reward, use the jth layer output for generation. (layer 0:j), and the proposed progressive state
            R, fid, state = get_is(args,
                                   gen_net,
                                   args.rl_num_eval_img,
                                   z_numpy=Z_NUMPY)
            # Print exploitation mark, for better readability of the log.
            if Best:
                print("arch:", cur_arch, "Exploitation:", Best)
            else:
                print("arch:", cur_arch, "Exploring...")
            # Proxy reward of the up-to-now (0:j) architecture.
            print("update times:", updates, "step:", layer + 1, "IS:", R,
                  "FID:", fid)
            mask = 0 if layer == total_layer_num - 1 else 1
            if search_iter >= 0:  # warm up
                memory.push([layer, last_R, 0.01 * last_fid] + last_state,
                            arch, R - last_R + 0.01 * (last_fid - fid),
                            [layer + 1, R, 0.01 * fid] + state,
                            mask)  # Append transition to memory

            if len(memory) >= 64:
                # Number of updates per step in environment
                for i in range(update_time):
                    # Update parameters of all the networks
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = Agent.update_parameters(
                        memory, min(len(memory), 256), updates)

                    updates += 1
                    outinfo['critic_error'] = min(critic_1_loss, critic_2_loss)
                    outinfo['entropy'] = ent_loss
                    outinfo['a_loss'] = policy_loss
                print("full batch", outinfo, alpha)
            last_R = R  # next step
            last_fid = fid
            last_state = state
        outinfo['rewards'] = R
        critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = Agent.update_parameters(
            memory, len(memory), updates)
        updates += 1
        outinfo['critic_error'] = min(critic_1_loss, critic_2_loss)
        outinfo['entropy'] = ent_loss
        outinfo['a_loss'] = policy_loss
        print("full batch", outinfo, alpha)
        # Clean up and start a new trajectory from scratch
        del gen_net, dis_net, gen_optimizer, dis_optimizer
        gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan(
            args, weights_init)
        print(outinfo, len(memory))
        Agent.save_model("test")
        WARMUP = False
Esempio n. 23
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import network
    gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda()
    dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda()

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    # set optimizer
    gen_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
        (args.beta1, args.beta2))
    dis_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
        (args.beta1, args.beta2))
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)

    # set up data_loader
    dataset = datasets.ImageDataset(args)
    train_loader = dataset.train

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz'
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    assert os.path.exists(fid_stat)

    # epoch number for dis_net
    args.max_epoch = args.max_epoch * args.n_critic
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter * args.n_critic /
                                 len(train_loader))

    # initial
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))
    gen_avg_param = copy_params(gen_net)
    start_epoch = 0
    best_fid = 1e4

    # set writer
    if args.load_path:
        print(f'=> resuming from {args.load_path}')
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path, 'Model',
                                       'checkpoint.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint['epoch']
        best_fid = checkpoint['best_fid']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        avg_gen_net = deepcopy(gen_net)
        avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        gen_avg_param = copy_params(avg_gen_net)
        del avg_gen_net

        args.path_helper = checkpoint['path_helper']
        logger = create_logger(args.path_helper['log_path'])
        logger.info(
            f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})')
    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('logs', args.exp_name)
        logger = create_logger(args.path_helper['log_path'])

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    # train loop
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)),
                      desc='total progress'):
        lr_schedulers = (gen_scheduler,
                         dis_scheduler) if args.lr_decay else None
        train(args, gen_net, dis_net, gen_optimizer, dis_optimizer,
              gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers)

        if epoch and epoch % args.val_freq == 0 or epoch == int(
                args.max_epoch) - 1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, fid_score = validate(args, fixed_z, fid_stat,
                                                  gen_net, writer_dict)
            logger.info(
                f'Inception score: {inception_score}, FID score: {fid_score} || @ epoch {epoch}.'
            )
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        avg_gen_net = deepcopy(gen_net)
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.model,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'avg_gen_state_dict': avg_gen_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])
        del avg_gen_net
Esempio n. 24
0
def main():
    args = cfg.parse_args()
    random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # weight init
    gen_net = eval('models.' + args.model + '.Generator')(args=args)
    dis_net = eval('models.' + args.model + '.Discriminator')(args=args)

    # weight init
    def weights_init(m):
        if isinstance(m, nn.Conv2d):
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    gen_net = gen_net.cuda()
    dis_net = dis_net.cuda()

    avg_gen_net = deepcopy(gen_net)
    initial_gen_net_weight = deepcopy(gen_net.state_dict())
    initial_dis_net_weight = deepcopy(dis_net.state_dict())
    assert id(initial_dis_net_weight) != id(dis_net.state_dict())
    assert id(initial_gen_net_weight) != id(gen_net.state_dict())
    # set optimizer
    gen_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
        (args.beta1, args.beta2))
    dis_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
        (args.beta1, args.beta2))
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)

    # set up data_loader
    dataset = datasets.ImageDataset(args)
    train_loader = dataset.train

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/fid_stats_stl10_train.npz'
    else:
        raise NotImplementedError('no fid stat for %s' % args.dataset.lower())
    assert os.path.exists(fid_stat)

    # epoch number for dis_net
    args.max_epoch = args.max_epoch * args.n_critic
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter * args.n_critic /
                                 len(train_loader))

    # initial
    np.random.seed(args.random_seed)
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))

    start_epoch = 0
    best_fid = 1e4

    args.path_helper = set_log_dir('logs',
                                   args.exp_name + "_{}".format(args.percent))
    logger = create_logger(args.path_helper['log_path'])
    # logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch))

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    print('=> resuming from %s' % args.load_path)
    assert os.path.exists(args.load_path)
    checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth')
    assert os.path.exists(checkpoint_file)
    checkpoint = torch.load(checkpoint_file)
    gen_net.load_state_dict(checkpoint['gen_state_dict'])

    torch.manual_seed(args.random_seed)
    pruning_generate(gen_net, (1 - args.percent), args.pruning_method)
    torch.manual_seed(args.random_seed)
    pruning_generate(avg_gen_net, (1 - args.percent), args.pruning_method)
    see_remain_rate(gen_net)

    if args.second_seed:
        dis_net.apply(weights_init)
    if args.finetune_D:
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
    else:
        dis_net.load_state_dict(initial_dis_net_weight)

    gen_weight = gen_net.state_dict()
    gen_orig_weight = rewind_weight(initial_gen_net_weight, gen_weight.keys())
    assert id(gen_weight) != id(gen_orig_weight)
    gen_weight.update(gen_orig_weight)
    gen_net.load_state_dict(gen_weight)
    gen_avg_param = copy_params(gen_net)

    if args.use_kd_D:
        orig_dis_net = eval('models.' + args.model +
                            '.Discriminator')(args=args).cuda()
        orig_dis_net.load(checkpoint['dis_state_dict'])
        orig_dis_net.eval()
    # train loop
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)),
                      desc='total progress'):
        lr_schedulers = (gen_scheduler,
                         dis_scheduler) if args.lr_decay else None
        see_remain_rate(gen_net)
        if not args.use_kd_D:
            train(args, gen_net, dis_net, gen_optimizer, dis_optimizer,
                  gen_avg_param, train_loader, epoch, writer_dict,
                  lr_schedulers)
        else:
            train_kd(args, gen_net, dis_net, orig_dis_net, gen_optimizer,
                     dis_optimizer, gen_avg_param, train_loader, epoch,
                     writer_dict, lr_schedulers)

        if epoch and epoch % args.val_freq == 0 or epoch == int(
                args.max_epoch) - 1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, fid_score = validate(args, fixed_z, fid_stat,
                                                  gen_net, writer_dict, epoch)
            logger.info(
                'Inception score: %.4f, FID score: %.4f || @ epoch %d.' %
                (inception_score, fid_score, epoch))
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        avg_gen_net.load_state_dict(gen_net.state_dict())
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.model,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'avg_gen_state_dict': avg_gen_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])
Esempio n. 25
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)

    # set tf env
    _init_inception(MODEL_DIR)
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan(
        args, weights_init)

    # set grow controller
    grow_ctrler = GrowCtrler(args.grow_step1, args.grow_step2)

    # initial
    start_search_iter = 0

    # set writer
    if args.load_path:
        print(f'=> resuming from {args.load_path}')
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path, 'Model',
                                       'checkpoint.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file,
                                map_location={'cuda:0': 'cpu'})
        # set controller && its optimizer
        cur_stage = checkpoint['cur_stage']
        controller, ctrl_optimizer = create_ctrler(args, cur_stage,
                                                   weights_init)

        start_search_iter = checkpoint['search_iter']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        controller.load_state_dict(checkpoint['ctrl_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        ctrl_optimizer.load_state_dict(checkpoint['ctrl_optimizer'])
        prev_archs = checkpoint['prev_archs']
        prev_hiddens = checkpoint['prev_hiddens']

        args.path_helper = checkpoint['path_helper']
        logger = create_logger(args.path_helper['log_path'])
        logger.info(
            f'=> loaded checkpoint {checkpoint_file} (search iteration {start_search_iter})'
        )
    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('logs', args.exp_name)
        logger = create_logger(args.path_helper['log_path'])
        prev_archs = None
        prev_hiddens = None

        # set controller && its optimizer
        cur_stage = 0
        controller, ctrl_optimizer = create_ctrler(args, cur_stage,
                                                   weights_init)

    # set up data_loader
    dataset = datasets.ImageDataset(args, 2**(cur_stage + 3),
                                    args.dis_batch_size, args.num_workers)
    train_loader = dataset.train

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'controller_steps': start_search_iter * args.ctrl_step
    }

    g_loss_history = RunningStats(args.dynamic_reset_window)
    d_loss_history = RunningStats(args.dynamic_reset_window)

    # train loop

    for search_iter in tqdm(range(int(start_search_iter),
                                  int(args.max_search_iter)),
                            desc='search progress'):
        logger.info(f"<start search iteration {search_iter}>")
        if search_iter == args.grow_step1 or search_iter == args.grow_step2:

            # save
            cur_stage = grow_ctrler.cur_stage(search_iter)
            logger.info(f'=> grow to stage {cur_stage}')
            prev_archs, prev_hiddens = get_topk_arch_hidden(
                args, controller, gen_net, prev_archs, prev_hiddens)

            # grow section
            del controller
            del ctrl_optimizer
            controller, ctrl_optimizer = create_ctrler(args, cur_stage,
                                                       weights_init)

            dataset = datasets.ImageDataset(args, 2**(cur_stage + 3),
                                            args.dis_batch_size,
                                            args.num_workers)
            train_loader = dataset.train

        dynamic_reset = train_shared(args,
                                     gen_net,
                                     dis_net,
                                     g_loss_history,
                                     d_loss_history,
                                     controller,
                                     gen_optimizer,
                                     dis_optimizer,
                                     train_loader,
                                     prev_hiddens=prev_hiddens,
                                     prev_archs=prev_archs)
        train_controller(args, controller, ctrl_optimizer, gen_net,
                         prev_hiddens, prev_archs, writer_dict)

        if dynamic_reset:
            logger.info('re-initialize share GAN')
            del gen_net, dis_net, gen_optimizer, dis_optimizer
            gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan(
                args, weights_init)

        save_checkpoint(
            {
                'cur_stage': cur_stage,
                'search_iter': search_iter + 1,
                'gen_model': args.gen_model,
                'dis_model': args.dis_model,
                'controller': args.controller,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'ctrl_state_dict': controller.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'ctrl_optimizer': ctrl_optimizer.state_dict(),
                'prev_archs': prev_archs,
                'prev_hiddens': prev_hiddens,
                'path_helper': args.path_helper
            }, False, args.path_helper['ckpt_path'])

    final_archs, _ = get_topk_arch_hidden(args, controller, gen_net,
                                          prev_archs, prev_hiddens)
    logger.info(f"discovered archs: {final_archs}")
Esempio n. 26
0
def main():

    args = cfg.parse_args()
    torch.cuda.manual_seed(args.rand_seed)

    if args.dataset == 'cifar':
        sample_x = torch.zeros((args.batch_size, 3, 32, 32))
        netE = Res18_Quadratic(3,
                               args.n_chan,
                               32,
                               normalize=False,
                               AF=nn.ELU())

    elif args.dataset == 'mnist':
        sample_x = torch.zeros((args.batch_size, 1, 32, 32))
        netE = Res12_Quadratic(1,
                               args.n_chan,
                               32,
                               normalize=False,
                               AF=nn.ELU())

    elif args.dataset == 'fmnist':
        sample_x = torch.zeros((args.batch_size, 1, 32, 32))
        netE = Res12_Quadratic(1,
                               args.n_chan,
                               32,
                               normalize=False,
                               AF=nn.ELU())

    else:
        NotImplementedError('{} unknown dataset'.format(args.dataset))
    #setup gpu
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    netE = netE.to(device)
    if args.n_gpus > 1:
        netE = nn.DataParallel(netE)

    root = 'logs/' + args.log + '_' + args.time

    #single sampling mode save a single file with custom number of images
    #all sampling mode save

    #set annealing schedule
    if args.annealing_schedule == 'exp':
        Nsampling = 2000  #exponential schedule with flat region in the beginning and end
        Tmax, Tmin = 100, 1
        T = Tmax * np.exp(-np.linspace(0, Nsampling - 1, Nsampling) *
                          (np.log(Tmax / Tmin) / Nsampling))
        T = np.concatenate((Tmax * np.ones((500, )), T), axis=0)
        T = np.concatenate((T, Tmin * np.linspace(1, 0, 200)), axis=0)

    elif args.annealing_schedule == 'lin':
        Nsampling = 2000  #linear schedule with flat region in the beginning and end
        Tmax, Tmin = 100, 1
        T = np.linspace(Tmax, Tmin, Nsampling)
        T = np.concatenate((Tmax * np.ones((500, )), T), axis=0)
        T = np.concatenate((T, Tmin * np.linspace(1, 0, 200)), axis=0)
    #sample

    if args.sample_mode == 'single':
        filename = args.file_name + str(args.net_indx) + '.pt'
        netE.load_state_dict(torch.load(root + '/models/' + filename))

        n_batches = int(np.ceil(args.n_samples_save / args.batch_size))

        denoise_samples = []
        print('sampling starts')
        for i in range(n_batches):
            initial_x = 0.5 + torch.randn_like(sample_x).to(device)
            x_list, E_trace = Annealed_Langevin_E(netE, initial_x,
                                                  args.sample_step_size, T,
                                                  100)

            x_denoise = SS_denoise(x_list[-1][:].to(device), netE, 0.1)
            denoise_samples.append(x_denoise)
            print('batch {}/{} finished'.format((i + 1), n_batches))

        denoise_samples = torch.cat(denoise_samples, 0)
        torch.save(
            denoise_samples, root + '/samples/' + args.dataset + '_' +
            str(args.n_samples_save) + 'samples.pt')

    elif args.sample_mode == 'all':
        n_batches = int(np.ceil(256 / args.batch_size))
        i = args.net_indx
        while True:
            filename = args.file_name + str(i) + '.pt'
            i += args.save_every
            try:
                netE.load_state_dict(torch.load(root + '/models/' + filename))
            except:
                print(root + '/models/' + filename)
                print('file not found or reached last file')
                break

            print('generating samples for ' + filename)
            denoise_samples = []
            for i in range(n_batches):
                initial_x = 0.5 + torch.randn_like(sample_x).to(device)
                x_list, E_trace = Annealed_Langevin_E(netE, initial_x,
                                                      args.sample_step_size, T,
                                                      100)
                print(str(len(x_list)))
                x_denoise = SS_denoise(x_list[-1].to(device), netE, 0.1)
                denoise_samples.append(x_denoise)
                print('batch {}/{} finished'.format((i + 1), n_batches))
            denoise_samples = torch.cat(denoise_samples, 0)
            save_sample_pdf(
                denoise_samples[0:256], (16, 16), root + '/samples/' +
                args.dataset + '_256samples_' + str(i) + 'knet_denoise.pdf')
Esempio n. 27
0
                    nargs='?',
                    default=argparse.SUPPRESS)
parser.add_argument('align',
                    help='alignments between target and source words',
                    nargs='?',
                    default=argparse.SUPPRESS)
parser.add_argument(
    '-s',
    '--subcorpora',
    help='YAML description of subcorpora lines (space separated file list)',
    default=argparse.SUPPRESS,
    action=store_training)
if __name__ == '__main__':
    os.putenv('LANG', 'C')
    os.putenv('LC_ALL', 'C')
    d = cfg.parse_args(parser, write='$outdir/rules.config', modeldir=True)
    cfgf = open(os.path.join(d.outdir, 'rules.config'), 'a')
    print >> cfgf, '\nrules:', d.outdir
    cfgf.close()
    dir = os.path.abspath(os.path.dirname(__file__))
    finp = os.path.join(dir, 'ghkm', 'filterbadinput')
    names = []
    triplefiles = [d.config['target'], d.config['source'], d.config['align']]

    steps = cfg.steps(d)

    hp = d.hadoop

    training = os.path.join(d.tmpdir, 'training')
    trainingtmp = os.path.join(d.tmpdir, 'training.tmp')
    trainingnew = trainingtmp + '.new'
Esempio n. 28
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)
    torch.backends.cudnn.deterministic = True

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # epoch number for dis_net
    dataset = datasets.ImageDataset(args, cur_img_size=8)
    train_loader = dataset.train
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter / len(train_loader))
    else:
        args.max_iter = args.max_epoch * len(train_loader)
    args.max_epoch = args.max_epoch * args.n_critic

    # import network
    gen_net = eval('models.' + args.gen_model + '.Generator')(args=args).cuda()
    dis_net = eval('models.' + args.dis_model +
                   '.Discriminator')(args=args).cuda()
    gen_net.set_arch(args.arch, cur_stage=2)

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform_(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    gpu_ids = [i for i in range(int(torch.cuda.device_count()))]
    gen_net = torch.nn.DataParallel(gen_net.to("cuda:0"), device_ids=gpu_ids)
    dis_net = torch.nn.DataParallel(dis_net.to("cuda:0"), device_ids=gpu_ids)

    gen_net.module.cur_stage = 0
    dis_net.module.cur_stage = 0
    gen_net.module.alpha = 1.
    dis_net.module.alpha = 1.

    # set optimizer
    if args.optimizer == "adam":
        gen_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
            (args.beta1, args.beta2))
        dis_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
            (args.beta1, args.beta2))
    elif args.optimizer == "adamw":
        gen_optimizer = AdamW(filter(lambda p: p.requires_grad,
                                     gen_net.parameters()),
                              args.g_lr,
                              weight_decay=args.wd)
        dis_optimizer = AdamW(filter(lambda p: p.requires_grad,
                                     dis_net.parameters()),
                              args.g_lr,
                              weight_decay=args.wd)
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz'
    elif args.fid_stat is not None:
        fid_stat = args.fid_stat
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    assert os.path.exists(fid_stat)

    # initial
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (64, args.latent_dim)))
    gen_avg_param = copy_params(gen_net)
    start_epoch = 0
    best_fid = 1e4

    # set writer
    if args.load_path:
        print(f'=> resuming from {args.load_path}')
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path)
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint['epoch']
        best_fid = checkpoint['best_fid']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        #         avg_gen_net = deepcopy(gen_net)
        #         avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        gen_avg_param = checkpoint['gen_avg_param']
        #         del avg_gen_net
        cur_stage = cur_stages(start_epoch, args)
        gen_net.module.cur_stage = cur_stage
        dis_net.module.cur_stage = cur_stage
        gen_net.module.alpha = 1.
        dis_net.module.alpha = 1.

        args.path_helper = checkpoint['path_helper']

    else:
        # create new log dir
        assert args.exp_name
        args.path_helper = set_log_dir('logs', args.exp_name)

    logger = create_logger(args.path_helper['log_path'])
    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    def return_states():
        states = {}
        states['epoch'] = epoch
        states['best_fid'] = best_fid_score
        states['gen_state_dict'] = gen_net.state_dict()
        states['dis_state_dict'] = dis_net.state_dict()
        states['gen_optimizer'] = gen_optimizer.state_dict()
        states['dis_optimizer'] = dis_optimizer.state_dict()
        states['gen_avg_param'] = gen_avg_param
        states['path_helper'] = args.path_helper
        return states

    # train loop

    for epoch in range(start_epoch + 1, args.max_epoch):
        train(
            args,
            gen_net,
            dis_net,
            gen_optimizer,
            dis_optimizer,
            gen_avg_param,
            train_loader,
            epoch,
            writer_dict,
            fixed_z,
        )
        backup_param = copy_params(gen_net)
        load_params(gen_net, gen_avg_param)
        fid_score = validate(
            args,
            fixed_z,
            fid_stat,
            epoch,
            gen_net,
            writer_dict,
        )
        logger.info(f'FID score: {fid_score} || @ epoch {epoch}.')
        load_params(gen_net, backup_param)
        is_best = False
        if epoch == 1 or fid_score < best_fid_score:
            best_fid_score = fid_score
            is_best = True
        if is_best or epoch % 1 == 0:
            states = return_states()
            save_checkpoint(states,
                            is_best,
                            args.path_helper['ckpt_path'],
                            filename=f'checkpoint_epoch_{epoch}.pth')
Esempio n. 29
0
def main():
    args = cfg.parse_args()
    random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net = Generator(bottom_width=args.bottom_width,
                        gf_dim=args.gf_dim,
                        latent_dim=args.latent_dim).cuda()
    dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda()
    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    initial_gen_net_weight = torch.load(os.path.join(args.init_path,
                                                     'initial_gen_net.pth'),
                                        map_location="cpu")
    initial_dis_net_weight = torch.load(os.path.join(args.init_path,
                                                     'initial_dis_net.pth'),
                                        map_location="cpu")

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    exp_str = args.dir
    args.load_path = os.path.join('output', exp_str, 'pth',
                                  'epoch{}.pth'.format(args.load_epoch))

    # state dict:
    assert os.path.exists(args.load_path)
    checkpoint = torch.load(args.load_path)
    print('=> loaded checkpoint %s' % args.load_path)
    state_dict = checkpoint['generator']
    gen_net = load_subnet(args, state_dict, initial_gen_net_weight).cuda()
    avg_gen_net = deepcopy(gen_net)

    # set optimizer
    gen_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
        (args.beta1, args.beta2))
    dis_optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
        (args.beta1, args.beta2))
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)

    # set up data_loader
    dataset = datasets.ImageDataset(args)
    train_loader = dataset.train

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    else:
        raise NotImplementedError('no fid stat for %s' % args.dataset.lower())
    assert os.path.exists(fid_stat)

    # epoch number for dis_net
    args.max_epoch = args.max_epoch * args.n_critic
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter * args.n_critic /
                                 len(train_loader))

    # initial
    np.random.seed(args.random_seed)
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (25, args.latent_dim)))

    start_epoch = 0
    best_fid = 1e4

    args.path_helper = set_log_dir('logs', args.exp_name)
    logger = create_logger(args.path_helper['log_path'])
    #logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch))

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }
    gen_avg_param = copy_params(gen_net)
    # train loop
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)),
                      desc='total progress'):
        lr_schedulers = (gen_scheduler,
                         dis_scheduler) if args.lr_decay else None
        train(args, gen_net, dis_net, gen_optimizer, dis_optimizer,
              gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers)

        if epoch and epoch % args.val_freq == 0 or epoch == int(
                args.max_epoch) - 1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, fid_score = validate(args, fixed_z, fid_stat,
                                                  gen_net, writer_dict)
            logger.info(
                'Inception score: %.4f, FID score: %.4f || @ epoch %d.' %
                (inception_score, fid_score, epoch))
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        avg_gen_net.load_state_dict(gen_net.state_dict())
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.model,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'avg_gen_state_dict': avg_gen_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])
Esempio n. 30
0
File: train.py Progetto: nnuq/tpu
        if epoch and epoch % args.val_freq == 0 or epoch == args.num_epochs - 1:
            fid_score = validate(args, fid_stat, gen_net, writer_dict,
                                 valid_loader)
            logger.info(f'FID score: {fid_score} || @ epoch {epoch}.')
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'enc_state_dict': enc_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'ae_recon_optimizer': ae_recon_optimizer.state_dict(),
                'ae_reg_optimizer': ae_reg_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])


if __name__ == '__main__':
    arg = cfg.parse_args()

    xmp.spawn(main, args=(arg, ), nprocs=8)
import numpy as np
import torch
import torch.nn as nn
from scipy import stats
from torch.utils import data

import cfg
import datasets
import experiments as exp
import logger
import utils

args = cfg.parse_args()

exp_func = getattr(exp, args.experiment)

# Model
_model = cfg.get_model(args.model_name,
                       args.dataset,
                       scales=args.scales,
                       basemodel=args.basemodel_name)
model = nn.DataParallel(_model)
model = model.cuda()

# Optimizer
optimizer = cfg.get_optimizer(model, args.optimizer, lr=args.lr)
scheduler = cfg.get_scheduler(optimizer)

# Criterion
criterion_func = cfg.get_criterion(args.criterion, cuda=True)
criterion = {'embed': criterion_func['MSE'], 'abstr': criterion_func['CE']}
Esempio n. 32
0
def main():

    args = cfg.parse_args()
    torch.cuda.manual_seed(args.rand_seed)

    #switch datasets and models

    if args.dataset == 'cifar':
        from data.cifar import inf_train_gen
        itr = inf_train_gen(args.batch_size, flip=False)
        netE = Res18_Quadratic(3,
                               args.n_chan,
                               32,
                               normalize=False,
                               AF=nn.ELU())
        #netE = SE_Res18_Quadratic(3,args.n_chan,32,normalize=False,AF=Swish())

    elif args.dataset == 'mnist':
        from data.mnist_32 import inf_train_gen
        itr = inf_train_gen(args.batch_size)
        netE = Res12_Quadratic(1,
                               args.n_chan,
                               32,
                               normalize=False,
                               AF=nn.ELU())

    elif args.dataset == 'fmnist':
        #print(dataset+str(args.n_chan))
        from data.fashion_mnist_32 import inf_train_gen
        itr = inf_train_gen(args.batch_size)
        netE = Res12_Quadratic(1,
                               args.n_chan,
                               32,
                               normalize=False,
                               AF=nn.ELU())

    else:
        NotImplementedError('{} unknown dataset'.format(args.dataset))

    #setup gpu
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    netE = netE.to(device)
    if args.n_gpus > 1:
        netE = nn.DataParallel(netE)

    #setup path

    now = datetime.now()
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
    #pdb.set_trace()
    print(str(args.cont))
    #print(str(args.time))
    if args.cont == True:
        root = 'logs/' + args.log + '_' + args.time  #compose string for loading
        #load network
        file_name = 'netE_' + str(args.net_indx) + '.pt'
        netE.load_state_dict(torch.load(root + '/models/' + file_name))
    else:  # start new will create logging folder
        root = 'logs/' + args.log + '_' + timestamp  #add timestemp
        #over write if folder already exist, not likely to happen as timestamp is used
        if os.path.isdir(root):
            shutil.rmtree(root)
        os.makedirs(root)
        os.makedirs(root + '/models')
        os.makedirs(root + '/samples')

    writer = SummaryWriter(root)

    # setup optimizer and lr scheduler
    params = {'lr': args.max_lr, 'betas': (0.9, 0.95)}
    optimizerE = torch.optim.Adam(netE.parameters(), **params)
    if args.lr_schedule == 'exp':
        scheduler = torch.optim.lr_scheduler.StepLR(optimizerE,
                                                    int(args.n_iter / 6))

    elif args.lr_schedule == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizerE,
                                                               args.n_iter,
                                                               eta_min=1e-6,
                                                               last_epoch=-1)

    elif args.lr_schedule == 'const':
        scheduler = torch.optim.lr_scheduler.StepLR(optimizerE,
                                                    int(args.n_iter))

    #train
    print_interval = 50
    max_iter = args.n_iter + args.net_indx
    batchSize = args.batch_size
    sigma0 = 0.1
    sigma02 = sigma0**2

    if args.noise_distribution == 'exp':
        sigmas_np = np.logspace(np.log10(args.min_noise),
                                np.log10(args.max_noise), batchSize)
    elif args.noise_distribution == 'lin':
        sigmas_np = np.linspace(args.min_noise, args.max_noise, batchSize)

    sigmas = torch.Tensor(sigmas_np).view((batchSize, 1, 1, 1)).to(device)

    start_time = time.time()

    for i in range(args.net_indx, args.net_indx + args.n_iter):
        x_real = itr.__next__().to(device)
        x_noisy = x_real + sigmas * torch.randn_like(x_real)

        x_noisy = x_noisy.requires_grad_()
        E = netE(x_noisy).sum()
        grad_x = torch.autograd.grad(E, x_noisy, create_graph=True)[0]
        x_noisy.detach()

        optimizerE.zero_grad()

        LS_loss = (((
            (x_real - x_noisy) / sigmas / sigma02 + grad_x / sigmas)**2) /
                   batchSize).sum()

        LS_loss.backward()
        optimizerE.step()
        scheduler.step()

        if (i + 1) % print_interval == 0:
            time_spent = time.time() - start_time
            start_time = time.time()
            netE.eval()
            E_real = netE(x_real).mean()
            E_noise = netE(torch.rand_like(x_real)).mean()
            netE.train()

            print(
                'Iteration {}/{} ({:.0f}%), E_real {:e}, E_noise {:e}, Normalized Loss {:e}, time {:4.1f}'
                .format(i + 1, max_iter, 100 * ((i + 1) / max_iter),
                        E_real.item(), E_noise.item(),
                        (sigma02**2) * (LS_loss.item()), time_spent))

            writer.add_scalar('E_real', E_real.item(), i + 1)
            writer.add_scalar('E_noise', E_noise.item(), i + 1)
            writer.add_scalar('loss', (sigma02**2) * LS_loss.item(), i + 1)
            del E_real, E_noise, x_real, x_noisy

        if (i + 1) % args.save_every == 0:
            print("-" * 50)
            file_name = args.file_name + str(i + 1) + '.pt'
            torch.save(netE.state_dict(), root + '/models/' + file_name)