コード例 #1
0
ファイル: dalle.py プロジェクト: deepglugs/dalle
def get_vae(args):
    vae = DiscreteVAE(image_size=args.size,
                      num_layers=args.vae_layers,
                      num_tokens=8192,
                      codebook_dim=args.codebook_dims,
                      num_resnet_blocks=9,
                      hidden_dim=128,
                      temperature=args.temperature)

    if args.vae is not None and os.path.isfile(args.vae):
        print(f"loading state dict from {args.vae}")
        vae.load_state_dict(torch.load(args.vae))

    vae.to(args.device)

    return vae
コード例 #2
0
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))  #(0.267, 0.233, 0.234))
])

vae = DiscreteVAE(image_size=256,
                  num_layers=3,
                  num_tokens=2048,
                  codebook_dim=256,
                  hidden_dim=128,
                  temperature=0.9)

# load pretrained vae

vae_dict = torch.load("./models/" + vaename + "-" + str(load_epoch) + ".pth")
vae.load_state_dict(vae_dict)
vae.to(device)

dalle = DALLE(
    dim=256,  #512,
    vae=
    vae,  # automatically infer (1) image sequence length and (2) number of image tokens
    num_text_tokens=10000,  # vocab size for text
    text_seq_len=256,  # text sequence length
    depth=6,  # should be 64
    heads=8,  # attention heads
    dim_head=64,  # attention head dimension
    attn_dropout=0.1,  # attention dropout
    ff_dropout=0.1  # feedforward dropout
)

# load pretrained dalle if continuing training