def main(conf):
    device = "cuda:0" if torch.cuda.is_available() else 'cpu'
    beta_schedule = "linear"
    beta_start = 1e-4
    beta_end = 2e-2
    n_timestep = 1000

    conf.distributed = dist.get_world_size() > 1

    transform = transforms.Compose(
        [
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True),
        ]
    )

    train_set = MultiResolutionDataset(
        conf.dataset.path, transform, conf.dataset.resolution
    )
    train_sampler = dist.data_sampler(
        train_set, shuffle=True, distributed=conf.distributed
    )
    train_loader = conf.training.dataloader.make(train_set, sampler=train_sampler)

    model = UNet(
        conf.model.in_channel,
        conf.model.channel,
        channel_multiplier=conf.model.channel_multiplier,
        n_res_blocks=conf.model.n_res_blocks,
        attn_strides=conf.model.attn_strides,
        dropout=conf.model.dropout,
        fold=conf.model.fold,
    )
    model = model.to(device)
    ema = UNet(
        conf.model.in_channel,
        conf.model.channel,
        channel_multiplier=conf.model.channel_multiplier,
        n_res_blocks=conf.model.n_res_blocks,
        attn_strides=conf.model.attn_strides,
        dropout=conf.model.dropout,
        fold=conf.model.fold,
    )
    ema = ema.to(device)

    if conf.distributed:
        model = nn.parallel.DistributedDataParallel(
            model,
            device_ids=[dist.get_local_rank()],
            output_device=dist.get_local_rank(),
        )

    optimizer = conf.training.optimizer.make(model.parameters())
    scheduler = conf.training.scheduler.make(optimizer)

    betas = make_beta_schedule(beta_schedule, beta_start, beta_end, n_timestep)
    diffusion = GaussianDiffusion(betas).to(device)

    train(conf, train_loader, model, ema, diffusion, optimizer, scheduler, device)
def generate_sample(model):
    betas = make_beta_schedule('linear', 1e-4, 2e-2, 1000)
    diffusion = GaussianDiffusion(betas).to('cuda')

    imgs = p_sample_loop(diffusion, model, [16, 3, 128, 128], 'cuda', capture_every=10)
    imgs = imgs[1:]

    id = 0
    grid = make_grid(torch.cat([i[id:id + 1] for i in imgs[:-1:4]], 0), nrow=5, normalize=True, range=(-1, 1))

    return grid.detach().mul(255).cpu().type(torch.uint8).permute(1, 2, 0).numpy()
import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from model import UNet
from diffusion import make_beta_schedule, GaussianDiffusion
from config import config

conf = config.diffusion
betas = make_beta_schedule(**conf.diffusion.beta_schedule)
diffusion = GaussianDiffusion(betas)
model = UNet(**conf.model)
img = paddle.randn([
    conf.training.dataloader.batch_size,
    conf.model.in_channel,
    conf.dataset.resolution,
    conf.dataset.resolution
])
time = paddle.randint(
    0, conf.diffusion.beta_schedule.n_timestep, (img.shape[0],)
)
loss = diffusion.p_loss(model, img, time)
print(loss.numpy())


conf = config.improved
betas = make_beta_schedule(**conf.diffusion.beta_schedule)
diffusion = GaussianDiffusion(betas)
model = UNet(**conf.model)
img = paddle.randn([
    conf.training.dataloader.batch_size,