Beispiel #1
0
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load VAE model and define classifier
    curl = torch.load(config.curl_model,
                      map_location=lambda storage, loc: storage)
    curl_model = nnetCurlSupervised(curl['feature_dim'] * curl['num_frames'],
                                    curl['encoder_num_layers'],
                                    curl['decoder_num_layers'],
                                    curl['hidden_dim'], curl['bn_dim'],
                                    curl['comp_num'], config.use_gpu)
    curl_model.load_state_dict(curl["model_state_dict"])
    #curl_sampler = curlLatentSampler(config.use_gpu)

    model = nnetRNN(curl['bn_dim'], config.num_layers, config.hidden_dim,
                    config.num_classes, 0)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (config.num_layers))
    logging.info('Hidden Dimension: %d' % (config.hidden_dim))
    logging.info('Number of Classes: %d' % (config.num_classes))
    logging.info('Data dimension: %d' % (curl['feature_dim']))
    logging.info('Number of Frames: %d' % (curl['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Dropout: %f ' % (config.dropout))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))
    sys.stdout.flush()

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()
        curl_model = curl_model.cuda()

    criterion = nn.CrossEntropyLoss()

    lr = config.learning_rate
    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    # Load datasets
    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_loss_tr = []
    ep_fer_tr = []
    ep_loss_dev = []
    ep_fer_dev = []
    err_p = 0
    best_model_state = None
    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_losses = []
        tr_fer = []
        # Main training loop
        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # First get CURL embeddings
            _, latent = curl_model(batch_x, batch_l)
            batch_x = compute_latent_features(latent)
            optimizer.zero_grad()

            # Main forward pass
            class_out = model(batch_x, batch_l)
            class_out = pad2list(class_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss = criterion(class_out, lab)

            train_losses.append(loss.item())
            if config.use_gpu:
                tr_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                tr_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))

            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ep_loss_tr.append(np.mean(train_losses))
        ep_fer_tr.append(np.mean(tr_fer))

        ######################
        ##### Validation #####
        ######################

        model.eval()
        val_losses = []
        val_fer = []
        # Main training loop
        for batch_x, batch_l, lab in data_loader_dev:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # First get CURL embeddings
            _, latent = curl_model(batch_x, batch_l)
            batch_x = compute_latent_features(latent)

            optimizer.zero_grad()
            # Main forward pass
            class_out = model(batch_x, batch_l)
            class_out = pad2list(class_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss = criterion(class_out, lab)

            val_losses.append(loss.item())
            if config.use_gpu:
                val_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                val_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))
        # Manage learning rate and revert model
        if epoch_i == 0:
            err_p = np.mean(val_losses)
            best_model_state = model.state_dict()
        else:
            if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = np.mean(val_losses)
                best_model_state = model.state_dict()

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f}".format(
            epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1])
        print_log += " || Val: {:.3f} :: Val FER: {:.2f}".format(
            ep_loss_dev[-1], ep_fer_dev[-1])
        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
            torch.save(
                {
                    'epoch': epoch_i + 1,
                    'vaeenc': config.curl_model,
                    'feature_dim': curl['feature_dim'],
                    'num_frames': curl['num_frames'],
                    'num_classes': config.num_classes,
                    'num_layers': config.num_layers,
                    'hidden_dim': config.hidden_dim,
                    'ep_loss_tr': ep_loss_tr,
                    'ep_loss_dev': ep_loss_dev,
                    'dropout': config.dropout,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, (open(model_path, 'wb')))
def run(config):
    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

    model_dir = os.path.join(config.store_path, config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(
        level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
        filename=os.path.join(model_dir, config.experiment_name),
        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(open(os.path.join(config.egs_dir, config.train_set, 'egs.config'), 'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers))
    logging.info('Decoder Number of Layers: %d' % (config.decoder_num_layers))
    logging.info('Classifier Number of Layers: %d' % (config.classifier_num_layers))
    logging.info('Hidden Dimension: %d' % (config.hidden_dim))
    logging.info('Classifier Hidden Dimension: %d' % (config.hidden_dim_classifier))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Number of classes: %d' % (config.num_classes))
    logging.info('Bottleneck dimension: %d' % (config.bn_dim))
    logging.info('Component Number: %d' % (config.comp_num))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))
    logging.info('Encoder Gradient Scale: %f ' % (config.encoder_grad_scale))

    sys.stdout.flush()

    model = nnetCurlMultistreamClassifier(config.feature_dim * num_frames, config.encoder_num_layers,
                                          config.decoder_num_layers, config.classifier_num_layers, config.hidden_dim,
                                          config.hidden_dim_classifier, config.bn_dim, config.comp_num,
                                          config.num_classes,
                                          config.use_gpu, enc_scale=config.encoder_grad_scale)

    lr = config.learning_rate
    criterion = nn.CrossEntropyLoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    if config.use_gpu:
        model = model.cuda()

    if config.load_previous_model != "None":
        ckpt = torch.load(config.load_previous_model)
        model.load_state_dict(ckpt["model_state_dict"])
        model.expand_component(config.use_gpu)
        previous_mean_p = torch.from_numpy(ckpt["prior_means"])
        previous_mean_p = torch.cat([previous_mean_p, 5 * (torch.rand(1, config.bn_dim) - 0.5)])

    if config.load_checkpoint != "None":
        ckpt = torch.load(config.load_checkpoint)
        model.load_state_dict(ckpt["model_state_dict"])
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        ep_start = ckpt["epoch"]
        means_p = torch.from_numpy(ckpt["prior_means"])
    else:
        ep_start = 0
        if config.load_previous_model != "None":
            means_p = previous_mean_p
        else:
            means_p = 5 * (torch.rand(config.comp_num, config.bn_dim) - 0.5)
        model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model')
        torch.save({
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'prior_means': means_p.numpy()}, (open(model_path, 'wb')))

    if config.use_gpu:
        model = model.cuda()

    ep_curl_tr = []
    ep_loss_tr = []
    ep_fer_tr = []
    ep_curl_dev = []
    ep_loss_dev = []
    ep_fer_dev = []

    # Load Datasets

    dataset_train = nnetDatasetSeq(os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True)

    err_p = 10000000
    best_model_state = model.state_dict()

    for epoch_i in range(ep_start, config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_curl_losses = []
        train_losses = []
        tr_fer = []

        # Main training loop

        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            optimizer.zero_grad()

            # Main forward pass
            class_out, ae_out, latent_out = model(batch_x, batch_l)

            """
            # Keep only on-zero label rows for label and class_out
            nonzero_idx = []
            for idx, l in enumerate(lab):
                if torch.sum(l) != 0:
                    nonzero_idx.append(idx)
            nonzero_idx = torch.FloatTensor(nonzero_idx).long()
            lab = lab[nonzero_idx]
            class_out = class_out[nonzero_idx]
            """

            # Convert all the weird tensors to frame-wise form
            batch_x = pad2list(batch_x, batch_l)
            ae_out = pad2list3d(ae_out, batch_l)
            # if nonzero_idx.nelement() != 0:
            class_out = pad2list(class_out[config.comp_label], batch_l)
            lab = pad2list(lab, batch_l)

            latent_out = (
                pad2list(latent_out[0], batch_l), pad2list3d(latent_out[1], batch_l),
                pad2list3d(latent_out[2], batch_l))

            # if nonzero_idx.nelement() != 0:
            loss_class = criterion(class_out, lab)
            train_losses.append(loss_class.item())

            loss = curl_loss_supervised(batch_x, ae_out, latent_out, means_p, config.comp_label, use_gpu=config.use_gpu)

            train_curl_losses.append(loss.item())

            # if nonzero_idx.nelement() != 0:
            if config.use_gpu:
                tr_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy()))
            else:
                tr_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy()))
            # if nonzero_idx.nelement() != 0:
            # (loss_class).backward()
            (-loss + 100 * loss_class).backward()
            # else:
            # (-loss).backward()

            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh)
            optimizer.step()

        ep_curl_tr.append(np.mean(train_curl_losses))
        ep_loss_tr.append(np.mean(train_losses))
        ep_fer_tr.append(np.mean(tr_fer))

        ######################
        ##### Validation #####
        ######################

        model.eval()

        # with torch.set_grad_enabled(False):
        val_curl_losses = []
        val_losses = []
        val_fer = []

        for batch_x, batch_l, lab in data_loader_dev:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            batch_l = batch_l
            # Main forward pass
            class_out, ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            batch_x = pad2list(batch_x, batch_l)
            class_out = pad2list(class_out[config.comp_label], batch_l)
            lab = pad2list(lab, batch_l)
            ae_out = pad2list3d(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0], batch_l), pad2list3d(latent_out[1], batch_l),
                          pad2list3d(latent_out[2], batch_l))

            loss_class = criterion(class_out, lab)
            loss = curl_loss_supervised(batch_x, ae_out, latent_out, means_p, config.comp_label,
                                        use_gpu=config.use_gpu)

            val_curl_losses.append(loss.item())
            val_losses.append(loss_class.item())
            if config.use_gpu:
                val_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy()))
            else:
                val_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy()))

        ep_curl_dev.append(np.mean(val_curl_losses))
        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        # Manage learning rate and revert model
        if epoch_i == 0:
            err_p = np.mean(val_losses)
            best_model_state = model.state_dict()
        else:
            if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}".format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = np.mean(val_losses)
                best_model_state = model.state_dict()

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr CURL Log-likelihood: {:.3f} || Tr Loss: {:.3f} || Tr FER: {:.3f} :: Val CURL Log-likelihood: {:.3f} || Val Loss: {:.3f} || Val FER: {:.3f}".format(
            epoch_i + 1, lr, ep_curl_tr[-1], ep_loss_tr[-1], ep_fer_tr[-1], ep_curl_dev[-1], ep_loss_dev[-1],
            ep_fer_dev[-1])

        logging.info(print_log)
        sys.stdout.flush()

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model')
            torch.save({
                'epoch': epoch_i + 1,
                'feature_dim': config.feature_dim,
                'num_frames': num_frames,
                'encoder_num_layers': config.encoder_num_layers,
                'decoder_num_layers': config.decoder_num_layers,
                'classifier_num_layers': config.classifier_num_layers,
                'hidden_dim': config.hidden_dim,
                'hidden_dim_classifier': config.hidden_dim_classifier,
                'comp_num': config.comp_num,
                'num_classes': config.num_classes,
                'bn_dim': config.bn_dim,
                'ep_curl_tr': ep_curl_tr,
                'ep_curl_dev': ep_curl_dev,
                'prior_means': means_p.numpy(),
                'lr': lr,
                'encoder_grad_scale': config.encoder_grad_scale,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb')))
Beispiel #3
0
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(
        open(os.path.join(config.egs_dir, config.train_set, 'egs.config'),
             'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Number of Decoder Layers: %d' % (config.num_layers_dec))
    logging.info('Hidden Dimension: %d' % (config.feature_dim))
    logging.info('Number of Classes: %d' % (config.num_classes))
    logging.info('Input channels: %s' % (config.in_channels))
    logging.info('Output channels: %s' % (config.out_channels))
    logging.info('Kernel Size: %d' % (config.kernel))
    logging.info('Input Kernel Size: %d' % (config.input_filter_kernel))
    logging.info('Window size: %f' % (config.wind_size))
    logging.info('Frequency Number: %d' % (config.freq_num))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Dropout: %f ' % (config.dropout))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))
    sys.stdout.flush()

    in_channels = config.in_channels.split(',')
    in_channels = [int(x) for x in in_channels]
    out_channels = config.out_channels.split(',')
    out_channels = [int(x) for x in out_channels]
    model = modulationSigmoidNet(config.feature_dim, num_frames, in_channels,
                                 out_channels, config.kernel,
                                 config.input_filter_kernel, config.freq_num,
                                 config.wind_size, config.num_layers_dec,
                                 config.hidden_dim, config.num_classes,
                                 config.use_gpu)

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    criterion = nn.CrossEntropyLoss()

    lr = config.learning_rate
    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    # Load datasets
    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_loss_tr = []
    ep_fer_tr = []
    ep_loss_dev = []
    ep_fer_dev = []
    ep_mod_tr = []
    ep_mod_dev = []
    err_p = 0
    best_model_state = None

    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_losses = []
        tr_fer = []
        tr_mod = []
        # Main training loop
        for batch_x, batch_l, lab in data_loader_train:
            s = batch_x.shape
            batch_x = batch_x.view(s[0], s[1], config.feature_dim, num_frames)
            batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames)
            batch_x = batch_x[:, None, :, :]  # change the data format for CNNs
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
                lab = Variable(lab).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)
                lab = Variable(lab)

            optimizer.zero_grad()
            # Main forward pass
            class_out, mod_f = model(batch_x)
            class_out = class_out.view(s[0], s[1], -1)
            class_out = pad2list(class_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss = criterion(class_out, lab)

            train_losses.append(loss.item())
            tr_mod.append(mod_f.item())

            if config.use_gpu:
                tr_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                tr_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))
            sys.stdout.flush()
            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ep_loss_tr.append(np.mean(train_losses))
        ep_fer_tr.append(np.mean(tr_fer))
        ep_mod_tr.append(np.mean(tr_mod))

        ######################
        ##### Validation #####
        ######################

        model.eval()
        val_losses = []
        val_fer = []
        val_mod = []

        # Main training loop
        for batch_x, batch_l, lab in data_loader_dev:
            s = batch_x.shape
            batch_x = batch_x.view(
                s[0], s[1], config.feature_dim,
                num_frames)  # change the data format for CNNs
            batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames)
            batch_x = batch_x[:, None, :, :]
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
                lab = Variable(lab).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)
                lab = Variable(lab)

            # Main forward pass
            class_out, mod_f = model(batch_x)
            class_out = class_out.view(s[0], s[1], -1)
            class_out = pad2list(class_out, batch_l)
            lab = pad2list(lab, batch_l)
            loss = criterion(class_out, lab)

            val_losses.append(loss.item())
            val_mod.append(mod_f.item())

            if config.use_gpu:
                val_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                val_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))

        # Manage learning rate and revert model
        if epoch_i == 0:
            err_p = np.mean(val_losses)
            best_model_state = model.state_dict()
        else:
            if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = np.mean(val_losses)
                best_model_state = model.state_dict()

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))
        ep_mod_dev.append(np.mean(val_mod))

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f} :: Tr Modulation {:.2f} Hz".format(
            epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1], ep_mod_tr[-1])
        print_log += " || Val: {:.3f} :: Val FER: {:.2f} :: Val Modulation {:.2f} Hz".format(
            ep_loss_dev[-1], ep_fer_dev[-1], ep_mod_dev[-1])
        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
            torch.save(
                {
                    'epoch': epoch_i + 1,
                    'feature_dim': config.feature_dim,
                    'num_frames': num_frames,
                    'num_classes': config.num_classes,
                    'num_layers_dec': config.num_layers_dec,
                    'hidden_dim': config.hidden_dim,
                    'in_channels': config.in_channels,
                    'out_channels': config.out_channels,
                    'kernel': config.kernel,
                    'freq_num': config.freq_num,
                    'input_filter_kernel': config.input_filter_kernel,
                    'wind_size': config.wind_size,
                    'ep_loss_tr': ep_loss_tr,
                    'ep_loss_dev': ep_loss_dev,
                    'dropout': config.dropout,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, (open(model_path, 'wb')))
Beispiel #4
0
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load model
    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetVAEClassifier(nnet['feature_dim'] * nnet['num_frames'],
                              nnet['num_classes'], nnet['encoder_num_layers'],
                              nnet['classifier_num_layers'],
                              nnet['ae_num_layers'], nnet['hidden_dim'],
                              nnet['bn_dim'], 0.5, config.use_gpu)
    model.load_state_dict(nnet['model_state_dict'])

    # I want to only update the encoder
    for p in model.classifier.parameters():
        p.requires_grad = False

    for p in model.vae_decoder.parameters():
        p.requires_grad = False

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (nnet['encoder_num_layers']))
    logging.info('Classifier Number of Layers: %d' %
                 (nnet['classifier_num_layers']))
    logging.info('AE Number of Layers: %d' % (nnet['ae_num_layers']))
    logging.info('Hidden Dimension: %d' % (nnet['hidden_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Bottleneck dimension: %d' % (nnet['bn_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Encoder Dropout: %f ' % (nnet['enc_dropout']))
    sys.stdout.flush()

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    criterion_classifier = nn.CrossEntropyLoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_vae_adapt = []
    ep_mm_adapt = []
    ep_loss_anchor = []
    ep_fer_anchor = []
    ep_vae_anchor = []
    ep_loss_test = []
    ep_fer_test = []
    ep_vae_test = []

    # Load Datasets

    # Anchor set
    path = os.path.join(config.egs_dir, config.anchor_set)
    with open(os.path.join(path, 'lengths.pkl'), 'rb') as f:
        lengths_anchor = pickle.load(f)
    labels_anchor = torch.load(os.path.join(path, 'labels.pkl'))
    anchor_ids = list(labels_anchor.keys())

    # Adaptation Set
    dataset_adapt = nnetDatasetSeqAE(
        os.path.join(config.egs_dir, config.adapt_set))
    data_loader_adapt = torch.utils.data.DataLoader(
        dataset_adapt, batch_size=config.batch_size, shuffle=True)

    # Test Set
    dataset_test = nnetDatasetSeq(os.path.join(config.egs_dir,
                                               config.test_set))
    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=config.batch_size, shuffle=True)

    # Start off with initial performance on test set

    model.eval()
    test_losses = []
    test_vae_losses = []
    test_fer = []
    for batch_x, batch_l, lab in data_loader_test:

        _, indices = torch.sort(batch_l, descending=True)
        if config.use_gpu:
            batch_x = Variable(batch_x[indices]).cuda()
            batch_l = Variable(batch_l[indices]).cuda()
            lab = Variable(lab[indices]).cuda()
        else:
            batch_x = Variable(batch_x[indices])
            batch_l = Variable(batch_l[indices])
            lab = Variable(lab[indices])

        # Main forward pass
        class_out, ae_out, latent_out = model(batch_x, batch_l)

        # Convert all the weird tensors to frame-wise form
        class_out = pad2list(class_out, batch_l)
        batch_x = pad2list(batch_x, batch_l)
        lab = pad2list(lab, batch_l)

        ae_out = pad2list(ae_out, batch_l)
        latent_out = (pad2list(latent_out[0],
                               batch_l), pad2list(latent_out[1], batch_l))

        loss_classifier = criterion_classifier(class_out, lab)
        loss_vae = vae_loss(batch_x, ae_out, latent_out)

        test_losses.append(loss_classifier.item())
        test_vae_losses.append(loss_vae[0].item() + loss_vae[1].item())

        if config.use_gpu:
            test_fer.append(
                compute_fer(class_out.cpu().data.numpy(),
                            lab.cpu().data.numpy()))
        else:
            test_fer.append(
                compute_fer(class_out.data.numpy(), lab.data.numpy()))

    print_log = "Initial Testset Error : Adapt (Test) loss: {:.3f} :: Adapt (Test) FER: {:.2f} :: Adapt (Test) Vae log-likelihood loss: {:.3f}".format(
        np.mean(test_losses), np.mean(test_fer), np.mean(test_vae_losses))

    logging.info(print_log)

    for epoch_i in range(config.epochs):

        ######################
        ##### Adaptation #####
        ######################

        model.train()
        adapt_vae_losses = []
        adapt_mm_losses = []
        anchor_losses = []
        anchor_vae_losses = []
        anchor_fer = []
        test_losses = []
        test_vae_losses = []
        test_fer = []

        # Main training loop

        for batch_x, batch_l in data_loader_adapt:

            # First do the adaptation

            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])

            # Main forward pass
            optimizer.zero_grad()
            class_out, ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            batch_x = pad2list(batch_x, batch_l)

            ae_out = pad2list(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0],
                                   batch_l), pad2list(latent_out[1], batch_l))

            loss_vae = vae_loss(batch_x, ae_out, latent_out)
            mm_loss = mmeasure_loss(class_out, use_gpu=config.use_gpu)
            loss = config.adapt_weight * (
                -loss_vae[0] - loss_vae[1]
            ) - config.mm_weight * mm_loss  # Just the autoencoder loss
            adapt_vae_losses.append(loss_vae[0].item() + loss_vae[1].item())
            adapt_mm_losses.append(mm_loss.item())

            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

            # Now lets try to anchor the parameters as close as possible to previously seen data

            # Select anchor data randomly
            ids = [random.choice(anchor_ids) for i in range(config.batch_size)]
            batch_x = torch.cat([
                torch.load(os.path.join(path, index))[None, :, :]
                for index in ids
            ])
            batch_l = torch.cat(
                [torch.IntTensor([lengths_anchor[index]]) for index in ids])
            lab = torch.cat([labels_anchor[index][None, :] for index in ids])

            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # Main forward pass
            optimizer.zero_grad()
            class_out, ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            batch_x = pad2list(batch_x, batch_l)
            lab = pad2list(lab, batch_l)

            ae_out = pad2list(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0],
                                   batch_l), pad2list(latent_out[1], batch_l))

            loss_classifier = criterion_classifier(class_out, lab)
            loss_vae = vae_loss(batch_x, ae_out, latent_out)
            loss = config.anchor_weight * (
                -loss_vae[0] - loss_vae[1] + loss_classifier
            )  # Use all the loss for anchor set

            anchor_losses.append(loss_classifier.item())
            anchor_vae_losses.append(loss_vae[0].item() + loss_vae[1].item())

            if config.use_gpu:
                anchor_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                anchor_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))
            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ## Test it on the WSJ test set

        model.eval()

        for batch_x, batch_l, lab in data_loader_test:

            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # Main forward pass
            class_out, ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            batch_x = pad2list(batch_x, batch_l)
            lab = pad2list(lab, batch_l)

            ae_out = pad2list(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0],
                                   batch_l), pad2list(latent_out[1], batch_l))

            loss_classifier = criterion_classifier(class_out, lab)
            loss_vae = vae_loss(batch_x, ae_out, latent_out)

            test_losses.append(loss_classifier.item())
            test_vae_losses.append(loss_vae[0].item() + loss_vae[1].item())

            if config.use_gpu:
                test_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                test_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))

        ep_vae_adapt.append(np.mean(adapt_vae_losses))
        ep_mm_adapt.append(np.mean(adapt_mm_losses))

        ep_loss_anchor.append(np.mean(anchor_losses))
        ep_fer_anchor.append(np.mean(anchor_fer))
        ep_vae_anchor.append(np.mean(anchor_vae_losses))

        ep_loss_test.append(np.mean(test_losses))
        ep_fer_test.append(np.mean(test_fer))
        ep_vae_test.append(np.mean(test_vae_losses))
        print_log = "Epoch: {:d} Adapt (Test) loss: {:.3f} :: Adapt (Test) FER: {:.2f}".format(
            epoch_i + 1, ep_loss_test[-1], ep_fer_test[-1])

        print_log += " || Anchor loss : {:.3f} :: Anchor FER: {:.2f}".format(
            ep_loss_anchor[-1], ep_fer_anchor[-1])

        print_log += " || VAE llhood (Adapt) : {:.3f} :: VAE llhood (Anchor) : {:.3f} :: VAE llhood (Test) : {:.3f} ".format(
            ep_vae_adapt[-1], ep_vae_anchor[-1], ep_vae_test[-1])

        print_log += " || Adapt mm loss : {:.3f} ".format(ep_mm_adapt[-1])

        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
            torch.save(
                {
                    'epoch': epoch_i + 1,
                    'feature_dim': nnet['feature_dim'],
                    'num_frames': nnet['num_frames'],
                    'num_classes': nnet['num_classes'],
                    'encoder_num_layers': nnet['encoder_num_layers'],
                    'classifier_num_layers': nnet['classifier_num_layers'],
                    'ae_num_layers': nnet['ae_num_layers'],
                    'ep_vae_adapt': ep_vae_adapt,
                    'ep_mm_adapt': ep_mm_adapt,
                    'ep_loss_anchor': ep_loss_anchor,
                    'ep_fer_anchor': ep_fer_anchor,
                    'ep_vae_anchor': ep_vae_anchor,
                    'ep_loss_test': ep_loss_test,
                    'ep_fer_test': ep_fer_test,
                    'ep_vae_test': ep_vae_test,
                    'hidden_dim': nnet['hidden_dim'],
                    'bn_dim': nnet['bn_dim'],
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, (open(model_path, 'wb')))
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(
        open(os.path.join(config.egs_dir, config.train_set, 'egs.config'),
             'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Input Channels: %s' % (config.in_channels))
    logging.info('Output Channels: %s' % (config.out_channels))
    logging.info('Kernel: %s' % (config.kernel))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Bottleneck dimension: %d' % (config.bn_dim))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))

    sys.stdout.flush()

    in_channels = [int(x) for x in config.in_channels.split(',')]
    out_channels = [int(x) for x in config.out_channels.split(',')]
    kernel = tuple([int(x) for x in config.kernel.split(',')])

    if config.nopool:
        model = nnetVAECNNNopool(config.feature_dim, num_frames, in_channels,
                                 out_channels, kernel, config.bn_dim,
                                 config.use_gpu)
    else:
        model = nnetVAECNN(config.feature_dim, num_frames, in_channels,
                           out_channels, kernel, config.bn_dim, config.use_gpu)

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    lr = config.learning_rate

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_vae_rec_tr = []
    ep_vae_kl_tr = []
    ep_vae_rec_dev = []
    ep_vae_kl_dev = []

    # Load Datasets

    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    err_p = 0
    best_model_state = None

    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_vae_rec_losses = []
        train_vae_kl_losses = []

        # Main training loop

        for batch_x, batch_l, lab in data_loader_train:
            s = batch_x.shape
            if config.nopool:
                batch_x = batch_x.view(s[0], s[1], config.feature_dim,
                                       num_frames)
                batch_x = batch_x.view(s[0] * s[1], config.feature_dim,
                                       num_frames)
                batch_x = batch_x[:,
                                  None, :, :]  # change the data format for CNNs

            batch_x = batch_x.view(s[0], s[1], config.feature_dim, num_frames)
            batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames)
            batch_x = batch_x[:, None, :, :]  # change the data format for CNNs
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)

            optimizer.zero_grad()

            # Main forward pass
            ae_out, latent_out = model(batch_x)

            if config.nopool:
                loss = vae_loss_nopool(batch_x[:, 0, :, :], ae_out[:, 0, :, :],
                                       latent_out, batch_l, s)
            else:
                loss = vae_loss(batch_x[:, 0, :, :], ae_out[:, 0, :, :],
                                latent_out)

            train_vae_rec_losses.append(loss[0].item())
            train_vae_kl_losses.append(loss[1].item())

            (-loss[0] - loss[1]).backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ep_vae_rec_tr.append(np.mean(train_vae_rec_losses))
        ep_vae_kl_tr.append(np.mean(train_vae_kl_losses))

        ######################
        ##### Validation #####
        ######################

        model.eval()

        with torch.set_grad_enabled(False):

            val_vae_rec_losses = []
            val_vae_kl_losses = []

            for batch_x, batch_l, lab in data_loader_dev:
                s = batch_x.shape
                batch_x = batch_x.view(s[0], s[1], config.feature_dim,
                                       num_frames)
                batch_x = batch_x.view(s[0] * s[1], config.feature_dim,
                                       num_frames)
                batch_x = batch_x[:,
                                  None, :, :]  # change the data format for CNNs
                if config.use_gpu:
                    batch_x = Variable(batch_x).cuda()
                    batch_l = Variable(batch_l).cuda()
                else:
                    batch_x = Variable(batch_x)
                    batch_l = Variable(batch_l)

                # Main forward pass
                ae_out, latent_out = model(batch_x)

                if config.nopool:
                    loss = vae_loss_nopool(batch_x[:, 0, :, :],
                                           ae_out[:, 0, :, :], latent_out,
                                           batch_l, s)
                else:
                    loss = vae_loss(batch_x[:, 0, :, :], ae_out[:, 0, :, :],
                                    latent_out)

                val_vae_rec_losses.append(loss[0].item())
                val_vae_kl_losses.append(loss[1].item())

            ep_vae_rec_dev.append(np.mean(val_vae_rec_losses))
            ep_vae_kl_dev.append(np.mean(val_vae_kl_losses))

        # Manage learning rate
        if epoch_i == 0:
            err_p = -np.mean(val_vae_rec_losses) - np.mean(val_vae_kl_losses)
            best_model_state = model.state_dict()
        else:
            if -np.mean(val_vae_rec_losses) - np.mean(val_vae_kl_losses) > (
                    100 - config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = -np.mean(val_vae_rec_losses) - np.mean(
                    val_vae_kl_losses)
                best_model_state = model.state_dict()

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr VAE ll={:.3f},rec={:.3f},kld={:.3f} :: Val VAE ll={:.3f},rec={:.3f},kld={:.3f}".format(
            epoch_i + 1, lr, ep_vae_kl_tr[-1] + ep_vae_rec_tr[-1],
            ep_vae_rec_tr[-1], ep_vae_kl_tr[-1],
            ep_vae_kl_dev[-1] + ep_vae_rec_dev[-1], ep_vae_rec_dev[-1],
            ep_vae_kl_dev[-1])

        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
        torch.save(
            {
                'epoch': epoch_i + 1,
                'feature_dim': config.feature_dim,
                'num_frames': num_frames,
                'in_channels': config.in_channels,
                'out_channels': config.out_channels,
                'kernel': config.kernel,
                'bn_dim': config.bn_dim,
                'nopool': config.nopool,
                'ep_vae_kl_tr': ep_vae_kl_tr,
                'ep_vae_rec_tr': ep_vae_rec_tr,
                'ep_vae_kl_dev': ep_vae_kl_dev,
                'ep_vae_rec_dev': ep_vae_rec_dev,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, (open(model_path, 'wb')))
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(
        open(os.path.join(config.egs_dir, config.train_set, 'egs.config'),
             'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers))
    logging.info('Decoder Number of Layers: %d' % (config.decoder_num_layers))
    logging.info('Hidden Dimension: %d' % (config.hidden_dim))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Bottleneck dimension: %d' % (config.bn_dim))
    logging.info('Component Number: %d' % (config.comp_num))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))

    sys.stdout.flush()

    model = nnetCurlSupervised(config.feature_dim * num_frames,
                               config.encoder_num_layers,
                               config.decoder_num_layers, config.hidden_dim,
                               config.bn_dim, config.comp_num, config.use_gpu)
    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    lr = config.learning_rate

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_curl_tr = []
    ep_curl_dev = []

    # Load Datasets

    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    err_p = 0
    best_model_state = None

    # Prior means
    means_p = 2 * (torch.rand(config.comp_num, config.bn_dim) - 0.5)

    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_curl_losses = []

        # Main training loop

        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])

            optimizer.zero_grad()

            # Main forward pass
            ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            batch_x = pad2list(batch_x, batch_l)
            ae_out = pad2list3d(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0], batch_l),
                          pad2list3d(latent_out[1], batch_l),
                          pad2list3d(latent_out[2], batch_l))
            loss = curl_loss_unsupervised(batch_x,
                                          ae_out,
                                          latent_out,
                                          means_p,
                                          use_gpu=config.use_gpu)

            train_curl_losses.append(loss.item())
            (-loss).backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ep_curl_tr.append(np.mean(train_curl_losses))

        ######################
        ##### Validation #####
        ######################

        model.eval()

        with torch.set_grad_enabled(False):

            val_curl_losses = []

            for batch_x, batch_l, lab in data_loader_dev:
                _, indices = torch.sort(batch_l, descending=True)
                if config.use_gpu:
                    batch_x = Variable(batch_x[indices]).cuda()
                    batch_l = Variable(batch_l[indices]).cuda()
                else:
                    batch_x = Variable(batch_x[indices])
                    batch_l = Variable(batch_l[indices])

                # Main forward pass
                ae_out, latent_out = model(batch_x, batch_l)

                # Convert all the weird tensors to frame-wise form
                batch_x = pad2list(batch_x, batch_l)

                ae_out = pad2list3d(ae_out, batch_l)
                latent_out = (pad2list(latent_out[0], batch_l),
                              pad2list3d(latent_out[1], batch_l),
                              pad2list3d(latent_out[2], batch_l))
                loss = curl_loss_unsupervised(batch_x,
                                              ae_out,
                                              latent_out,
                                              means_p,
                                              use_gpu=config.use_gpu)

                val_curl_losses.append(loss.item())

            ep_curl_dev.append(np.mean(val_curl_losses))

        # Manage learning rate
        if epoch_i == 0:
            err_p = -np.mean(val_curl_losses)
            best_model_state = model.state_dict()
        else:
            if -np.mean(val_curl_losses) > (100 - config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = -np.mean(val_curl_losses)
                best_model_state = model.state_dict()

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr CURL Log-likelihood: {:.3f} :: Val CURL Log-likelihood: {:.3f}".format(
            epoch_i + 1, lr, ep_curl_tr[-1], ep_curl_dev[-1])

        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
        torch.save(
            {
                'epoch': epoch_i + 1,
                'feature_dim': config.feature_dim,
                'num_frames': num_frames,
                'encoder_num_layers': config.encoder_num_layers,
                'decoder_num_layers': config.decoder_num_layers,
                'hidden_dim': config.hidden_dim,
                'comp_num': config.comp_num,
                'bn_dim': config.bn_dim,
                'ep_curl_tr': ep_curl_tr,
                'ep_curl_dev': ep_curl_dev,
                'prior_means': means_p.numpy(),
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, (open(model_path, 'wb')))
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(
        open(os.path.join(config.egs_dir, 'egs.config'), 'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers))
    logging.info('Classifier Number of Layers: %d' %
                 (config.classifier_num_layers))
    logging.info('AE Number of Layers: %d' % (config.ae_num_layers))
    logging.info('AR Time Shift: %d' % (config.time_shift))
    logging.info('Hidden Dimension: %d' % (config.feature_dim))
    logging.info('Number of Classes: %d' % (config.num_classes))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Bottleneck dimension: %d' % (config.bn_dim))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    sys.stdout.flush()

    model = nnetAEClassifierMultitaskAEAR(
        config.feature_dim * num_frames, config.num_classes,
        config.encoder_num_layers, config.classifier_num_layers,
        config.ae_num_layers, config.hidden_dim, config.bn_dim,
        config.time_shift)
    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    criterion_classifier = nn.CrossEntropyLoss()
    criterion_ae = nn.MSELoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_loss_tr = []
    ep_fer_tr = []
    ep_ae_tr = []
    ep_ar_tr = []
    ep_loss_dev = []
    ep_fer_dev = []
    ep_ae_dev = []
    ep_ar_dev = []

    # Load Datasets

    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_losses = []
        train_ae_losses = []
        train_ar_losses = []
        tr_fer = []

        # Main training loop

        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # Main forward pass
            class_out, ae_out, ar_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            ae_out = pad2list(ae_out, batch_l)
            ar_out = pad2list(ar_out, batch_l - config.time_shift)
            lab = pad2list(lab, batch_l)

            optimizer.zero_grad()

            loss_classifier = criterion_classifier(class_out, lab)
            loss_ae = criterion_ae(ae_out, pad2list(batch_x, batch_l))
            loss_ar = criterion_ae(
                ar_out,
                pad2list(batch_x[:, config.time_shift:, :],
                         batch_l - config.time_shift))
            loss = loss_classifier + loss_ae + loss_ar

            train_losses.append(loss_classifier.item())
            train_ae_losses.append(loss_ae.item())
            train_ar_losses.append(loss_ar.item())
            if config.use_gpu:
                tr_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                tr_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))

            loss.backward()
            optimizer.step()

        ep_loss_tr.append(np.mean(train_losses))
        ep_fer_tr.append(np.mean(tr_fer))
        ep_ae_tr.append(np.mean(train_ae_losses))
        ep_ar_tr.append(np.mean(train_ar_losses))

        ######################
        ##### Validation #####
        ######################

        model.eval()

        with torch.set_grad_enabled(False):

            val_losses = []
            val_ae_losses = []
            val_ar_losses = []
            val_fer = []

            for batch_x, batch_l, lab in data_loader_dev:
                _, indices = torch.sort(batch_l, descending=True)
                if config.use_gpu:
                    batch_x = Variable(batch_x[indices]).cuda()
                    batch_l = Variable(batch_l[indices]).cuda()
                    lab = Variable(lab[indices]).cuda()
                else:
                    batch_x = Variable(batch_x[indices])
                    batch_l = Variable(batch_l[indices])
                    lab = Variable(lab[indices])

                # Main forward pass
                class_out, ae_out, ar_out = model(batch_x, batch_l)

                # Convert all the weird tensors to frame-wise form
                class_out = pad2list(class_out, batch_l)
                ae_out = pad2list(ae_out, batch_l)
                ar_out = pad2list(ar_out, batch_l - config.time_shift)
                lab = pad2list(lab, batch_l)

                loss_classifier = criterion_classifier(class_out, lab)
                loss_ae = criterion_ae(ae_out, pad2list(batch_x, batch_l))
                loss_ar = criterion_ae(
                    ar_out,
                    pad2list(batch_x[:, config.time_shift:, :],
                             batch_l - config.time_shift))

                val_losses.append(loss_classifier.item())
                val_ae_losses.append(loss_ae.item())
                val_ar_losses.append(loss_ar.item())

                if config.use_gpu:
                    val_fer.append(
                        compute_fer(class_out.cpu().data.numpy(),
                                    lab.cpu().data.numpy()))
                else:
                    val_fer.append(
                        compute_fer(class_out.data.numpy(), lab.data.numpy()))

            ep_loss_dev.append(np.mean(val_losses))
            ep_fer_dev.append(np.mean(val_fer))
            ep_ae_dev.append(np.mean(val_ae_losses))
            ep_ar_dev.append(np.mean(val_ar_losses))

        print_log = "Epoch: {:d} Tr loss: {:.3f} :: Tr FER: {:.2f}".format(
            epoch_i + 1, ep_loss_tr[-1], ep_fer_tr[-1])
        print_log += " || Val : {:.3f} :: Val FER: {:.2f}".format(
            ep_loss_dev[-1], ep_fer_dev[-1])
        print_log += " || AE Loss (Train) : {:.3f} :: AE Loss (Dev) : {:.3f} ".format(
            ep_ae_tr[-1], ep_ae_dev[-1])
        print_log += " || AR Loss (Train) : {:.3f} :: AR Loss (Dev) : {:.3f} ".format(
            ep_ar_tr[-1], ep_ar_dev[-1])
        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
            torch.save(
                {
                    'epoch': epoch_i + 1,
                    'feature_dim': config.feature_dim,
                    'num_frames': num_frames,
                    'num_classes': config.num_classes,
                    'encoder_num_layers': config.encoder_num_layers,
                    'classifier_num_layers': config.classifier_num_layers,
                    'ae_num_layers': config.ae_num_layers,
                    'hidden_dim': config.hidden_dim,
                    'bn_dim': config.bn_dim,
                    'time_shift': config.time_shift,
                    'ep_loss_tr': ep_loss_tr,
                    'ep_loss_dev': ep_loss_dev,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, (open(model_path, 'wb')))
Beispiel #8
0
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(
        open(os.path.join(config.egs_dir, 'egs.config'), 'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers))
    logging.info('Classifier Number of Layers: %d' %
                 (config.classifier_num_layers))
    logging.info('AE Number of Layers: %d' % (config.ae_num_layers))
    logging.info('Hidden Dimension: %d' % (config.hidden_dim))
    logging.info('Number of Classes: %d' % (config.num_classes))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Bottleneck dimension: %d' % (config.bn_dim))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Inital Cross-entropy weight: %f ' % (config.ce_weight_init))
    logging.info('Cross-entropy change intervla: %d ' %
                 (config.ce_change_interval))
    logging.info('Encoder Dropout: %f ' % (config.enc_dropout))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))

    sys.stdout.flush()

    model = nnetVAEClassifier(config.feature_dim * num_frames,
                              config.num_classes, config.encoder_num_layers,
                              config.classifier_num_layers,
                              config.ae_num_layers, config.hidden_dim,
                              config.bn_dim, config.enc_dropout,
                              config.use_gpu)
    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    criterion_classifier = nn.CrossEntropyLoss()
    lr = config.learning_rate
    ce_weight = config.ce_weight_init

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_loss_tr = []
    ep_fer_tr = []
    ep_vae_rec_tr = []
    ep_vae_kl_tr = []
    ep_loss_dev = []
    ep_fer_dev = []
    ep_vae_rec_dev = []
    ep_vae_kl_dev = []

    # Load Datasets

    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    err_p = 0
    best_model_state = None

    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_losses = []
        train_vae_rec_losses = []
        train_vae_kl_losses = []
        tr_fer = []

        # Main training loop

        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            optimizer.zero_grad()

            # Main forward pass
            class_out, ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            batch_x = pad2list(batch_x, batch_l)
            lab = pad2list(lab, batch_l)

            ae_out = pad2list(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0],
                                   batch_l), pad2list(latent_out[1], batch_l))

            loss_classifier = criterion_classifier(class_out, lab)
            loss_vae = vae_loss(batch_x, ae_out, latent_out)
            if np.isnan(loss_vae[0].item()) or np.isnan(loss_vae[1].item()):
                logging.info(
                    "VAE Loss can diverged to nan, reverting to previous model"
                )
                revert = True
                continue

            loss = ce_weight * loss_classifier - (loss_vae[0] + loss_vae[1])

            train_losses.append(loss_classifier.item())
            train_vae_rec_losses.append(loss_vae[0].item())
            train_vae_kl_losses.append(loss_vae[1].item())
            if config.use_gpu:
                tr_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                tr_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))

            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        if not (epoch_i + 1) % config.ce_change_interval:
            logging.info("Changing CE weight from {:.6f} to {:.6f}".format(
                ce_weight, config.cerr * ce_weight))
            ce_weight = config.cerr * ce_weight

        ep_loss_tr.append(np.mean(train_losses))
        ep_fer_tr.append(np.mean(tr_fer))
        ep_vae_rec_tr.append(np.mean(train_vae_rec_losses))
        ep_vae_kl_tr.append(np.mean(train_vae_kl_losses))

        ######################
        ##### Validation #####
        ######################

        model.eval()

        with torch.set_grad_enabled(False):

            val_losses = []
            val_vae_rec_losses = []
            val_vae_kl_losses = []
            val_fer = []

            for batch_x, batch_l, lab in data_loader_dev:
                _, indices = torch.sort(batch_l, descending=True)
                if config.use_gpu:
                    batch_x = Variable(batch_x[indices]).cuda()
                    batch_l = Variable(batch_l[indices]).cuda()
                    lab = Variable(lab[indices]).cuda()
                else:
                    batch_x = Variable(batch_x[indices])
                    batch_l = Variable(batch_l[indices])
                    lab = Variable(lab[indices])

                # Main forward pass
                class_out, ae_out, latent_out = model(batch_x, batch_l)

                # Convert all the weird tensors to frame-wise form
                class_out = pad2list(class_out, batch_l)
                batch_x = pad2list(batch_x, batch_l)
                lab = pad2list(lab, batch_l)

                ae_out = pad2list(ae_out, batch_l)
                latent_out = (pad2list(latent_out[0], batch_l),
                              pad2list(latent_out[1], batch_l))
                loss_classifier = criterion_classifier(class_out, lab)
                loss_vae = vae_loss(batch_x, ae_out, latent_out)

                val_losses.append(loss_classifier.item())
                val_vae_rec_losses.append(loss_vae[0].item())
                val_vae_kl_losses.append(loss_vae[1].item())

                if config.use_gpu:
                    val_fer.append(
                        compute_fer(class_out.cpu().data.numpy(),
                                    lab.cpu().data.numpy()))
                else:
                    val_fer.append(
                        compute_fer(class_out.data.numpy(), lab.data.numpy()))

            ep_loss_dev.append(np.mean(val_losses))
            ep_fer_dev.append(np.mean(val_fer))
            ep_vae_rec_dev.append(np.mean(val_vae_rec_losses))
            ep_vae_kl_dev.append(np.mean(val_vae_kl_losses))

        # Manage learning rate
        if epoch_i == 0:
            err_p = np.mean(val_losses)
            best_model_state = model.state_dict()
        else:
            if np.mean(val_losses) > (100 + config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = np.mean(val_losses)
                best_model_state = model.state_dict()

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f}".format(
            epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1])
        print_log += " || Val : {:.3f} :: Val FER: {:.2f}".format(
            ep_loss_dev[-1], ep_fer_dev[-1])
        print_log += " || VAE Log-Likelihood (Train) : Rec> {:.3f} KL> {:.3f} :: VAE Log-Likelihood (Dev) :  Rec> {:.3f} KL> {:.3f} ".format(
            ep_vae_rec_tr[-1], ep_vae_kl_tr[-1], ep_vae_rec_dev[-1],
            ep_vae_kl_dev[-1])
        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
        torch.save(
            {
                'epoch': epoch_i + 1,
                'feature_dim': config.feature_dim,
                'num_frames': num_frames,
                'num_classes': config.num_classes,
                'encoder_num_layers': config.encoder_num_layers,
                'classifier_num_layers': config.classifier_num_layers,
                'ae_num_layers': config.ae_num_layers,
                'hidden_dim': config.hidden_dim,
                'bn_dim': config.bn_dim,
                'enc_dropout': config.enc_dropout,
                'ep_loss_tr': ep_loss_tr,
                'ep_loss_dev': ep_loss_dev,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, (open(model_path, 'wb')))