Beispiel #1
0
    def run(self):
        # torch.autograd.set_detect_anomaly(True) ####
        val_epoch_loss_hist = []
        self.time_ref = time.time()
        try:
            for epoch in range(self.num_epochs):
                if torch.cuda.is_available:
                    torch.cuda.empty_cache()

                records_train = self._train()
                records_train.update({
                    "split": "train",
                    "epoch": epoch,
                    "experiment": self.exp_id
                })
                self.stats["train"].append(records_train)
                train_epoch_loss = records_train["loss"]

                print(
                    f"Train epoch {epoch:04}: average loss = {train_epoch_loss:6.10}"
                )

                records_val = self._val()
                records_val.update({
                    "split": "val",
                    "epoch": epoch,
                    "experiment": self.exp_id
                })
                val_epoch_loss = records_val["loss"]
                self.stats["val"].append(records_val)

                print(
                    f"Validation epoch {epoch:04}: average loss = {val_epoch_loss:6.10}"
                )
                print("".join(f"{k:>15}:  {v}\n"
                              for k, v in records_val.items()))

                savepath = os.path.join(self.output_dir, "model",
                                        f"ckpt_epoch_{epoch:04}.pt")
                utils.save_model(self.model, savepath)

                if val_epoch_loss < self.best_val_epoch_loss:
                    self.best_val_epoch_loss = val_epoch_loss
                    self.best_model_epoch = epoch

                if np.isnan(train_epoch_loss) and np.isnan(val_epoch_loss):
                    break

                if len(val_epoch_loss_hist) < self.early_stop_hist_len + 1:
                    val_epoch_loss_hist = [val_epoch_loss
                                           ] + val_epoch_loss_hist
                else:
                    val_epoch_loss_hist = [val_epoch_loss
                                           ] + val_epoch_loss_hist[:-1]
                    best_delta = np.max(np.diff(val_epoch_loss_hist))
                    if best_delta < self.early_stop_min_delta:
                        break

        finally:
            self._save_stats()
Beispiel #2
0
def main(args):
    #torch.backends.cudnn.benchmark=True # This makes dilated conv much faster for CuDNN 7.5

    # MODEL
    num_features = [args.features*i for i in range(1, args.levels+1)] if args.feature_growth == "add" else \
                   [args.features*2**i for i in range(0, args.levels)]
    target_outputs = int(args.output_size * args.sr)
    model = Waveunet(args.channels,
                     num_features,
                     args.channels,
                     args.instruments,
                     kernel_size=args.kernel_size,
                     target_output_size=target_outputs,
                     depth=args.depth,
                     strides=args.strides,
                     conv_type=args.conv_type,
                     res=args.res,
                     separate=args.separate)

    if args.cuda:
        model = model_utils.DataParallel(model)
        print("move model to gpu")
        model.cuda()

    print('model: ', model)
    print('parameter count: ', str(sum(p.numel() for p in model.parameters())))

    writer = SummaryWriter(args.log_dir)

    ### DATASET
    musdb = get_musdb_folds(args.dataset_dir)
    # If not data augmentation, at least crop targets to fit model output shape
    crop_func = partial(crop_targets, shapes=model.shapes)
    # Data augmentation function for training
    augment_func = partial(random_amplify,
                           shapes=model.shapes,
                           min=0.7,
                           max=1.0)
    train_data = SeparationDataset(musdb,
                                   "train",
                                   args.instruments,
                                   args.sr,
                                   args.channels,
                                   model.shapes,
                                   True,
                                   args.hdf_dir,
                                   audio_transform=augment_func)
    val_data = SeparationDataset(musdb,
                                 "val",
                                 args.instruments,
                                 args.sr,
                                 args.channels,
                                 model.shapes,
                                 False,
                                 args.hdf_dir,
                                 audio_transform=crop_func)
    test_data = SeparationDataset(musdb,
                                  "test",
                                  args.instruments,
                                  args.sr,
                                  args.channels,
                                  model.shapes,
                                  False,
                                  args.hdf_dir,
                                  audio_transform=crop_func)

    dataloader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        worker_init_fn=utils.worker_init_fn)

    ##### TRAINING ####

    # Set up the loss function
    if args.loss == "L1":
        criterion = nn.L1Loss()
    elif args.loss == "L2":
        criterion = nn.MSELoss()
    else:
        raise NotImplementedError("Couldn't find this loss!")

    # Set up optimiser
    optimizer = Adam(params=model.parameters(), lr=args.lr)

    # Set up training state dict that will also be saved into checkpoints
    state = {"step": 0, "worse_epochs": 0, "epochs": 0, "best_loss": np.Inf}

    # LOAD MODEL CHECKPOINT IF DESIRED
    if args.load_model is not None:
        print("Continuing training full model from checkpoint " +
              str(args.load_model))
        state = model_utils.load_model(model, optimizer, args.load_model,
                                       args.cuda)

    print('TRAINING START')
    while state["worse_epochs"] < args.patience:
        print("Training one epoch from iteration " + str(state["step"]))
        avg_time = 0.
        model.train()
        with tqdm(total=len(train_data) // args.batch_size) as pbar:
            np.random.seed()
            for example_num, (x, targets) in enumerate(dataloader):
                if args.cuda:
                    x = x.cuda()
                    for k in list(targets.keys()):
                        targets[k] = targets[k].cuda()

                t = time.time()

                # Set LR for this iteration
                utils.set_cyclic_lr(optimizer, example_num,
                                    len(train_data) // args.batch_size,
                                    args.cycles, args.min_lr, args.lr)
                writer.add_scalar("lr", utils.get_lr(optimizer), state["step"])

                # Compute loss for each instrument/model
                optimizer.zero_grad()
                outputs, avg_loss = model_utils.compute_loss(model,
                                                             x,
                                                             targets,
                                                             criterion,
                                                             compute_grad=True)

                optimizer.step()

                state["step"] += 1

                t = time.time() - t
                avg_time += (1. / float(example_num + 1)) * (t - avg_time)

                writer.add_scalar("train_loss", avg_loss, state["step"])

                if example_num % args.example_freq == 0:
                    input_centre = torch.mean(
                        x[0, :, model.shapes["output_start_frame"]:model.
                          shapes["output_end_frame"]],
                        0)  # Stereo not supported for logs yet
                    writer.add_audio("input",
                                     input_centre,
                                     state["step"],
                                     sample_rate=args.sr)

                    for inst in outputs.keys():
                        writer.add_audio(inst + "_pred",
                                         torch.mean(outputs[inst][0], 0),
                                         state["step"],
                                         sample_rate=args.sr)
                        writer.add_audio(inst + "_target",
                                         torch.mean(targets[inst][0], 0),
                                         state["step"],
                                         sample_rate=args.sr)

                pbar.update(1)

        # VALIDATE
        val_loss = validate(args, model, criterion, val_data)
        print("VALIDATION FINISHED: LOSS: " + str(val_loss))
        writer.add_scalar("val_loss", val_loss, state["step"])

        # EARLY STOPPING CHECK
        checkpoint_path = os.path.join(args.checkpoint_dir,
                                       "checkpoint_" + str(state["step"]))
        if val_loss >= state["best_loss"]:
            state["worse_epochs"] += 1
        else:
            print("MODEL IMPROVED ON VALIDATION SET!")
            state["worse_epochs"] = 0
            state["best_loss"] = val_loss
            state["best_checkpoint"] = checkpoint_path

        # CHECKPOINT
        print("Saving model...")
        model_utils.save_model(model, optimizer, state, checkpoint_path)

        state["epochs"] += 1

    #### TESTING ####
    # Test loss
    print("TESTING")

    # Load best model based on validation loss
    state = model_utils.load_model(model, None, state["best_checkpoint"],
                                   args.cuda)
    test_loss = validate(args, model, criterion, test_data)
    print("TEST FINISHED: LOSS: " + str(test_loss))
    writer.add_scalar("test_loss", test_loss, state["step"])

    # Mir_eval metrics
    test_metrics = evaluate(args, musdb["test"], model, args.instruments)

    # Dump all metrics results into pickle file for later analysis if needed
    with open(os.path.join(args.checkpoint_dir, "results.pkl"), "wb") as f:
        pickle.dump(test_metrics, f)

    # Write most important metrics into Tensorboard log
    avg_SDRs = {
        inst: np.mean([np.nanmean(song[inst]["SDR"]) for song in test_metrics])
        for inst in args.instruments
    }
    avg_SIRs = {
        inst: np.mean([np.nanmean(song[inst]["SIR"]) for song in test_metrics])
        for inst in args.instruments
    }
    for inst in args.instruments:
        writer.add_scalar("test_SDR_" + inst, avg_SDRs[inst], state["step"])
        writer.add_scalar("test_SIR_" + inst, avg_SIRs[inst], state["step"])
    overall_SDR = np.mean([v for v in avg_SDRs.values()])
    writer.add_scalar("test_SDR", overall_SDR)
    print("SDR: " + str(overall_SDR))

    writer.close()
Beispiel #3
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))

    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data  # the length of batch data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True,
                                                  batch)  # step是什么意思?
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []
            # use dev data to validation the model
            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            # use current the best model to test
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
def create_attributes_fc_model(ModelClass,
                               pretrained_fc,
                               pretrained_features,
                               fc_shape,
                               target_columns,
                               weights_root,
                               labels_file,
                               train_images_folder,
                               valid_images_folder=None,
                               is_train=True,
                               batch_size=32,
                               num_workers=4,
                               num_epochs=10,
                               use_gpu=None):
    """
    :param ModelClass: Class of Model to create
    :param pretrained_fc: Pre-trained model's fully connected layers
    :param pretrained_features: Pre-trained feature extractor
    :param fc_shape: Number of outputs of feature extractor; used as number of inputs for next layer 
    :param target_columns: Attributes to train
    :param weights_root: Path for weights folder
    :param labels_file: Path tot labels_files
    :param train_image_folder: Path to train images
    :param valid_image_folder: Path to validation images
    :param is_train: Boolean to train or test
    :param batch_size: Number for batch size
    :param num_workers: Number of workers to load data
    :param num_epochs: Number of epochs to train
    :param use_gpu: Boolean to use GPU
    """

    # Each target needs its own model
    models = {}

    # Create a model for each target attribute/label
    for col_name, col_shape in target_columns.items():
        print("Processing Attribute: {}".format(col_name))

        # Set path for weights file (weights may or may not exist)
        weights_path = os.path.join(weights_root, col_name + ".pth")
        load_weights_path = None

        # Only load weights if it exists already
        if os.path.exists(weights_path):
            load_weights_path = weights_path

        # Get new Dense model
        model = utils.load_fc_model(ModelClass,
                                    pretrained_fc,
                                    fc_shape,
                                    col_shape,
                                    weights_path=load_weights_path,
                                    use_gpu=use_gpu)

        # Decide if model will be used for training or testing
        if is_train:
            print("Start training for: {}".format(col_name))
            # Train the model
            model = train_model(model,
                                pretrained_features,
                                col_name,
                                labels_file,
                                train_images_folder,
                                valid_images_folder,
                                batch_size,
                                num_workers,
                                num_epochs,
                                use_gpu=use_gpu,
                                flatten_pretrained_out=True)
            # Save weights after completing training
            utils.save_model(model, weights_path)
        # Save the each model after training
        models[col_name] = model

    return models
Beispiel #5
0
        # viz
        # tsboard.add_scalar('data/train-loss',train_loss,e)
        # tsboard.add_scalar('data/val-loss',val_loss,e)
        # tsboard.add_scalar('data/val-accuracy',val_acc.item(),e)
        # tsboard.add_scalar('data/train-accuracy',train_acc.item(),e)

        # Write to csv file
        writer.writerow(
            [e + 1, train_loss,
             train_acc.item(), val_loss,
             val_acc.item()])
        # early stopping and save best model
        if val_loss < best_loss:
            # 重置参数
            best_loss = val_loss
            patience = args.patience
            # 保存模型,分为'arch'和'state_dict'两部分
            utils.save_model(
                {
                    'arch': args.model,
                    'state_dict': net.state_dict()
                }, './model/{}-run-{}.pth.tar'.format(args.model, run))
        else:
            patience -= 1
            if patience == 0:
                print('Run out of patience!')
                writeFile.close()
                # tsboard.close()
                break
 def save(self, job_dir, filepath):
     """Save Keras model to `{job_dir}/{filepath}``"""
     save_model(self.model, job_dir, filepath)
Beispiel #7
0
def train_model(model,
                optimizer,
                scheduler,
                start_epoch,
                num_epochs,
                dataloaders,
                dataset_sizes,
                dataset,
                ld=0.02,
                alpha=1e-2,
                enc_lr=1e-1,
                enc_num_epoch=80,
                weight_decay=0.0005,
                is_training_encoder=True):

    # Define dataloader & dataset_size
    dataloader, dataset_size = dataloaders[
        model.num_classifiers - 1], dataset_sizes[model.num_classifiers - 1]

    # Define Criterion for loss.
    criterion = nn.CrossEntropyLoss()
    LwF_criterion = LwFLoss.LwFLoss()  # LwF_Loss.
    enc_criterion = nn.MSELoss()  # Encoder_Loss.

    # Gen_output for LwFLoss.
    prev_labels = {}
    prev_labels = utils.gen_output(model, dataloader, prev_labels)

    # Define the prev_encoder model & gen_output.
    feature_model = model.features

    prev_encoders, prev_codes = {}, {}
    for i in range(model.num_classifiers - 1):
        prev_encoders[i] = encoderModel()

        if torch.cuda.is_available():
            prev_encoders[i] = prev_encoders[i].cuda()

        # Load the pre-trained encoder.
        prev_encoders[i], _ = utils.save_model(prev_encoders[i],
                                               0,
                                               0,
                                               reuse='encoder_' + dataset[i],
                                               save_mode=False)

        for parameters in prev_encoders[i].parameters():
            parameters.requires_grad = False

        prev_encoders[i].train(False)

        # Gen_output for encLoss.
        if not (i in prev_codes):
            prev_codes[i] = []

        prev_codes[i] = utils.gen_output(prev_encoders[i],
                                         dataloader,
                                         prev_codes[i],
                                         feature_model=feature_model)

    best_model_wts = model.state_dict()
    torch.save({'model': best_model_wts}, 'curr_best_model_wts')
    best_loss = 0.0
    best_acc = 0.0

    since = time.time()

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(start_epoch + epoch,
                                   start_epoch + num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for i, data in enumerate(dataloader[phase]):
                # get the inputs
                inputs, labels, _ = data

                # wrap them in Variable
                if torch.cuda.is_available():
                    inputs, labels = Variable(inputs.cuda()), Variable(
                        labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs, features = model(inputs)
                _, preds = torch.max(outputs[-1].data,
                                     1)  # You can use "topk" function.

                if phase == 'train':
                    LwF_Loss, enc_Loss = 0, 0
                    for k in range(model.num_classifiers - 1):
                        # wrap prev_labels in Variable for out of memory.
                        if torch.cuda.is_available():
                            prev_labels_i = Variable(prev_labels[k][i].cuda())
                            prev_codes_i = Variable(prev_codes[k][i].cuda())
                        else:
                            prev_labels_i = prev_labels[k][i]
                            prev_codes_i = prev_codes[k][i]

                        # It should be checked.
                        # feature_model = model.pretrained_model.features
                        # for params in feature_model.parameters():
                        #     params.requires_grad = False
                        #
                        # feature_model.train(False)

                        # forward
                        LwF_Loss = LwF_Loss + LwF_criterion(
                            outputs[k], prev_labels_i)

                        codes, _ = prev_encoders[k](features)
                        enc_Loss = enc_Loss + enc_criterion(
                            codes, prev_codes_i)**2

                    # CrossEntropyLoss + Knowledge Distillation Loss + alpha/2 * enc_loss.
                    loss = criterion(
                        outputs[-1],
                        labels) + ld * LwF_Loss + alpha / 2 * enc_Loss
                else:
                    loss = criterion(outputs[-1], labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data).item()

            epoch_loss = running_loss / dataset_size[phase]
            epoch_acc = running_corrects / dataset_size[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_loss = epoch_loss
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                torch.save({'model': best_model_wts}, 'curr_best_model_wts')

        # if model.num_classifiers > 1:  # Continual Learning.
        #     if (epoch % 2 == 0 and epoch < 10) or (epoch % 10 == 0) or (epoch == num_epochs-1):
        #         test_model(model, dataloaders, dataset_sizes, num_task=0)  # Test the model.
        #     print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best test Loss: {:4f} Acc: {:4f}'.format(best_loss,
                                                    best_acc))  # mems

    # load the best model.
    checkpoint = torch.load('curr_best_model_wts')
    model.load_state_dict(checkpoint['model'])

    # Please check the old task performance separately, because of out of memory error.
    if model.num_classifiers > 1:  # Continual Learning.
        print()
        for i in range(model.num_classifiers - 1):
            test_model(model, dataloaders, dataset_sizes,
                       num_task=i)  # Test the model.

    ##################################################
    # Training Auto-Encoder after learning a new task.
    # ------------------------------------------------
    if is_training_encoder:
        # Define a new encoder model.
        new_encoder_model = encoderModel()

        if torch.cuda.is_available():
            new_encoder_model = new_encoder_model.cuda()

        enc_optimizer = optim.Adadelta(new_encoder_model.parameters(),
                                       lr=enc_lr,
                                       weight_decay=weight_decay)

        # Decay LR by a factor of gamma every step_size.
        enc_lr_scheduler = optim.lr_scheduler.StepLR(enc_optimizer,
                                                     step_size=enc_num_epoch,
                                                     gamma=0.1)

        # Load model.
        ae_name = 'encoder_' + dataset
        start_epoch = 0
        new_encoder_model, start_epoch = utils.save_model(new_encoder_model,
                                                          enc_num_epoch,
                                                          start_epoch,
                                                          save_mode=False,
                                                          reuse=ae_name)
        # Training the auto-encoder.
        new_encoder_model = train_encoder(feature_model,
                                          model,
                                          new_encoder_model,
                                          enc_lr_scheduler,
                                          criterion,
                                          enc_criterion,
                                          enc_optimizer,
                                          dataloader,
                                          dataset_size,
                                          num_epochs=enc_num_epoch)
        # Save model.
        utils.save_model(new_encoder_model,
                         enc_num_epoch,
                         start_epoch,
                         save_mode=True,
                         reuse=ae_name)

    return model