def main(args):
    if args.pos1 == 'train':
        train_dataset = myDataset(os.path.join(args.train_dir, 'feature'))
        #prepare dataloader
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=4,
            collate_fn=myDataset.get_collate_fn(args.prediction_num,
                                                args.neg_num,
                                                args.reduce_times))
        saver = pytorch_saver(10, args.save_dir)
        #build model
        model = CPC(args.input_dim,
                    args.feat_dim,
                    reduce_times=args.reduce_times,
                    prediction_num=args.prediction_num)
        if args.resume_dir != '':
            print('loading model')
            model.load_state_dict(
                pytorch_saver.load_dir(args.resume_dir)['state_dict'])

        model.train()
        model.cuda()
        args.log = os.path.join(args.save_dir, args.log)
        train(model, train_data_loader, saver, args.epochs, args.learning_rate,
              args.log)

    else:
        test_dataset = myDataset(os.path.join(args.test_dir, 'feature'),
                                 os.path.join(args.test_dir, 'phn_align.pkl'))
        test_data_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=4,
            collate_fn=myDataset.get_collate_fn(args.prediction_num,
                                                args.neg_num,
                                                args.reduce_times,
                                                train=False))
        if args.resume_dir == '':
            print("resume should exist in inference mode", file=sys.stderr)
            sys.exit(-1)
        else:
            model = CPC(args.input_dim,
                        args.feat_dim,
                        reduce_times=args.reduce_times,
                        prediction_num=args.prediction_num)
            print('loading model')
            model.load_state_dict(
                pytorch_saver.load_dir(args.resume_dir)['state_dict'])
            model.eval()
            model.cuda()

            inference(model, test_data_loader, args.result_dir,
                      args.reduce_times)
Example #2
0
def test():
    args = parser.parse_args()
    if args.model_path is not None:
        package = torch.load(args.model_path)
        data_dir = '../data_prepare/data'
    else:
        cf = ConfigParser.ConfigParser()
        cf.read(args.conf)
        model_path = cf.get('Model', 'model_file')
        data_dir = cf.get('Data', 'data_dir')
        package = torch.load(model_path)
    
    input_size = package['input_size']
    layers = package['rnn_layers']
    hidden_size = package['hidden_size']
    rnn_type = package['rnn_type']
    num_class = package["num_class"]
    feature_type = package['epoch']['feature_type']
    n_feats = package['epoch']['n_feats']
    out_type = package['epoch']['out_type']
    model_type = package['name']
    drop_out = package['_drop_out']
    #weight_decay = package['epoch']['weight_decay']
    #print(weight_decay)

    decoder_type =  args.decode_type

    test_dataset = myDataset(data_dir, data_set='test', feature_type=feature_type, out_type=out_type, n_feats=n_feats)
    
    if model_type == 'CNN_LSTM_CTC':
        model = CNN_LSTM_CTC(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, 
                    rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out)
        test_loader = myCNNDataLoader(test_dataset, batch_size=8, shuffle=False,
                    num_workers=4, pin_memory=False)
    else:
        model = CTC_RNN(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers,
                    rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out)
        test_loader = myDataLoader(test_dataset, batch_size=8, shuffle=False,
                    num_workers=4, pin_memory=False)
    
    model.load_state_dict(package['state_dict'])
    model.eval()
    
    if USE_CUDA:
        model = model.cuda()

    if decoder_type == 'Greedy':
        decoder  = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0)
    else:
        decoder = BeamDecoder(test_dataset.int2phone, top_paths=40, beam_width=20, blank_index=0, space_idx=-1,
                                lm_path=None, lm_alpha=0.8, lm_beta=1, cutoff_prob=1.0, dic=test_dataset.phone_word)    

    total_wer = 0
    total_cer = 0
    start = time.time()
    for data in test_loader:
        inputs, target, input_sizes, input_size_list, target_sizes = data 
        if model.name == 'CTC_RNN':
            inputs = inputs.transpose(0,1)
        inputs = Variable(inputs, volatile=True, requires_grad=False)
        if USE_CUDA:
            inputs = inputs.cuda()
        
        if model.name == 'CTC_RNN':
            inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list)
        probs = model(inputs)
        probs = probs.data.cpu()
        #print(probs)
        
        decoded = decoder.decode(probs, input_size_list)
        
        targets = decoder._unflatten_targets(target, target_sizes)
        labels = decoder._process_strings(decoder._convert_to_strings(targets))
        for x in range(len(labels)):
            print("origin: "+ labels[x])
            print("decoded: "+ decoded[x])
        cer = 0
        wer = 0
        for x in range(len(labels)):
            cer += decoder.cer(decoded[x], labels[x])
            wer += decoder.wer(decoded[x], labels[x])
            decoder.num_word += len(labels[x].split())
            decoder.num_char += len(labels[x])
        total_cer += cer
        total_wer += wer
    CER = (1 - float(total_cer) / decoder.num_char)*100
    WER = (1 - float(total_wer) / decoder.num_word)*100
    print("Character error rate on test set: %.4f" % CER)
    print("Word error rate on test set: %.4f" % WER)
    end = time.time()
    time_used = (end - start) / 60.0
    print("Time used for decoding %d sentences: %.4f minutes" % (len(test_dataset), time_used))
Example #3
0
def main():
    args = parser.parse_args()
    cf = ConfigParser.ConfigParser()
    try:
        cf.read(args.conf)
    except:
        print("conf file not exists")
    
    logger = init_logger(os.path.join(args.log_dir, 'train_lstm_ctc.log'))
    dataset = cf.get('Data', 'dataset')
    data_dir = cf.get('Data', 'data_dir')
    feature_type = cf.get('Data', 'feature_type')
    out_type = cf.get('Data', 'out_type')
    n_feats = cf.getint('Data', 'n_feats')
    batch_size = cf.getint("Training", 'batch_size')
    
    #Data Loader
    train_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats)
    train_loader = myDataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                        num_workers=4, pin_memory=False)
    dev_dataset = myDataset(data_dir, data_set="dev", feature_type=feature_type, out_type=out_type, n_feats=n_feats)
    dev_loader = myDataLoader(dev_dataset, batch_size=batch_size, shuffle=False,
                        num_workers=4, pin_memory=False)
    
    #decoder for dev set
    decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0)
    
    #Define Model
    rnn_input_size = cf.getint('Model', 'rnn_input_size')
    rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size')
    rnn_layers = cf.getint('Model', 'rnn_layers')
    rnn_type = RNN[cf.get('Model', 'rnn_type')]
    bidirectional = cf.getboolean('Model', 'bidirectional')
    batch_norm = cf.getboolean('Model', 'batch_norm')
    num_class = cf.getint('Model', 'num_class')
    drop_out = cf.getfloat('Model', 'num_class')
    model = CTC_RNN(rnn_input_size=rnn_input_size, rnn_hidden_size=rnn_hidden_size, rnn_layers=rnn_layers, 
                        rnn_type=rnn_type, bidirectional=bidirectional, batch_norm=batch_norm, 
                        num_class=num_class, drop_out=drop_out)
    #model.apply(xavier_uniform_init)
    print(model.name)
    
    #Training
    init_lr = cf.getfloat('Training', 'init_lr')
    num_epoches = cf.getint('Training', 'num_epoches')
    end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc')
    decay = cf.getfloat("Training", 'lr_decay')
    weight_decay = cf.getfloat("Training", 'weight_decay')
    try:
        seed = cf.getint('Training', 'seed')
    except:
        seed = torch.cuda.initial_seed()
    
    params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'seed':seed
            'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size,
            'feature_type':feature_type, 'n_feats': n_feats, 'out_type': out_type }
    
    if USE_CUDA:
        torch.cuda.manual_seed(seed)
        model = model.cuda()

    print(params)
    
    loss_fn = CTCLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay)
    
    #visualization for training
    from visdom import Visdom
    viz = Visdom()
    title = dataset+' '+feature_type+str(n_feats)+' LSTM_CTC'
    opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'),
            dict(title=title+" CER on Train", ylabel = 'CER', xlabel = 'Epoch'),
            dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')]
    viz_window = [None, None, None]
    
    count = 0
    learning_rate = init_lr
    acc_best = -100
    acc_best_true = -100
    adjust_rate_flag = False
    stop_train = False
    adjust_time = 0
    start_time = time.time()
    loss_results = []
    training_cer_results = []
    dev_cer_results = []
    
    while not stop_train:
        if count >= num_epoches:
            break
        count += 1
        
        if adjust_rate_flag:
            learning_rate *= decay
            adjust_rate_flag = False
            for param in optimizer.param_groups:
                param['lr'] *= decay
        
        print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate))
        logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate))
        
        loss = train(model, train_loader, loss_fn, optimizer, logger)
        loss_results.append(loss)
        cer = dev(model, train_loader, decoder, logger)
        print("cer on training set is %.4f" % cer)
        logger.info("cer on training set is %.4f" % cer)
        training_cer_results.append(cer)
        acc = dev(model, dev_loader, decoder, logger)
        dev_cer_results.append(acc)
        
        #model_path_accept = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'.pkl'
        #model_path_reject = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'_rejected.pkl'
        
        if acc > (acc_best + end_adjust_acc):
            acc_best = acc
            adjust_rate_count = 0
            model_state = copy.deepcopy(model.state_dict())
            op_state = copy.deepcopy(optimizer.state_dict())
        elif (acc > acc_best - end_adjust_acc):
            adjust_rate_count += 1
            if acc > acc_best and acc > acc_best_true:
                acc_best_true = acc
                model_state = copy.deepcopy(model.state_dict())
                op_state = copy.deepcopy(optimizer.state_dict())
        else:
            adjust_rate_count = 0
        
        #torch.save(model.state_dict(), model_path_reject)
        print("adjust_rate_count:"+str(adjust_rate_count))
        print('adjust_time:'+str(adjust_time))
        logger.info("adjust_rate_count:"+str(adjust_rate_count))
        logger.info('adjust_time:'+str(adjust_time))

        if adjust_rate_count == 10:
            adjust_rate_flag = True
            adjust_time += 1
            adjust_rate_count = 0
            acc_best = acc_best_true
            model.load_state_dict(model_state)
            optimizer.load_state_dict(op_state)

        if adjust_time == 8:
            stop_train = True
        
        time_used = (time.time() - start_time) / 60
        print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used))
        logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used))
        x_axis = range(count)
        y_axis = [loss_results[0:count], training_cer_results[0:count], dev_cer_results[0:count]]
        for x in range(len(viz_window)):
            if viz_window[x] is None:
                viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],)
            else:
                viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',)

    print("End training, best cv acc is: %.4f" % acc_best)
    logger.info("End training, best cv acc is: %.4f" % acc_best)
    best_path = os.path.join(args.log_dir, 'best_model'+'_cv'+str(acc_best)+'.pkl')
    cf.set('Model', 'model_file', best_path)
    cf.write(open(args.conf, 'w'))
    params['epoch']=count
    torch.save(CTC_RNN.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, training_cer_results=training_cer_results, dev_cer_results=dev_cer_results), best_path)
Example #4
0
def main(args):
    if args.pos1 == 'train':
        train_dataset = myDataset(args.train_data)
        dev_dataset = myDataset(args.dev_data)
        #prepare dataloader
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=4,
            collate_fn=myDataset.get_collate_fn(args.question_length,
                                                args.option_length))
        dev_data_loader = torch.utils.data.DataLoader(
            dev_dataset,
            batch_size=args.batch_size,
            num_workers=4,
            collate_fn=myDataset.get_collate_fn(args.question_length,
                                                args.option_length))
        saver = pytorch_saver(10, args.save_dir)
        #build model
        model = qacnn_1d(args.question_length,
                         args.option_length,
                         args.filter_num,
                         args.filter_size,
                         args.cnn_layers,
                         args.dnn_size,
                         train_dataset.word_dim,
                         dropout=args.dropout)
        if args.resume_dir != '':
            model.load_state_dict(
                pytorch_saver.load_dir(args.resume_dir)['state_dict'])

        model.train()
        model.cuda()
        args.log = os.path.join(args.save_dir, args.log)
        train(model, train_data_loader, dev_data_loader, saver, args.epochs,
              args.learning_rate, args.log)

    else:
        test_dataset = myDataset(args.test_data)
        test_data_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=1,
            collate_fn=myDataset.get_collate_fn(args.question_length,
                                                args.option_length),
            shuffle=False,
            num_workers=2)
        if args.resume_dir == '':
            print("resume should exist in inference mode", file=sys.stderr)
            sys.exit(-1)
        else:
            model = qacnn_1d(args.question_length,
                             args.option_length,
                             args.filter_num,
                             args.filter_size,
                             args.cnn_layers,
                             args.dnn_size,
                             test_dataset.word_dim,
                             dropout=args.dropout)
            model.load_state_dict(
                pytorch_saver.load_dir(args.resume_dir)['state_dict'])
            model.eval()
            model.cuda()

            inference(model, test_data_loader, args.test_result)
Example #5
0
                        default=0.0001,
                        metavar='N',
                        help='learning rate for training (default: 0.001)')
    args = parser.parse_args()
    if os.path.exists('logs/' + datasetname) == False:
        os.makedirs('logs/' + datasetname)
    log_dir = 'logs/' + datasetname
    train_path = './dataset/iris/iris_train.data'
    test_path = './dataset/iris/iris_test.data'
    for i in range(1, repeat + 1):
        sdae_savepath = ("model/sdae-run-iris-%d.pt" % i)
        if os.path.exists("model/sdae-run-iris-%d.pt" % i) == False:
            print("Experiment #%d" % i)
            write_log("Experiment #%d" % i, log_dir)

            train_data = myDataset(train_path, -1)
            # test_data=myDataset(test_path,-1)
            train_loader = data.DataLoader(dataset=train_data,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           collate_fn=train_data.collate_fn)
            # test_loader = data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True,
            #                                collate_fn=train_data.collate_fn)

            # pretrain
            sdae = StackedDAE(input_dim=4,
                              z_dim=2,
                              binary=False,
                              encodeLayer=[8],
                              decodeLayer=[8],
                              activation="relu",
Example #6
0
def main(args):
    # Initialize models
    # n_channels is input channels and n_classes is output channels
    model_G = UNet(n_channels=1, n_classes=3)
    model_D = ConvDis(in_channels=3, in_size=args.image_size)

    # Initialize start epochs for G and D
    start_epoch_G = start_epoch_D = 0

    # Start epoch for this session
    start_epoch = 0

    # Load saved models if resume training
    if args.model_G:
        print('Resume model G: %s' % args.model_G)
        checkpoint_G = torch.load(model_G)
        model_G.load_state_dict(checkpoint_G['state_dict'])
        start_epoch_G = checkpoint_G['epoch']

    if args.model_D:
        print('Resume model D: %s' % args.model_D)
        checkpoint_D = torch.load(model_D)
        model_D.load_state_dict(checkpoint_D['state_dict'])
        start_epoch_D = checkpoint_D['epoch']

    assert start_epoch_G == start_epoch_D

    # Shift models to GPU
    model_G.cuda()
    model_D.cuda()

    # Initialize optimizers
    optimizer_G = optim.Adam(model_G.parameters(),
                             lr=args.lr_G,
                             betas=(0.5, 0.999),
                             eps=1e-8,
                             weight_decay=args.weight_decay)
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.lr_D,
                             betas=(0.5, 0.999),
                             eps=1e-8,
                             weight_decay=args.weight_decay)

    # Load optimizers if resume training
    if args.model_G:
        optimizer_G.load_state_dict(checkpoint_G['optimizer'])
    if args.model_D:
        optimizer_D.load_state_dict(checkpoint_D['optimizer'])

    # Loss Function
    global criterion
    criterion = nn.BCELoss()
    global L1
    L1 = nn.L1Loss()
    global FeatureLoss
    FeatureLoss = FeatureLoss()

    # Dataset
    data_root = args.path
    dataset = args.dataset

    if dataset == 'unsplash':
        from data_loader import Unsplash_Dataset as myDataset
    elif dataset == 'cifar':
        from data_loader import CIFAR_Dataset as myDataset
    # elif dataset == 'bob':
    #     from load_data import Spongebob_Dataset as myDataset
    else:
        raise ValueError('dataset type not supported')

    # Define transform
    image_transform = transforms.Compose(
        [transforms.CenterCrop(args.image_size),
         transforms.ToTensor()])

    data_train = myDataset(data_root,
                           mode='train',
                           transform=image_transform,
                           types='raw',
                           shuffle=True)

    train_loader = data.DataLoader(data_train,
                                   batch_size=args.batch_size,
                                   shuffle=False)

    data_val = myDataset(data_root,
                         mode='test',
                         transform=image_transform,
                         types='raw',
                         shuffle=True)

    val_loader = data.DataLoader(data_val,
                                 batch_size=args.batch_size,
                                 shuffle=False)

    global val_bs
    val_bs = val_loader.batch_size

    # set up plotter, path, etc.
    global iteration, print_interval, plotter, plotter_basic, plot_train_result_interval
    iteration = 0
    print_interval = 5
    plot_train_result_interval = 100
    plotter = Plotter_GAN_TV()
    plotter_basic = Plotter_GAN()

    global img_path
    size = str(args.image_size)
    date = str(datetime.datetime.now().month) + '_' + str(
        datetime.datetime.now().day)
    img_path = '/scratch/as3ek/image_colorization/results/img/%s/GAN_%s%s_%dL1_bs%d_%s_lr_D%s_lr_G%s/' \
               % (date, args.dataset, size, args.lamb, args.batch_size, 'Adam', str(args.lr_D), str(args.lr_G))
    model_path = '/scratch/as3ek/image_colorization/results/model/%s/GAN_%s%s_%dL1_bs%d_%s_lr_D%s_lr_G%s/' \
               % (date, args.dataset, size, args.lamb, args.batch_size, 'Adam', str(args.lr_D), str(args.lr_G))

    if not os.path.exists(img_path):
        os.makedirs(img_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    start_epoch = 0

    for epoch in range(start_epoch, args.num_epoch):
        print('Epoch {}/{}'.format(epoch, args.num_epoch - 1))
        print('-' * 20)
        #         if epoch == 0:
        #             val_lerrG, val_errD = validate(val_loader, model_G, model_D, optimizer_G, optimizer_D, epoch=-1)
        # train
        train_errG, train_errD = train(train_loader, model_G, model_D,
                                       optimizer_G, optimizer_D, epoch,
                                       iteration)
        # validate
        val_lerrG, val_errD = validate(val_loader, model_G, model_D,
                                       optimizer_G, optimizer_D, epoch)

        plotter.train_update(train_errG, train_errD)
        plotter.val_update(val_lerrG, val_errD)
        plotter.draw(img_path + 'train_val.png')

        if args.save:
            print('Saving check point')
            save_checkpoint({'epoch': epoch + 1,
                             'state_dict': model_G.state_dict(),
                             'optimizer': optimizer_G.state_dict(),
                             },
                             filename=model_path+'G_epoch%d.pth.tar' \
                             % epoch)
            save_checkpoint({'epoch': epoch + 1,
                             'state_dict': model_D.state_dict(),
                             'optimizer': optimizer_D.state_dict(),
                             },
                             filename=model_path+'D_epoch%d.pth.tar' \
                             % epoch)
Example #7
0
def main():
    args = parser.parse_args()
    cf = ConfigParser.ConfigParser()
    try:
        cf.read(args.conf)
    except:
        print("conf file not exists")

    try:
        seed = cf.get('Training', 'seed')
        seed = long(seed)
    except:
        seed = torch.cuda.initial_seed()

    torch.manual_seed(seed)
    if USE_CUDA:
        torch.cuda.manual_seed_all(seed)

    logger = init_logger(os.path.join(args.log_dir, 'train_ctc_model.log'))

    #Define Model
    rnn_input_size = cf.getint('Model', 'rnn_input_size')
    rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size')
    rnn_layers = cf.getint('Model', 'rnn_layers')
    rnn_type = RNN[cf.get('Model', 'rnn_type')]
    bidirectional = cf.getboolean('Model', 'bidirectional')
    batch_norm = cf.getboolean('Model', 'batch_norm')
    rnn_param = {
        "rnn_input_size": rnn_input_size,
        "rnn_hidden_size": rnn_hidden_size,
        "rnn_layers": rnn_layers,
        "rnn_type": rnn_type,
        "bidirectional": bidirectional,
        "batch_norm": batch_norm
    }

    num_class = cf.getint('Model', 'num_class')
    drop_out = cf.getfloat('Model', 'drop_out')
    add_cnn = cf.getboolean('Model', 'add_cnn')

    cnn_param = {}
    layers = cf.getint('CNN', 'layers')
    channel = eval(cf.get('CNN', 'channel'))
    kernel_size = eval(cf.get('CNN', 'kernel_size'))
    stride = eval(cf.get('CNN', 'stride'))
    padding = eval(cf.get('CNN', 'padding'))
    pooling = eval(cf.get('CNN', 'pooling'))
    batch_norm = cf.getboolean('CNN', 'batch_norm')
    activation_function = activate_f[cf.get('CNN', 'activation_function')]

    cnn_param['batch_norm'] = batch_norm
    cnn_param['activate_function'] = activation_function
    cnn_param["layer"] = []
    for layer in range(layers):
        layer_param = [
            channel[layer], kernel_size[layer], stride[layer], padding[layer]
        ]
        if pooling is not None:
            layer_param.append(pooling[layer])
        else:
            layer_param.append(None)
        cnn_param["layer"].append(layer_param)

    model = CTC_Model(rnn_param=rnn_param,
                      add_cnn=add_cnn,
                      cnn_param=cnn_param,
                      num_class=num_class,
                      drop_out=drop_out)
    #model.apply(xavier_uniform_init)
    for idx, m in enumerate(model.modules()):
        print(idx, m)
        break

    dataset = cf.get('Data', 'dataset')
    data_dir = cf.get('Data', 'data_dir')
    feature_type = cf.get('Data', 'feature_type')
    out_type = cf.get('Data', 'out_type')
    n_feats = cf.getint('Data', 'n_feats')
    mel = cf.getboolean('Data', 'mel')
    batch_size = cf.getint("Training", 'batch_size')

    #Data Loader
    train_dataset = myDataset(data_dir,
                              data_set='train',
                              feature_type=feature_type,
                              out_type=out_type,
                              n_feats=n_feats,
                              mel=mel)
    dev_dataset = myDataset(data_dir,
                            data_set="dev",
                            feature_type=feature_type,
                            out_type=out_type,
                            n_feats=n_feats,
                            mel=mel)
    if add_cnn:
        train_loader = myCNNDataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=4,
                                       pin_memory=False)
        dev_loader = myCNNDataLoader(dev_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=4,
                                     pin_memory=False)
    else:
        train_loader = myDataLoader(train_dataset,
                                    batch_size=batch_size,
                                    shuffle=True,
                                    num_workers=4,
                                    pin_memory=False)
        dev_loader = myDataLoader(dev_dataset,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=4,
                                  pin_memory=False)
    #decoder for dev set
    decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0)

    #Training
    init_lr = cf.getfloat('Training', 'init_lr')
    num_epoches = cf.getint('Training', 'num_epoches')
    end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc')
    decay = cf.getfloat("Training", 'lr_decay')
    weight_decay = cf.getfloat("Training", 'weight_decay')

    params = {
        'num_epoches': num_epoches,
        'end_adjust_acc': end_adjust_acc,
        'mel': mel,
        'seed': seed,
        'decay': decay,
        'learning_rate': init_lr,
        'weight_decay': weight_decay,
        'batch_size': batch_size,
        'feature_type': feature_type,
        'n_feats': n_feats,
        'out_type': out_type
    }
    print(params)

    if USE_CUDA:
        model = model.cuda()

    loss_fn = CTCLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=init_lr,
                                 weight_decay=weight_decay)

    #visualization for training
    from visdom import Visdom
    viz = Visdom()
    if add_cnn:
        title = dataset + ' ' + feature_type + str(n_feats) + ' CNN_LSTM_CTC'
    else:
        title = dataset + ' ' + feature_type + str(n_feats) + ' LSTM_CTC'

    opts = [
        dict(title=title + " Loss", ylabel='Loss', xlabel='Epoch'),
        dict(title=title + " Loss on Dev", ylabel='DEV Loss', xlabel='Epoch'),
        dict(title=title + ' CER on DEV', ylabel='DEV CER', xlabel='Epoch')
    ]
    viz_window = [None, None, None]

    count = 0
    learning_rate = init_lr
    loss_best = 1000
    loss_best_true = 1000
    adjust_rate_flag = False
    stop_train = False
    adjust_time = 0
    acc_best = 0
    acc_best_true = 0
    start_time = time.time()
    loss_results = []
    dev_loss_results = []
    dev_cer_results = []

    while not stop_train:
        if count >= num_epoches:
            break
        count += 1

        if adjust_rate_flag:
            learning_rate *= decay
            adjust_rate_flag = False
            for param in optimizer.param_groups:
                param['lr'] *= decay

        print("Start training epoch: %d, learning_rate: %.5f" %
              (count, learning_rate))
        logger.info("Start training epoch: %d, learning_rate: %.5f" %
                    (count, learning_rate))

        loss = train(model,
                     train_loader,
                     loss_fn,
                     optimizer,
                     logger,
                     add_cnn=add_cnn,
                     print_every=20)
        loss_results.append(loss)
        acc, dev_loss = dev(model,
                            dev_loader,
                            loss_fn,
                            decoder,
                            logger,
                            add_cnn=add_cnn)
        print("loss on dev set is %.4f" % dev_loss)
        logger.info("loss on dev set is %.4f" % dev_loss)
        dev_loss_results.append(dev_loss)
        dev_cer_results.append(acc)

        #adjust learning rate by dev_loss
        if dev_loss < (loss_best - end_adjust_acc):
            loss_best = dev_loss
            adjust_rate_count = 0
            model_state = copy.deepcopy(model.state_dict())
            op_state = copy.deepcopy(optimizer.state_dict())
        elif (dev_loss < loss_best + end_adjust_acc):
            adjust_rate_count += 1
            if dev_loss < loss_best and dev_loss < loss_best_true:
                loss_best_true = dev_loss
                model_state = copy.deepcopy(model.state_dict())
                op_state = copy.deepcopy(optimizer.state_dict())
        else:
            adjust_rate_count = 10

        if acc > acc_best:
            acc_best = acc
            best_model_state = copy.deepcopy(model.state_dict())
            best_op_state = copy.deepcopy(optimizer.state_dict())
        '''
        #adjust learning rate by dev_acc
        if acc > (acc_best + end_adjust_acc):
            acc_best = acc
            adjust_rate_count = 0
            loss_best = dev_loss
            model_state = copy.deepcopy(model.state_dict())
            op_state = copy.deepcopy(optimizer.state_dict())
        elif (acc > acc_best - end_adjust_acc):
            adjust_rate_count += 1
            if acc > acc_best and acc > acc_best_true:
                acc_best_true = acc
                loss_best = dev_loss
                model_state = copy.deepcopy(model.state_dict())
                op_state = copy.deepcopy(optimizer.state_dict())
        else:
            adjust_rate_count = 0
        #torch.save(model.state_dict(), model_path_reject)
        '''

        print("adjust_rate_count:" + str(adjust_rate_count))
        print('adjust_time:' + str(adjust_time))
        logger.info("adjust_rate_count:" + str(adjust_rate_count))
        logger.info('adjust_time:' + str(adjust_time))

        if adjust_rate_count == 10:
            adjust_rate_flag = True
            adjust_time += 1
            adjust_rate_count = 0
            if loss_best > loss_best_true:
                loss_best = loss_best_true
            #if acc_best < acc_best_true:
            #    acc_best = acc_best_true
            model.load_state_dict(model_state)
            optimizer.load_state_dict(op_state)

        if adjust_time == 8:
            stop_train = True

        time_used = (time.time() - start_time) / 60
        print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" %
              (count, acc, time_used))
        logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" %
                    (count, acc, time_used))

        x_axis = range(count)
        y_axis = [
            loss_results[0:count], dev_loss_results[0:count],
            dev_cer_results[0:count]
        ]
        for x in range(len(viz_window)):
            if viz_window[x] is None:
                viz_window[x] = viz.line(
                    X=np.array(x_axis),
                    Y=np.array(y_axis[x]),
                    opts=opts[x],
                )
            else:
                viz.line(
                    X=np.array(x_axis),
                    Y=np.array(y_axis[x]),
                    win=viz_window[x],
                    update='replace',
                )

    print("End training, best cv loss is: %.4f, acc is: %.4f" %
          (loss_best, acc_best))
    logger.info("End training, best loss acc is: %.4f, acc is: %.4f" %
                (loss_best, acc_best))
    model.load_state_dict(best_model_state)
    optimizer.load_state_dict(best_op_state)
    best_path = os.path.join(args.log_dir,
                             'best_model' + '_cv' + str(acc_best) + '.pkl')
    cf.set('Model', 'model_file', best_path)
    cf.write(open(args.conf, 'w'))
    params['epoch'] = count

    torch.save(
        CTC_Model.save_package(model,
                               optimizer=optimizer,
                               epoch=params,
                               loss_results=loss_results,
                               dev_loss_results=dev_loss_results,
                               dev_cer_results=dev_cer_results), best_path)
Example #8
0
def main():
    args = parser.parse_args()
    cf = ConfigParser.ConfigParser()
    try:
        cf.read(args.conf)
    except:
        print("conf file not exists")
    
    logger = init_logger(os.path.join(args.log_dir, 'train_cnn_lstm_ctc.log'))
    dataset = cf.get('Data', 'dataset')
    data_dir = cf.get('Data', 'data_dir')
    feature_type = cf.get('Data', 'feature_type')
    out_type = cf.get('Data', 'out_type')
    n_feats = cf.getint('Data', 'n_feats')
    batch_size = cf.getint("Training", 'batch_size')
    
    #Data Loader
    train_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats)
    train_loader = myCNNDataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                        num_workers=4, pin_memory=False)
    dev_dataset = myDataset(data_dir, data_set="test", feature_type=feature_type, out_type=out_type, n_feats=n_feats)
    dev_loader = myCNNDataLoader(dev_dataset, batch_size=batch_size, shuffle=False,
                        num_workers=4, pin_memory=False)
    
    #decoder for dev set
    decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0)
    
    #Define Model
    rnn_input_size = cf.getint('Model', 'rnn_input_size')
    rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size')
    rnn_layers = cf.getint('Model', 'rnn_layers')
    rnn_type = RNN[cf.get('Model', 'rnn_type')]
    bidirectional = cf.getboolean('Model', 'bidirectional')
    batch_norm = cf.getboolean('Model', 'batch_norm')
    num_class = cf.getint('Model', 'num_class')
    drop_out = cf.getfloat('Model', 'num_class')
    model = CNN_LSTM_CTC(rnn_input_size=rnn_input_size, rnn_hidden_size=rnn_hidden_size, rnn_layers=rnn_layers, 
                        rnn_type=rnn_type, bidirectional=bidirectional, batch_norm=batch_norm, 
                        num_class=num_class, drop_out=drop_out)
    #model.apply(xavier_uniform_init)
    print(model.name)
    
    #Training
    init_lr = cf.getfloat('Training', 'init_lr')
    num_epoches = cf.getint('Training', 'num_epoches')
    end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc')
    decay = cf.getfloat("Training", 'lr_decay')
    weight_decay = cf.getfloat("Training", 'weight_decay')
    try:
        seed = cf.getint('Training', 'seed')
    except:
        seed = torch.cuda.initial_seed()
    params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'seed':seed,
                'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size,
                'feature_type':feature_type, 'n_feats': n_feats, 'out_type': out_type }
    
    if USE_CUDA:
        torch.cuda.manual_seed(seed)
        model = model.cuda()
    
    print(params)

    loss_fn = CTCLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay)

    #visualization for training
    from visdom import Visdom
    viz = Visdom(env='863_corpus')
    title = dataset+' '+feature_type+str(n_feats)+' CNN_LSTM_CTC'
    opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'),
            dict(title=title+" CER on Train", ylabel = 'CER', xlabel = 'Epoch'),
            dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')]
    viz_window = [None, None, None]
    
    count = 0
    learning_rate = init_lr
    acc_best = -100
    acc_best_true = -100
    adjust_rate_flag = False
    stop_train = False
    adjust_time = 0
    start_time = time.time()
    loss_results = []
    training_cer_results = []
    dev_cer_results = []
    
    while not stop_train:
        if count >= num_epoches:
            break
        count += 1
        
        if adjust_rate_flag:
            learning_rate *= decay
            adjust_rate_flag = False
            for param in optimizer.param_groups:
                param['lr'] *= decay
        
        print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate))
        logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate))
        
        loss = train(model, train_loader, loss_fn, optimizer, logger, print_every=20)
        loss_results.append(loss)
        cer = dev(model, train_loader, decoder, logger)
        print("cer on training set is %.4f" % cer)
        logger.info("cer on training set is %.4f" % cer)
        training_cer_results.append(cer)
        acc = dev(model, dev_loader, decoder, logger)
        dev_cer_results.append(acc)
        
        #model_path_accept = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'.pkl'
        #model_path_reject = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'_rejected.pkl'
        
        if acc > (acc_best + end_adjust_acc):
            acc_best = acc
            adjust_rate_count = 0
            model_state = copy.deepcopy(model.state_dict())
            op_state = copy.deepcopy(optimizer.state_dict())
        elif (acc > acc_best - end_adjust_acc):
            adjust_rate_count += 1
            if acc > acc_best and acc > acc_best_true:
                acc_best_true = acc
                model_state = copy.deepcopy(model.state_dict())
                op_state = copy.deepcopy(optimizer.state_dict())
        else:
            adjust_rate_count = 0
        #torch.save(model.state_dict(), model_path_reject)
        print("adjust_rate_count:"+str(adjust_rate_count))
        print('adjust_time:'+str(adjust_time))
        logger.info("adjust_rate_count:"+str(adjust_rate_count))
        logger.info('adjust_time:'+str(adjust_time))

        if adjust_rate_count == 10:
            adjust_rate_flag = True
            adjust_time += 1
            adjust_rate_count = 0
            acc_best = acc_best_true
            model.load_state_dict(model_state)
            optimizer.load_state_dict(op_state)
        
        if adjust_time == 8:    
            stop_train = True   
        
        time_used = (time.time() - start_time) / 60
        print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used))
        logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used))
        x_axis = range(count)
        y_axis = [loss_results[0:count], training_cer_results[0:count], dev_cer_results[0:count]]
        for x in range(len(viz_window)):
            if viz_window[x] is None:
                viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],)
            else:
                viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',)

    print("End training, best cv acc is: %.4f" % acc_best)
    logger.info("End training, best cv acc is: %.4f" % acc_best)
    best_path = os.path.join(args.log_dir, 'best_model'+'_cv'+str(acc_best)+'.pkl')
    cf.set('Model', 'model_file', best_path)
    cf.write(open(args.conf, 'w'))
    params['epoch']=count
    torch.save(CNN_LSTM_CTC.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, training_cer_results=training_cer_results, dev_cer_results=dev_cer_results), best_path)
Example #9
0
def test():
    model_path = '../log/exp_cnn_lstm_ctc_spectrum201/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum_stride_1_2/exp2_82.1483/best_model_cv80.8660423723.pkl'
    package = torch.load(model_path)
    data_dir = '/home/fran/Documents/CTC_pytorch_data/data_prepare/data'
    rnn_param = package["rnn_param"]
    add_cnn = package["add_cnn"]
    cnn_param = package["cnn_param"]
    num_class = package["num_class"]
    feature_type = package['epoch']['feature_type']
    n_feats = package['epoch']['n_feats']
    out_type = package['epoch']['out_type']
    drop_out = package['_drop_out']
    try:
        mel = package['epoch']['mel']
    except:
        mel = False
    #weight_decay = package['epoch']['weight_decay']
    #print(weight_decay)

    decoder_type = 'Greedy'

    test_dataset = myDataset(data_dir,
                             data_set='train',
                             feature_type=feature_type,
                             out_type=out_type,
                             n_feats=n_feats,
                             mel=mel)

    model = CTC_Model(rnn_param=rnn_param,
                      add_cnn=add_cnn,
                      cnn_param=cnn_param,
                      num_class=num_class,
                      drop_out=drop_out)

    if add_cnn:
        test_loader = myCNNDataLoader(test_dataset,
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4,
                                      pin_memory=False)
    else:
        test_loader = myDataLoader(test_dataset,
                                   batch_size=1,
                                   shuffle=False,
                                   num_workers=4,
                                   pin_memory=False)

    model.load_state_dict(package['state_dict'])
    model.eval()

    if USE_CUDA:
        model = model.cuda()

    if decoder_type == 'Greedy':
        decoder = GreedyDecoder(test_dataset.int2phone,
                                space_idx=-1,
                                blank_index=0)
    else:
        decoder = BeamDecoder(test_dataset.int2phone)

    import pickle
    f = open('../decode_map_48-39/map_dict.pkl', 'rb')
    map_dict = pickle.load(f)
    f.close()
    print(map_dict)

    vis = visdom.Visdom(env='fan')
    legend = []
    for i in range(49):
        legend.append(test_dataset.int2phone[i])

    for data in test_loader:
        inputs, target, input_sizes, input_size_list, target_sizes = data
        if not add_cnn:
            inputs = inputs.transpose(0, 1)

        inputs = Variable(inputs, volatile=True, requires_grad=False)
        if USE_CUDA:
            inputs = inputs.cuda()

        if not add_cnn:
            inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list)

        probs, visual = model(inputs, visualize=True)
        probs = probs.data.cpu()

        if add_cnn:
            max_length = probs.size(0)
            input_size_list = [int(x * max_length) for x in input_size_list]

        decoded = decoder.decode(probs, input_size_list)
        targets = decoder._unflatten_targets(target, target_sizes)
        labels = decoder._process_strings(decoder._convert_to_strings(targets))

        for x in range(len(labels)):
            label = labels[x].strip().split(' ')
            for i in range(len(label)):
                label[i] = map_dict[label[i]]
            labels[x] = ' '.join(label)
            decode = decoded[x].strip().split(' ')
            for i in range(len(decode)):
                decode[i] = map_dict[decode[i]]
            decoded[x] = ' '.join(decode)

        for x in range(len(labels)):
            print("origin: " + labels[x])
            print("decoded: " + decoded[x])

        if add_cnn:
            spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu()
            opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum')
            vis.heatmap(spectrum_inputs, opts=opts)

            opts = dict(title=labels[0],
                        xlabel="frame",
                        ylabel='feature_after_cnn')
            after_cnn = visual[1][0][0].transpose(0, 1).data.cpu()
            vis.heatmap(after_cnn, opts=opts)

            opts = dict(title=labels[0],
                        xlabel="frame",
                        ylabel='feature_before_rnn')
            before_rnn = visual[2].transpose(0, 1)[0].transpose(0,
                                                                1).data.cpu()
            vis.heatmap(before_rnn, opts=opts)

            show_prob = visual[3].transpose(0, 1)[0].data.cpu()
            line_opts = dict(title=decoded[0],
                             xlabel="frame",
                             ylabel="probability",
                             legend=legend)
            x = show_prob.size()[0]
            vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts)
        else:
            spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu()
            opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum')
            vis.heatmap(spectrum_inputs, opts=opts)

            show_prob = visual[1].transpose(0, 1)[0].data.cpu()
            line_opts = dict(title=decoded[0],
                             xlabel="frame",
                             ylabel="probability",
                             legend=legend)
            x = show_prob.size()[0]
            vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts)
        break
Example #10
0
def test():
    model_path = '../log/exp_cnn_lstm_ctc/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum/exp3_81.7186/best_model_cv80.4941223351.pkl'
    package = torch.load(model_path)
    data_dir = '../data_prepare/data'
    input_size = package['input_size']
    layers = package['rnn_layers']
    hidden_size = package['hidden_size']
    rnn_type = package['rnn_type']
    num_class = package["num_class"]
    feature_type = package['epoch']['feature_type']
    n_feats = package['epoch']['n_feats']
    out_type = package['epoch']['out_type']
    model_type = package['name']
    drop_out = package['_drop_out']
    try:
        mel = package['epoch']['mel']
    except:
        mel = False
    #weight_decay = package['epoch']['weight_decay']
    #print(weight_decay)

    decoder_type = 'Greedy'

    test_dataset = myDataset(data_dir,
                             data_set='test',
                             feature_type=feature_type,
                             out_type=out_type,
                             n_feats=n_feats,
                             mel=mel)

    if model_type == 'CNN_LSTM_CTC':
        model = CNN_LSTM_CTC(rnn_input_size=input_size,
                             rnn_hidden_size=hidden_size,
                             rnn_layers=layers,
                             rnn_type=rnn_type,
                             bidirectional=True,
                             batch_norm=True,
                             num_class=num_class,
                             drop_out=drop_out)
        test_loader = myCNNDataLoader(test_dataset,
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4,
                                      pin_memory=False)
    else:
        model = CTC_RNN(rnn_input_size=input_size,
                        rnn_hidden_size=hidden_size,
                        rnn_layers=layers,
                        rnn_type=rnn_type,
                        bidirectional=True,
                        batch_norm=True,
                        num_class=num_class,
                        drop_out=drop_out)
        test_loader = myDataLoader(test_dataset,
                                   batch_size=8,
                                   shuffle=False,
                                   num_workers=4,
                                   pin_memory=False)

    model.load_state_dict(package['state_dict'])
    model.eval()

    if USE_CUDA:
        model = model.cuda()

    if decoder_type == 'Greedy':
        decoder = GreedyDecoder(test_dataset.int2phone,
                                space_idx=-1,
                                blank_index=0)
    else:
        decoder = BeamDecoder(test_dataset.int2phone,
                              top_paths=3,
                              beam_width=20,
                              blank_index=0,
                              space_idx=-1,
                              lm_path=None,
                              dict_path=None,
                              trie_path=None,
                              lm_alpha=10,
                              lm_beta1=1,
                              lm_beta2=1)
    import pickle
    f = open('../decode_map_48-39/map_dict.pkl', 'rb')
    map_dict = pickle.load(f)
    f.close()
    print(map_dict)

    vis = visdom.Visdom(env='fan')
    legend = []
    for i in range(49):
        legend.append(test_dataset.int2phone[i])

    for data in test_loader:
        inputs, target, input_sizes, input_size_list, target_sizes = data
        if model.name == 'CTC_RNN':
            inputs = inputs.transpose(0, 1)

        inputs = Variable(inputs, volatile=True, requires_grad=False)
        if USE_CUDA:
            inputs = inputs.cuda()

        if model.name == 'CTC_RNN':
            inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list)
        probs, visual = model(inputs, test=True)
        probs = probs.data.cpu()

        decoded = decoder.decode(probs, input_size_list)
        targets = decoder._unflatten_targets(target, target_sizes)
        labels = decoder._process_strings(decoder._convert_to_strings(targets))

        for x in range(len(labels)):
            label = labels[x].strip().split(' ')
            for i in range(len(label)):
                label[i] = map_dict[label[i]]
            labels[x] = ' '.join(label)
            decode = decoded[x].strip().split(' ')
            for i in range(len(decode)):
                decode[i] = map_dict[decode[i]]
            decoded[x] = ' '.join(decode)

        for x in range(len(labels)):
            print("origin: " + labels[x])
            print("decoded: " + decoded[x])

        spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu()
        opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum')
        vis.heatmap(spectrum_inputs, opts=opts)

        opts = dict(title=labels[0],
                    xlabel="frame",
                    ylabel='feature_after_cnn1')
        after_cnn = visual[1][0][0].transpose(0, 1).data.cpu()
        vis.heatmap(after_cnn, opts=opts)

        opts = dict(title=labels[0],
                    xlabel="frame",
                    ylabel='feature_after_cnn2')
        after_cnn2 = visual[2][0][0].transpose(0, 1).data.cpu()
        vis.heatmap(after_cnn2, opts=opts)

        opts = dict(title=labels[0],
                    xlabel="frame",
                    ylabel='feature_before_rnn')
        before_rnn = visual[3].transpose(0, 1)[0].transpose(0, 1).data.cpu()
        vis.heatmap(before_rnn, opts=opts)

        show_prob = visual[4].transpose(0, 1)[0].data.cpu()
        line_opts = dict(title=decoded[0],
                         xlabel="frame",
                         ylabel="probability",
                         legend=legend)
        x = show_prob.size()[0]
        vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts)
        break
Example #11
0
def test():
    args = parser.parse_args()
    if args.model_path is not None:
        package = torch.load(args.model_path)
        data_dir = '../data_prepare/data'
    else:
        cf = ConfigParser.ConfigParser()
        cf.read(args.conf)
        model_path = cf.get('Model', 'model_file')
        data_dir = cf.get('Data', 'data_dir')
        package = torch.load(model_path)

    input_size = package['input_size']
    layers = package['rnn_layers']
    hidden_size = package['hidden_size']
    rnn_type = package['rnn_type']
    num_class = package["num_class"]
    feature_type = package['epoch']['feature_type']
    n_feats = package['epoch']['n_feats']
    out_type = package['epoch']['out_type']
    model_type = package['name']
    drop_out = package['_drop_out']
    #weight_decay = package['epoch']['weight_decay']
    #print(weight_decay)

    decoder_type = args.decode_type

    test_dataset = myDataset(data_dir,
                             data_set='test',
                             feature_type=feature_type,
                             out_type=out_type,
                             n_feats=n_feats)

    if model_type == 'CNN_LSTM_CTC':
        model = CNN_LSTM_CTC(rnn_input_size=input_size,
                             rnn_hidden_size=hidden_size,
                             rnn_layers=layers,
                             rnn_type=rnn_type,
                             bidirectional=True,
                             batch_norm=True,
                             num_class=num_class,
                             drop_out=drop_out)
        test_loader = myCNNDataLoader(test_dataset,
                                      batch_size=8,
                                      shuffle=False,
                                      num_workers=4,
                                      pin_memory=False)
    else:
        model = CTC_RNN(rnn_input_size=input_size,
                        rnn_hidden_size=hidden_size,
                        rnn_layers=layers,
                        rnn_type=rnn_type,
                        bidirectional=True,
                        batch_norm=True,
                        num_class=num_class,
                        drop_out=drop_out)
        test_loader = myDataLoader(test_dataset,
                                   batch_size=8,
                                   shuffle=False,
                                   num_workers=4,
                                   pin_memory=False)

    model.load_state_dict(package['state_dict'])
    model.eval()

    if USE_CUDA:
        model = model.cuda()

    if decoder_type == 'Greedy':
        decoder = GreedyDecoder(test_dataset.int2phone,
                                space_idx=-1,
                                blank_index=0)
    else:
        decoder = BeamDecoder(test_dataset.int2phone,
                              top_paths=40,
                              beam_width=20,
                              blank_index=0,
                              space_idx=-1,
                              lm_path=None,
                              lm_alpha=0.8,
                              lm_beta=1,
                              cutoff_prob=1.0,
                              dic=test_dataset.phone_word)

    total_wer = 0
    total_cer = 0
    start = time.time()
    for data in test_loader:
        inputs, target, input_sizes, input_size_list, target_sizes = data
        if model.name == 'CTC_RNN':
            inputs = inputs.transpose(0, 1)
        inputs = Variable(inputs, volatile=True, requires_grad=False)
        if USE_CUDA:
            inputs = inputs.cuda()

        if model.name == 'CTC_RNN':
            inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list)
        probs = model(inputs)
        probs = probs.data.cpu()
        #print(probs)

        decoded = decoder.decode(probs, input_size_list)

        targets = decoder._unflatten_targets(target, target_sizes)
        labels = decoder._process_strings(decoder._convert_to_strings(targets))
        for x in range(len(labels)):
            print("origin: " + labels[x])
            print("decoded: " + decoded[x])
        cer = 0
        wer = 0
        for x in range(len(labels)):
            cer += decoder.cer(decoded[x], labels[x])
            wer += decoder.wer(decoded[x], labels[x])
            decoder.num_word += len(labels[x].split())
            decoder.num_char += len(labels[x])
        total_cer += cer
        total_wer += wer
    CER = (1 - float(total_cer) / decoder.num_char) * 100
    WER = (1 - float(total_wer) / decoder.num_word) * 100
    print("Character error rate on test set: %.4f" % CER)
    print("Word error rate on test set: %.4f" % WER)
    end = time.time()
    time_used = (end - start) / 60.0
    print("Time used for decoding %d sentences: %.4f minutes" %
          (len(test_dataset), time_used))
Example #12
0
def main():
    savePath = os.path.join("models", "SoM_Mix_1")
    saveDescription = "mix: 1-2"
    trainDatasets, devDatasets, testDatasets = [], [], []

    ### dataset 1 #########
    datasetLoader1 = DatasetLoader()
    datasetLoader1.dataset = Datasets().audio_features_mfcc_functionals
    datasetLoader1.loadDataset()
    trainDatasets.append(datasetLoader1.trainDataset)
    devDatasets.append(datasetLoader1.devDataset)
    testDatasets.append(datasetLoader1.testDataset)
    #######################
    ### dataset 2 #########
    datasetLoader2 = DatasetLoader()
    datasetLoader2.dataset = Datasets().visual_features_functionals
    datasetLoader2.loadDataset()
    trainDatasets.append(datasetLoader2.trainDataset)
    devDatasets.append(datasetLoader2.devDataset)
    testDatasets.append(datasetLoader2.testDataset)
    #######################

    ### models ############
    model1Path = os.path.join("models", "SoM_GRU_1", "best")
    model2Path = os.path.join("models", "SoM_GRU_2", "best")
    models = [model1Path, model2Path]
    #######################

    # the paths to where the fused features would be (or already are)
    trainPath = os.path.join(savePath, "trainData.csv")
    devPath = os.path.join(savePath, "devData.csv")
    testPath = os.path.join(savePath, "testData.csv")

    # comment out the next three lines if already got the CSV files of fused feats for train
    modelsOutToCSVs(models, trainDatasets, trainPath)
    modelsOutToCSVs(models, devDatasets, devPath)
    modelsOutToCSVs(models, testDatasets, testPath)

    trainDataset = myDataset(address=trainPath, tars=[1, 2])
    devDataset = myDataset(address=devPath, tars=[1, 2])
    testDataset = myDataset(address=testPath, tars=[1, 2])
    tarsFunc = lambda tars: tars[:, 0
                                 ] - tars[:, 1
                                          ]  # the target for which the model will get trained. Depends on how it is loaded from the dataset!

    featSize = trainDataset.shape()[-1]
    model = fullyConnected(featSize, 1, hiddenSize=32)
    wrapper = ModelWrapper([model], tabuList=[], device='cuda:0')
    # comment out the next lines if you just want to test
    wrapper.train(trainDataset,
                  epochs=2500,
                  firstEpoch=1,
                  savePath=savePath,
                  evalDataset=devDataset,
                  csvPath=os.path.join(savePath, "trainLog.csv"),
                  computeLossFor=len(trainDataset),
                  computeLossForEval=len(devDataset),
                  tolerance=5,
                  tarsFunc=tarsFunc,
                  plusTar=-1)

    wrapper.load_model(os.path.join(savePath, "best"))
    _, evalLoss = wrapper.testCompute(devDataset,
                                      verbose=True,
                                      computeLossFor=len(devDataset),
                                      tarsFunc=tarsFunc,
                                      plusTar=-1)
    _, evalLoss2 = wrapper.testCompute(devDataset,
                                       verbose=True,
                                       computeLossFor=len(devDataset),
                                       tarsFunc=tarsFunc,
                                       plusTar=1)
    _, testLoss = wrapper.testCompute(testDataset,
                                      verbose=True,
                                      computeLossFor=len(testDataset),
                                      tarsFunc=tarsFunc,
                                      plusTar=-1)
    _, testLoss2 = wrapper.testCompute(testDataset,
                                       verbose=True,
                                       computeLossFor=len(testDataset),
                                       tarsFunc=tarsFunc,
                                       plusTar=1)

    writeLineToCSV(os.path.join("models", "results.csv"), [
        "savePath", "saveDescription", "evalLoss", "evalLoss2", "evalCCC",
        "evalCCC2", "testLoss", "testLoss2", "testCCC", "testCCC2"
    ], [
        savePath, saveDescription, evalLoss, evalLoss2, 1 - evalLoss,
        1 - evalLoss2, testLoss, testLoss2, 1 - testLoss, 1 - testLoss2
    ])
Example #13
0
        os.makedirs('logs/'+datasetname)
    writer = SummaryWriter(log_dir='logs/'+datasetname)
    log_dir='logs/'+datasetname
    train_path = './dataset/iris/iris_train.data'
    # test_path =  '/DATACENTER1/xiao.peng/DCN_keras-master/dataset/RCV1/Processed/data-0.pkl'

    # all_path='./dataset/wine/wine.data'
    for i in range(1, repeat+1):
        # sdae_savepath = ("model/sdae-dcn-run-"+datasetname+"-%d.pt" % i)
        #best pretrain
        sdae_savepath="D:\code\dec-pytorch\model\sdae-run-iris-1.pt"
        if os.path.exists(sdae_savepath)==False:
            print("Experiment #%d" % i)
            write_log("Experiment #%d" % i,log_dir)

            train_data=myDataset(train_path,-1, '.data')
            # test_data=myDataset(test_path,-1, '.pkl')
            train_loader = data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True,
                                           collate_fn=train_data.collate_fn,num_workers=4)
            # test_loader = data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True,
            #                                collate_fn=train_data.collate_fn,num_workers=4)


            # pretrain
            sdae = StackedDAE(input_dim=4, z_dim=2, binary=False,
                encodeLayer=[8], decodeLayer=[8], activation="relu",
                dropout=0,log_dir=log_dir)
            sdae.cuda()
            # print(sdae)
            sdae.pretrain(train_loader, lr=args.sdae_pre_lr, batch_size=batch_size,
                num_epochs=20, corrupt=0.2, loss_type="mse")
Example #14
0
def test():
    model_path = '../log/exp_cnn_lstm_ctc_spectrum201/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum_stride_1_2/exp2_82.1483/best_model_cv80.8660423723.pkl'
    package = torch.load(model_path)
    data_dir = '../data_prepare/data'
    rnn_param = package["rnn_param"]
    add_cnn = package["add_cnn"]
    cnn_param = package["cnn_param"]
    num_class = package["num_class"]
    feature_type = package['epoch']['feature_type']
    n_feats = package['epoch']['n_feats']
    out_type = package['epoch']['out_type']
    drop_out = package['_drop_out']
    try:
        mel = package['epoch']['mel']
    except:
        mel = False
    #weight_decay = package['epoch']['weight_decay']
    #print(weight_decay)

    decoder_type = 'Greedy'

    test_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel)
    
    model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out)
        
    if add_cnn:
        test_loader = myCNNDataLoader(test_dataset, batch_size=1, shuffle=False,
                                        num_workers=4, pin_memory=False)
    else:
        test_loader = myDataLoader(test_dataset, batch_size=1, shuffle=False,
                                        num_workers=4, pin_memory=False)
    
    model.load_state_dict(package['state_dict'])
    model.eval()
    
    if USE_CUDA:
        model = model.cuda()

    if decoder_type == 'Greedy':
        decoder  = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0)
    else:
        decoder = BeamDecoder(test_dataset.int2phone)    
    
    import pickle
    f = open('../decode_map_48-39/map_dict.pkl', 'rb')
    map_dict = pickle.load(f)
    f.close()
    print(map_dict)

    vis = visdom.Visdom(env='fan')
    legend = []
    for i in range(49):
        legend.append(test_dataset.int2phone[i])
    
    for data in test_loader:
        inputs, target, input_sizes, input_size_list, target_sizes = data 
        if not add_cnn:
            inputs = inputs.transpose(0,1)

        inputs = Variable(inputs, volatile=True, requires_grad=False)
        if USE_CUDA:
            inputs = inputs.cuda()
        
        if not add_cnn:
            inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list)
    
        probs, visual = model(inputs, visualize=True)
        probs = probs.data.cpu()
        
        if add_cnn:
            max_length = probs.size(0)
            input_size_list = [int(x*max_length) for x in input_size_list]

        decoded = decoder.decode(probs, input_size_list)
        targets = decoder._unflatten_targets(target, target_sizes)
        labels = decoder._process_strings(decoder._convert_to_strings(targets))
        
        for x in range(len(labels)):
            label = labels[x].strip().split(' ')
            for i in range(len(label)):
                label[i] = map_dict[label[i]]
            labels[x] = ' '.join(label)
            decode = decoded[x].strip().split(' ')
            for i in range(len(decode)):
                decode[i] = map_dict[decode[i]]
            decoded[x] = ' '.join(decode)

        for x in range(len(labels)):
            print("origin: "+ labels[x])
            print("decoded: "+ decoded[x])
        
        if add_cnn:
            spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu()
            opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum')
            vis.heatmap(spectrum_inputs, opts = opts)
        
            opts = dict(title=labels[0], xlabel="frame", ylabel='feature_after_cnn')
            after_cnn = visual[1][0][0].transpose(0, 1).data.cpu()
            vis.heatmap(after_cnn, opts = opts)
        
            opts = dict(title=labels[0], xlabel="frame", ylabel='feature_before_rnn')
            before_rnn = visual[2].transpose(0, 1)[0].transpose(0, 1).data.cpu()
            vis.heatmap(before_rnn, opts=opts)
        
            show_prob = visual[3].transpose(0, 1)[0].data.cpu()
            line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend)
            x = show_prob.size()[0]
            vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts)
        else:
            spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu()
            opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum')
            vis.heatmap(spectrum_inputs, opts = opts)

            show_prob = visual[1].transpose(0, 1)[0].data.cpu()
            line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend)
            x = show_prob.size()[0]
            vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts)
        break
Example #15
0
def test():
    args = parser.parse_args()
    if args.model_path is not None:
        package = torch.load(args.model_path)
        data_dir = '../data_prepare/data'
    else:
        cf = ConfigParser.ConfigParser()
        cf.read(args.conf)
        model_path = cf.get('Model', 'model_file')
        data_dir = cf.get('Data', 'data_dir')
        beam_width = cf.getint('Decode', 'beam_width')
        lm_alpha = cf.getfloat('Decode', 'lm_alpha')
        package = torch.load(model_path)

    rnn_param = package["rnn_param"]
    add_cnn = package["add_cnn"]
    cnn_param = package["cnn_param"]
    num_class = package["num_class"]
    feature_type = package['epoch']['feature_type']
    n_feats = package['epoch']['n_feats']
    out_type = package['epoch']['out_type']
    drop_out = package['_drop_out']
    try:
        mel = package['epoch']['mel']
    except:
        mel = False
    #weight_decay = package['epoch']['weight_decay']
    #print(weight_decay)

    decoder_type = args.decode_type

    test_dataset = myDataset(data_dir,
                             data_set=args.data_set,
                             feature_type=feature_type,
                             out_type=out_type,
                             n_feats=n_feats,
                             mel=mel)

    model = CTC_Model(rnn_param=rnn_param,
                      add_cnn=add_cnn,
                      cnn_param=cnn_param,
                      num_class=num_class,
                      drop_out=drop_out)

    if add_cnn:
        test_loader = myCNNDataLoader(test_dataset,
                                      batch_size=8,
                                      shuffle=False,
                                      num_workers=4,
                                      pin_memory=False)
    else:
        test_loader = myDataLoader(test_dataset,
                                   batch_size=8,
                                   shuffle=False,
                                   num_workers=4,
                                   pin_memory=False)

    model.load_state_dict(package['state_dict'])
    model.eval()

    if USE_CUDA:
        model = model.cuda()

    if decoder_type == 'Greedy':
        decoder = GreedyDecoder(test_dataset.int2phone,
                                space_idx=-1,
                                blank_index=0)
    else:
        decoder = BeamDecoder(test_dataset.int2phone,
                              beam_width=beam_width,
                              blank_index=0,
                              space_idx=-1,
                              lm_path=args.lm_path,
                              lm_alpha=lm_alpha)

    if args.map_48_39 is not None:
        import pickle
        f = open(args.map_48_39, 'rb')
        map_dict = pickle.load(f)
        f.close()
        print(map_dict)

    total_wer = 0
    total_cer = 0
    start = time.time()
    for data in test_loader:
        inputs, target, input_sizes, input_size_list, target_sizes = data
        if not add_cnn:
            inputs = inputs.transpose(0, 1)
        inputs = Variable(inputs, volatile=True, requires_grad=False)

        if USE_CUDA:
            inputs = inputs.cuda()

        if not add_cnn:
            inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list)

        probs = model(inputs)
        if add_cnn:
            max_length = probs.size(0)
            input_sizes_list = [int(x * max_length) for x in input_sizes_list]

        probs = probs.data.cpu()
        decoded = decoder.decode(probs, input_size_list)
        targets = decoder._unflatten_targets(target, target_sizes)
        labels = decoder._process_strings(decoder._convert_to_strings(targets))
        if args.map_48_39 is not None:
            for x in range(len(labels)):
                label = labels[x].strip().split(' ')
                for i in range(len(label)):
                    label[i] = map_dict[label[i]]
                labels[x] = ' '.join(label)
                decode = decoded[x].strip().split(' ')
                for i in range(len(decode)):
                    decode[i] = map_dict[decode[i]]
                decoded[x] = ' '.join(decode)

        for x in range(len(labels)):
            print("origin : " + labels[x])
            print("decoded: " + decoded[x])
        cer = 0
        wer = 0
        for x in range(len(labels)):
            cer += decoder.cer(decoded[x], labels[x])
            wer += decoder.wer(decoded[x], labels[x])
            decoder.num_word += len(labels[x].split())
            decoder.num_char += len(labels[x])
        total_cer += cer
        total_wer += wer
    CER = (1 - float(total_cer) / decoder.num_char) * 100
    WER = (1 - float(total_wer) / decoder.num_word) * 100
    print("Character error rate on test set: %.4f" % CER)
    print("Word error rate on test set: %.4f" % WER)
    end = time.time()
    time_used = (end - start) / 60.0
    print("time used for decode %d sentences: %.4f" %
          (len(test_dataset), time_used))