Example #1
0
def save_ocr():

    generator = SampleGenerator(10)
    crnn = CRNN(generator.vocab_size() + 1)
    optimizer = tf.keras.optimizers.Adam(1e-4)
    checkpoint = tf.train.Checkpoint(model=crnn, optimizer=optimizer)
    checkpoint.restore(tf.train.latest_checkpoint('checkpoints'))
    if False == exists('model'): mkdir("model")
    crnn.save(join("model", "crnn.h5"))
Example #2
0
def start_training(log_dir, optimizer=Adam(lr=0.001, decay=1e-6), loss="categorical_crossentropy",
                   metrics=None):
    """
    Train the model
    :param log_dir: directory to log the reports
    :param optimizer: optimizer for model (default: Adam)
    :param loss: loss function for model (default: categorical_crossentropy)
    :param metrics: metrics for model (default: accuracy)
    :return: model checkpoint for the best epoch with regards to validation accuracy
    """
    if metrics is None:
        metrics = ["accuracy"]

    train_data = ImageLoader(os.path.join(args.data_path, "training.csv"))
    val_data = ImageLoader(os.path.join(args.data_path, "validation.csv"))

    # Training Callbacks
    checkpoint_filename = os.path.join(log_dir, "weights.{epoch:02d}.model")
    model_checkpoint_callback = ModelCheckpoint(checkpoint_filename, save_best_only=True, verbose=1, monitor="val_acc")
    csv_logger_callback = CSVLogger(os.path.join(log_dir, "log.csv"))
    early_stopping_callback = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode="min")

    crnn = CRNN()
    model = crnn.build_model(train_data.get_input_shape())
    model.compile(optimizer, loss, metrics)

    history = model.fit_generator(
        train_data.get_data(),
        steps_per_epoch=train_data.get_num_files() // args.batch_size,
        epochs=args.num_epochs,
        callbacks=[model_checkpoint_callback, csv_logger_callback, early_stopping_callback],
        verbose=1,
        validation_data=val_data.get_data(should_shuffle=False),
        validation_steps=val_data.get_num_files() // args.batch_size,
        max_queue_size=args.batch_size,
        workers=4,
        use_multiprocessing=True
    )

    # Plot the loss and accuracy curves for training and validation
    fig, ax = plt.subplots(2, 1)
    ax[0].plot(history.history['loss'], color='b', label="Training loss")
    ax[0].plot(history.history['val_loss'], color='r', label="validation loss", axes=ax[0])
    ax[0].legend(loc='best', shadow=True)

    ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
    ax[1].plot(history.history['val_acc'], color='r', label="Validation accuracy")
    ax[1].legend(loc='best', shadow=True)
    plt.savefig(os.path.join(log_dir, "history.png"))

    # Evaluation on model with best validation accuracy
    best_epoch = np.argmax(history.history["val_acc"])
    print("Log files: ", log_dir)
    print("Best epoch: ", best_epoch+1)

    model_file_name = checkpoint_filename.replace("{epoch:02d}", "{:02d}".format(best_epoch))
    return model_file_name
Example #3
0
    def construct_model(self, model_name='r1', lr=1e-6):
        self.model_name = model_name
        tf.summary.image('image_angle_0', self.images, 1)

        with open(self.save_path + '/setup.txt', 'a') as self.out:
            self.out.write('Architecture: ' + str(model_name)+ '\n')
            self.out.write('number of channels: ' + str(self.n_channels) + '\n')
            self.out.write('img dimensionality: ' + str(self.img_dimens) + '\n')

        if model_name == 'r3':
            self.model = Regressor_3(self.images,
                                     self.counts,
                                     lr=lr)
        if model_name == 'alexnet':
            self.model = AlexNet(self.images,
                                 self.counts,
                                 lr=0.003)
        if model_name == 'lstm':
            self.model = CRNN(self.images,
                              self.counts,
                              lr=0.003)


        self.loss = self.model.loss()

        tf.summary.scalar("loss", self.loss)

        with tf.name_scope('train'):
            self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)

        tf.add_to_collection(name='saved', value=self.loss)
        tf.add_to_collection(name='saved', value=self.model.pred_counts)
        if self.model_name == 'lstm':
            tf.add_to_collection(name='saved', value=self.reconstruction)

        tf.add_to_collection(name='placeholder', value=self.x)
        tf.add_to_collection(name='placeholder', value=self.y)
        tf.add_to_collection(name='placeholder', value=self.images)
        tf.add_to_collection(name='placeholder', value=self.counts)
        tf.add_to_collection(name='placeholder', value=self.model.keep_prob)
        tf.add_to_collection(name='placeholder', value=self.model.is_training)
        tf.add_to_collection(name='placeholder', value=self.iterator.initializer)


        self.summaries   = tf.summary.merge_all()
        self.saver       = tf.train.Saver()

        self.writer      = tf.summary.FileWriter(self.save_path+'/logs/train')
        self.writer_test = tf.summary.FileWriter(self.save_path+'/logs/test')
Example #4
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Example #5
0
  def __init__(self):

    self.tokenizer = Tokenizer();
    # NOTE: extra class for blank index
    self.crnn = CRNN(self.tokenizer.size() + 1);
    if exists(join('model', 'crnn.h5')):
      self.crnn = tf.keras.models.load_model(join('model','crnn.h5'), compile = False);
Example #6
0
    def predict_word(self, X):
        crnn = CRNN()
        crnn.build(dropout=False)
        crnn.model.load_weights(self.weight_name + ".h5")

        out = crnn.test_func([X])[0]
        ret = []

        for j in range(out.shape[0]):
            out_best = list(np.argmax(out[j, 2:], 1))
            out_best = [k for k, g in itertools.groupby(out_best)]
            outstr = ''
            for c in out_best:
                if 0 <= c <= 9:
                    outstr += chr(c + ord('0'))
                elif 10 <= c <= 35:
                    outstr += chr(c - 10 + ord('A'))
                elif 36 <= c <= 61:
                    outstr += chr(c - 36 + ord('a'))
            ret.append(outstr)
        return ret
Example #7
0
def main(opts):
  alphabet = '0123456789.'
  nclass = len(alphabet) + 1
  model_name = 'crnn'
  net = CRNN(nclass)
  print("Using {0}".format(model_name))

  if opts.cuda:
    net.cuda()
  learning_rate = opts.base_lr
  optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay)

  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)

  ## 数据集
  converter = strLabelConverter(alphabet)
  dataset = ImgDataset(
      root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image',
      csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt',
      transform=None,
      target_transform=converter.encode
  )
  ocrdataloader = torch.utils.data.DataLoader(
      dataset, batch_size=1, shuffle=False, collate_fn=own_collate
  )

  num_count = 0
  net = net.eval()

  converter = strLabelConverter(alphabet)
  ctc_loss = CTCLoss()

  for step in range(len(dataset)):

    try:
    data = next(data_iter)
    except:
    data_iter = iter(ocrdataloader)
    data = next(data_iter)

    im_data, gt_boxes, text = data
    im_data = im_data.cuda()

    try:
      res = process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=False)

      pred, target = res
      if pred == target[0]:
    num_count += 1
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      pass


    print('correct/total:%d/%d'%(num_count, len(dataset)))
Example #8
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Example #9
0
def main():
    args = hyperparameters()


    train_path = os.path.join(args.path, 'train')
    test_path = os.path.join(args.path, 'test')

    # gpu or cpu 설정
    device = torch.device(f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu') 

    # train dataset load
    train_dataset = CRNN_dataset(path=train_path, w=args.img_width, h=args.img_height)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    
    # test dataset load
    test_dataset = CRNN_dataset(path=test_path, w=args.img_width, h=args.img_height)
    test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True)
    

    # model 정의
    model = CRNN(args.img_height, 1, 37, 256)
 
    # loss 정의
    criterion = nn.CTCLoss()
    
    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr,
                            betas=(0.5, 0.999))
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    else:
        assert False, "옵티마이저를 다시 입력해주세요. :("

    model = model.to(device)
    best_test_loss = 100000000
    for i in range(args.epochs):
        
        print('epochs: ', i)

        print("<----training---->")
        model.train()
        for inputs, targets in tqdm(train_dataloader):
            # inputs의 dimension을 (batch, channel, h, w)로 바꿔주세요. hint: pytorch tensor에 제공되는 함수 사용
            batch_size = inputs.size(0)
            inputs = inputs.to(device)
            target_text, target_length = targets 
            target_text, target_length = target_text.to(device), target_length.to(device)
            preds = model(inputs) # 여기를 log probability로 바꿔야할 것 같은데욥...
            preds_length = Variable(torch.IntTensor([preds.size(0)] * batch_size))

            """
            CTCLoss의 설명과 해당 로스의 input에 대해 설명해주세요.
            
            CTC = Connectionist Temporal Classification
            각각의 수평적인 위치에서 annotation을 획득한 label L을 input으로 삼는다.
            이 input은 한 문자가 여러 위치단위에 있는 경우(한 글자의 크기가 커서) annotation이 중복되어 도출될 수 있기 때문에 문제가 발생하는데
            이 때 CTC는 위치와 넓이를 무시하고, ground-truth text만을 CTC Loss function에 제공하고 잘못 중복된 annotation을 제거해준다.
            그리고 이 때 생성되는 가능한 모든 gt text의 점수들의 합에 -log를 취한 값이 CTC Loss이다.
            """

            loss = criterion(preds, target_text, preds_length, target_length) / batch_size 
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        

        print("<----evaluation---->")

        """
        model.train(), model.eval()의 차이에 대해 설명해주세요.
        .eval()을 하는 이유가 무엇일까요?
        batchnorm과 dropout이 있는 모델은 train할 때와 evaluate할 때 모델이 달라지기 때문에 설정하는 것이다.
        (평가 모델에 batchnorm과 dropout을 실행한다.)
        """

        model.eval() 
        loss = 0.0

        for inputs, targets in tqdm(test_dataloader):
            with torch.no_grad():
                batch_size = inputs.size(0)
                inputs = inputs.to(device)
                target_text, target_length = targets
                target_text, target_length = target_text.to(device), target_length.to(device) # 설정한 device (gpu or cpu) 에 저장되도록
                preds = model(inputs)
                preds_length = Variable(torch.IntTensor([preds.size(0)] * batch_size))
                loss = criterion(preds, target_text, preds_length, target_length) / batch_size # test를 어떻게 할까?? 

        
        print("test loss: ", loss)
        if loss < best_test_loss:
            # loss가 bset_test_loss보다 작다면 지금의 loss가 best loss가 되겠죠?
            best_test_loss = loss
            # args.savepath을 이용하여 best model 저장하기
            PATH = args.savepath
            torch.save(model, PATH)
            print("best model 저장 성공")
Example #10
0
def get_models():
    if cfg.model == 'CRNN_STN':
        return CRNN_STN(cfg)
    else:
        return CRNN(cfg)
def main():
    parser = argparse.ArgumentParser(description='Speech Emotion Recognition')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=512,
                        help='hidden size of model (default: 256)')
    parser.add_argument('--layer_size',
                        type=int,
                        default=3,
                        help='number of layers of model (default: 3)')
    parser.add_argument('--n_class',
                        type=int,
                        default=7,
                        help='number of classes of data (default: 7)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout rate in training (default: 0.2')
    parser.add_argument('--bidirectional',
                        default=True,
                        action='store_true',
                        help='use bidirectional RNN (default: False')
    parser.add_argument('--batch_size',
                        type=int,
                        default=8,
                        help='batch size in training (default: 32')
    parser.add_argument(
        '--workers',
        type=int,
        default=4,
        help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=30,
                        help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-04,
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--save_name',
                        type=str,
                        default='model',
                        help='the name of model')
    parser.add_argument('--mode', type=str, default='train')

    args = parser.parse_args()

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    # N_FFT: defined in loader.py
    feature_size = N_FFT / 2 + 1

    cnn = resnet.ResNet(feature_size, resnet.BasicBlock, [3, 3, 3])
    rnn = RNN.RNN(cnn.feature_size,
                  args.hidden_size,
                  args.n_class,
                  input_dropout_p=args.dropout,
                  dropout_p=args.dropout,
                  n_layers=args.layer_size,
                  bidirectional=args.bidirectional,
                  rnn_cell='gru',
                  variable_lengths=False)

    model = CRNN.CRNN(cnn, rnn)
    model.flatten_parameters()

    model = nn.DataParallel(model).to(device)

    optimizer = optim.Adam(model.module.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss(reduction='sum').to(device)

    if args.mode != 'train':
        return

    data_download()

    wav_paths = [
        os.path.join('./dataset/wav', fname)
        for fname in os.listdir('./dataset/wav')
    ]

    best_acc = 0
    begin_epoch = 0

    loss_acc = [[], [], [], []]

    train_batch_num, train_dataset_list, valid_dataset, test_dataset = split_dataset(
        args, wav_paths, dataset_ratio=[0.7, 0.1, 0.2])

    logger.info('start')

    train_begin = time.time()

    for epoch in range(begin_epoch, args.max_epochs):

        train_queue = queue.Queue(args.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue,
                                   args.batch_size, args.workers)
        train_loader.start()

        train_loss, train_acc = train(model, train_batch_num, train_queue,
                                      criterion, optimizer, device,
                                      train_begin, args.workers, 10)
        logger.info('Epoch %d (Training) Loss %0.4f Acc %0.4f' %
                    (epoch, train_loss, train_acc))

        train_loader.join()

        loss_acc[0].append(train_loss)
        loss_acc[1].append(train_acc)

        valid_queue = queue.Queue(args.workers * 2)

        valid_loader = BaseDataLoader(valid_dataset, valid_queue,
                                      args.batch_size, 0)
        valid_loader.start()

        eval_loss, eval_acc = evaluate(model, valid_loader, valid_queue,
                                       criterion, device)
        logger.info('Epoch %d (Evaluate) Loss %0.4f Acc %0.4f' %
                    (epoch, eval_loss, eval_acc))

        valid_loader.join()

        loss_acc[2].append(eval_loss)
        loss_acc[3].append(eval_acc)

        best_model = (eval_acc > best_acc)

        if best_model:
            best_acc = eval_acc
            torch.save(model.state_dict(), './save_model/best_model.pt')
            save_epoch = epoch

    model.load_state_dict(torch.load('./save_model/best_model.pt'))

    test_queue = queue.Queue(args.workers * 2)

    test_loader = BaseDataLoader(test_dataset, test_queue, args.batch_size, 0)
    test_loader.start()

    test_loss, test_acc = evaluate(model, test_loader, test_queue, criterion,
                                   device)
    logger.info('Epoch %d (Test) Loss %0.4f Acc %0.4f' %
                (save_epoch, test_loss, test_acc))

    test_loader.join()

    save_data(loss_acc, test_loss, test_acc)
    plot_data(loss_acc, test_loss, test_acc)

    return 0
Example #12
0
def modelRun(Path, LoadMatFileName, dataVar, labelVar, numOfClasses,
             numOfKernels, scaleFactor, BS, checkpoint, SaveMatFileName,
             numOfEpochs, samplingRate, type, dropoutRate, visibleGPU):

    os.environ["CUDA_VISIBLE_DEVICES"] = visibleGPU
    tf.keras.backend.clear_session()

    with tf.Graph().as_default() as g:
        # Load data file
        mat = sio.loadmat(Path + LoadMatFileName)
        labels = (mat[labelVar])  # Labels
        data = mat[dataVar]  # Data

        # Variables Initialization
        numOfSamples = data.shape[1]
        numOfChannels = data.shape[0]
        numOfTrials = data.shape[2]

        kernelLength = (int)(samplingRate / 2)  #
        EEGNet_F1 = (int)(2 *
                          numOfChannels)  # Double to the number of Channels
        EEGNet_F2 = (int)(4 * EEGNet_F1)  # Double to the EEGNet_F1

        if type == "EnK_EEGNet":
            model = EEGNet(nb_classes=numOfClasses,
                           Chans=numOfChannels,
                           Samples=numOfSamples,
                           dropoutRate=dropoutRate,
                           kernLength=kernelLength,
                           F1=EEGNet_F1,
                           D=2,
                           F2=EEGNet_F2,
                           EnK=True,
                           dropoutType='Dropout')
        elif type == "EEGNet":
            model = EEGNet(nb_classes=numOfClasses,
                           Chans=numOfChannels,
                           Samples=numOfSamples,
                           dropoutRate=dropoutRate,
                           kernLength=kernelLength,
                           F1=EEGNet_F1,
                           D=2,
                           F2=EEGNet_F2,
                           EnK=False,
                           dropoutType='Dropout')
        elif type == "Gau_EEGNet":
            model = Gau_EEGNet(nb_classes=numOfClasses,
                               Chans=numOfChannels,
                               Samples=numOfSamples,
                               dropoutRate=dropoutRate,
                               kernLength=kernelLength,
                               F1=EEGNet_F1,
                               D=2,
                               F2=EEGNet_F2,
                               EnK=False,
                               dropoutType='Dropout')
        elif type == "EnK_ShallowConvNet":
            model = ShallowConvNet(nb_classes=numOfClasses,
                                   Chans=numOfChannels,
                                   Samples=numOfSamples,
                                   dropoutRate=dropoutRate,
                                   kernLength=kernelLength,
                                   F1=EEGNet_F1,
                                   D=2,
                                   F2=EEGNet_F2,
                                   EnK=True,
                                   dropoutType='Dropout')
        elif type == "ShallowConvNet":
            model = ShallowConvNet(nb_classes=numOfClasses,
                                   Chans=numOfChannels,
                                   Samples=numOfSamples,
                                   dropoutRate=dropoutRate,
                                   kernLength=kernelLength,
                                   F1=EEGNet_F1,
                                   D=2,
                                   F2=EEGNet_F2,
                                   EnK=False,
                                   dropoutType='Dropout')
        elif type == "Gau_ShallowConvNet":
            model = Gau_ShallowConvNet(nb_classes=numOfClasses,
                                       Chans=numOfChannels,
                                       Samples=numOfSamples,
                                       dropoutRate=dropoutRate,
                                       kernLength=kernelLength,
                                       F1=EEGNet_F1,
                                       D=2,
                                       F2=EEGNet_F2,
                                       EnK=False,
                                       dropoutType='Dropout')
        elif type == "EnK_DeepConvNet":
            model = DeepConvNet(nb_classes=numOfClasses,
                                Chans=numOfChannels,
                                Samples=numOfSamples,
                                dropoutRate=dropoutRate,
                                kernLength=kernelLength,
                                F1=EEGNet_F1,
                                D=2,
                                F2=EEGNet_F2,
                                EnK=True,
                                dropoutType='Dropout')
        elif type == "DeepConvNet":
            model = DeepConvNet(nb_classes=numOfClasses,
                                Chans=numOfChannels,
                                Samples=numOfSamples,
                                dropoutRate=dropoutRate,
                                kernLength=kernelLength,
                                F1=EEGNet_F1,
                                D=2,
                                F2=EEGNet_F2,
                                EnK=False,
                                dropoutType='Dropout')
        elif type == "Gau_DeepConvNet":
            model = Gau_DeepConvNet(nb_classes=numOfClasses,
                                    Chans=numOfChannels,
                                    Samples=numOfSamples,
                                    dropoutRate=dropoutRate,
                                    kernLength=kernelLength,
                                    F1=EEGNet_F1,
                                    D=2,
                                    F2=EEGNet_F2,
                                    EnK=False,
                                    dropoutType='Dropout')
        elif type == "ConvGau_DeepConvNet":
            model = ConvGau_DeepConvNet(nb_classes=numOfClasses,
                                        Chans=numOfChannels,
                                        Samples=numOfSamples,
                                        dropoutRate=dropoutRate,
                                        kernLength=kernelLength,
                                        F1=EEGNet_F1,
                                        D=2,
                                        F2=EEGNet_F2,
                                        EnK=False,
                                        dropoutType='Dropout')
        elif type == "ConvGau_ShallowConvNet":
            model = ConvGau_ShallowConvNet(nb_classes=numOfClasses,
                                           Chans=numOfChannels,
                                           Samples=numOfSamples,
                                           dropoutRate=dropoutRate,
                                           kernLength=kernelLength,
                                           F1=EEGNet_F1,
                                           D=2,
                                           F2=EEGNet_F2,
                                           EnK=False,
                                           dropoutType='Dropout')
        elif type == "ConvGau_EEGNet":
            model = ConvGau_EEGNet(nb_classes=numOfClasses,
                                   Chans=numOfChannels,
                                   Samples=numOfSamples,
                                   dropoutRate=dropoutRate,
                                   kernLength=kernelLength,
                                   F1=EEGNet_F1,
                                   D=2,
                                   F2=EEGNet_F2,
                                   EnK=False,
                                   dropoutType='Dropout')

        elif type == "CRNN":
            model = CRNN(nb_classes=numOfClasses,
                         Chans=numOfChannels,
                         Samples=numOfSamples,
                         dropoutRate=dropoutRate,
                         kernLength=kernelLength,
                         F1=EEGNet_F1,
                         D=2,
                         F2=EEGNet_F2,
                         EnK=False,
                         dropoutType='Dropout')
        else:
            print("Error: no such model exist")

        # Selecting the class
        f1_avg, pos_label, loss_type, class_weights = getClassInfo(
            numOfClasses)

        # extract raw data. scale by scaleFactor due to scaling sensitivity in deep learning
        X = data * scaleFactor

        print(X.shape)

        X = np.reshape(
            X,
            (numOfTrials, numOfChannels,
             numOfSamples))  # format should be in (trials, channels, samples)
        y = np.asarray(labels)
        Y = y.reshape(-1)

        # convert data to NCHW (trials, kernels, channels, samples) format. Data
        X = X.reshape(numOfTrials, numOfKernels, numOfChannels, numOfSamples)

        print(model.summary())

        model.compile(loss=loss_type, optimizer='adam', metrics=['accuracy'])

        seed = 7  # Fix number

        CNNacc = []
        CNNmse = []
        CNNf1 = []
        CNNct = []

        X_train, X_test, Y_train, Y_test = getTrainTestVal(X, Y, testSize=0.2)

        Y_train = oneHot(Y_train, numOfClasses, True)

        Y_test = oneHot(Y_test, numOfClasses, True)

        # fitting existing model
        # model.fit(X_train, Y_train, validation_data=(X_val, Y_val),
        #                         batch_size=BS, epochs=numOfEpochs, verbose=2, class_weight=class_weights,
        #                         callbacks=checkpoint)

        # model.fit(X_train, Y_train,
        #           batch_size=BS, epochs=numOfEpochs, verbose=2, class_weight=class_weights,
        #           callbacks=checkpoint,  validation_split = 0.20)

        model.fit(X_train,
                  Y_train,
                  batch_size=BS,
                  epochs=numOfEpochs,
                  verbose=2,
                  class_weight=class_weights,
                  callbacks=checkpoint)

        tic = time.clock()
        predicted = model.predict(X_test)
        toc = time.clock()
        # predicted= predicted
        computation_time = toc - tic

        predicted = oneHot(predicted.argmax(axis=-1), numOfClasses, False)
        mse,mae,co_kap_sco,acc,avg_pre_sco,precision,recall,\
        f1_sc=getPerformanceMetricsDL(numOfClasses, pos_label, f1_avg, Y_test, predicted)

        # Grad-Cam method with Test Data
        predicted_class = predicted.argmax(axis=-1)
        camTest, heatmapTest = grad_cam(
            model, X_test[2, :, :, :].reshape(1, 1, numOfChannels,
                                              numOfSamples),
            predicted_class[2], "en_k_layer", numOfClasses)
        cv2.imwrite(SaveMatFileName + 'Test.png', camTest)

        # Grad-Cam method with Test Data
        predicted_class = Y_train.argmax(axis=-1)
        camTrain, heatmapTrain = grad_cam(
            model, X_train[2, :, :, :].reshape(1, 1, numOfChannels,
                                               numOfSamples),
            predicted_class[2], "en_k_layer", numOfClasses)
        cv2.imwrite(SaveMatFileName + 'Train.png', camTrain)

        print('acc, f1 score, coh kappa is ', acc, ' ', f1_sc, ' ', co_kap_sco)

        #########################################################
        # For classifiers
        # reshape back to (trials, channels, samples)
        X = X.reshape(numOfTrials, numOfChannels, numOfSamples)

        # convert labels to one-hot encodings.
        Y = oneHot(Y, numOfClasses, True)

        # Disabled the running basic classifiers
        # other_acc, other_mse, other_mae, other_avpc, \
        # other_cks, other_pre, other_rec, other_f1, other_ct = Classifiers(X, Y.argmax(axis=-1),f1_avg,numOfClasses)

        other_acc, other_mse, other_mae, other_avpc, \
        other_cks, other_pre, other_rec, other_f1, other_ct = [0.],[0.],[0.],[0.],[0.],[0.],[0.],[0.],[0.]

        other_acc.append(acc)
        other_mse.append(mse)
        other_mae.append(mse)
        other_avpc.append(avg_pre_sco)
        other_cks.append(co_kap_sco)
        other_pre.append(precision)
        other_rec.append(recall)
        other_f1.append(f1_sc)
        other_ct.append(computation_time)

        print("Classifier ACC for LogRef, LDA, L-SVM, RBF-SVM, NN, Proposed:",
              other_acc)
        print("Classifier MSE for LogRef, LDA, L-SVM, RBF-SVM, NN, Proposed :",
              other_mse)
        print(
            "Classifier f1 score for LogRef, LDA, L-SVM, RBF-SVM, NN, Proposed :",
            other_f1)

        sio.savemat(
            SaveMatFileName + '.mat', {
                "acc":
                other_acc,
                "mse":
                other_mse,
                "mae":
                other_mae,
                "avg_pre_recl":
                other_avpc,
                "cohen_kappa":
                other_cks,
                "precision":
                other_pre,
                "recall":
                other_rec,
                "f1":
                other_f1,
                "times_prediction":
                other_ct,
                "pre_labels":
                predicted.argmax(axis=-1),
                "true_labels":
                Y_test.argmax(axis=-1),
                "camTest":
                camTest,
                "camheatmapTest":
                heatmapTest,
                "camTrain":
                camTrain,
                "camheatmapTrain":
                heatmapTrain,
                "camData":
                X_train[2, :, :, :].reshape(1, 1, numOfChannels, numOfSamples),
                "camLabel":
                predicted_class
            })
    HIDDEN_SIZE = 64
    KERNEL_SIZE = (20, 5)
    STRIDE = (8, 2)
    GRU_NUM_LAYERS = 2
    NUM_DIRS = 2
    NUM_CLASSES = 2

    kernel_x = KERNEL_SIZE[1]

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    test_audio, sr = torchaudio.load(sys.argv[1])
    test_audio = test_audio.squeeze()

    # Create models
    CRNN_model = CRNN(IN_SIZE, HIDDEN_SIZE, KERNEL_SIZE, STRIDE, GRU_NUM_LAYERS)
    attn_layer = AttnMech(HIDDEN_SIZE * NUM_DIRS)
    apply_attn = ApplyAttn(HIDDEN_SIZE * 2, NUM_CLASSES)
    # Load models
    checkpoint = torch.load('models/crnn_final', map_location=device)
    CRNN_model.load_state_dict(checkpoint['model_state_dict'])
    checkpoint = torch.load('models/attn_final', map_location=device)
    attn_layer.load_state_dict(checkpoint['model_state_dict'])
    checkpoint = torch.load('models/apply_attn_final', map_location=device)
    apply_attn.load_state_dict(checkpoint['model_state_dict'])

    # Create melspec
    melspec_test = torchaudio.transforms.MelSpectrogram(
        sample_rate=48000,
        n_mels=N_MELS
    ).to(device)
Example #14
0
def main(args):

    # cuda check
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # argument handling
    input_size = [int(x) for x in args.input_size.split('x')]

    # random seed
    random.seed(random.randint(1, 10000))

    # for faster training
    cudnn.banchmark = True
    cudnn.fastest = True

    # train transformation
    transform = transforms.Compose(
        [Resize(size=(input_size[0], input_size[1])),
         ToTensor()])

    # train dataset
    data = CrnnDataLoader(data_path=args.dataroot,
                          mode="train",
                          transform=transform)

    # model load
    nclass = data.cls_len()
    net = CRNN(nclass)

    # optimizer
    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=5e-4)

    # loss_function -> CTCLoss
    criterion = nn.CTCLoss()

    # epoch
    best_acc = 0
    epoch = 0

    while epoch < args.epochs:
        data_loader = torch.utils.data.DataLoader(data,
                                                  batch_size=args.batch_size,
                                                  num_workers=4,
                                                  shuffle=True)
        iterator = tqdm(data_loader)
        iter_count = 0
        ''' TODO: CTC LOSS '''
        for sample in iterator:
            optimizer.zero_grad()
            imgs = Variable(sample["img"])
            labels = Variable(sample["seq"]).view(-1)
            label_lens = Variable(sample["seq_len"]).view(-1)

            if device == 'cuda':
                imgs = imgs.cuda()

            preds = net(imgs).cpu()

            pred_lens = Variable(torch.Tensor(preds.size(0)).int())

            print("preds:", preds.shape)
            print("labels:", labels.shape)
            print("pred_lens", pred_lens.shape)
            print("label_lens", label_lens.shape)

            loss = criterion(preds, labels, pred_lens, label_lens)
            loss.backward()
            optimizer.step()
            status = "epoch: {}; loss: {}".format(epoch, loss.data[0])

        epoch += 1
Example #15
0
def train(args):
    @tf.function
    def train_step(x, y):
        with tf.GradientTape() as tape:
            y_pred = model(x["the_input"])
            # loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], x["input_length"].reshape((-1,1)), x["label_length"].reshape((-1,1)))))
            loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], tf.reshape(x["input_length"], [-1, 1]), tf.reshape(x["label_length"], [-1, 1]))))
        
        # Compute gradients
        trainable_vars = model.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        model.optimizer.apply_gradients(zip(gradients, trainable_vars))
        return loss


    epochs = 1000
    iter_per_epoch = 100
    #model, test_func = get_CResRNN(weights=os.path.join("OUTPUT_DIR", "exp1", "weights06.h5"))
    #model, test_func = get_CResRNN(weights=os.path.join("OUTPUT_DIR", "weights0995.h5"))
    #model.load_weights(os.path.join("OUTPUT_DIR", "exp1", "weights15.h5"))
    #model.load_weights(os.path.join("OUTPUT_DIR", "weights0995.h5"))
    model2, test_func = CRNN_model()

    train_generator = FakeImageGenerator(args).next_gen()
    

    model = CRNN(ALPHABET)
    model.build()
    model.summary()

    # model = tf.keras.load_model('checkpoints/checkpoint')
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=5))

    loss_train = []

    for epoch in range(1, epochs):
        print(f"Start of epoch {epoch}")

        pb = Progbar(iter_per_epoch, stateful_metrics="loss")

        for iter in range(iter_per_epoch):
            x, y = next(train_generator)
            with tf.GradientTape() as tape:
                y_pred = model(x["the_input"])
                # loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], x["input_length"].reshape((-1,1)), x["label_length"].reshape((-1,1)))))
                loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], tf.reshape(x["input_length"], [-1, 1]), tf.reshape(x["label_length"], [-1, 1]))))
            
            # Compute gradients
            trainable_vars = model.trainable_variables
            gradients = tape.gradient(loss, trainable_vars)

            # Update weights
            model.optimizer.apply_gradients(zip(gradients, trainable_vars))

            values = [('loss', loss)]
            pb.add(1, values=values)

        if epoch % 5 == 0:
            model.save("checkpoints/base_crnn.h5")


    
    

    # print("test2")
    # x, y = next(train_generator)
    # model.fit(x, y)
    # print("test1")
    
    x, y = next(train_generator)
    print(model(x["the_input"]))

    """
Example #16
0
class Pipeline:
    def __init__(self, save_path):
        tf.reset_default_graph()
        self.save_path = save_path
        self.sess = tf.Session()

        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

    def load_image(self, image, label):
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize_images(image, [self.img_dimens[0],
                                               self.img_dimens[1]])
        image = tf.image.per_image_standardization(image)
        return image, label

    def load_data(self, img_dimension=(5,5), n_channels=3):

        self.x = tf.placeholder('float32',
                                 shape=[None,
                                      img_dimension[0],
                                      img_dimension[1],
                                      3],

                                 name='input_images')

        self.y = tf.placeholder('int32',
                                 shape=[None, 1],
                                 name='counts_images')

        self.img_dimens = img_dimension
        self.n_channels = n_channels

        n_process = int(multiprocessing.cpu_count()/2)
        self.dataset_img = tf.data.Dataset.from_tensor_slices((self.x, self.y))
        self.dataset_img = self.dataset_img.map(self.load_image,
                                                num_parallel_calls=n_process)


    def create_batches(self, batch_size=32):
        batches = self.dataset_img.batch(batch_size)
        batches = batches.prefetch(buffer_size=1)

        self.iterator = batches.make_initializable_iterator()

        self.images, self.counts = self.iterator.get_next()


    def construct_model(self, model_name='r1', lr=1e-6):
        self.model_name = model_name
        tf.summary.image('image_angle_0', self.images, 1)

        with open(self.save_path + '/setup.txt', 'a') as self.out:
            self.out.write('Architecture: ' + str(model_name)+ '\n')
            self.out.write('number of channels: ' + str(self.n_channels) + '\n')
            self.out.write('img dimensionality: ' + str(self.img_dimens) + '\n')

        if model_name == 'r3':
            self.model = Regressor_3(self.images,
                                     self.counts,
                                     lr=lr)
        if model_name == 'alexnet':
            self.model = AlexNet(self.images,
                                 self.counts,
                                 lr=0.003)
        if model_name == 'lstm':
            self.model = CRNN(self.images,
                              self.counts,
                              lr=0.003)


        self.loss = self.model.loss()

        tf.summary.scalar("loss", self.loss)

        with tf.name_scope('train'):
            self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)

        tf.add_to_collection(name='saved', value=self.loss)
        tf.add_to_collection(name='saved', value=self.model.pred_counts)
        if self.model_name == 'lstm':
            tf.add_to_collection(name='saved', value=self.reconstruction)

        tf.add_to_collection(name='placeholder', value=self.x)
        tf.add_to_collection(name='placeholder', value=self.y)
        tf.add_to_collection(name='placeholder', value=self.images)
        tf.add_to_collection(name='placeholder', value=self.counts)
        tf.add_to_collection(name='placeholder', value=self.model.keep_prob)
        tf.add_to_collection(name='placeholder', value=self.model.is_training)
        tf.add_to_collection(name='placeholder', value=self.iterator.initializer)


        self.summaries   = tf.summary.merge_all()
        self.saver       = tf.train.Saver()

        self.writer      = tf.summary.FileWriter(self.save_path+'/logs/train')
        self.writer_test = tf.summary.FileWriter(self.save_path+'/logs/test')


    def train(self, x_train, y_train, keep_prob=0.5):

        epoch_train_loss = []

        self.sess.run(self.iterator.initializer,
                      feed_dict={self.x: x_train,
                                 self.y: y_train})

        try:
            while True:
                train_loss,_,_,_,sm = self.sess.run([self.loss,
                                                     self.images,
                                                     self.counts,
                                                     self.train_step,
                                                     self.summaries],
                        feed_dict={self.model.keep_prob: keep_prob,
                                   self.model.is_training: True})

                epoch_train_loss.append(train_loss)
                self.writer.add_summary(sm, self.it)
                self.it += 1

        except tf.errors.OutOfRangeError:
            pass

        return np.mean(epoch_train_loss)

    def validation(self, x_val, y_val):

        epoch_val_loss = []

        self.sess.run(self.iterator.initializer,
                      feed_dict={self.x: x_val,
                                 self.y: y_val})
        try:
            while True:
                #Aqui no esta reutilizando los batches
                val_loss,_,_,sm = self.sess.run([self.loss,
                                                 self.images,
                                                 self.counts,
                                                 self.summaries],
                                                 feed_dict={self.model.keep_prob: 1,
                                                            self.model.is_training: False})
                epoch_val_loss.append(val_loss)
                self.writer_test.add_summary(sm, self.it)
                self.it += 1

        except tf.errors.OutOfRangeError:
            pass

        return np.mean(epoch_val_loss)

    def test(self, x_test, y_test):

        epoch_test_loss = []

        self.sess.run(self.iterator.initializer,
                      feed_dict={self.x: x_test,
                                 self.y: y_test})
        try:
            while True:
                #Aqui no esta reutilizando los batches
                test_loss,_,_= self.sess.run([self.loss,
                                              self.images,
                                              self.counts],
                                              feed_dict={self.model.keep_prob: 1,
                                                         self.model.is_training: False})
                epoch_test_loss.append(test_loss)

        except tf.errors.OutOfRangeError:
            pass

        with open(self.save_path + '/setup.txt', 'a') as self.out:
            self.out.write('best model found in iter: ' + str(self.best_model_epoch) + '\n')
        return np.mean(epoch_test_loss)



    def fit(self, x_train, y_train, x_val, y_val, n_epochs=10,
            stop_step=20, keep_prob=0.5):

        # init variables
        self.sess.run(tf.local_variables_initializer())
        self.sess.run(tf.global_variables_initializer())
        self.writer.add_graph(self.sess.graph)
        # Variable for early stopping
        best_loss = math.inf
        nochanges = 0 # count to break the train
        # GLobal train iterations
        self.it = 0
        self.best_model_epoch  = n_epochs

        for epoch in range(n_epochs):
            train_loss = self.train(x_train, y_train, keep_prob)

            if epoch % 2 == 0:
                val_loss   = self.validation(x_val, y_val)
                print('Epoch: {0} Train Loss: {1} Val Loss: {2}'.format(epoch,
                                                                        train_loss,
                                                                        val_loss))
                if val_loss < best_loss:
                    print('saving best model on epoch {0}'.format(epoch))
                    best_loss = val_loss
                    nochanges = 0

                    if os.path.exists(self.save_path+'/model/best_model'):
                        shutil.rmtree(dir)

                    self.best_model_epoch = epoch
                    self.saver.save(self.sess, self.save_path+'/model/best_model')
                else:
                    nochanges += 1

            if nochanges == stop_step:
                print('Early stopping at epoch: {}'.format(self.best_model_epoch))
                break
Example #17
0
converter = utils.strLabelConverter(opt.alphabet)
criterion = torch.nn.CTCLoss()


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh)
crnn.apply(weights_init)
if opt.pretrained != '':
    print('loading pretrained model from %s' % opt.pretrained)
    crnn.load_state_dict(torch.load(opt.pretrained))
print(crnn)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn.cuda()
    crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda()
    criterion = criterion.cuda()
def main():
    args = hyperparameters()

    train_path = os.path.join(args.path, 'train')
    test_path = os.path.join(args.path, 'test')

    # gpu or cpu 설정
    device = torch.device(
        f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu')

    # train dataset load
    train_dataset = CRNN_dataset(path=train_path,
                                 w=args.img_width,
                                 h=args.img_height)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True)

    # test dataset load
    test_dataset = CRNN_dataset(path=test_path,
                                w=args.img_width,
                                h=args.img_height)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=True)

    # model 정의
    model = CRNN(
        nc=1, nclass=37, nh=256,
        imgH=args.img_height)  #nc =1 ,nclass = 36, nh = 100, #args.img_height

    # loss 정의
    criterion = nn.CTCLoss()

    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(0.5, 0.999))
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    else:
        assert False, "옵티마이저를 다시 입력해주세요. :("

    model = model.to(device)
    best_test_loss = 100000000
    for i in range(args.epochs):

        print('epochs: ', i)

        print("<----training---->")
        model.train()
        for inputs, targets in tqdm(train_dataloader):
            # ---?--- # inputs의 dimension을 (batch, channel, h, w)로 바꿔주세요. hint: pytorch tensor에 제공되는 함수 사용
            inputs = inputs.permute(0, 1, 3, 2)
            batch_size = inputs.size(0)
            inputs = inputs.to(device)
            target_text, target_length = targets
            target_text, target_length = target_text.to(
                device), target_length.to(device)
            preds = model(inputs)
            preds = preds.log_softmax(2)
            preds_length = Variable(
                torch.IntTensor([preds.size(0)] * batch_size))
            """
            CTCLoss의 설명과 해당 로스의 input에 대해 설명해주세요.

            학습데이터에 클래스 라벨만 순서대로 있고 각 클래스의 위치는 어디있는지 모르는 unsegmented
            시퀀스 데이터의 학습을 위해서 사용하는 알고리즘
            ocr(광학 문자 인식)이나 음성 인식등에 널리 사용된다
            input: 예측값, 정답값, 예측 시퀀스의 길이, 정답 시퀀스의 길이

            """

            loss = criterion(preds, target_text, preds_length,
                             target_length) / batch_size

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("<----evaluation---->")
        """
        model.train(), model.eval()의 차이에 대해 설명해주세요.
        .eval()을 하는 이유가 무엇일까요?

        모델을 학습할 때 train/eval에 맞게 모델을 변경시킨다
        Dropout이나 batchNormalization을 쓰는 모델은 학습시킬 때와 평가할 때
        구조/역할이 다르기 때문이다.

        """

        model.eval()
        loss = 0.0

        for inputs, targets in tqdm(test_dataloader):
            inputs = inputs.permute(0, 1, 3, 2)
            batch_size = inputs.size(0)
            inputs = inputs.to(device)
            target_text, target_length = targets
            target_text, target_length = target_text.to(
                device), target_length.to(device)
            preds = model(inputs)
            preds = preds.log_softmax(2)
            preds_length = Variable(
                torch.IntTensor([preds.size(0)] * batch_size))
            loss += criterion(preds, target_text, preds_length,
                              target_length) / batch_size

        print("test loss: ", loss / len(test_dataloader))
        if loss < best_test_loss:
            # loss가 bset_test_loss보다 작다면 지금의 loss가 best loss가 되겠죠?
            best_test_loss = loss.clone()
            # args.savepath을 이용하여 best model 저장하기
            torch.save(model.state_dict(), args.savepath)
            print("best model 저장 성공")
Example #19
0
def main():
    args = hyperparameters()

    train_path = os.path.join(args.path, 'train')
    test_path = os.path.join(args.path, 'test')

    # gpu or cpu 설정
    device = torch.device(
        f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu')

    # train dataset load
    train_dataset = CRNN_dataset(path=train_path,
                                 w=args.img_width,
                                 h=args.img_height)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True)

    # test dataset load
    test_dataset = CRNN_dataset(path=test_path,
                                w=args.img_width,
                                h=args.img_height)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=True)

    # model 정의
    model = CRNN(args.img_height, 1, 37, 256)  # nc=1, nclass=37, nh=256

    # loss 정의
    criterion = nn.CTCLoss()

    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(0.5, 0.999))
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    else:
        assert False, "옵티마이저를 다시 입력해주세요. :("

    model = model.to(device)
    best_test_loss = 100000000

    for i in range(args.epochs):

        print('epochs: ', i)

        print("<----training---->")
        model.train()
        for inputs, targets in tqdm(train_dataloader):
            inputs = inputs.permute(
                0, 1, 3, 2
            )  # inputs의 dimension을 (batch, channel, h, w)로 바꿔주세요. hint: pytorch tensor에 제공되는 함수 사용
            batch_size = inputs.size(0)
            inputs = inputs.to(device)
            target_text, target_length = targets
            target_text, target_length = target_text.to(
                device), target_length.to(device)
            preds = model(inputs)
            preds = F.log_softmax(preds, dim=-1)
            preds_length = Variable(
                torch.IntTensor([preds.size(0)] * batch_size))
            """
            CTCLoss의 설명과 해당 로스의 input에 대해 설명해주세요.

            CTC(Connectionist Temporal Classification)이란, 입력 프레임 시퀀스와 타겟 시퀀스 간에
            명시적으로 할당해주지 않아도 모델을 학습할 수 있는 기법을 말한다. 
            CRNN을 살펴보면, 입력 이미지 feature vector sequence의 길이는 가변적이고 실제 단어의 글자수와도 맞지 않는다.
            기존의 CNN은 라벨 할당으로 학습한 것과 달리, 입력 sequence가 주어졌을 때 각 시점별로 본래 label sequence로 향하는 모든 가능한 경로를 고려하여 우도를 구하여 학습한다. 
            연산량의 감소를 위해 dynamic programming (앞에서 계산한 경로의 우도를 기억해두는 방법) 알고리즘을 활용한다는 특징이 있고,
            CTC layer는 RNN 출력 확률 벡터 sequence를 입력받아 loss를 계산하여 grandient를 통해 학습을 가능하게 만든다.
            
            loss의 input은 RNN layer의 출력 확률 벡터 sequence라고 할 수 있다.

            """

            loss = criterion(preds, target_text, preds_length,
                             target_length) / batch_size

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("\n<----evaluation---->")
        """
        model.train(), model.eval()의 차이에 대해 설명해주세요.
        .eval()을 하는 이유가 무엇일까요?
        train은 말 그대로 학습 모드 , eval은 test 모드를 의미한다. 학습이 끝났으니 test 모드에 들어가자~! 하고 모델에게 알려주는 것이다.

        """

        model.eval()
        loss = 0.0

        for inputs, targets in tqdm(test_dataloader):
            inputs = inputs.permute(0, 1, 3, 2)
            batch_size = inputs.size(0)
            inputs = inputs.to(device)
            target_text, target_length = targets
            target_text, target_length = target_text.to(
                device), target_length.to(device)
            preds = model(inputs)
            preds = F.log_softmax(preds, dim=-1)
            preds_length = Variable(
                torch.IntTensor([preds.size(0)] * batch_size))

            loss += criterion(
                preds, target_text, preds_length, target_length
            ) / batch_size  # 학습이 아니라 test loss이니 밑에서 찍으려면 이 한 줄이 더 있어야 한다.

        print("\ntest loss: ", loss)
        if loss < best_test_loss:
            # loss가 bset_test_loss보다 작다면 지금의 loss가 best loss가 되겠죠?
            best_test_loss = loss
            # args.savepath을 이용하여 best model 저장하기
            torch.save(model.state_dict(), args.savepath)
            print("best model 저장 성공")
def main():
    parser = argparse.ArgumentParser(
        description='Spoken Language Idenfication')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=512,
                        help='hidden size of model (default: 256)')
    parser.add_argument('--layer_size',
                        type=int,
                        default=3,
                        help='number of layers of model (default: 3)')
    parser.add_argument('--n_class',
                        type=int,
                        default=2,
                        help='number of classes of data (default: 7)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout rate in training (default: 0.2')
    parser.add_argument('--bidirectional',
                        default=True,
                        action='store_true',
                        help='use bidirectional RNN (default: False')
    parser.add_argument('--batch_size',
                        type=int,
                        default=2,
                        help='batch size in training (default: 32')
    parser.add_argument(
        '--workers',
        type=int,
        default=4,
        help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=10,
                        help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-04,
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--save_name',
                        type=str,
                        default='model',
                        help='the name of model')
    parser.add_argument('--mode', type=str, default='train')
    parser.add_argument('--nn_type',
                        type=str,
                        default='crnn',
                        help='type of neural networks')

    args = parser.parse_args()

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    feature_size = N_FFT / 2 + 1

    cnn = CNN.CNN(feature_size)
    rnn = RNN.RNN(cnn.feature_size,
                  args.hidden_size,
                  args.n_class,
                  input_dropout_p=args.dropout,
                  dropout_p=args.dropout,
                  n_layers=args.layer_size,
                  bidirectional=args.bidirectional,
                  rnn_cell='gru',
                  variable_lengths=False)

    model = CRNN.CRNN(cnn, rnn)
    model.flatten_parameters()

    model = nn.DataParallel(model).to(device)

    optimizer = optim.Adam(model.module.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss(reduction='sum').to(device)

    if args.mode != 'train':
        return

    download_data()

    kor_db_list = []
    search('dataset/train/train_data', kor_db_list)

    train_wav_paths = np.loadtxt("dataset/TRAIN_list.csv",
                                 delimiter=',',
                                 dtype=np.unicode)
    valid_wav_paths = np.loadtxt("dataset/TEST_developmentset_list.csv",
                                 delimiter=',',
                                 dtype=np.unicode)
    test_wav_paths = np.loadtxt("dataset/TEST_coreset_list.csv",
                                delimiter=',',
                                dtype=np.unicode)

    train_wav_paths = list(
        map(lambda x: "dataset/TIMIT/{}.WAV".format(x), train_wav_paths))
    valid_wav_paths = list(
        map(lambda x: "dataset/TIMIT/{}.WAV".format(x), valid_wav_paths))
    test_wav_paths = list(
        map(lambda x: "dataset/TIMIT/{}.WAV".format(x), test_wav_paths))

    min_loss = 100000
    begin_epoch = 0

    loss_acc = [[], [], [], []]

    train_batch_num, train_dataset_list, valid_dataset, test_dataset = \
        split_dataset(args, train_wav_paths, valid_wav_paths, test_wav_paths, kor_db_list)

    logger.info('start')

    train_begin = time.time()

    for epoch in range(begin_epoch, args.max_epochs):

        train_queue = queue.Queue(args.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue,
                                   args.batch_size, args.workers, args.nn_type)
        train_loader.start()

        train_loss, train_acc = train(model, train_batch_num, train_queue,
                                      criterion, optimizer, device,
                                      train_begin, args.workers, 10)
        logger.info('Epoch %d (Training) Loss %0.4f Acc %0.4f' %
                    (epoch, train_loss, train_acc))

        train_loader.join()

        loss_acc[0].append(train_loss)
        loss_acc[1].append(train_acc)

        valid_queue = queue.Queue(args.workers * 2)

        valid_loader = BaseDataLoader(valid_dataset, valid_queue,
                                      args.batch_size, 0, args.nn_type)
        valid_loader.start()

        eval_loss, eval_acc = evaluate(model, valid_loader, valid_queue,
                                       criterion, device)
        logger.info('Epoch %d (Evaluate) Loss %0.4f Acc %0.4f' %
                    (epoch, eval_loss, eval_acc))

        valid_loader.join()

        loss_acc[2].append(eval_loss)
        loss_acc[3].append(eval_acc)

        best_model = (eval_loss < min_loss)

        if best_model:
            min_loss = eval_loss
            torch.save(model.state_dict(), './save_model/best_model.pt')
            save_epoch = epoch

    model.load_state_dict(torch.load('./save_model/best_model.pt'))

    test_queue = queue.Queue(args.workers * 2)

    test_loader = BaseDataLoader(test_dataset, test_queue, args.batch_size, 0,
                                 args.nn_type)
    test_loader.start()

    confusion_matrix = torch.zeros((args.n_class, args.n_class))
    test_loss, test_acc = evaluate(model, test_loader, test_queue, criterion,
                                   device, confusion_matrix)
    logger.info('Epoch %d (Test) Loss %0.4f Acc %0.4f' %
                (save_epoch, test_loss, test_acc))

    test_loader.join()

    save_data(loss_acc, test_loss, test_acc,
              confusion_matrix.to('cpu').numpy())
    plot_data(loss_acc, test_loss, test_acc)

    return 0
Example #21
0
def train_net(args):
    manual_seed = 7
    random.seed(manual_seed)
    np.random.seed(manual_seed)
    torch.manual_seed(manual_seed)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_loss = float('inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    # Initialize / load checkpoint
    if checkpoint is None:
        model = CRNN(imgH, nc, nclass, nh)
        model.apply(weights_init)
        # model = nn.DataParallel(model)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=args.lr,
                                         betas=(args.beta1, 0.999))

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    logger = utils.get_logger()

    # Move to GPU, if available
    model = model.to(device)

    # Loss function
    criterion = nn.CTCLoss(reduction='mean').to(device)

    # Custom dataloaders
    train_dataset = data_gen.Ic2015Dataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)
    test_dataset = data_gen.Ic2015Dataset('test')
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=num_workers)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_acc = train(train_loader=train_loader,
                                      model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      logger=logger)
        effective_lr = utils.get_learning_rate(optimizer)
        print('\nCurrent effective learning rate: {}\n'.format(effective_lr))

        writer.add_scalar('Learning_Rate', effective_lr, epoch)

        writer.add_scalar('Train_Loss', train_loss, epoch)
        writer.add_scalar('Train_Accuracy', train_acc, epoch)

        # One epoch's validation
        test_loss, test_acc = test(test_loader=test_loader,
                                   model=model,
                                   criterion=criterion,
                                   logger=logger)
        writer.add_scalar('Test_Loss', test_loss, epoch)
        writer.add_scalar('Test_Accuracy', test_acc, epoch)

        # Check if there was an improvement
        is_best = test_loss < best_loss
        best_loss = min(test_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        utils.save_checkpoint(epoch, epochs_since_improvement, model,
                              optimizer, best_loss, is_best)
Example #22
0
def train_net(args):
    manual_seed = 7
    random.seed(manual_seed)
    np.random.seed(manual_seed)
    torch.manual_seed(manual_seed)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_loss = float('inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    # Initialize / load checkpoint
    if checkpoint is None:
        model = CRNN(imgH, num_channels, num_classes, num_hidden)
        model.apply(weights_init)
        # model = nn.DataParallel(model)

        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-09)
        # optimizer = CRNNOptimizer(
        #     torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        #     args.k,
        #     num_hidden,
        #     args.warmup_steps)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    logger = utils.get_logger()

    # Move to GPU, if available
    model = model.to(device)

    # Loss function
    criterion = nn.CTCLoss(reduction='mean').to(device)

    # Custom dataloaders
    train_dataset = data_gen.MJSynthDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)
    valid_dataset = data_gen.MJSynthDataset('val')
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=num_workers)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_acc = train(train_loader=train_loader,
                                      model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      logger=logger)
        writer.add_scalar('Train_Loss', train_loss, epoch)
        writer.add_scalar('Train_Accuracy', train_acc, epoch)

        # One epoch's validation
        valid_loss, valid_acc = valid(valid_loader=valid_loader,
                                      model=model,
                                      criterion=criterion,
                                      logger=logger)
        writer.add_scalar('Validation_Loss', valid_loss, epoch)
        writer.add_scalar('Validation_Accuracy', valid_acc, epoch)

        # Check if there was an improvement
        is_best = valid_loss < best_loss
        best_loss = min(valid_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        utils.save_checkpoint(epoch, epochs_since_improvement, model,
                              optimizer, best_loss, is_best)
Example #23
0
def main(opts):
  alphabet = '0123456789.'
  nclass = len(alphabet) + 1
  model_name = 'crnn'
  net = CRNN(nclass)
  print("Using {0}".format(model_name))

  if opts.cuda:
    net.cuda()
  learning_rate = opts.base_lr
  optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay)

  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)

  ## 数据集
  converter = strLabelConverter(alphabet)
  dataset = ImgDataset(
      root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image',
      csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt',
      transform=None,
      target_transform=converter.encode
  )
  ocrdataloader = torch.utils.data.DataLoader(
      dataset, batch_size=opts.batch_size, shuffle=True, collate_fn=own_collate
  )
  
  step_start = 0
  net.train()

  converter = strLabelConverter(alphabet)
  ctc_loss = CTCLoss()

  for step in range(step_start, opts.max_iters):

    try:
    data = next(data_iter)
    except:
    data_iter = iter(ocrdataloader)
    data = next(data_iter)
    
    im_data, gt_boxes, text = data
    im_data = im_data.cuda()
       
    try:
      loss= process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=True)

      net.zero_grad()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      pass


    if step % disp_interval == 0:
      try:
    print('step:%d || loss %.4f' % (step, loss))
      except:
    import sys, traceback
    traceback.print_exc(file=sys.stdout)
    pass
    
    if step > step_start and (step % batch_per_epoch == 0):
      save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step))
      state = {'step': step,
           'learning_rate': learning_rate,
          'state_dict': net.state_dict(),
          'optimizer': optimizer.state_dict()}
      torch.save(state, save_name)
      print('save model: {}'.format(save_name))
Example #24
0
def train_and_predict(x_train, y_train, x_val, y_val, x_test):
    """Train a neural network classifier and compute predictions.

    Args:
        x_train (np.ndarray): Training instances.
        y_train (np.ndarray): Training labels.
        x_val (np.ndarray): Validation instances.
        y_val (np.ndarray): Validation labels.
        x_test (np.ndarray): Test instances.

    Returns:
        The predictions of the classifier.
    """
    _ensure_reproducibility()

    # Determine which device (GPU or CPU) to use
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Convert data into PyTorch tensors
    x_train = torch.FloatTensor(x_train).transpose(1, 2)
    x_val = torch.FloatTensor(x_val).transpose(1, 2)
    x_test = torch.FloatTensor(x_test).transpose(1, 2)
    y_train = torch.FloatTensor(y_train)
    y_val = torch.FloatTensor(y_val)

    # Instantiate neural network
    n_classes = y_train.shape[-1]
    n_feats = x_train.shape[1]
    net = CRNN(n_classes, n_feats).to(device)

    # Use binary cross-entropy loss function
    criterion = BCELoss()
    # Use Adam optimization algorithm
    optimizer = Adam(net.parameters(), lr=0.01)
    # Use scheduler to decay learning rate regularly
    scheduler = StepLR(optimizer, step_size=2, gamma=0.9)
    # Use helper class to iterate over data in batches
    loader_train = DataLoader(TensorDataset(x_train, y_train),
                              batch_size=128, shuffle=True)
    loader_val = DataLoader(TensorDataset(x_val, y_val), batch_size=512)
    loader_test = DataLoader(TensorDataset(x_test), batch_size=512)

    # Instantiate Logger to record training/validation performance
    # Configure to save the states of the top 3 models during validation
    logger = Logger(net, n_states=3)

    for epoch in range(15):
        # Train model using training set
        pbar = tqdm(loader_train)
        pbar.set_description('Epoch %d' % epoch)
        train(net.train(), criterion, optimizer, pbar, logger, device)

        # Evaluate model using validation set and monitor F1 score
        validate(net.eval(), criterion, loader_val, logger, device)
        logger.monitor('val_f1')

        # Print training and validation results
        logger.print_results()

        # Invoke learning rate scheduler
        scheduler.step()

    # Ensemble top 3 model predictions
    y_preds = []
    for state_dict in logger.state_dicts:
        net.load_state_dict(state_dict)
        y_preds.append(_flatten(predict(net, loader_test, device)))
    return torch.stack(y_preds).mean(dim=0).cpu().numpy()
Example #25
0
                            num_workers=1,
                            pin_memory=True)

    ### Create melspecs
    # With augmentations
    melspec_train = nn.Sequential(
        torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=N_MELS),
        torchaudio.transforms.FrequencyMasking(freq_mask_param=15),
        torchaudio.transforms.TimeMasking(time_mask_param=35),
    ).to(device)
    # W/o augmentations
    melspec_val = torchaudio.transforms.MelSpectrogram(
        sample_rate=16000, n_mels=N_MELS).to(device)

    ### Create model
    CRNN_model = CRNN(IN_SIZE, HIDDEN_SIZE, KERNEL_SIZE, STRIDE,
                      GRU_NUM_LAYERS)
    attn_layer = AttnMech(HIDDEN_SIZE * NUM_DIRS)
    apply_attn = ApplyAttn(HIDDEN_SIZE * 2, NUM_CLASSES)

    ### Download ready models
    # checkpoint = torch.load('crnn_final', map_location=device)
    # CRNN_model.load_state_dict(checkpoint['model_state_dict'])
    # checkpoint = torch.load('attn_final', map_location=device)
    # attn_layer.load_state_dict(checkpoint['model_state_dict'])
    # checkpoint = torch.load('apply_attn_final', map_location=device)
    # apply_attn.load_state_dict(checkpoint['model_state_dict'])

    full_model = FullModel(CRNN_model, attn_layer, apply_attn)
    print(full_model.to(device))
    print(count_parameters(full_model))
    #wandb.init()