コード例 #1
0
def save_best_model(transducer_model, model_optimizer, hidden_size, layers, epoch,
                    file_name):
    """
    Save the best model
    :param transducer_model: A trained model
    :param model_optimizer: Model optimizer
    :param hidden_size: Number of hidden layers
    :param layers: Number of GRU layers to use
    :param epoch: Epoch/iteration number
    :param file_name: Output file name
    :return:
    """
    if os.path.isfile(file_name):
        os.remove(file_name)
    ModelHandler.save_checkpoint({
        'model_state_dict': transducer_model.state_dict(),
        'model_optimizer': model_optimizer.state_dict(),
        'hidden_size': hidden_size,
        'gru_layers': layers,
        'epochs': epoch,
    }, file_name)
    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) + "]  INFO: MODEL SAVED SUCCESSFULLY.\n")
コード例 #2
0
ファイル: test_models.py プロジェクト: mfallahi/pepper
def do_test(test_file, batch_size, gpu_mode, num_workers, model_path,
            print_details):
    """
    Train a model and save
    :param test_file: A CSV file containing test image information
    :param batch_size: Batch size for training
    :param gpu_mode: If true the model will be trained on GPU
    :param num_workers: Number of workers for data loading
    :param model_path: Path to a saved model
    :param print_details: Print debug stuff
    :return:
    """
    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "]  Loading data\n")

    if os.path.isfile(model_path) is False:
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] ERROR: INVALID PATH TO MODEL\n")
        exit(1)

    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "]  INFO: MODEL LOADING\n")

    transducer_model, hidden_size, gru_layers, prev_ite = \
        ModelHandler.load_simple_model_for_training(model_path,
                                                    input_channels=ImageSizeOptions.IMAGE_CHANNELS,
                                                    image_features=ImageSizeOptions.IMAGE_HEIGHT,
                                                    seq_len=ImageSizeOptions.SEQ_LENGTH,
                                                    num_classes=ImageSizeOptions.TOTAL_LABELS)

    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "]  INFO: MODEL LOADED\n")
    sys.stderr.flush()

    if gpu_mode:
        transducer_model = torch.nn.DataParallel(transducer_model).cuda()

    stats_dictioanry = test(test_file,
                            batch_size,
                            gpu_mode,
                            transducer_model,
                            num_workers,
                            gru_layers,
                            hidden_size,
                            num_classes=ImageSizeOptions.TOTAL_LABELS,
                            print_details=print_details)

    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "] INFO: TEST COMPLETE")
コード例 #3
0
def train(train_file, test_file, batch_size, epoch_limit, gpu_mode,
          num_workers, retrain_model, retrain_model_path, gru_layers,
          hidden_size, lr, decay, model_dir, stats_dir, train_mode, world_size,
          rank, device_id):

    if train_mode is True and rank == 0:
        train_loss_logger = open(stats_dir + "train_loss.csv", 'w')
        test_loss_logger = open(stats_dir + "test_loss.csv", 'w')
        confusion_matrix_logger = open(stats_dir + "confusion_matrix.txt", 'w')
    else:
        train_loss_logger = None
        test_loss_logger = None
        confusion_matrix_logger = None

    torch.cuda.set_device(device_id)

    if rank == 0:
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: LOADING DATA\n")

    train_data_set = SequenceDataset(train_file)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_data_set, num_replicas=world_size, rank=rank)

    train_loader = torch.utils.data.DataLoader(dataset=train_data_set,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_workers=0,
                                               pin_memory=True,
                                               sampler=train_sampler)

    num_classes = ImageSizeOptions.TOTAL_LABELS

    if retrain_model is True:
        if os.path.isfile(retrain_model_path) is False:
            sys.stderr.write(
                "[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                "] ERROR: INVALID PATH TO RETRAIN PATH MODEL --retrain_model_path\n"
            )
            exit(1)
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: RETRAIN MODEL LOADING\n")
        transducer_model, hidden_size, gru_layers, prev_ite = \
            ModelHandler.load_simple_model_for_training(retrain_model_path,
                                                        input_channels=ImageSizeOptions.IMAGE_CHANNELS,
                                                        image_features=ImageSizeOptions.IMAGE_HEIGHT,
                                                        seq_len=ImageSizeOptions.SEQ_LENGTH,
                                                        num_classes=num_classes)

        if train_mode is True:
            epoch_limit = prev_ite + epoch_limit

        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: RETRAIN MODEL LOADED\n")
    else:
        transducer_model = ModelHandler.get_new_gru_model(
            input_channels=ImageSizeOptions.IMAGE_CHANNELS,
            image_features=ImageSizeOptions.IMAGE_HEIGHT,
            gru_layers=gru_layers,
            hidden_size=hidden_size,
            num_classes=num_classes)
        prev_ite = 0

    param_count = sum(p.numel() for p in transducer_model.parameters()
                      if p.requires_grad)
    if rank == 0:
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: TOTAL TRAINABLE PARAMETERS:\t" +
                         str(param_count) + "\n")

    model_optimizer = torch.optim.Adam(transducer_model.parameters(),
                                       lr=lr,
                                       weight_decay=decay)

    if retrain_model is True:
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: OPTIMIZER LOADING\n")
        model_optimizer = ModelHandler.load_simple_optimizer(
            model_optimizer, retrain_model_path, gpu_mode)
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: OPTIMIZER LOADED\n")

    if gpu_mode:
        transducer_model = transducer_model.to(device_id)
        transducer_model = nn.parallel.DistributedDataParallel(
            transducer_model, device_ids=[device_id])

    class_weights = torch.Tensor(CLASS_WEIGHTS)
    # Loss
    criterion = nn.CrossEntropyLoss(class_weights)

    if gpu_mode is True:
        criterion = criterion.to(device_id)

    start_epoch = prev_ite

    # Train the Model
    if rank == 0:
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: Training starting\n")
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] Start: " + str(start_epoch + 1) + " End: " +
                         str(epoch_limit) + "\n")

    stats = dict()
    stats['loss_epoch'] = []
    stats['accuracy_epoch'] = []

    for epoch in range(start_epoch, epoch_limit, 1):
        total_loss = 0
        total_images = 0
        if rank == 0:
            sys.stderr.write(
                "[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                "] Train epoch: " + str(epoch + 1) + "\n")
        # make sure the model is in train mode. BN is different in train and eval.

        batch_no = 1
        if rank == 0:
            progress_bar = tqdm(
                total=len(train_loader),
                ncols=100,
                leave=True,
                position=rank,
                desc="Loss: ",
            )
        else:
            progress_bar = None

        transducer_model.train()
        for images, labels in train_loader:
            labels = labels.type(torch.LongTensor)
            images = images.type(torch.FloatTensor)
            if gpu_mode:
                images = images.to(device_id)
                labels = labels.to(device_id)

            hidden = torch.zeros(images.size(0), 2 * TrainOptions.LSTM_LAYERS,
                                 TrainOptions.HIDDEN_SIZE)
            #
            cell_state = torch.zeros(images.size(0),
                                     2 * TrainOptions.LSTM_LAYERS,
                                     TrainOptions.HIDDEN_SIZE)

            if gpu_mode:
                hidden = hidden.to(device_id)
                cell_state = cell_state.to(device_id)

            for i in range(0, ImageSizeOptions.SEQ_LENGTH,
                           TrainOptions.WINDOW_JUMP):
                model_optimizer.zero_grad()

                if i + TrainOptions.TRAIN_WINDOW > ImageSizeOptions.SEQ_LENGTH:
                    break

                image_chunk = images[:, i:i + TrainOptions.TRAIN_WINDOW]
                label_chunk = labels[:, i:i + TrainOptions.TRAIN_WINDOW]

                #
                output_, hidden, cell_state = transducer_model(
                    image_chunk, hidden, cell_state)

                loss = criterion(output_.contiguous().view(-1, num_classes),
                                 label_chunk.contiguous().view(-1))
                #without retaingraph=true this won't run
                loss.backward()

                model_optimizer.step()
                total_loss += loss.item()
                total_images += image_chunk.size(0)
                hidden = hidden.detach()
                cell_state = cell_state.detach()

            # update the progress bar
            avg_loss = (total_loss / total_images) if total_images else 0

            if train_mode is True and rank == 0:
                train_loss_logger.write(
                    str(epoch + 1) + "," + str(batch_no) + "," +
                    str(avg_loss) + "\n")

            if rank == 0:
                progress_bar.set_description("Loss: " + str(avg_loss))
                progress_bar.refresh()
                progress_bar.update(1)
                batch_no += 1

        if rank == 0:
            progress_bar.close()
        dist.barrier()

        if rank == 0:
            stats_dictioanry = test(test_file,
                                    batch_size,
                                    gpu_mode,
                                    transducer_model,
                                    num_workers,
                                    gru_layers,
                                    hidden_size,
                                    num_classes=ImageSizeOptions.TOTAL_LABELS)
            stats['loss'] = stats_dictioanry['loss']
            stats['accuracy'] = stats_dictioanry['accuracy']
            stats['loss_epoch'].append((epoch, stats_dictioanry['loss']))
            stats['accuracy_epoch'].append(
                (epoch, stats_dictioanry['accuracy']))
        dist.barrier()

        # update the loggers
        if train_mode is True and rank == 0:
            # save the model after each epoch
            # encoder_model, decoder_model, encoder_optimizer, decoder_optimizer, hidden_size, layers, epoch,
            # file_name
            save_best_model(
                transducer_model, model_optimizer, hidden_size, gru_layers,
                epoch,
                model_dir + "_epoch_" + str(epoch + 1) + '_checkpoint.pkl')

            test_loss_logger.write(
                str(epoch + 1) + "," + str(stats['loss']) + "," +
                str(stats['accuracy']) + "\n")
            confusion_matrix_logger.write(
                str(epoch + 1) + "\n" +
                str(stats_dictioanry['confusion_matrix']) + "\n")
            train_loss_logger.flush()
            test_loss_logger.flush()
            confusion_matrix_logger.flush()
        elif train_mode is False:
            # this setup is for hyperband
            if epoch + 1 >= 10 and stats['accuracy'] < 98:
                sys.stderr.write(
                    "[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                    "] INFO: EARLY STOPPING AS THE MODEL NOT DOING WELL\n")
                return transducer_model, model_optimizer, stats

    if rank == 0:
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: Finished training\n")

    return transducer_model, model_optimizer, stats
コード例 #4
0
ファイル: predict.py プロジェクト: TimHanneman/pepper
def predict(test_file, output_filename, model_path, batch_size, num_workers,
            gpu_mode):
    """
    Create a prediction table/dictionary of an images set using a trained model.
    :param test_file: File to predict on
    :param output_filename: Name of output file
    :param batch_size: Batch size used for prediction
    :param model_path: Path to a trained model
    :param gpu_mode: If true, predictions will be done over GPU
    :param num_workers: Number of workers to be used by the dataloader
    :return: Prediction dictionary
    """
    prediction_data_file = DataStore(output_filename, mode='w')

    # data loader
    test_data = SequenceDataset(test_file)
    test_loader = DataLoader(test_data,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=num_workers)

    transducer_model, hidden_size, gru_layers, prev_ite = \
        ModelHandler.load_simple_model_for_training(model_path,
                                                    input_channels=ImageSizeOptions.IMAGE_CHANNELS,
                                                    image_features=ImageSizeOptions.IMAGE_HEIGHT,
                                                    seq_len=ImageSizeOptions.SEQ_LENGTH,
                                                    num_classes=ImageSizeOptions.TOTAL_LABELS)
    transducer_model.eval()

    if gpu_mode:
        transducer_model = transducer_model.cuda()

    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "] INFO: STARTING INFERENCE\n")

    with torch.no_grad():
        for contig, contig_start, contig_end, chunk_id, images, position, index in tqdm(
                test_loader, ncols=50):
            images = images.type(torch.FloatTensor)

            hidden = torch.zeros(images.size(0), 2 * TrainOptions.LSTM_LAYERS,
                                 TrainOptions.HIDDEN_SIZE)
            cell_state = torch.zeros(images.size(0),
                                     2 * TrainOptions.LSTM_LAYERS,
                                     TrainOptions.HIDDEN_SIZE)
            prediction_base_tensor = torch.zeros(
                (images.size(0), images.size(1),
                 ImageSizeOptions.TOTAL_LABELS))

            if gpu_mode:
                images = images.cuda()
                hidden = hidden.cuda()
                prediction_base_tensor = prediction_base_tensor.cuda()

            for i in range(0, ImageSizeOptions.SEQ_LENGTH,
                           TrainOptions.WINDOW_JUMP):
                if i + TrainOptions.TRAIN_WINDOW > ImageSizeOptions.SEQ_LENGTH:
                    break
                chunk_start = i
                chunk_end = i + TrainOptions.TRAIN_WINDOW
                # chunk all the data
                image_chunk = images[:, chunk_start:chunk_end]

                # run inference
                output_base, hidden, cell_state = transducer_model(
                    image_chunk, hidden, cell_state)

                # now calculate how much padding is on the top and bottom of this chunk so we can do a simple
                # add operation
                top_zeros = chunk_start
                bottom_zeros = ImageSizeOptions.SEQ_LENGTH - chunk_end

                # do softmax and get prediction
                # we run a softmax a padding to make the output tensor compatible for adding
                inference_layers = nn.Sequential(
                    nn.Softmax(dim=2),
                    nn.ZeroPad2d((0, 0, top_zeros, bottom_zeros)))
                if gpu_mode:
                    inference_layers = inference_layers.cuda()
                    base_prediction = inference_layers(output_base).cuda()
                else:
                    base_prediction = inference_layers(output_base)

                # now simply add the tensor to the global counter
                prediction_base_tensor = torch.add(prediction_base_tensor,
                                                   base_prediction)

            base_values, base_labels = torch.max(prediction_base_tensor, 2)

            predicted_base_labels = base_labels.cpu().numpy()

            for i in range(images.size(0)):
                prediction_data_file.write_prediction(
                    contig[i], contig_start[i], contig_end[i], chunk_id[i],
                    position[i], index[i], predicted_base_labels[i])
コード例 #5
0
def predict_cpu(filepath, file_chunks, output_filepath, model_path, batch_size,
                total_callers, threads_per_caller, num_workers):
    """
    Create a prediction table/dictionary of an images set using a trained model.
    :param filepath: Path to image files to predict on
    :param file_chunks: Path to chunked files
    :param batch_size: Batch size used for prediction
    :param model_path: Path to a trained model
    :param output_filepath: Path to output directory
    :param total_callers: Number of callers to spawn
    :param threads_per_caller: Number of threads to use per caller
    :param num_workers: Number of workers to be used by the dataloader
    :return: Prediction dictionary
    """
    # load the model and create an ONNX session
    transducer_model, hidden_size, gru_layers, prev_ite = \
        ModelHandler.load_simple_model_for_training(model_path,
                                                    input_channels=ImageSizeOptions.IMAGE_CHANNELS,
                                                    image_features=ImageSizeOptions.IMAGE_HEIGHT,
                                                    seq_len=ImageSizeOptions.SEQ_LENGTH,
                                                    num_classes=ImageSizeOptions.TOTAL_LABELS)
    transducer_model.eval()

    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "] INFO: MODEL LOADING TO ONNX\n")
    x = torch.zeros(1, TrainOptions.TRAIN_WINDOW,
                    ImageSizeOptions.IMAGE_HEIGHT)
    h = torch.zeros(1, 2 * TrainOptions.GRU_LAYERS, TrainOptions.HIDDEN_SIZE)

    if not os.path.isfile(model_path + ".onnx"):
        sys.stderr.write("[" +
                         str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                         "] INFO: SAVING MODEL TO ONNX\n")
        torch.onnx.export(transducer_model, (x, h),
                          model_path + ".onnx",
                          training=False,
                          opset_version=10,
                          do_constant_folding=True,
                          input_names=['input_image', 'input_hidden'],
                          output_names=['output_pred', 'output_hidden'],
                          dynamic_axes={
                              'input_image': {
                                  0: 'batch_size'
                              },
                              'input_hidden': {
                                  0: 'batch_size'
                              },
                              'output_pred': {
                                  0: 'batch_size'
                              },
                              'output_hidden': {
                                  0: 'batch_size'
                              }
                          })

    start_time = time.time()
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=total_callers) as executor:
        futures = [
            executor.submit(predict, filepath, file_chunks[thread_id],
                            output_filepath, batch_size, num_workers,
                            thread_id, threads_per_caller, model_path)
            for thread_id in range(0, total_callers)
        ]

        for fut in concurrent.futures.as_completed(futures):
            if fut.exception() is None:
                # get the results
                thread_id = fut.result()
                sys.stderr.write(
                    "[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                    "] INFO: THREAD " + str(thread_id) +
                    " FINISHED SUCCESSFULLY.\n")
            else:
                sys.stderr.write("ERROR: " + str(fut.exception()) + "\n")
            fut._result = None  # python issue 27144

    end_time = time.time()
    mins = int((end_time - start_time) / 60)
    secs = int((end_time - start_time)) % 60
    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "] INFO: FINISHED PREDICTION\n")
    sys.stderr.write("[" + str(datetime.now().strftime('%m-%d-%Y %H:%M:%S')) +
                     "] INFO: ELAPSED TIME: " + str(mins) + " Min " +
                     str(secs) + " Sec\n")
コード例 #6
0
def predict_distributed_cpu(filepath, file_chunks, output_filepath, model_path,
                            batch_size, total_callers, threads, num_workers):
    """
    Create a prediction table/dictionary of an images set using a trained model.
    :param filepath: Path to image files to predict on
    :param file_chunks: Path to chunked files
    :param batch_size: Batch size used for prediction
    :param model_path: Path to a trained model
    :param output_filepath: Path to output directory
    :param total_callers: Number of callers to spawn
    :param threads: Number of threads to use per caller
    :param num_workers: Number of workers to be used by the dataloader
    :return: Prediction dictionary
    """
    # load the model and create an ONNX session
    transducer_model, hidden_size, gru_layers, prev_ite = \
        ModelHandler.load_simple_model_for_training(model_path,
                                                    input_channels=ImageSizeOptions.IMAGE_CHANNELS,
                                                    image_features=ImageSizeOptions.IMAGE_HEIGHT,
                                                    seq_len=ImageSizeOptions.SEQ_LENGTH,
                                                    num_classes=ImageSizeOptions.TOTAL_LABELS)
    transducer_model.eval()

    sys.stderr.write("INFO: MODEL LOADING TO ONNX\n")
    x = torch.zeros(1, TrainOptions.TRAIN_WINDOW,
                    ImageSizeOptions.IMAGE_HEIGHT)
    h = torch.zeros(1, 2 * TrainOptions.LSTM_LAYERS, TrainOptions.HIDDEN_SIZE)
    #
    ce = torch.zeros(1, 2 * TrainOptions.LSTM_LAYERS, TrainOptions.HIDDEN_SIZE)

    if not os.path.isfile(model_path + ".onnx"):
        sys.stderr.write("INFO: SAVING MODEL TO ONNX\n")
        #
        #torch.onnx.export(transducer_model, (x, h),
        torch.onnx.export(transducer_model, (x, h, ce),
                          model_path + ".onnx",
                          training=False,
                          opset_version=10,
                          do_constant_folding=True,
                          input_names=['input_image', 'input_hidden'],
                          output_names=['output_pred', 'output_hidden'],
                          dynamic_axes={
                              'input_image': {
                                  0: 'batch_size'
                              },
                              'input_hidden': {
                                  0: 'batch_size'
                              },
                              'output_pred': {
                                  0: 'batch_size'
                              },
                              'output_hidden': {
                                  0: 'batch_size'
                              }
                          })

    args = (filepath, output_filepath, model_path, batch_size, num_workers,
            threads)
    mp.spawn(setup,
             args=(total_callers, args, file_chunks),
             nprocs=total_callers,
             join=True)
コード例 #7
0
def predict(input_filepath, file_chunks, output_filepath, model_path,
            batch_size, num_workers, rank, device_id):
    transducer_model, hidden_size, gru_layers, prev_ite = \
        ModelHandler.load_simple_model_for_training(model_path,
                                                    input_channels=ImageSizeOptions.IMAGE_CHANNELS,
                                                    image_features=ImageSizeOptions.IMAGE_HEIGHT,
                                                    seq_len=ImageSizeOptions.SEQ_LENGTH,
                                                    num_classes=ImageSizeOptions.TOTAL_LABELS)
    transducer_model.eval()
    transducer_model = transducer_model.eval()
    # create output file
    output_filename = output_filepath + "pepper_prediction_" + str(
        device_id) + ".hdf"
    prediction_data_file = DataStore(output_filename, mode='w')

    # data loader
    input_data = SequenceDataset(input_filepath, file_chunks)
    data_loader = DataLoader(input_data,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=num_workers)

    torch.cuda.set_device(device_id)
    transducer_model.to(device_id)
    transducer_model.eval()
    transducer_model = DistributedDataParallel(transducer_model,
                                               device_ids=[device_id])

    if rank == 0:
        progress_bar = tqdm(
            total=len(data_loader),
            ncols=100,
            leave=False,
            position=rank,
            desc="GPU #" + str(device_id),
        )

    with torch.no_grad():
        for contig, contig_start, contig_end, chunk_id, images, position, index in data_loader:
            sys.stderr.flush()
            images = images.type(torch.FloatTensor)
            hidden = torch.zeros(images.size(0), 2 * TrainOptions.GRU_LAYERS,
                                 TrainOptions.HIDDEN_SIZE)

            prediction_base_tensor = torch.zeros(
                (images.size(0), images.size(1),
                 ImageSizeOptions.TOTAL_LABELS))

            images = images.to(device_id)
            hidden = hidden.to(device_id)
            prediction_base_tensor = prediction_base_tensor.to(device_id)

            for i in range(0, ImageSizeOptions.SEQ_LENGTH,
                           TrainOptions.WINDOW_JUMP):
                if i + TrainOptions.TRAIN_WINDOW > ImageSizeOptions.SEQ_LENGTH:
                    break
                chunk_start = i
                chunk_end = i + TrainOptions.TRAIN_WINDOW
                # chunk all the data
                image_chunk = images[:, chunk_start:chunk_end]

                # run inference
                output_base, hidden = transducer_model(image_chunk, hidden)

                # now calculate how much padding is on the top and bottom of this chunk so we can do a simple
                # add operation
                top_zeros = chunk_start
                bottom_zeros = ImageSizeOptions.SEQ_LENGTH - chunk_end

                # do softmax and get prediction
                # we run a softmax a padding to make the output tensor compatible for adding
                inference_layers = nn.Sequential(
                    nn.Softmax(dim=2),
                    nn.ZeroPad2d((0, 0, top_zeros, bottom_zeros)))
                inference_layers = inference_layers.to(device_id)

                # run the softmax and padding layers
                base_prediction = inference_layers(output_base).to(device_id)

                # now simply add the tensor to the global counter
                prediction_base_tensor = torch.add(prediction_base_tensor,
                                                   base_prediction)

                del inference_layers
                torch.cuda.empty_cache()

            base_values, base_labels = torch.max(prediction_base_tensor, 2)

            # this part is for the phred score calculation
            counts = torch.ones(
                (base_values.size(0),
                 base_values.size(1) - 2 * ImageSizeOptions.SEQ_OVERLAP))
            top_ones = nn.ZeroPad2d(
                (ImageSizeOptions.SEQ_OVERLAP, ImageSizeOptions.SEQ_OVERLAP))
            counts = top_ones(counts) + 1

            base_values = base_labels.cpu().numpy()
            phred_score = -10 * torch.log10(1.0 - (base_values / counts))
            phred_score[phred_score == float('inf')] = 100

            predicted_base_labels = base_labels.cpu().numpy()
            phred_score = phred_score.cpu().numpy()

            for i in range(images.size(0)):
                prediction_data_file.write_prediction(
                    contig[i], contig_start[i], contig_end[i], chunk_id[i],
                    position[i], index[i], predicted_base_labels[i],
                    phred_score[i])
            if rank == 0:
                progress_bar.update(1)

    if rank == 0:
        progress_bar.close()