Ejemplo n.º 1
0
def main(args):

    poas = []
    init(args.seed, args.device)

    print("* loading data")
    testdata = ChunkDataSet(
        *load_data(limit=args.chunks, shuffle=args.shuffle))
    dataloader = DataLoader(testdata, batch_size=args.batchsize)

    for w in [int(i) for i in args.weights.split(',')]:

        print("* loading model", w)
        model = load_model(args.model_directory, args.device, weights=w)

        print("* calling")
        predictions = []
        t0 = time.perf_counter()

        for data, *_ in dataloader:
            with torch.no_grad():
                log_probs = model(data.to(args.device))
                predictions.append(log_probs.exp().cpu().numpy())

        duration = time.perf_counter() - t0

        references = [
            decode_ref(target, model.alphabet)
            for target in dataloader.dataset.targets
        ]
        sequences = [
            decode_ctc(post, model.alphabet)
            for post in np.concatenate(predictions)
        ]
        accuracies = list(starmap(accuracy, zip(references, sequences)))

        if args.poa: poas.append(sequences)

        print("* mean      %.2f%%" % np.mean(accuracies))
        print("* median    %.2f%%" % np.median(accuracies))
        print("* time      %.2f" % duration)
        print("* samples/s %.2E" % (args.chunks * data.shape[2] / duration))

    if args.poa:

        print("* doing poa")
        t0 = time.perf_counter()
        # group each sequence prediction per model together
        poas = [list(seq) for seq in zip(*poas)]

        consensuses = poa(poas)
        duration = time.perf_counter() - t0

        accuracies = list(starmap(accuracy, zip(references, consensuses)))

        print("* mean      %.2f%%" % np.mean(accuracies))
        print("* median    %.2f%%" % np.median(accuracies))
        print("* time      %.2f" % duration)
Ejemplo n.º 2
0
def main(args):

    sys.stderr.write("> loading model\n")
    model = load_model(args.model_directory,
                       args.device,
                       weights=int(args.weights))

    num_reads = 0
    num_chunks = 0

    t0 = time.perf_counter()
    sys.stderr.write("> calling\n")

    for fast5 in tqdm(glob("%s/*fast5" % args.reads_directory), ascii=True):

        for read_id, raw_data in get_raw_data(fast5):

            if len(raw_data) <= args.chunksize:
                chunks = np.expand_dims(raw_data, axis=0)
            else:
                chunks = window(raw_data,
                                args.chunksize,
                                stepsize=args.chunksize - args.overlap)

            chunks = np.expand_dims(chunks, axis=1)

            num_reads += 1
            num_chunks += chunks.shape[0]

            with torch.no_grad():

                # copy to gpu
                tchunks = torch.tensor(chunks).to(args.device)

                # run model
                predictions = torch.exp(model(tchunks))

                # copy to cpu
                predictions = predictions.cpu()

                if len(predictions) > 1:
                    predictions = stitch(predictions,
                                         int(args.overlap / model.stride / 2))
                else:
                    predictions = np.squeeze(predictions, axis=0)

                sequence = decode_ctc(predictions, model.alphabet)

                print(">%s" % read_id)
                print('\n'.join(wrap(sequence, 100)))

    t1 = time.perf_counter()
    sys.stderr.write("> completed reads: %s\n" % num_reads)
    sys.stderr.write("> samples per second %.1E\n" %
                     (num_chunks * args.chunksize / (t1 - t0)))
    sys.stderr.write("> done\n")
Ejemplo n.º 3
0
def test(model, device, test_loader):

    model.eval()
    test_loss = 0
    predictions = []
    prediction_lengths = []

    with torch.no_grad():
        for batch_idx, (data, out_lengths, target,
                        lengths) in enumerate(test_loader, start=1):
            data, target = data.to(device), target.to(device)
            log_probs = model(data)
            test_loss += criterion(log_probs.transpose(1, 0), target,
                                   out_lengths / model.stride, lengths)
            predictions.append(torch.exp(log_probs).cpu())
            prediction_lengths.append(out_lengths / model.stride)

    predictions = np.concatenate(predictions)
    lengths = np.concatenate(prediction_lengths)

    references = [
        decode_ref(target, model.alphabet)
        for target in test_loader.dataset.targets
    ]
    sequences = [
        decode_ctc(post[:n], model.alphabet)
        for post, n in zip(predictions, lengths)
    ]

    if all(map(len, sequences)):
        accuracies = list(starmap(accuracy, zip(references, sequences)))
    else:
        accuracies = [0]

    mean = np.mean(accuracies)
    median = np.median(accuracies)

    print()
    print('Validation Loss:              %.4f' % (test_loss / batch_idx))
    print("Validation Accuracy (mean):   %.3f%%" % max(0, mean))
    print("Validation Accuracy (median): %.3f%%" % max(0, median))
    print()
    return test_loss.item() / batch_idx, mean, median