def main(args): poas = [] init(args.seed, args.device) print("* loading data") testdata = ChunkDataSet( *load_data(limit=args.chunks, shuffle=args.shuffle)) dataloader = DataLoader(testdata, batch_size=args.batchsize) for w in [int(i) for i in args.weights.split(',')]: print("* loading model", w) model = load_model(args.model_directory, args.device, weights=w) print("* calling") predictions = [] t0 = time.perf_counter() for data, *_ in dataloader: with torch.no_grad(): log_probs = model(data.to(args.device)) predictions.append(log_probs.exp().cpu().numpy()) duration = time.perf_counter() - t0 references = [ decode_ref(target, model.alphabet) for target in dataloader.dataset.targets ] sequences = [ decode_ctc(post, model.alphabet) for post in np.concatenate(predictions) ] accuracies = list(starmap(accuracy, zip(references, sequences))) if args.poa: poas.append(sequences) print("* mean %.2f%%" % np.mean(accuracies)) print("* median %.2f%%" % np.median(accuracies)) print("* time %.2f" % duration) print("* samples/s %.2E" % (args.chunks * data.shape[2] / duration)) if args.poa: print("* doing poa") t0 = time.perf_counter() # group each sequence prediction per model together poas = [list(seq) for seq in zip(*poas)] consensuses = poa(poas) duration = time.perf_counter() - t0 accuracies = list(starmap(accuracy, zip(references, consensuses))) print("* mean %.2f%%" % np.mean(accuracies)) print("* median %.2f%%" % np.median(accuracies)) print("* time %.2f" % duration)
def main(args): sys.stderr.write("> loading model\n") model = load_model(args.model_directory, args.device, weights=int(args.weights)) num_reads = 0 num_chunks = 0 t0 = time.perf_counter() sys.stderr.write("> calling\n") for fast5 in tqdm(glob("%s/*fast5" % args.reads_directory), ascii=True): for read_id, raw_data in get_raw_data(fast5): if len(raw_data) <= args.chunksize: chunks = np.expand_dims(raw_data, axis=0) else: chunks = window(raw_data, args.chunksize, stepsize=args.chunksize - args.overlap) chunks = np.expand_dims(chunks, axis=1) num_reads += 1 num_chunks += chunks.shape[0] with torch.no_grad(): # copy to gpu tchunks = torch.tensor(chunks).to(args.device) # run model predictions = torch.exp(model(tchunks)) # copy to cpu predictions = predictions.cpu() if len(predictions) > 1: predictions = stitch(predictions, int(args.overlap / model.stride / 2)) else: predictions = np.squeeze(predictions, axis=0) sequence = decode_ctc(predictions, model.alphabet) print(">%s" % read_id) print('\n'.join(wrap(sequence, 100))) t1 = time.perf_counter() sys.stderr.write("> completed reads: %s\n" % num_reads) sys.stderr.write("> samples per second %.1E\n" % (num_chunks * args.chunksize / (t1 - t0))) sys.stderr.write("> done\n")
def test(model, device, test_loader): model.eval() test_loss = 0 predictions = [] prediction_lengths = [] with torch.no_grad(): for batch_idx, (data, out_lengths, target, lengths) in enumerate(test_loader, start=1): data, target = data.to(device), target.to(device) log_probs = model(data) test_loss += criterion(log_probs.transpose(1, 0), target, out_lengths / model.stride, lengths) predictions.append(torch.exp(log_probs).cpu()) prediction_lengths.append(out_lengths / model.stride) predictions = np.concatenate(predictions) lengths = np.concatenate(prediction_lengths) references = [ decode_ref(target, model.alphabet) for target in test_loader.dataset.targets ] sequences = [ decode_ctc(post[:n], model.alphabet) for post, n in zip(predictions, lengths) ] if all(map(len, sequences)): accuracies = list(starmap(accuracy, zip(references, sequences))) else: accuracies = [0] mean = np.mean(accuracies) median = np.median(accuracies) print() print('Validation Loss: %.4f' % (test_loss / batch_idx)) print("Validation Accuracy (mean): %.3f%%" % max(0, mean)) print("Validation Accuracy (median): %.3f%%" % max(0, median)) print() return test_loss.item() / batch_idx, mean, median