def main(args): poas = [] init(args.seed, args.device) print("* loading data") testdata = ChunkDataSet( *load_data(limit=args.chunks, shuffle=args.shuffle)) dataloader = DataLoader(testdata, batch_size=args.batchsize) for w in [int(i) for i in args.weights.split(',')]: print("* loading model", w) model = load_model(args.model_directory, args.device, weights=w) print("* calling") predictions = [] t0 = time.perf_counter() for data, *_ in dataloader: with torch.no_grad(): log_probs = model(data.to(args.device)) predictions.append(log_probs.exp().cpu().numpy()) duration = time.perf_counter() - t0 references = [ decode_ref(target, model.alphabet) for target in dataloader.dataset.targets ] sequences = [ decode_ctc(post, model.alphabet) for post in np.concatenate(predictions) ] accuracies = list(starmap(accuracy, zip(references, sequences))) if args.poa: poas.append(sequences) print("* mean %.2f%%" % np.mean(accuracies)) print("* median %.2f%%" % np.median(accuracies)) print("* time %.2f" % duration) print("* samples/s %.2E" % (args.chunks * data.shape[2] / duration)) if args.poa: print("* doing poa") t0 = time.perf_counter() # group each sequence prediction per model together poas = [list(seq) for seq in zip(*poas)] consensuses = poa(poas) duration = time.perf_counter() - t0 accuracies = list(starmap(accuracy, zip(references, consensuses))) print("* mean %.2f%%" % np.mean(accuracies)) print("* median %.2f%%" % np.median(accuracies)) print("* time %.2f" % duration)
def main(args): poas = [] init(args.seed, args.device) print("* loading data") testdata = ChunkDataSet( *load_data( limit=args.chunks, shuffle=args.shuffle, directory=args.directory, validation=True ) ) dataloader = DataLoader(testdata, batch_size=args.batchsize) accuracy_with_cov = lambda ref, seq: accuracy(ref, seq, min_coverage=args.min_coverage) for w in [int(i) for i in args.weights.split(',')]: seqs = [] print("* loading model", w) model = load_model(args.model_directory, args.device, weights=w) print("* calling") t0 = time.perf_counter() with torch.no_grad(): for data, *_ in dataloader: if half_supported(): data = data.type(torch.float16).to(args.device) else: data = data.to(args.device) log_probs = model(data) if hasattr(model, 'decode_batch'): seqs.extend(model.decode_batch(log_probs)) else: seqs.extend([model.decode(p) for p in permute(log_probs, 'TNC', 'NTC')]) duration = time.perf_counter() - t0 refs = [decode_ref(target, model.alphabet) for target in dataloader.dataset.targets] accuracies = [accuracy_with_cov(ref, seq) if len(seq) else 0. for ref, seq in zip(refs, seqs)] if args.poa: poas.append(sequences) print("* mean %.2f%%" % np.mean(accuracies)) print("* median %.2f%%" % np.median(accuracies)) print("* time %.2f" % duration) print("* samples/s %.2E" % (args.chunks * data.shape[2] / duration)) if args.poa: print("* doing poa") t0 = time.perf_counter() # group each sequence prediction per model together poas = [list(seq) for seq in zip(*poas)] consensuses = poa(poas) duration = time.perf_counter() - t0 accuracies = list(starmap(accuracy_with_coverage_filter, zip(references, consensuses))) print("* mean %.2f%%" % np.mean(accuracies)) print("* median %.2f%%" % np.median(accuracies)) print("* time %.2f" % duration)