def _calc_performance_stats(cost, batches): Z = [] Y = [] L = [] rowidx = [] for batch in batches: args = { key : val for key,val in batch.data().items() if hasattr(cost,key) } cost.eval(clear=False,**args) Zi = cost.loss.Z.fpval.asnumpy() Yi = sm.asnumpy(batch.Y) Li = cost.loss.loss.fpval.asnumpy() rowidx_i = sm.asnumpy(batch.rowidx) if hasattr(batch,"rowidx") else None # Deal with possibility that only a subset of predictions contributed to the loss, due to some test performed after prediction (e.g. max of sequence and reverse_complement of sequence) if cost.Zmask.fpval is not None: Zmask = cost.Zmask.fpval.asnumpy() Zi = np.hstack([Zi[Zmask[:,col],col].reshape((-1,1)) for col in range(Zi.shape[1])]) Yi = np.hstack([Yi[Zmask[:,col],col].reshape((-1,1)) for col in range(Yi.shape[1])]) if rowidx_i is not None: rowidx_i = rowidx_i[Zmask[:,0],0].reshape((-1,1)) Z.append(Zi) Y.append(Yi) L.append(Li) if rowidx_i is not None: rowidx.append(rowidx_i) cost.clear() Z = np.vstack(Z) Y = np.vstack(Y) L = np.vstack(L) L = np.mean(L,axis=0) rowidx = np.vstack(rowidx) if rowidx else None return { "L" : L, "Z" : Z, "Y" : Y, "I" : rowidx }
def count_errors(model,datasrc): # If model.ninst > datasrc.ntask, then that means multiple models # were trained on the original data, so we need to replicate the task. # For each task, we generate model.ninst/data.ntask copies if isinstance(datasrc,datasource): datasrc = datasrc.asbatches(128) Z = make_predictions(model,datasrc)["Z"] Y = np.vstack([sm.asnumpy(batch.Y) for batch in datasrc]) error_counts = [] for i in range(model.ninst): s = slice(i*Z.shape[1]//model.ninst,(i+1)*Z.shape[1]//model.ninst) count = np.sum(np.argmax(Z[:,s],axis=1) != np.argmax(Y[:,s],axis=1)) error_counts.append(count) error_counts = np.asarray(error_counts) return error_counts
def count_errors(model, datasrc): # If model.ninst > datasrc.ntask, then that means multiple models # were trained on the original data, so we need to replicate the task. # For each task, we generate model.ninst/data.ntask copies if isinstance(datasrc, datasource): datasrc = datasrc.asbatches(128) Z = make_predictions(model, datasrc)["Z"] Y = np.vstack([sm.asnumpy(batch.Y) for batch in datasrc]) error_counts = [] for i in range(model.ninst): s = slice(i * Z.shape[1] // model.ninst, (i + 1) * Z.shape[1] // model.ninst) count = np.sum( np.argmax(Z[:, s], axis=1) != np.argmax(Y[:, s], axis=1)) error_counts.append(count) error_counts = np.asarray(error_counts) return error_counts