Ejemplo n.º 1
0
def _calc_performance_stats(cost, batches):
    Z = []
    Y = []
    L = []
    rowidx = []
    for batch in batches:
        args = { key : val for key,val in batch.data().items() if hasattr(cost,key) }
        cost.eval(clear=False,**args)
        Zi = cost.loss.Z.fpval.asnumpy()
        Yi = sm.asnumpy(batch.Y)
        Li = cost.loss.loss.fpval.asnumpy()
        rowidx_i = sm.asnumpy(batch.rowidx) if hasattr(batch,"rowidx") else None
        
        # Deal with possibility that only a subset of predictions contributed to the loss, due to some test performed after prediction (e.g. max of sequence and reverse_complement of sequence)
        if cost.Zmask.fpval is not None:
            Zmask = cost.Zmask.fpval.asnumpy()
            Zi = np.hstack([Zi[Zmask[:,col],col].reshape((-1,1)) for col in range(Zi.shape[1])])
            Yi = np.hstack([Yi[Zmask[:,col],col].reshape((-1,1)) for col in range(Yi.shape[1])])
            if rowidx_i is not None:
                rowidx_i = rowidx_i[Zmask[:,0],0].reshape((-1,1))

        Z.append(Zi)
        Y.append(Yi)
        L.append(Li)
        if rowidx_i is not None:
            rowidx.append(rowidx_i)
        cost.clear()

    Z = np.vstack(Z)
    Y = np.vstack(Y)
    L = np.vstack(L)
    L = np.mean(L,axis=0)
    rowidx = np.vstack(rowidx) if rowidx else None
    return { "L" : L, "Z" : Z, "Y" : Y, "I" : rowidx }
Ejemplo n.º 2
0
def count_errors(model,datasrc):
    # If model.ninst > datasrc.ntask, then that means multiple models 
    # were trained on the original data, so we need to replicate the task.
    # For each task, we generate model.ninst/data.ntask copies 
    if isinstance(datasrc,datasource):
        datasrc = datasrc.asbatches(128)
    Z = make_predictions(model,datasrc)["Z"]
    Y = np.vstack([sm.asnumpy(batch.Y) for batch in datasrc])

    error_counts = []
    for i in range(model.ninst):
        s = slice(i*Z.shape[1]//model.ninst,(i+1)*Z.shape[1]//model.ninst)
        count = np.sum(np.argmax(Z[:,s],axis=1) != np.argmax(Y[:,s],axis=1))
        error_counts.append(count)
    error_counts = np.asarray(error_counts)
    return error_counts
Ejemplo n.º 3
0
def count_errors(model, datasrc):
    # If model.ninst > datasrc.ntask, then that means multiple models
    # were trained on the original data, so we need to replicate the task.
    # For each task, we generate model.ninst/data.ntask copies
    if isinstance(datasrc, datasource):
        datasrc = datasrc.asbatches(128)
    Z = make_predictions(model, datasrc)["Z"]
    Y = np.vstack([sm.asnumpy(batch.Y) for batch in datasrc])

    error_counts = []
    for i in range(model.ninst):
        s = slice(i * Z.shape[1] // model.ninst,
                  (i + 1) * Z.shape[1] // model.ninst)
        count = np.sum(
            np.argmax(Z[:, s], axis=1) != np.argmax(Y[:, s], axis=1))
        error_counts.append(count)
    error_counts = np.asarray(error_counts)
    return error_counts