예제 #1
0
def updateAllModelLabels(data, labels):
    # This is the problems that the label update is in
    txn = db.getTxn()
    problems = Tracks.getProblems(data)

    for problem in problems:
        modelSummaries = db.ModelSummaries(data['user'], data['hub'],
                                           data['track'], problem['chrom'],
                                           problem['chromStart'])

        modelsums = modelSummaries.get(txn=txn, write=True)

        if len(modelsums.index) < 1:
            submitPregenJob(problem, data, txn=txn)
            continue

        newSum = modelsums.apply(modelSumLabelUpdate,
                                 axis=1,
                                 args=(labels, data, problem, txn))

        modelSummaries.put(newSum, txn=txn)

        checkGenerateModels(newSum, problem, data, txn=txn)

    txn.commit()
예제 #2
0
def checkForMoreJobs(task):
    txn = db.getTxn()
    problem = task['problem']
    modelSums = db.ModelSummaries(task['user'], task['hub'], task['track'],
                                  problem['chrom'],
                                  problem['chromStart']).get(txn=txn,
                                                             write=True)
    Models.checkGenerateModels(modelSums, problem, task, txn=txn)
    txn.commit()
예제 #3
0
def checkLabelRegions():
    labeledRegions = 0
    for key in db.ModelSummaries.db_key_tuples():
        modelSum = db.ModelSummaries(*key).get()
        if not modelSum.empty:
            if modelSum['regions'].max() > 0:
                labeledRegions = labeledRegions + 1

    return labeledRegions > cfg.minLabeledRegions
예제 #4
0
def getModels(data):
    problems = Tracks.getProblems(data)

    output = []

    for problem in problems:
        modelSummaries = db.ModelSummaries(data['user'], data['hub'],
                                           data['track'], problem['chrom'],
                                           problem['chromStart']).get()

        if len(modelSummaries.index) < 1:
            lopartOutput = generateLOPARTModel(data, problem)
            output.extend(lopartOutput)
            continue

        nonZeroRegions = modelSummaries[modelSummaries['regions'] > 0]

        if len(nonZeroRegions.index) < 1:
            lopartOutput = generateLOPARTModel(data, problem)
            output.extend(lopartOutput)
            continue

        withPeaks = nonZeroRegions[nonZeroRegions['numPeaks'] > 0]

        if len(withPeaks.index) < 1:
            lopartOutput = generateLOPARTModel(data, problem)
            output.extend(lopartOutput)
            continue

        noError = withPeaks[withPeaks['errors'] < 1]

        if len(noError.index) < 1:
            lopartOutput = generateLOPARTModel(data, problem)
            output.extend(lopartOutput)
            continue

        elif len(noError.index) > 1:
            # Select which model to display from modelSums with 0 error
            noError = whichModelToDisplay(data, problem, noError)

        penalty = noError['penalty'].iloc[0]

        minErrorModel = db.Model(data['user'], data['hub'], data['track'],
                                 problem['chrom'], problem['chromStart'],
                                 penalty)
        model = minErrorModel.getInBounds(data['ref'], data['start'],
                                          data['end'])
        onlyPeaks = model[model['annotation'] == 'peak']
        # Organize the columns
        onlyPeaks = onlyPeaks[modelColumns]
        onlyPeaks.columns = jbrowseModelColumns
        output.extend(onlyPeaks.to_dict('records'))
    return output
예제 #5
0
def numCorrectModels():
    correct = 0

    for key in db.ModelSummaries.db_key_tuples():
        modelSum = db.ModelSummaries(*key).get()

        if modelSum.empty:
            continue

        zeroErrors = modelSum[modelSum['errors'] < 0]

        correct = correct + len(zeroErrors.index)

    return correct
예제 #6
0
def getModelSummary(data):
    txn = db.getTxn()
    problems = Tracks.getProblems(data, txn=txn)

    output = {}

    for problem in problems:
        # TODO: Replace 1 with user of hub NOT current user
        modelSummaries = db.ModelSummaries(data['user'], data['hub'],
                                           data['track'], problem['chrom'],
                                           problem['chromStart']).get(txn=txn)

        if len(modelSummaries.index) < 1:
            continue

        output[problem['chromStart']] = modelSummaries.to_dict('records')
    txn.commit()
    return output
예제 #7
0
def putModel(data):
    modelData = pd.read_json(data['modelData'])
    modelData.columns = modelColumns
    modelInfo = data['modelInfo']
    problem = modelInfo['problem']
    penalty = data['penalty']
    user = modelInfo['user']
    hub = modelInfo['hub']
    track = modelInfo['track']

    txn = db.getTxn()
    db.Model(user, hub, track, problem['chrom'], problem['chromStart'],
             penalty).put(modelData, txn=txn)
    labels = db.Labels(user, hub, track, problem['chrom']).get()
    errorSum = calculateModelLabelError(modelData, labels, problem, penalty)
    db.ModelSummaries(user, hub, track, problem['chrom'],
                      problem['chromStart']).add(errorSum, txn=txn)
    txn.commit()

    return modelInfo
예제 #8
0
def getDataPoints():
    if not check():
        return

    dataPoints = pd.DataFrame()

    for key in db.ModelSummaries.db_key_tuples():
        modelSum = db.ModelSummaries(*key).get()
        if modelSum.empty:
            continue

        if modelSum['regions'].max() < 1:
            continue

        withPeaks = modelSum[modelSum['numPeaks'] > 0]

        noError = withPeaks[withPeaks['errors'] < 1]

        logPenalties = np.log10(noError['penalty'].astype(float))

        featuresDb = db.Features(*key)
        featuresTxn = db.getTxn()
        features = featuresDb.get(txn=featuresTxn, write=True)

        for penalty in logPenalties:
            datapoint = features.copy()

            datapoint['logPenalty'] = penalty

            dataPoints = dataPoints.append(datapoint, ignore_index=True)

        featuresTxn.commit()

    # TODO: Save datapoints, update ones which have changed, not all of them every time

    Y = dataPoints['logPenalty']
    X = dataPoints.drop('logPenalty', 1)

    return dropBadCols(X), Y