def updateAllModelLabels(data, labels): # This is the problems that the label update is in txn = db.getTxn() problems = Tracks.getProblems(data) for problem in problems: modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']) modelsums = modelSummaries.get(txn=txn, write=True) if len(modelsums.index) < 1: submitPregenJob(problem, data, txn=txn) continue newSum = modelsums.apply(modelSumLabelUpdate, axis=1, args=(labels, data, problem, txn)) modelSummaries.put(newSum, txn=txn) checkGenerateModels(newSum, problem, data, txn=txn) txn.commit()
def checkForMoreJobs(task): txn = db.getTxn() problem = task['problem'] modelSums = db.ModelSummaries(task['user'], task['hub'], task['track'], problem['chrom'], problem['chromStart']).get(txn=txn, write=True) Models.checkGenerateModels(modelSums, problem, task, txn=txn) txn.commit()
def checkLabelRegions(): labeledRegions = 0 for key in db.ModelSummaries.db_key_tuples(): modelSum = db.ModelSummaries(*key).get() if not modelSum.empty: if modelSum['regions'].max() > 0: labeledRegions = labeledRegions + 1 return labeledRegions > cfg.minLabeledRegions
def getModels(data): problems = Tracks.getProblems(data) output = [] for problem in problems: modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get() if len(modelSummaries.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue nonZeroRegions = modelSummaries[modelSummaries['regions'] > 0] if len(nonZeroRegions.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue withPeaks = nonZeroRegions[nonZeroRegions['numPeaks'] > 0] if len(withPeaks.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue noError = withPeaks[withPeaks['errors'] < 1] if len(noError.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue elif len(noError.index) > 1: # Select which model to display from modelSums with 0 error noError = whichModelToDisplay(data, problem, noError) penalty = noError['penalty'].iloc[0] minErrorModel = db.Model(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart'], penalty) model = minErrorModel.getInBounds(data['ref'], data['start'], data['end']) onlyPeaks = model[model['annotation'] == 'peak'] # Organize the columns onlyPeaks = onlyPeaks[modelColumns] onlyPeaks.columns = jbrowseModelColumns output.extend(onlyPeaks.to_dict('records')) return output
def numCorrectModels(): correct = 0 for key in db.ModelSummaries.db_key_tuples(): modelSum = db.ModelSummaries(*key).get() if modelSum.empty: continue zeroErrors = modelSum[modelSum['errors'] < 0] correct = correct + len(zeroErrors.index) return correct
def getModelSummary(data): txn = db.getTxn() problems = Tracks.getProblems(data, txn=txn) output = {} for problem in problems: # TODO: Replace 1 with user of hub NOT current user modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get(txn=txn) if len(modelSummaries.index) < 1: continue output[problem['chromStart']] = modelSummaries.to_dict('records') txn.commit() return output
def putModel(data): modelData = pd.read_json(data['modelData']) modelData.columns = modelColumns modelInfo = data['modelInfo'] problem = modelInfo['problem'] penalty = data['penalty'] user = modelInfo['user'] hub = modelInfo['hub'] track = modelInfo['track'] txn = db.getTxn() db.Model(user, hub, track, problem['chrom'], problem['chromStart'], penalty).put(modelData, txn=txn) labels = db.Labels(user, hub, track, problem['chrom']).get() errorSum = calculateModelLabelError(modelData, labels, problem, penalty) db.ModelSummaries(user, hub, track, problem['chrom'], problem['chromStart']).add(errorSum, txn=txn) txn.commit() return modelInfo
def getDataPoints(): if not check(): return dataPoints = pd.DataFrame() for key in db.ModelSummaries.db_key_tuples(): modelSum = db.ModelSummaries(*key).get() if modelSum.empty: continue if modelSum['regions'].max() < 1: continue withPeaks = modelSum[modelSum['numPeaks'] > 0] noError = withPeaks[withPeaks['errors'] < 1] logPenalties = np.log10(noError['penalty'].astype(float)) featuresDb = db.Features(*key) featuresTxn = db.getTxn() features = featuresDb.get(txn=featuresTxn, write=True) for penalty in logPenalties: datapoint = features.copy() datapoint['logPenalty'] = penalty dataPoints = dataPoints.append(datapoint, ignore_index=True) featuresTxn.commit() # TODO: Save datapoints, update ones which have changed, not all of them every time Y = dataPoints['logPenalty'] X = dataPoints.drop('logPenalty', 1) return dropBadCols(X), Y