def createHubFromParse(parsed): # Will need to add a way to add additional folder depth for userID once authentication is added hub = parsed['hub'] user = parsed['user'] genomesFile = parsed['genomesFile'] print("Hub: ", hub) # This will need to be updated if there are multiple genomes in file genome = genomesFile['genome'] hubInfo = {'genome': genome, 'isPublic': parsed['isPublic'], 'users': parsed['users']} dataPath = os.path.join(cfg.jbrowsePath, cfg.dataPath) includes = getGeneTracks(genome, dataPath) # Generate problems for this genome txn = db.getTxn() problems = generateProblems(genome, dataPath, txn) txn.commit() problemPath = generateProblemTrack(problems) includes.append(problemPath) getRefSeq(genome, dataPath, includes) path = storeHubInfo(user, hub, genomesFile['trackDb'], hubInfo, genome) return path
def updateAllModelLabels(data, labels): # This is the problems that the label update is in txn = db.getTxn() problems = Tracks.getProblems(data) for problem in problems: modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']) modelsums = modelSummaries.get(txn=txn, write=True) if len(modelsums.index) < 1: submitPregenJob(problem, data, txn=txn) continue newSum = modelsums.apply(modelSumLabelUpdate, axis=1, args=(labels, data, problem, txn)) modelSummaries.put(newSum, txn=txn) checkGenerateModels(newSum, problem, data, txn=txn) txn.commit()
def __init__(self, user, hub, track, problem, trackUrl=None, tasks=None): self.user = user self.hub = hub self.track = track self.problem = problem if tasks is None: self.tasks = {} else: self.tasks = tasks # When adding labels during hub upload the URL hasn't been stored yet if trackUrl is None: txn = db.getTxn() hubInfo = db.HubInfo(user, hub).get(txn=txn) try: self.trackUrl = hubInfo['tracks'][track]['url'] except TypeError: print(hubInfo) print(db.HubInfo.db_key_tuples()) print(user, track) txn.commit() raise Exception txn.commit() else: self.trackUrl = trackUrl self.status = 'New'
def resetAllJobs(data): """Resets all jobs""" for keys in db.Job.db_key_tuples(): txn = db.getTxn() jobDb = db.Job(*keys) jobToReset = jobDb.get(txn=txn) jobToReset.resetJob() jobDb.put(jobToReset, txn=txn) txn.commit()
def checkForMoreJobs(task): txn = db.getTxn() problem = task['problem'] modelSums = db.ModelSummaries(task['user'], task['hub'], task['track'], problem['chrom'], problem['chromStart']).get(txn=txn, write=True) Models.checkGenerateModels(modelSums, problem, task, txn=txn) txn.commit()
def resetJob(data): """resets a job to a new state""" jobId = data['jobId'] txn = db.getTxn() jobDb = db.Job(jobId) jobToReset = jobDb.get(txn=txn, write=True) jobToReset.resetJob() jobDb.put(jobToReset, txn=txn) txn.commit() return jobToReset.__dict__()
def restartAllJobs(data): for key in db.Job.db_key_tuples(): txn = db.getTxn() jobDb = db.Job(*key) job = jobDb.get(txn=txn, write=True) restarted = job.restartUnfinished() if restarted: jobDb.put(job, txn=txn) txn.commit()
def updateTask(data): """Updates a task given the job/task id and stuff to update it with""" jobId = data['id'] task = data['task'] txn = db.getTxn() jobDb = db.Job(jobId) jobToUpdate = jobDb.get(txn=txn, write=True) task = jobToUpdate.updateTask(task) jobDb.put(jobToUpdate, txn=txn) txn.commit() if jobToUpdate.status == 'Done': checkForMoreJobs(task) return task
def removeLabel(data): toRemove = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'] }) txn = db.getTxn() labels = db.Labels(data['user'], data['hub'], data['track'], data['ref']) removed, after = labels.remove(toRemove, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, after) txn.commit() return removed.to_dict()
def storeHubInfo(user, hub, tracks, hubInfo, genome): superList = [] trackList = [] hubInfoTracks = {} # Load the track list into something which can be converted for track in tracks: if 'superTrack' in track: superList.append(track) continue if 'parent' in track: for super in superList: if super['track'] == track['parent']: trackList.append(track) continue for parent in trackList: if parent['track'] == track['parent']: if 'children' not in parent: parent['children'] = [] parent['children'].append(track) else: parent['children'].append(track) for track in trackList: # Determine which track is the coverage data coverage = None for child in track['children']: file = child['bigDataUrl'].rsplit('/', 1) if 'coverage' in file[1]: coverage = child # Add Data Url to config if coverage is not None: categories = 'Data' for category in track['longLabel'].split(' | ')[:-1]: categories = categories + ' / %s' % category hubInfoTracks[track['track']] = {'categories': categories, 'key': track['shortLabel'], 'url': coverage['bigDataUrl']} checkForPrexistingLabels(coverage['bigDataUrl'], user, hub, track, genome) txn = db.getTxn() hubInfo['tracks'] = hubInfoTracks db.HubInfo(user, hub).put(hubInfo, txn=txn) txn.commit() return '/%s/' % os.path.join(str(user), hub)
def updateLabel(data): label = data['label'] updateLabel = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'], 'annotation': label }) txn = db.getTxn() labelDb = db.Labels(data['user'], data['hub'], data['track'], data['ref']) item, labels = labelDb.add(updateLabel, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, labels) txn.commit() return item.to_dict()
def getModelSummary(data): txn = db.getTxn() problems = Tracks.getProblems(data, txn=txn) output = {} for problem in problems: # TODO: Replace 1 with user of hub NOT current user modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get(txn=txn) if len(modelSummaries.index) < 1: continue output[problem['chromStart']] = modelSummaries.to_dict('records') txn.commit() return output
def saveLabelGroup(group, user, hub, track, genome, coverageUrl): group = group.sort_values('chromStart', ignore_index=True) group['annotation'] = group.apply(fixNoPeaks, axis=1) chrom = group['chrom'].loc[0] txn = db.getTxn() db.Labels(user, hub, track['track'], chrom).put(group, txn=txn) chromProblems = Tracks.getProblemsForChrom(genome, chrom, txn) withLabels = chromProblems.apply(checkIfProblemHasLabels, axis=1, args=(group,)) doPregen = chromProblems[withLabels] submitPregenWithData(doPregen, user, hub, track, coverageUrl) txn.commit()
def putModel(data): modelData = pd.read_json(data['modelData']) modelData.columns = modelColumns modelInfo = data['modelInfo'] problem = modelInfo['problem'] penalty = data['penalty'] user = modelInfo['user'] hub = modelInfo['hub'] track = modelInfo['track'] txn = db.getTxn() db.Model(user, hub, track, problem['chrom'], problem['chromStart'], penalty).put(modelData, txn=txn) labels = db.Labels(user, hub, track, problem['chrom']).get() errorSum = calculateModelLabelError(modelData, labels, problem, penalty) db.ModelSummaries(user, hub, track, problem['chrom'], problem['chromStart']).add(errorSum, txn=txn) txn.commit() return modelInfo
def getDataPoints(): if not check(): return dataPoints = pd.DataFrame() for key in db.ModelSummaries.db_key_tuples(): modelSum = db.ModelSummaries(*key).get() if modelSum.empty: continue if modelSum['regions'].max() < 1: continue withPeaks = modelSum[modelSum['numPeaks'] > 0] noError = withPeaks[withPeaks['errors'] < 1] logPenalties = np.log10(noError['penalty'].astype(float)) featuresDb = db.Features(*key) featuresTxn = db.getTxn() features = featuresDb.get(txn=featuresTxn, write=True) for penalty in logPenalties: datapoint = features.copy() datapoint['logPenalty'] = penalty dataPoints = dataPoints.append(datapoint, ignore_index=True) featuresTxn.commit() # TODO: Save datapoints, update ones which have changed, not all of them every time Y = dataPoints['logPenalty'] X = dataPoints.drop('logPenalty', 1) return dropBadCols(X), Y
def addLabel(data): label = 'unknown' for i in range(100): print("#################################################") print("THIS IS THE DATA", data) for i in range(10): print("#################################################") # Duplicated because calls from updateLabel are causing freezing newLabel = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'], 'annotation': label }) txn = db.getTxn() item, labels = db.Labels(data['user'], data['hub'], data['track'], data['ref']).add(newLabel, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, labels) txn.commit() return data
def getJob(data): """Gets job by ID""" txn = db.getTxn() output = db.Job(data['id']).get().__dict__() txn.commit() return output
def getTrackInfo(data): txn = db.getTxn() hubInfo = db.HubInfo(data['user'], data['hub']).get(txn=txn) txn.commit() return hubInfo['tracks'][data['track']]
def putNewJob(self, checkExists=True): txn = db.getTxn() value = self.putNewJobWithTxn(txn, checkExists=checkExists) txn.commit() return value