def __init__(self, user, hub, track, problem, trackUrl=None, tasks=None): self.user = user self.hub = hub self.track = track self.problem = problem if tasks is None: self.tasks = {} else: self.tasks = tasks # When adding labels during hub upload the URL hasn't been stored yet if trackUrl is None: txn = db.getTxn() hubInfo = db.HubInfo(user, hub).get(txn=txn) try: self.trackUrl = hubInfo['tracks'][track]['url'] except TypeError: print(hubInfo) print(db.HubInfo.db_key_tuples()) print(user, track) txn.commit() raise Exception txn.commit() else: self.trackUrl = trackUrl self.status = 'New'
def backup(request): user = request.authenticated_userid return { 'last_backup': db.getLastBackup(), 'backups': db.getAvailableBackups(), 'user': user }
def updateAllModelLabels(data, labels): # This is the problems that the label update is in txn = db.getTxn() problems = Tracks.getProblems(data) for problem in problems: modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']) modelsums = modelSummaries.get(txn=txn, write=True) if len(modelsums.index) < 1: submitPregenJob(problem, data, txn=txn) continue newSum = modelsums.apply(modelSumLabelUpdate, axis=1, args=(labels, data, problem, txn)) modelSummaries.put(newSum, txn=txn) checkGenerateModels(newSum, problem, data, txn=txn) txn.commit()
def checkForMoreJobs(task): txn = db.getTxn() problem = task['problem'] modelSums = db.ModelSummaries(task['user'], task['hub'], task['track'], problem['chrom'], problem['chromStart']).get(txn=txn, write=True) Models.checkGenerateModels(modelSums, problem, task, txn=txn) txn.commit()
def resetAllJobs(data): """Resets all jobs""" for keys in db.Job.db_key_tuples(): txn = db.getTxn() jobDb = db.Job(*keys) jobToReset = jobDb.get(txn=txn) jobToReset.resetJob() jobDb.put(jobToReset, txn=txn) txn.commit()
def resetJob(data): """resets a job to a new state""" jobId = data['jobId'] txn = db.getTxn() jobDb = db.Job(jobId) jobToReset = jobDb.get(txn=txn, write=True) jobToReset.resetJob() jobDb.put(jobToReset, txn=txn) txn.commit() return jobToReset.__dict__()
def getModels(data): problems = Tracks.getProblems(data) output = [] for problem in problems: modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get() if len(modelSummaries.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue nonZeroRegions = modelSummaries[modelSummaries['regions'] > 0] if len(nonZeroRegions.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue withPeaks = nonZeroRegions[nonZeroRegions['numPeaks'] > 0] if len(withPeaks.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue noError = withPeaks[withPeaks['errors'] < 1] if len(noError.index) < 1: lopartOutput = generateLOPARTModel(data, problem) output.extend(lopartOutput) continue elif len(noError.index) > 1: # Select which model to display from modelSums with 0 error noError = whichModelToDisplay(data, problem, noError) penalty = noError['penalty'].iloc[0] minErrorModel = db.Model(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart'], penalty) model = minErrorModel.getInBounds(data['ref'], data['start'], data['end']) onlyPeaks = model[model['annotation'] == 'peak'] # Organize the columns onlyPeaks = onlyPeaks[modelColumns] onlyPeaks.columns = jbrowseModelColumns output.extend(onlyPeaks.to_dict('records')) return output
def restartAllJobs(data): for key in db.Job.db_key_tuples(): txn = db.getTxn() jobDb = db.Job(*key) job = jobDb.get(txn=txn, write=True) restarted = job.restartUnfinished() if restarted: jobDb.put(job, txn=txn) txn.commit()
def updateTask(data): """Updates a task given the job/task id and stuff to update it with""" jobId = data['id'] task = data['task'] txn = db.getTxn() jobDb = db.Job(jobId) jobToUpdate = jobDb.get(txn=txn, write=True) task = jobToUpdate.updateTask(task) jobDb.put(jobToUpdate, txn=txn) txn.commit() if jobToUpdate.status == 'Done': checkForMoreJobs(task) return task
def check(): changes = db.Prediction('changes').get() if changes > cfg.numChanges: if not db.Prediction.has_key('EnoughLabels'): db.Prediction('EnoughLabels').put(False) if db.Prediction('EnoughLabels').get(): return True else: if checkLabelRegions(): db.Prediction('EnoughLabels').put(True) return True return False return False
def removeTrack(request): query = request.matchdict hubName = query['hub'] owner = query['user'] trackName = request.params['trackName'] hubInfo = db.HubInfo(owner, hubName).get() del hubInfo['tracks'][trackName] db.HubInfo(owner, hubName).put(hubInfo) url = request.route_url('myHubs') return HTTPFound(location=url)
def storeHubInfo(user, hub, tracks, hubInfo, genome): superList = [] trackList = [] hubInfoTracks = {} # Load the track list into something which can be converted for track in tracks: if 'superTrack' in track: superList.append(track) continue if 'parent' in track: for super in superList: if super['track'] == track['parent']: trackList.append(track) continue for parent in trackList: if parent['track'] == track['parent']: if 'children' not in parent: parent['children'] = [] parent['children'].append(track) else: parent['children'].append(track) for track in trackList: # Determine which track is the coverage data coverage = None for child in track['children']: file = child['bigDataUrl'].rsplit('/', 1) if 'coverage' in file[1]: coverage = child # Add Data Url to config if coverage is not None: categories = 'Data' for category in track['longLabel'].split(' | ')[:-1]: categories = categories + ' / %s' % category hubInfoTracks[track['track']] = {'categories': categories, 'key': track['shortLabel'], 'url': coverage['bigDataUrl']} checkForPrexistingLabels(coverage['bigDataUrl'], user, hub, track, genome) txn = db.getTxn() hubInfo['tracks'] = hubInfoTracks db.HubInfo(user, hub).put(hubInfo, txn=txn) txn.commit() return '/%s/' % os.path.join(str(user), hub)
def addUserToHub(hubName, owner, newUser): keys = db.HubInfo.keysWhichMatch(db.HubInfo, owner) hubInfo = db.HubInfo(owner, hubName).get() if 'users' in hubInfo.keys(): hubInfo['users'].append(newUser) else: hubInfo['users'].append(newUser) hubInfo['users'] = list(set(hubInfo['users'])) db.HubInfo(owner, hubName).put(hubInfo) # create permissions database object for a user db.Permissions(owner, hubName, newUser).put(["", "", "", "", ""])
def removeLabel(data): toRemove = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'] }) txn = db.getTxn() labels = db.Labels(data['user'], data['hub'], data['track'], data['ref']) removed, after = labels.remove(toRemove, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, after) txn.commit() return removed.to_dict()
def putNewJobWithTxn(self, txn, checkExists=True): """puts Job into job list if the job doesn't exist""" if checkExists: if self.checkIfExists(): return self.id = str(db.JobInfo('Id').incrementId(txn=txn)) self.iteration = db.Iteration( self.user, self.hub, self.track, self.problem['chrom'], self.problem['chromStart']).increment(txn=txn) self.putWithDb(db.Job(self.id), txn=txn) return self.id
def modelSumLabelUpdate(modelSum, labels, data, problem, txn): model = db.Model(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart'], modelSum['penalty']).get(txn=txn) return calculateModelLabelError(model, labels, problem, modelSum['penalty'])
def updateLabel(data): label = data['label'] updateLabel = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'], 'annotation': label }) txn = db.getTxn() labelDb = db.Labels(data['user'], data['hub'], data['track'], data['ref']) item, labels = labelDb.add(updateLabel, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, labels) txn.commit() return item.to_dict()
def createHubFromParse(parsed): # Will need to add a way to add additional folder depth for userID once authentication is added hub = parsed['hub'] user = parsed['user'] genomesFile = parsed['genomesFile'] print("Hub: ", hub) # This will need to be updated if there are multiple genomes in file genome = genomesFile['genome'] hubInfo = {'genome': genome, 'isPublic': parsed['isPublic'], 'users': parsed['users']} dataPath = os.path.join(cfg.jbrowsePath, cfg.dataPath) includes = getGeneTracks(genome, dataPath) # Generate problems for this genome txn = db.getTxn() problems = generateProblems(genome, dataPath, txn) txn.commit() problemPath = generateProblemTrack(problems) includes.append(problemPath) getRefSeq(genome, dataPath, includes) path = storeHubInfo(user, hub, genomesFile['trackDb'], hubInfo, genome) return path
def getProblems(data, txn=None): if 'genome' not in data: data['genome'] = getGenome(data, txn=txn) problems = db.Problems(data['genome']) problemsInBounds = problems.getInBounds(data['ref'], data['start'], data['end']) if problemsInBounds is None: problemsPath = os.path.join(cfg.jbrowsePath, cfg.dataPath, 'genomes', data['genome'], 'problems.bed') if not os.path.exists(problemsPath): location = Hubs.generateProblems(data['genome'], problemsPath) if not location == problemsPath: raise Exception problemsDf = pd.read_csv(problemsPath, sep='\t', header=None) problemsDf.columns = problemColumns problems.put(problemsDf, txn=txn) problemsIsInBounds = problemsDf.apply(db.checkInBounds, axis=1, args=(data['ref'], data['start'], data['end'])) return problemsDf[problemsIsInBounds].to_dict('records') else: return problemsInBounds.to_dict('records')
def isPublic(request): query = request.matchdict hubName = query['hub'] user = query['user'] chkpublic = "chkpublic" in request.params.keys() hub = db.HubInfo(user, hubName).get() hub['isPublic'] = chkpublic db.HubInfo(user, hubName).put(hub) if chkpublic: Hubs.addUserToHub(hubName, user, 'Public') elif 'Public' in hub['users']: Hubs.removeUserFromHub(hubName, user, 'Public') url = request.route_url('myHubs') return HTTPFound(location=url)
def putFeatures(data): """ Saves features to be later used for prediction and learning""" if not isinstance(data['data'], list) and not len(data['data']) == 1: raise Exception(data['data']) problem = data['problem'] features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']) features.put(pd.Series(data['data'][0]))
def addTrack(request): query = request.matchdict hubName = query['hub'] owner = query['user'] category = request.params['category'] trackName = request.params['trackName'] url = request.params['url'] hubInfo = db.HubInfo(owner, hubName).get() hubInfo['tracks'][trackName] = { 'categories': category, 'key': trackName, 'url': url } db.HubInfo(owner, hubName).put(hubInfo) url = request.route_url('myHubs') return HTTPFound(location=url)
def checkLabelRegions(): labeledRegions = 0 for key in db.ModelSummaries.db_key_tuples(): modelSum = db.ModelSummaries(*key).get() if not modelSum.empty: if modelSum['regions'].max() > 0: labeledRegions = labeledRegions + 1 return labeledRegions > cfg.minLabeledRegions
def deleteHub(request): userid = request.unauthenticated_userid hubName = request.params['hubName'] hub_info = None db.HubInfo(userid, hubName).put(hub_info) url = request.route_url('myHubs') return HTTPFound(location=url)
def do_GET(self, data): args = data['args'] if args['file'] == 'trackList.json': hubInfo = db.HubInfo(self.query['user'], self.query['hub']).get() return createTrackListWithHubInfo(hubInfo) else: print('no handler for %s' % self.query['handler'])
def getModelSummary(data): txn = db.getTxn() problems = Tracks.getProblems(data, txn=txn) output = {} for problem in problems: # TODO: Replace 1 with user of hub NOT current user modelSummaries = db.ModelSummaries(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get(txn=txn) if len(modelSummaries.index) < 1: continue output[problem['chromStart']] = modelSummaries.to_dict('records') txn.commit() return output
def getLabels(data): labels = db.Labels(data['user'], data['hub'], data['track'], data['ref']) labelsDf = labels.getInBounds(data['ref'], data['start'], data['end']) if len(labelsDf.index) < 1: return [] labelsDf = labelsDf[labelColumns] labelsDf.columns = jbrowseLabelColumns return labelsDf.to_dict('records')
def dropBadCols(df): pd.set_option("display.max_rows", None, "display.max_columns", None) noNegatives = df.replace(-np.Inf, np.nan) output = noNegatives.dropna(axis=1) # Take a note of what columns were dropped so that can be later used during prediction # This line just compares the two column indices and finds the differences badCols = list(set(df.columns) - set(output.columns)) db.Prediction('badCols').put(badCols) return output
def publicHubs(request): userid = request.authenticated_userid everyKey = db.HubInfo.keysWhichMatch(db.HubInfo) hubNames = list(map(lambda tuple: tuple[1], everyKey)) hubInfos = {} for key in everyKey: currentHub = db.HubInfo(key[0], key[1]).get() if 'isPublic' in currentHub.keys() and currentHub['isPublic']: currentHub['labels'] = 0 for labelKey in db.Labels.keysWhichMatch(key[0], key[1]): currentHub['labels'] += db.Labels(*labelKey).get().shape[0] currentHub['owner'] = key[0] hubInfos['{hubName}'.format(hubName=key[1])] = currentHub return {"user": userid, "HubNames": hubNames, "hubInfos": hubInfos}
def getAllJobs(data): jobs = [] for key in db.Job.db_key_tuples(): value = db.Job(*key).get() if value is None: continue jobs.append(value.__dict__()) return jobs