Beispiel #1
0
def createHubFromParse(parsed):
    # Will need to add a way to add additional folder depth for userID once authentication is added
    hub = parsed['hub']
    user = parsed['user']
    genomesFile = parsed['genomesFile']

    print("Hub: ", hub)

    # This will need to be updated if there are multiple genomes in file
    genome = genomesFile['genome']

    hubInfo = {'genome': genome,
               'isPublic': parsed['isPublic'],
               'users': parsed['users']}

    dataPath = os.path.join(cfg.jbrowsePath, cfg.dataPath)

    includes = getGeneTracks(genome, dataPath)

    # Generate problems for this genome

    txn = db.getTxn()
    problems = generateProblems(genome, dataPath, txn)
    txn.commit()

    problemPath = generateProblemTrack(problems)

    includes.append(problemPath)

    getRefSeq(genome, dataPath, includes)

    path = storeHubInfo(user, hub, genomesFile['trackDb'], hubInfo, genome)

    return path
Beispiel #2
0
def updateAllModelLabels(data, labels):
    # This is the problems that the label update is in
    txn = db.getTxn()
    problems = Tracks.getProblems(data)

    for problem in problems:
        modelSummaries = db.ModelSummaries(data['user'], data['hub'],
                                           data['track'], problem['chrom'],
                                           problem['chromStart'])

        modelsums = modelSummaries.get(txn=txn, write=True)

        if len(modelsums.index) < 1:
            submitPregenJob(problem, data, txn=txn)
            continue

        newSum = modelsums.apply(modelSumLabelUpdate,
                                 axis=1,
                                 args=(labels, data, problem, txn))

        modelSummaries.put(newSum, txn=txn)

        checkGenerateModels(newSum, problem, data, txn=txn)

    txn.commit()
Beispiel #3
0
    def __init__(self, user, hub, track, problem, trackUrl=None, tasks=None):
        self.user = user
        self.hub = hub
        self.track = track
        self.problem = problem

        if tasks is None:
            self.tasks = {}
        else:
            self.tasks = tasks

        # When adding labels during hub upload the URL hasn't been stored yet
        if trackUrl is None:
            txn = db.getTxn()
            hubInfo = db.HubInfo(user, hub).get(txn=txn)
            try:
                self.trackUrl = hubInfo['tracks'][track]['url']
            except TypeError:
                print(hubInfo)
                print(db.HubInfo.db_key_tuples())
                print(user, track)
                txn.commit()
                raise Exception
            txn.commit()
        else:
            self.trackUrl = trackUrl
        self.status = 'New'
Beispiel #4
0
def resetAllJobs(data):
    """Resets all jobs"""
    for keys in db.Job.db_key_tuples():
        txn = db.getTxn()
        jobDb = db.Job(*keys)
        jobToReset = jobDb.get(txn=txn)
        jobToReset.resetJob()
        jobDb.put(jobToReset, txn=txn)
        txn.commit()
Beispiel #5
0
def checkForMoreJobs(task):
    txn = db.getTxn()
    problem = task['problem']
    modelSums = db.ModelSummaries(task['user'], task['hub'], task['track'],
                                  problem['chrom'],
                                  problem['chromStart']).get(txn=txn,
                                                             write=True)
    Models.checkGenerateModels(modelSums, problem, task, txn=txn)
    txn.commit()
Beispiel #6
0
def resetJob(data):
    """resets a job to a new state"""
    jobId = data['jobId']
    txn = db.getTxn()
    jobDb = db.Job(jobId)
    jobToReset = jobDb.get(txn=txn, write=True)
    jobToReset.resetJob()
    jobDb.put(jobToReset, txn=txn)
    txn.commit()
    return jobToReset.__dict__()
Beispiel #7
0
def restartAllJobs(data):
    for key in db.Job.db_key_tuples():
        txn = db.getTxn()

        jobDb = db.Job(*key)

        job = jobDb.get(txn=txn, write=True)

        restarted = job.restartUnfinished()

        if restarted:
            jobDb.put(job, txn=txn)
        txn.commit()
Beispiel #8
0
def updateTask(data):
    """Updates a task given the job/task id and stuff to update it with"""
    jobId = data['id']
    task = data['task']
    txn = db.getTxn()
    jobDb = db.Job(jobId)
    jobToUpdate = jobDb.get(txn=txn, write=True)
    task = jobToUpdate.updateTask(task)
    jobDb.put(jobToUpdate, txn=txn)
    txn.commit()

    if jobToUpdate.status == 'Done':
        checkForMoreJobs(task)
    return task
Beispiel #9
0
def removeLabel(data):
    toRemove = pd.Series({
        'chrom': data['ref'],
        'chromStart': data['start'],
        'chromEnd': data['end']
    })

    txn = db.getTxn()
    labels = db.Labels(data['user'], data['hub'], data['track'], data['ref'])
    removed, after = labels.remove(toRemove, txn=txn)
    db.Prediction('changes').increment(txn=txn)
    Models.updateAllModelLabels(data, after)
    txn.commit()
    return removed.to_dict()
Beispiel #10
0
def storeHubInfo(user, hub, tracks, hubInfo, genome):
    superList = []
    trackList = []
    hubInfoTracks = {}

    # Load the track list into something which can be converted
    for track in tracks:
        if 'superTrack' in track:
            superList.append(track)
            continue

        if 'parent' in track:
            for super in superList:
                if super['track'] == track['parent']:
                    trackList.append(track)
                    continue
            for parent in trackList:
                if parent['track'] == track['parent']:
                    if 'children' not in parent:
                        parent['children'] = []
                        parent['children'].append(track)
                    else:
                        parent['children'].append(track)

    for track in trackList:
        # Determine which track is the coverage data
        coverage = None
        for child in track['children']:
            file = child['bigDataUrl'].rsplit('/', 1)
            if 'coverage' in file[1]:
                coverage = child

        # Add Data Url to config
        if coverage is not None:
            categories = 'Data'
            for category in track['longLabel'].split(' | ')[:-1]:
                categories = categories + ' / %s' % category

            hubInfoTracks[track['track']] = {'categories': categories,
                                             'key': track['shortLabel'],
                                             'url': coverage['bigDataUrl']}

            checkForPrexistingLabels(coverage['bigDataUrl'], user, hub, track, genome)

    txn = db.getTxn()
    hubInfo['tracks'] = hubInfoTracks
    db.HubInfo(user, hub).put(hubInfo, txn=txn)
    txn.commit()

    return '/%s/' % os.path.join(str(user), hub)
Beispiel #11
0
def updateLabel(data):
    label = data['label']

    updateLabel = pd.Series({
        'chrom': data['ref'],
        'chromStart': data['start'],
        'chromEnd': data['end'],
        'annotation': label
    })
    txn = db.getTxn()
    labelDb = db.Labels(data['user'], data['hub'], data['track'], data['ref'])
    item, labels = labelDb.add(updateLabel, txn=txn)
    db.Prediction('changes').increment(txn=txn)
    Models.updateAllModelLabels(data, labels)
    txn.commit()
    return item.to_dict()
Beispiel #12
0
def getModelSummary(data):
    txn = db.getTxn()
    problems = Tracks.getProblems(data, txn=txn)

    output = {}

    for problem in problems:
        # TODO: Replace 1 with user of hub NOT current user
        modelSummaries = db.ModelSummaries(data['user'], data['hub'],
                                           data['track'], problem['chrom'],
                                           problem['chromStart']).get(txn=txn)

        if len(modelSummaries.index) < 1:
            continue

        output[problem['chromStart']] = modelSummaries.to_dict('records')
    txn.commit()
    return output
Beispiel #13
0
def saveLabelGroup(group, user, hub, track, genome, coverageUrl):
    group = group.sort_values('chromStart', ignore_index=True)

    group['annotation'] = group.apply(fixNoPeaks, axis=1)

    chrom = group['chrom'].loc[0]

    txn = db.getTxn()

    db.Labels(user, hub, track['track'], chrom).put(group, txn=txn)

    chromProblems = Tracks.getProblemsForChrom(genome, chrom, txn)

    withLabels = chromProblems.apply(checkIfProblemHasLabels, axis=1, args=(group,))

    doPregen = chromProblems[withLabels]

    submitPregenWithData(doPregen, user, hub, track, coverageUrl)

    txn.commit()
Beispiel #14
0
def putModel(data):
    modelData = pd.read_json(data['modelData'])
    modelData.columns = modelColumns
    modelInfo = data['modelInfo']
    problem = modelInfo['problem']
    penalty = data['penalty']
    user = modelInfo['user']
    hub = modelInfo['hub']
    track = modelInfo['track']

    txn = db.getTxn()
    db.Model(user, hub, track, problem['chrom'], problem['chromStart'],
             penalty).put(modelData, txn=txn)
    labels = db.Labels(user, hub, track, problem['chrom']).get()
    errorSum = calculateModelLabelError(modelData, labels, problem, penalty)
    db.ModelSummaries(user, hub, track, problem['chrom'],
                      problem['chromStart']).add(errorSum, txn=txn)
    txn.commit()

    return modelInfo
Beispiel #15
0
def getDataPoints():
    if not check():
        return

    dataPoints = pd.DataFrame()

    for key in db.ModelSummaries.db_key_tuples():
        modelSum = db.ModelSummaries(*key).get()
        if modelSum.empty:
            continue

        if modelSum['regions'].max() < 1:
            continue

        withPeaks = modelSum[modelSum['numPeaks'] > 0]

        noError = withPeaks[withPeaks['errors'] < 1]

        logPenalties = np.log10(noError['penalty'].astype(float))

        featuresDb = db.Features(*key)
        featuresTxn = db.getTxn()
        features = featuresDb.get(txn=featuresTxn, write=True)

        for penalty in logPenalties:
            datapoint = features.copy()

            datapoint['logPenalty'] = penalty

            dataPoints = dataPoints.append(datapoint, ignore_index=True)

        featuresTxn.commit()

    # TODO: Save datapoints, update ones which have changed, not all of them every time

    Y = dataPoints['logPenalty']
    X = dataPoints.drop('logPenalty', 1)

    return dropBadCols(X), Y
Beispiel #16
0
def addLabel(data):
    label = 'unknown'
    for i in range(100):
        print("#################################################")
    print("THIS IS THE DATA", data)
    for i in range(10):
        print("#################################################")

    # Duplicated because calls from updateLabel are causing freezing
    newLabel = pd.Series({
        'chrom': data['ref'],
        'chromStart': data['start'],
        'chromEnd': data['end'],
        'annotation': label
    })

    txn = db.getTxn()
    item, labels = db.Labels(data['user'], data['hub'], data['track'],
                             data['ref']).add(newLabel, txn=txn)
    db.Prediction('changes').increment(txn=txn)
    Models.updateAllModelLabels(data, labels)
    txn.commit()
    return data
Beispiel #17
0
def getJob(data):
    """Gets job by ID"""
    txn = db.getTxn()
    output = db.Job(data['id']).get().__dict__()
    txn.commit()
    return output
Beispiel #18
0
def getTrackInfo(data):
    txn = db.getTxn()
    hubInfo = db.HubInfo(data['user'], data['hub']).get(txn=txn)
    txn.commit()

    return hubInfo['tracks'][data['track']]
Beispiel #19
0
 def putNewJob(self, checkExists=True):
     txn = db.getTxn()
     value = self.putNewJobWithTxn(txn, checkExists=checkExists)
     txn.commit()
     return value