def check(): changes = db.Prediction('changes').get() if changes > cfg.numChanges: if not db.Prediction.has_key('EnoughLabels'): db.Prediction('EnoughLabels').put(False) if db.Prediction('EnoughLabels').get(): return True else: if checkLabelRegions(): db.Prediction('EnoughLabels').put(True) return True return False return False
def dropBadCols(df): pd.set_option("display.max_rows", None, "display.max_columns", None) noNegatives = df.replace(-np.Inf, np.nan) output = noNegatives.dropna(axis=1) # Take a note of what columns were dropped so that can be later used during prediction # This line just compares the two column indices and finds the differences badCols = list(set(df.columns) - set(output.columns)) db.Prediction('badCols').put(badCols) return output
def doPrediction(data, problem, txn=None): features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get() if features.empty: return False model = db.Prediction('model').get() if not isinstance(model, dict): return False colsToDrop = db.Prediction('badCols').get() featuresDropped = features.drop(labels=colsToDrop) prediction = predictWithFeatures(featuresDropped, model) if prediction is None: return False return prediction
def removeLabel(data): toRemove = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'] }) txn = db.getTxn() labels = db.Labels(data['user'], data['hub'], data['track'], data['ref']) removed, after = labels.remove(toRemove, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, after) txn.commit() return removed.to_dict()
def updateLabel(data): label = data['label'] updateLabel = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'], 'annotation': label }) txn = db.getTxn() labelDb = db.Labels(data['user'], data['hub'], data['track'], data['ref']) item, labels = labelDb.add(updateLabel, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, labels) txn.commit() return item.to_dict()
def addLabel(data): label = 'unknown' for i in range(100): print("#################################################") print("THIS IS THE DATA", data) for i in range(10): print("#################################################") # Duplicated because calls from updateLabel are causing freezing newLabel = pd.Series({ 'chrom': data['ref'], 'chromStart': data['start'], 'chromEnd': data['end'], 'annotation': label }) txn = db.getTxn() item, labels = db.Labels(data['user'], data['hub'], data['track'], data['ref']).add(newLabel, txn=txn) db.Prediction('changes').increment(txn=txn) Models.updateAllModelLabels(data, labels) txn.commit() return data
def learn(X, Y): cvfit = cvglmnet(x=X.to_numpy().copy(), y=Y.to_numpy().copy()) db.Prediction('model').put(cvfit)
def makePrediction(data): model = db.Prediction('model').get() print(model)