def putFeatures(data): """ Saves features to be later used for prediction and learning""" if not isinstance(data['data'], list) and not len(data['data']) == 1: raise Exception(data['data']) problem = data['problem'] features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']) features.put(pd.Series(data['data'][0]))
def doPrediction(data, problem, txn=None): features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']).get() if features.empty: return False model = db.Prediction('model').get() if not isinstance(model, dict): return False colsToDrop = db.Prediction('badCols').get() featuresDropped = features.drop(labels=colsToDrop) prediction = predictWithFeatures(featuresDropped, model) if prediction is None: return False return prediction
def getDataPoints(): if not check(): return dataPoints = pd.DataFrame() for key in db.ModelSummaries.db_key_tuples(): modelSum = db.ModelSummaries(*key).get() if modelSum.empty: continue if modelSum['regions'].max() < 1: continue withPeaks = modelSum[modelSum['numPeaks'] > 0] noError = withPeaks[withPeaks['errors'] < 1] logPenalties = np.log10(noError['penalty'].astype(float)) featuresDb = db.Features(*key) featuresTxn = db.getTxn() features = featuresDb.get(txn=featuresTxn, write=True) for penalty in logPenalties: datapoint = features.copy() datapoint['logPenalty'] = penalty dataPoints = dataPoints.append(datapoint, ignore_index=True) featuresTxn.commit() # TODO: Save datapoints, update ones which have changed, not all of them every time Y = dataPoints['logPenalty'] X = dataPoints.drop('logPenalty', 1) return dropBadCols(X), Y
def getFeatures(data): problem = data['problem'] features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart']) return features.get()