def __init__(self, refTable, keep, objective, times, modelNames=None): self.estimatedParams = None self.trueParams = None self.refTable = refTable self.sumStatArray = toArray(self.refTable,'sumstat') self.paramArray = toArray(self.refTable,'param') self.indexList = np.arange(len(self.refTable.index)) self.picks = [] self.keep = keep self.objective = objective self.times = times self.modelNames = modelNames
def run(self): """Runs according to settings (these must be specified by user.)""" # Extract sum stats and model indices from ref table indices = toArray(self._refTable, 'idx').flatten() sumStat = toArray(self._refTable, 'sumstat') print(sumStat.shape) # Create a classifier # TODO according to user-specified settings # TODO 2: Implement random forest without sklearn dependency model = Sequential() model.add( Dense(1000, input_dim=sumStat.shape[1], kernel_initializer='glorot_uniform', activation='relu')) model.add( Dense(100, kernel_initializer='glorot_uniform', activation='relu')) model.add( Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid')) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam') # Do a 5-fold cross-validation # accuracies = cross_val(sumStat, indices, model, 5) # print("Neural net cross-val accuracies: ") # print(accuracies) # Fit on summary statistics (the more the better) model.fit(sumStat, indices, batch_size=64, epochs=2, shuffle=True, validation_split=0.2) # Predict probabilities of models on summary obs sumStatTest = np.array(self._pp.scaledSumStatObsData).reshape(1, -1) print("Probability of model 1 is: \n") pred = model.predict_proba(sumStatTest) return pred
def getEstimates(self,subset): """ Compute mean for each parameter in subset. :param subset: the subset table. :return: the means (estimates) """ paramArray = toArray(subset,'param') return np.mean(paramArray, axis=0)
def run(self): """Runs according to settings (these must be specified by user.)""" rf = RandomForestClassifier(**self._settings['specs']) # Extract sum stats and model indices from ref table indices = toArray(self._refTable, 'idx').flatten() sumStat = toArray(self._refTable, 'sumstat') # Do a 5-fold cross-validation accuracies = self._cross_val(sumStat, indices, rf, 5) # Fit on summary statistics (the more the better) rf.fit(sumStat, indices) # Predict probabilities of models on summary obs sumStatTest = np.array(self._pp.scaledSumStatObsData).reshape(1, -1) pred = rf.predict_proba(sumStatTest) return {mod : np.round(pred[0,i],3) for i, mod in enumerate(self._modelNames)}
def report(self, outputdir): """ Compute the prediction error if the objective is inference. Compute the confusion matrix if the objective is comparison. """ if self.objective == "comparison": predictions = self.compute() true = toArray(self.refTable, 'idx')[self.picks, :] actual = pd.Series(true[:,0],name="Actual") predicted = pd.Series(predictions[:,0], name="Predicted") confusionMatrix = pd.crosstab(actual,predicted) self.saveConfusion(confusionMatrix.as_matrix(),outputdir) return confusionMatrix if self.objective == "inference": self.estimatedParams = self.compute() self.trueParams = self.paramArray[self.picks,:] self.saveEstimates(outputdir) SumSqDiff = np.sum((self.estimatedParams - self.trueParams)**2,axis=0) Variance = np.var(self.trueParams,axis=0) return np.float(SumSqDiff / Variance)
def initParamTable(self): """ Initialise the parameter table.""" paramArray = toArray(self.table, 'param') return pd.DataFrame(paramArray, columns=self.paramNames)
def __init__(self, subset, paramNames): self.paramArray = toArray(subset,'param') self.paramNames = paramNames
def getColumn(self, columnName): """Returns given column as numpy array.""" return toArray(self._table, columnName)