Esempio n. 1
0
 def __init__(self, refTable, keep, objective, times, modelNames=None):
     self.estimatedParams = None
     self.trueParams = None
     self.refTable = refTable
     self.sumStatArray = toArray(self.refTable,'sumstat')
     self.paramArray = toArray(self.refTable,'param')
     self.indexList = np.arange(len(self.refTable.index))
     self.picks = []
     self.keep = keep
     self.objective = objective
     self.times = times
     self.modelNames = modelNames
Esempio n. 2
0
    def run(self):
        """Runs according to settings (these must be specified by user.)"""

        # Extract sum stats and model indices from ref table
        indices = toArray(self._refTable, 'idx').flatten()
        sumStat = toArray(self._refTable, 'sumstat')

        print(sumStat.shape)

        # Create a classifier
        # TODO according to user-specified settings
        # TODO 2: Implement random forest without sklearn dependency

        model = Sequential()
        model.add(
            Dense(1000,
                  input_dim=sumStat.shape[1],
                  kernel_initializer='glorot_uniform',
                  activation='relu'))
        model.add(
            Dense(100, kernel_initializer='glorot_uniform', activation='relu'))
        model.add(
            Dense(1, kernel_initializer='glorot_uniform',
                  activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam')

        # Do a 5-fold cross-validation
        # accuracies = cross_val(sumStat, indices, model, 5)
        # print("Neural net cross-val accuracies: ")
        # print(accuracies)

        # Fit on summary statistics (the more the better)
        model.fit(sumStat,
                  indices,
                  batch_size=64,
                  epochs=2,
                  shuffle=True,
                  validation_split=0.2)

        # Predict probabilities of models on summary obs
        sumStatTest = np.array(self._pp.scaledSumStatObsData).reshape(1, -1)
        print("Probability of model 1 is: \n")
        pred = model.predict_proba(sumStatTest)

        return pred
Esempio n. 3
0
 def getEstimates(self,subset):
     """
     Compute mean for each parameter in subset.
     :param subset: the subset table.
     :return: the means (estimates)
     """
     paramArray = toArray(subset,'param')
     return np.mean(paramArray, axis=0)
Esempio n. 4
0
    def run(self):
        """Runs according to settings (these must be specified by user.)"""

        rf = RandomForestClassifier(**self._settings['specs'])

        # Extract sum stats and model indices from ref table
        indices = toArray(self._refTable, 'idx').flatten()
        sumStat = toArray(self._refTable, 'sumstat')

        # Do a 5-fold cross-validation
        accuracies = self._cross_val(sumStat, indices, rf, 5)

        # Fit on summary statistics (the more the better)
        rf.fit(sumStat, indices)

        # Predict probabilities of models on summary obs
        sumStatTest = np.array(self._pp.scaledSumStatObsData).reshape(1, -1)
        pred = rf.predict_proba(sumStatTest)

        return {mod : np.round(pred[0,i],3) for i, mod in enumerate(self._modelNames)}
Esempio n. 5
0
    def report(self, outputdir):
        """
        Compute the prediction error if the objective is inference.
        Compute the confusion matrix if the objective is comparison.
        """

        if self.objective == "comparison":
            predictions = self.compute()
            true = toArray(self.refTable, 'idx')[self.picks, :]
            actual = pd.Series(true[:,0],name="Actual")
            predicted = pd.Series(predictions[:,0], name="Predicted")
            confusionMatrix = pd.crosstab(actual,predicted)
            self.saveConfusion(confusionMatrix.as_matrix(),outputdir)

            return confusionMatrix

        if self.objective == "inference":
            self.estimatedParams = self.compute()
            self.trueParams = self.paramArray[self.picks,:]
            self.saveEstimates(outputdir)
            SumSqDiff = np.sum((self.estimatedParams - self.trueParams)**2,axis=0)
            Variance = np.var(self.trueParams,axis=0)

            return np.float(SumSqDiff / Variance)
Esempio n. 6
0
 def initParamTable(self):
     """ Initialise the parameter table."""
     paramArray = toArray(self.table, 'param')
     return pd.DataFrame(paramArray, columns=self.paramNames)
Esempio n. 7
0
 def __init__(self, subset, paramNames):
     self.paramArray = toArray(subset,'param')
     self.paramNames = paramNames
Esempio n. 8
0
    def getColumn(self, columnName):
        """Returns given column as numpy array."""

        return toArray(self._table, columnName)