def test_getValues(self): data = pd.DataFrame({ 'r1': np.arange(5), 'r2': np.arange(5)**2, 'r3': np.arange(5)**3 }).T binMask = [[True, True, False, False, True], [True, True, False, False, False], [False, True, True, True, True]] maskedData = MaskedArray(data=data, mask=binMask) res_rows = [masked for masked in maskedData.getMasked(rows=True)] self.assertEqual(res_rows, [[2, 3], [4, 9, 16], [0]]) res_cols = [masked for masked in maskedData.getMasked(rows=False)] self.assertEqual(res_cols, [[0], [], [2, 4], [3, 9], [16]])
def score_model(model, data, metric, cols=None): # Create masked array if cols is None: cols = data.columns maskedData = MaskedArray(data=data) maskedData.generate() maskedDf = pd.DataFrame(maskedData.getMaskedMatrix(), index=data.index, columns=data.columns) # Predict # model.fit(maskedDf) imputed = model.predict(maskedDf) imputedGenes = np.intersect1d(cols, imputed.columns) # Compare imputed masked array and input maskedIdx = maskedDf[imputedGenes].values != data[imputedGenes].values score_res = metric(data[imputedGenes].values[maskedIdx], imputed[imputedGenes].values[maskedIdx]) return score_res
def test_generate(self): rawData = test_data.rawData m_df = MaskedArray(data=rawData, dropout=0.1) m_df.generate()