def test_basic_niche_sort_immutable(self): # Swap samples s1 and s2 and features o1 and o2 to see if this can # obtain the original table structure. table = pd.DataFrame( [[1, 0, 1, 0, 0], [1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s2', 's1', 's3', 's4', 's5'], index=['o2', 'o1', 'o3', 'o4']).T gradient = pd.Series( [2, 1, 3, 4, 5], index=['s2', 's1', 's3', 's4', 's5']) exp_table = pd.DataFrame( [[1, 0, 1, 0, 0], [1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s2', 's1', 's3', 's4', 's5'], index=['o2', 'o1', 'o3', 'o4']).T exp_gradient = pd.Series( [2, 1, 3, 4, 5], index=['s2', 's1', 's3', 's4', 's5']) niche_sort(table, gradient) pdt.assert_frame_equal(exp_table, table) pdt.assert_series_equal(exp_gradient, gradient)
def test_basic_niche_sort_error(self): table = pd.DataFrame([[1, 1, 0, 0, 0], [0, 1, 1, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s1', 's2', 's3', 's4', 's5'], index=['o1', 'o2', 'o3', 'o4']).T gradient = pd.Series([1, 2, 3, 4, 5], index=['s1', 's2', 's3', 's4', 's5']) with self.assertRaises(ValueError): niche_sort(table, gradient, niche_estimator='rawr')
def test_basic_niche_sort_error(self): table = pd.DataFrame( [[1, 1, 0, 0, 0], [0, 1, 1, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s1', 's2', 's3', 's4', 's5'], index=['o1', 'o2', 'o3', 'o4']).T gradient = pd.Series( [1, 2, 3, 4, 5], index=['s1', 's2', 's3', 's4', 's5']) with self.assertRaises(ValueError): niche_sort(table, gradient, niche_estimator='rawr')
def test_basic_niche_sort_lambda(self): table = pd.DataFrame( [[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 1, 1]], columns=['s1', 's2', 's3', 's4', 's5'], index=['o1', 'o3', 'o2', 'o4']).T gradient = pd.Series( [1, 2, 3, 4, 5], index=['s1', 's2', 's3', 's4', 's5']) exp_table = pd.DataFrame( [[1, 1, 0, 0, 0], [0, 1, 1, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s1', 's2', 's3', 's4', 's5'], index=['o1', 'o2', 'o3', 'o4']).T def _dumb_estimator(v, gradient): v[v > 0] = 1 values = v / v.sum() return np.dot(gradient, values) res_table = niche_sort(table, gradient, niche_estimator=_dumb_estimator) pdt.assert_frame_equal(exp_table, res_table)
def test_basic_niche_sort(self): table = pd.DataFrame([[1, 1, 0, 0, 0], [0, 1, 1, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s1', 's2', 's3', 's4', 's5'], index=['o1', 'o2', 'o3', 'o4']).T gradient = pd.Series([1, 2, 3, 4, 5], index=['s1', 's2', 's3', 's4', 's5']) res_table = niche_sort(table, gradient) pdt.assert_frame_equal(table, res_table)
def test_basic_niche_sort(self): table = pd.DataFrame( [[1, 1, 0, 0, 0], [0, 1, 1, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]], columns=['s1', 's2', 's3', 's4', 's5'], index=['o1', 'o2', 'o3', 'o4']).T gradient = pd.Series( [1, 2, 3, 4, 5], index=['s1', 's2', 's3', 's4', 's5']) res_table = niche_sort(table, gradient) pdt.assert_frame_equal(table, res_table)
mappingdf=mappingdf[~mappingdf.index.duplicated(keep='first')] #match the tables otutabledf,mappingdf=match(otutabledf,mappingdf[mappingdf['host_subject_id'].isin(['M2','M3','M9'])]) otutabledf=otutabledf.T[otutabledf.sum()>0].T otutabledf=otutabledf[otutabledf.T.sum()>0] otutabledf.columns=[str(x) for x in otutabledf.columns] sorting_map={'M9':2,'M2':3,'M3':1} mappingdf['host_num']=[int(sorting_map[x]) for x in mappingdf['host_subject_id']] mappingdf=mappingdf.apply(pd.to_numeric, errors='ignore') #sort by niche observed_table = niche_sort(otutabledf, mappingdf['host_num']) mappingdf=mappingdf.T[observed_table.index].T otutabledf=observed_table.copy() otutabledf.to_dense().to_csv("cluster_models/base_model_keyboard_table.csv",sep=',', encoding='utf-8') mappingdf.to_dense().to_csv("cluster_models/base_model_keyboard_meta.csv",sep=',', encoding='utf-8') ######### build the model ######### x0 = [3, 20, 20, 1e2, 1e2,1e1] bnds = ((3,3),(0,1e2),(0,2e3),(0,1e10),(0,5e1),(1,10)) model_fit=minimize_model(x0,bnds,np.array(otutabledf.T[:104].T.as_matrix())) base_truth,X_noise_sub=build_block_model(3, model_fit.x[1], model_fit.x[2], model_fit.x[3] , model_fit.x[4] ,otutabledf.shape[1]