def test_rows(self): """Dict2D Rows property should return list in correct order""" #should work with no data self.assertEqual(list(Dict2D(self.empty).Rows), []) #should work on square matrix sq = Dict2D(self.square, RowOrder='abc', ColOrder='abc') self.assertEqual(list(sq.Rows), [[1,2,3],[2,4,6],[3,6,9]]) #check that it works when we change the row and col order sq.RowOrder = 'ba' sq.ColOrder = 'ccb' self.assertEqual(list(sq.Rows), [[6,6,4],[3,3,2]]) #check that it doesn't raise an error on sparse matrices... sp = Dict2D(self.sparse) rows = list(sp.Rows) for r in rows: r.sort() rows.sort() self.assertEqual(rows, [[1,3],[2]]) #...unless self.RowOrder and self.ColOrder are set... sp.RowOrder = 'ad' sp.ColOrder = 'abc' self.assertRaises(Dict2DSparseError, list, sp.Rows) #...and then, only if self.Pad is not set sp.Pad = True sp.Default = 'xxx' self.assertEqual(list(sp.Rows), [[1, 'xxx', 3],['xxx',2,'xxx']])
def test_purge(self): """Dict2D purge should delete unwanted keys at both levels""" d = Dict2D(self.square) d.RowOrder = 'ab' d.ColOrder = 'bc' d.purge() self.assertEqual( d, Dict2D({ 'a': { 'b': 2, 'c': 3 }, 'b': { 'b': 4, 'c': 6 } })) #check that a superset of the keys is OK d = Dict2D(self.square) d.RowOrder = dict.fromkeys('abcd') d.ColOrder = dict.fromkeys('abcd') d.purge() self.assertEqual(d, Dict2D(self.square)) #check that everything gets deleted if nothing is valid d.RowOrder = list('xyz') d.ColOrder = list('xyz') d.purge() self.assertEqual(d, {})
def __init__(self, data=None, RowOrder=None, ColOrder=None, Default=None, Pad=None, RowConstructor=None, info=None): """ Init dict with pre-exisitng data: dict of dicts Usage: data = distance matrix in form acceptable by Dict2D class RowOrder = list of 'interesting keys', default is the set of all amino acids ColOrder = list of 'interesting keys', default is the set of all amino acids Default = value to set padded elements to Pad = boolean describing whether to fill object to hold all possible elements based on RowOrder and ColOrder RowConstructor = constructor to use when building inner objects, default dict info = the AAIndexRecord object Power = Power the original matrix has been raised to yield current matrix """ if RowOrder is not None: self.RowOrder = RowOrder if ColOrder is not None: self.ColOrder = ColOrder if Pad is not None: self.Pad = Pad # Initialize super class attributes Dict2D.__init__(self, data=data, RowOrder=self.RowOrder,\ ColOrder=self.ColOrder, Default=Default, Pad=self.Pad,\ RowConstructor=RowConstructor) Delegator.__init__(self, info) # The power to which the original data has been raised to give # the current data, starts at 1., modified by elementPow() # accessed as self.Power self.__dict__['Power'] = 1.
def test_setDiag(self): """Dict2D setDiag should set diagonal to specified value""" #should have no effect on empty dict2d d = Dict2D(self.empty) d.setDiag(0) self.assertEqual(d, {}) #should work on one-element dict d = Dict2D(self.single_same) d.setDiag(0) self.assertEqual(d, {'a':{'a':0}}) d = Dict2D(self.single_diff) d.setDiag(0) self.assertEqual(d, {'a':{'a':0,'b':3}}) #should work on dense dict d = Dict2D(self.square) d.setDiag(9) self.assertEqual(d, { 'a':{'a':9,'b':2,'c':3}, 'b':{'a':2,'b':9,'c':6}, 'c':{'a':3,'b':6,'c':9}, }) #should work on sparse dict, creating cols for rows but not vice versa d = Dict2D(self.sparse) d.setDiag(-1) self.assertEqual(d, {'a':{'a':-1,'c':3},'d':{'b':2,'d':-1}})
def test_transpose(self): """Dict2D transpose should work on both dense and sparse matrices, in place""" #should do nothing to empty matrix d = Dict2D(self.empty) d.transpose() self.assertEqual(d, {}) #should do nothing to single-element square matrix d = Dict2D(self.single_same) d.transpose() self.assertEqual(d, {'a':{'a':2}}) #should reverse single-element non-square matrix d = Dict2D(self.single_diff) d.transpose() self.assertEqual(d, {'b':{'a':3}}) #should work on sparse matrix d = Dict2D(self.sparse) d.transpose() self.assertEqual(d, {'a':{'a':1}, 'c':{'a':3},'b':{'d':2}}) #should reverse row and col order d = Dict2D(self.dense) d.RowOrder = 'ab' d.ColOrder = 'abc' d.transpose() self.assertEqual(d, \ {'a':{'a':1,'b':2},'b':{'a':2,'b':4},'c':{'a':3,'b':6}}) self.assertEqual(d.ColOrder, 'ab') self.assertEqual(d.RowOrder, 'abc')
def test_fromIndices(self): """Dict2D.fromIndices should construct from list of indices""" d2d = Dict2D(self.sparse) d2d2 = Dict2D() self.assertNotEqual(d2d, d2d2) d2d2.fromIndices([('a','a',1),('a','c',3),('d','b',2)]) self.assertEqual(d2d, d2d2) self.assertRaises(Dict2DInitError, d2d2.fromIndices, [1,2,3])
def test_colKeys(self): """Dict2D colKeys should find all the keys of component cols""" self.assertEqual(Dict2D(self.empty).colKeys(), []) self.assertEqual(Dict2D(self.single_diff).colKeys(), ['b']) #note that keys will be returned in arbitrary order self.assertEqualItems(Dict2D(self.square).colKeys(), ['a','b','c']) self.assertEqualItems(Dict2D(self.dense).colKeys(), ['a','b','c']) self.assertEqualItems(Dict2D(self.sparse).colKeys(), ['a','b','c'])
def test_chi_square_from_Dict2D(self): """chi_square_from_Dict2D calcs a Chi-Square and p value from Dict2D""" #test1 obs_matrix = Dict2D({ 'rest_of_tree': { 'env1': 2, 'env3': 1, 'env2': 0 }, 'b': { 'env1': 1, 'env3': 1, 'env2': 3 } }) input_matrix = calc_contingency_expected(obs_matrix) test, csp = chi_square_from_Dict2D(input_matrix) self.assertFloatEqual(test, 3.0222222222222221) #test2 test_matrix_2 = Dict2D({ 'Marl': { 'val': [2, 5.2] }, 'Chalk': { 'val': [10, 5.2] }, 'Sandstone': { 'val': [8, 5.2] }, 'Clay': { 'val': [2, 5.2] }, 'Limestone': { 'val': [4, 5.2] } }) test2, csp2 = chi_square_from_Dict2D(test_matrix_2) self.assertFloatEqual(test2, 10.1538461538) self.assertFloatEqual(csp2, 0.0379143890013) #test3 matrix3_obs = Dict2D({ 'AIDS': { 'Males': 4, 'Females': 2, 'Both': 3 }, 'No_AIDS': { 'Males': 3, 'Females': 16, 'Both': 2 } }) matrix3 = calc_contingency_expected(matrix3_obs) test3, csp3 = chi_square_from_Dict2D(matrix3) self.assertFloatEqual(test3, 7.6568405139833722) self.assertFloatEqual(csp3, 0.0217439383468)
def test_getItemsIf(self): """Dict2D getItemsIf should return list of items when f(item) is True""" lt_5 = lambda x: x < 5 d = Dict2D(self.square) d.RowOrder = d.ColOrder = 'abc' self.assertEqual(d.getItemsIf(lt_5), [1,2,3,2,4,3]) self.assertEqual(d.getItemsIf(lt_5, negate=True), [6,6,9]) d = Dict2D(self.top_triangle) d.RowOrder = d.ColOrder = 'abc' self.assertEqual(d.getItemsIf(lt_5), [1,2,3,4]) self.assertEqual(d.getItemsIf(lt_5, negate=True), [6,9])
def test_fromLists(self): """Dict2D.fromLists should construct from list of lists""" #Note that this only works for dense matrices, not sparse ones orig = Dict2D(self.dense) new = Dict2D(self.dense) #will overwrite this data self.assertEqual(orig, new) assert orig is not new new.RowOrder = ['b','a'] new.ColOrder = ['c','a','b'] new.fromLists([[3,6,9],[1,3,5]]) self.assertNotEqual(orig, new) test = Dict2D({'b':{'c':3,'a':6,'b':9},'a':{'c':1,'a':3,'b':5}}) self.assertEqual(new, test)
def test_getItemIndices(self): """Dict2D getItemIndices should return indices when f(item) is True""" lt_5 = lambda x: x < 5 d = Dict2D(self.square) d.RowOrder = d.ColOrder = 'abc' self.assertEqual(d.getItemIndices(lt_5), \ [('a','a'),('a','b'),('a','c'),('b','a'),('b','b'),('c','a')]) self.assertEqual(d.getItemIndices(lt_5, negate=True), \ [('b','c'),('c','b'),('c','c')]) d = Dict2D(self.top_triangle) d.RowOrder = d.ColOrder = 'abc' self.assertEqual(d.getItemIndices(lt_5), \ [('a','a'),('a','b'),('a','c'),('b','b')])
def test_getRows(self): """Dict2D getRows should get specified rows""" self.assertEqual(Dict2D(self.square).getRows(['a','c']), \ {'a':{'a':1,'b':2,'c':3},'c':{'a':3,'b':6,'c':9}}) #should work on sparse matrix self.assertEqual(Dict2D(self.sparse).getRows(['d']), {'d':{'b':2}}) #should raise KeyError if row doesn't exist... d = Dict2D(self.sparse) self.assertRaises(KeyError, d.getRows, ['c']) #...unless we're Padding d.Pad = True self.assertEqual(d.getRows('c'), {'c':{}}) #should work when we negate it self.assertEqual(Dict2D(self.square).getRows(['a','c'], negate=True), {'b':{'a':2,'b':4,'c':6}})
def test_getColIndices(self): """Dict2D getColIndices should return list of indices of matching cols""" d = Dict2D(self.square) lt_15 = lambda x: sum(x) < 15 self.assertEqual(d.getColIndices(lt_15), ['a','b']) #check that negate works self.assertEqual(d.getColIndices(lt_15, negate=True), ['c'])
def make_contingency_matrix(OTU_name, category_info, otu_sample_info, category_values, suppress_warnings=False): """make the contingency table for running the G test of independence counts OTU as present (count > 1) or absent makes a column in the matrix for each category value If suppress_warnings=True, doesn't warn when sample in map but not otu table """ result = {'OTU_pos': {}, 'OTU_neg': {}} for category in category_values: result['OTU_pos'][category + '_pos'] = 0 result['OTU_neg'][category + '_pos'] = 0 for sample in category_info: category = category_info[sample] try: OTU_count = float(otu_sample_info[OTU_name][sample]) OTU_count = int(OTU_count) worked = True except KeyError: if not suppress_warnings: print "Warning:", sample, "is in the sample mapping file but not the OTU table" worked = False if worked: if OTU_count == 0: result['OTU_neg'][category + '_pos'] += 1 elif OTU_count > 0: result['OTU_pos'][category + '_pos'] += 1 return Dict2D(result, Default=0, Pad=True)
def upgma(pairwise_distances): """Uses the UPGMA algorithm to cluster sequences pairwise_distances: a dictionary with pair tuples mapped to a distance returns a PhyloNode object of the UPGMA cluster """ items_in_matrix = [] for i in pairwise_distances: if i[0] not in items_in_matrix: items_in_matrix.append(i[0]) if i[1] not in items_in_matrix: items_in_matrix.append(i[1]) dict2d_input = [(i[0], i[1], pairwise_distances[i]) for i in \ pairwise_distances] dict2d_input.extend([(i[1], i[0], pairwise_distances[i]) for i in \ pairwise_distances]) dict2d_input = Dict2D(dict2d_input, RowOrder=items_in_matrix, \ ColOrder=items_in_matrix, Pad=True, Default=BIG_NUM) matrix_a, node_order = inputs_from_dict2D(dict2d_input) tree = UPGMA_cluster(matrix_a, node_order, BIG_NUM) index = 0 for node in tree.traverse(): if not node.Parent: node.Name = 'root' elif not node.Name: node.Name = 'edge.' + str(index) index += 1 return tree
def test_build_tree_from_distance_matrix(self): """build_tree_from_distance_matrix builds a tree from a dict2d """ data = [('sample1aaaaaaa', 'sample2', 1.438), ('sample2', 'sample1aaaaaaa', 1.438), ('sample1aaaaaaa', 'sample3', 2.45678), ('sample3', 'sample1aaaaaaa', 2.45678), ('sample2', 'sample3', 2.7), ('sample3', 'sample2', 2.7)] data_dict2d = Dict2D(data, Pad=True, Default=0.0) result = build_tree_from_distance_matrix(data_dict2d) self.assertEqual(str(result), '((sample1aaaaaaa:0.59739,sample2:0.84061),sample3:1.85939);')
def test_getCols(self): """Dict2D getCols should return object with specified cols only""" d = Dict2D(self.square) self.assertEqual(d.getCols('bc'), { 'a':{'b':2, 'c':3}, 'b':{'b':4, 'c':6}, 'c':{'b':6,'c':9}, }) #check that it works on ragged matrices d = Dict2D(self.top_triangle) self.assertEqual(d.getCols('ac'), { 'a':{'a':1, 'c':3}, 'b':{'c':6}, 'c':{'c':9} }) #check that negate works d = Dict2D(self.square) self.assertEqual(d.getCols('bc', negate=True), { 'a':{'a':1}, 'b':{'a':2}, 'c':{'a':3}, })
def test_init_without_data(self): """Dict2D init functions correctly without a data parameter """ d = Dict2D(RowOrder=['a'],ColOrder=['b'],Pad=True,Default=42, RowConstructor=Freqs) self.assertEqual(d.RowOrder,['a']) self.assertEqual(d.ColOrder,['b']) self.assertEqual(d.Pad,True) self.assertEqual(d.Default,42) self.assertEqual(d.RowConstructor, Freqs) self.assertEqual(d,{'a':{'b':42.}})
def test_fromDicts(self): """Dict2D.fromDicts should construct from dict of dicts""" d2d = Dict2D() d2d.fromDicts(self.sparse) self.assertEqual(d2d['a']['c'], 3) self.assertEqual(d2d['d']['b'], 2) self.assertEqual(len(d2d), 2) self.assertEqual(len(d2d['a']), 2) self.assertRaises(KeyError, d2d.__getitem__, 'c') self.assertRaises(Dict2DInitError, d2d.fromDicts, [1,2,3])
def test_getItems(self): """Dict2D getItems should return list of relevant items""" d = Dict2D(self.square) self.assertEqual(d.getItems([('a','a'),('b','c'),('c','a'),('a','a')]),\ [1,6,3,1]) #should work on ragged matrices... d = Dict2D(self.top_triangle) self.assertEqual(d.getItems([('a','c'),('c','c')]), [3,9]) #...unless absent items are asked for... self.assertRaises(KeyError, d.getItems, [('a','a'),('c','a')]) #...unles self.Pad is True d.Pad = True self.assertEqual(d.getItems([('a','c'),('c','a')]), [3, None]) #negate should work -- must specify RowOrder and ColOrder to get #results in predictable order d.Pad = False d.RowOrder = d.ColOrder = 'abc' self.assertEqual(d.getItems([('a','c'),('c','a'),('a','a')], \ negate=True), [2,4,6,9])
def test_getColsIf(self): """Dict2D getColsIf should return new Dict2D with matching cols""" d = Dict2D(self.square) lt_15 = lambda x: sum(x) < 15 self.assertEqual(d.getColsIf(lt_15), { 'a':{'a':1,'b':2},'b':{'a':2,'b':4},'c':{'a':3,'b':6} }) #check that negate works self.assertEqual(d.getColsIf(lt_15, negate=True), \ {'a':{'c':3},'b':{'c':6},'c':{'c':9}})
def mapping_file_to_dict(mapping_data, header): """processes mapping data in list of lists format into a 2 deep dict""" map_dict = {} for i in range(len(mapping_data)): sam = mapping_data[i] map_dict[sam[0]] = {} for j in range(len(header)): if j == 0: continue # sampleID field map_dict[sam[0]][header[j]] = sam[j] return Dict2D(map_dict)
def test_getRowIndices(self): """Dict2D getRowIndices should return indices of rows where f(x) True""" d = Dict2D(self.square) lt_15 = lambda x: sum(x) < 15 self.assertEqual(d.getRowIndices(lt_15), ['a','b']) #should be bound by RowOrder and ColOrder d.RowOrder = d.ColOrder = 'ac' self.assertEqual(d.getRowIndices(lt_15), ['a','c']) #negate should work d.RowOrder = d.ColOrder = None self.assertEqual(d.getRowIndices(lt_15, negate=True), ['c'])
def test_pad(self): """Dict2D pad should fill empty slots with default, but not make square""" d = Dict2D(self.sparse) d.pad() self.assertEqual(len(d), 2) self.assertEqual(len(d['a']), 3) self.assertEqual(len(d['d']), 3) self.assertEqual(d['a'].keys(), d['d'].keys()) self.assertEqual(d['a']['b'], None) #check that it works with a different default value d = Dict2D(self.sparse, Default='x') d.pad() self.assertEqual(d['a']['b'], 'x') #check that it works with a different constructor d = Dict2D(self.sparse, Default=0, RowConstructor=Freqs) d.pad() self.assertEqual(d['a']['b'], 0) assert isinstance(d['a'], Freqs)
def test_toLists(self): """Dict2D toLists should convert dict into list of lists""" d = Dict2D(self.square) d.RowOrder = 'abc' d.ColOrder = 'abc' self.assertEqual(d.toLists(), [[1,2,3],[2,4,6],[3,6,9]]) self.assertEqual(d.toLists(headers=True), \ [['-', 'a', 'b', 'c'], ['a', 1, 2, 3], ['b', 2, 4, 6], ['c', 3, 6, 9], ]) #should raise error if called on sparse matrix... self.assertRaises(Dict2DSparseError, Dict2D(self.sparse).toLists) #...unless self.Pad is True d = Dict2D(self.sparse) d.RowOrder = 'ad' d.ColOrder = 'abc' d.Pad = True d.Default = 'x' self.assertEqual(d.toLists(headers=True), \ [['-','a','b','c'],['a',1,'x',3],['d','x',2,'x']]) #works without RowOrder or ColOrder goal = [[1,2,3],[2,4,6],[3,6,9]] # headers=False d = Dict2D(self.square) l = d.toLists() for r in l: r.sort() l.sort() self.assertEqual(l,goal) # headers=True d.toLists(headers=True) l = d.toLists() for r in l: r.sort() l.sort() self.assertEqual(l,goal)
def test_init(self): """Dict2D init should work as expected""" #NOTE: currently only tests init from dict of dicts. Other initializers #are tested in the test_guess_input* and test_from* methods #should compare equal to the relevant dict for d in [self.empty, self.single_same, self.single_diff, self.dense, \ self.sparse]: d2d = Dict2D(d) self.assertEqual(d2d, d) self.assertEqual(d2d.__class__, Dict2D) #spot-check values d2d = Dict2D(self.sparse) self.assertEqual(d2d['a']['c'], 3) self.assertEqual(d2d['d']['b'], 2) self.assertEqual(len(d2d), 2) self.assertEqual(len(d2d['a']), 2) self.assertRaises(KeyError, d2d.__getitem__, 'c') #check truth values assert not Dict2D(self.empty) assert Dict2D(self.single_same)
def test_inputs_from_dict2D(self): """inputs_from_dict2D makes an array object and PhyloNode list""" matrix = [('1', '2', 0.86), ('2', '1', 0.86), \ ('1', '3', 0.92), ('3', '1', 0.92), ('2', '3', 0.67), \ ('3', '2', 0.67)] row_order = ['3', '2', '1'] matrix_d2d = Dict2D(matrix, RowOrder=row_order, \ ColOrder=row_order, Pad=True, Default = 999999999999999) matrix_array, PhyloNode_order = inputs_from_dict2D(matrix_d2d) self.assertFloatEqual(matrix_array[0][2], 0.92) self.assertFloatEqual(matrix_array[1][0], 0.67) self.assertEqual(PhyloNode_order[0].Name, '3') self.assertEqual(PhyloNode_order[2].Name, '1')
def test_sharedColKeys(self): """Dict2D sharedColKeys should find keys shared by all component cols""" self.assertEqual(Dict2D(self.empty).sharedColKeys(), []) self.assertEqual(Dict2D(self.single_diff).sharedColKeys(), ['b']) #note that keys will be returned in arbitrary order self.assertEqualItems(Dict2D(self.square).sharedColKeys(),['a','b','c']) self.assertEqualItems(Dict2D(self.dense).sharedColKeys(), ['a','b','c']) self.assertEqualItems(Dict2D(self.sparse).sharedColKeys(), []) self.square['x'] = {'b':3, 'c':5, 'e':7} self.assertEqualItems(Dict2D(self.square).colKeys(),['a','b','c','e']) self.assertEqualItems(Dict2D(self.square).sharedColKeys(),['b','c'])
def test_cols(self): """Dict2D Cols property should return list in correct order""" #should work with no data self.assertEqual(list(Dict2D(self.empty).Cols), []) #should work with square matrix sq = Dict2D(self.square, RowOrder='abc', ColOrder='abc') self.assertEqual(list(sq.Cols), [[1,2,3],[2,4,6],[3,6,9]]) #check that it works when we change the row and col order sq.RowOrder = 'ba' sq.ColOrder = 'ccb' self.assertEqual(list(sq.Cols), [[6,3],[6,3],[4,2]]) #check that it _does_ raise an error on sparse matrices... sp = Dict2D(self.sparse) self.assertRaises(Dict2DSparseError, list, sp.Cols) #...especially if self.RowOrder and self.ColOrder are set... sp.RowOrder = 'ad' sp.ColOrder = 'abc' self.assertRaises(Dict2DSparseError, list, sp.Cols) #...and then, only if self.Pad is not set sp.Pad = True sp.Default = 'xxx' self.assertEqual(list(sp.Cols), [[1,'xxx'],['xxx',2],[3,'xxx']])
def test_reflect(self): """Dict2D reflect should reflect square matrices across diagonal.""" d = Dict2D(self.top_triangle) #should fail if RowOrder and/or ColOrder are unspecified self.assertRaises(Dict2DError, d.reflect) self.assertRaises(Dict2DError, d.reflect, upper_to_lower) d.RowOrder = 'abc' self.assertRaises(Dict2DError, d.reflect) d.RowOrder = None d.ColOrder = 'abc' self.assertRaises(Dict2DError, d.reflect) #should work if RowOrder and ColOrder are both set d.RowOrder = 'abc' d.reflect(upper_to_lower) self.assertEqual(d, self.square) #try it on lower triangle as well -- note that the diagonal won't be #set if it's absent. d = Dict2D(self.bottom_triangle) d.ColOrder = 'abc' d.RowOrder = 'abc' d.reflect(lower_to_upper) self.assertEqual(d, { 'a':{'b':2,'c':3}, 'b':{'a':2,'c':6}, 'c':{'a':3,'b':6}, }) d = Dict2D({ 'a':{'a':2,'b':4,'c':6}, 'b':{'a':10,'b':20, 'c':30}, 'c':{'a':30, 'b':60, 'c':90}, }) d.ColOrder = d.RowOrder = 'abc' d.reflect(average) self.assertEqual(d, { 'a':{'a':2,'b':7,'c':18}, 'b':{'a':7,'b':20,'c':45}, 'c':{'a':18,'b':45,'c':90}, })