def test_group_denovo_specific(self): obs1, obs2 = group_denovo(self.testclusterslt10, ['.....((((.......))))...'], 10) exp1 = { '(((...)))...(((...)))': [(9, 10)], '(((...)))': [(1, 2), (3, 4), (11, 12), (5, 6)], '.....((((.......))))...': [(15, 16), (7, 8)], '............': [(13, 14)]} exp2 = ['.....((((.......))))...', '(((...)))...(((...)))', '(((...)))', '............'] self.assertEqual(obs1, exp1) self.assertEqual(obs2, exp2)
def test_group_denovo_all(self): obs1, obs2 = group_denovo(self.testclusterslt10, self.testclusterslt10.keys(), 10) print "\n", obs1 print obs2 exp1 = { '(((...)))...(((...)))': [(9, 10)], '(((...)))': [(1, 2), (3, 4), (11, 12), (5, 6)], '.....((((.......))))...': [(7, 8)], '............': [(13, 14)]} exp2 = ['.....((((.......))))...', '(((...)))...(((...)))', '(((...)))', '............'] self.assertEqual(obs1, exp1) self.assertEqual(obs2, exp2)
#broken out by shapes. No comparison needed at first if not same shape, #as most likely not simmilar enough files = [] hold = {} #pool = Pool(processes=args.c) #run the pool over all shape groups to get final grouped structgroups fout = open(otufolder + "shapesizes.txt", 'w') groupnum = 1 for shapegroup in groups_shape.keys(): #write out each group to file for use in subprocess groupinfo = {struct: structgroups[struct] for struct in groups_shape[shapegroup]} #fout.write(shapegroup + "\t" + str(len(groupinfo)) + "\n") #pool.apply_async(func=group_by_distance, # args=(groupinfo, structscore, None, None, args.nr), callback=hold.update) stime = time() hold.update(group_denovo(groupinfo, groupinfo.keys(), structscore)) print len(groupinfo), "clusters:", str((time()-stime)/60), "min" fout.close() #memory saving wipe of structgroups, groups_shape, and groupinfo groups_shape.clear() del groups_shape groupinfo.clear() del groupinfo structgroups.clear() del structgroups #pool.close() #pool.join() #hold should now be the combined dictionaries from all calls of #group_by_forester, aka new structgroups #do one more grouping with all remaining structs regardless of shape #print len(hold), "clusters fgrouping"