Beispiel #1
0
 def test_group_by_distance_all(self):
     obs = group_by_distance(self.testclusterslt10, 10)
     exp = {
         '(((...)))...(((...)))': [(9, 10)],
         '(((...)))': [(1, 2), (3, 4), (11, 12), (5, 6)],
         '.....((((.......))))...': [(15, 16), (7, 8)],
         '............': [(13, 14)]}
     self.assertEqual(obs, exp)
Beispiel #2
0
 def test_group_by_distance_lt10_specific_nogroup(self):
     obs = group_by_distance(self.testclusterslt10, 10, ['.....((((.......))))...'])
     exp = {
         "(((...)))": [(1, 2), (3, 4)],
         "((.....))": [(5, 6)],
         ".....((((.......))))...": [(7, 8)],
         "(((...)))...(((...)))": [(9, 10)],
         "((((....))))": [(11, 12)],
         '............': [(13, 14)]
     }
     self.assertEqual(obs, exp)
Beispiel #3
0
 def test_group_by_distance_lt10_specific_group(self):
     obs = group_by_distance(self.testclusterslt10, 15, specstructs=['(((...)))'])
     print obs
        files = []
        hold = {}
        pool = Pool(processes=args.c)
        #run the pool over all shape groups to get final grouped structgroups
        fout = open(otufolder + "shapesizes.txt", 'w')
        groupnum = 1
        for shapegroup in groups_shape.keys():
            #write out each group to file for use in subprocess
            groupinfo = {struct: structgroups[struct] for struct in groups_shape[shapegroup]}
            fout.write(shapegroup + "\t" + str(len(groupinfo)) + "\n")
            pool.apply_async(func=group_by_distance,
                args=(groupinfo, structscore), callback=hold.update)
            structgroups.clear()
            del structgroups
            stime = time()
            hold.update(group_by_distance(groupinfo, structscore, None, None, args.nr))
            print len(groupinfo), "clusters:", str((time()-stime)/60), "min"
        fout.close()
        #memory saving wipe of structgroups, groups_shape, and groupinfo
        groups_shape.clear()
        del groups_shape
        groupinfo.clear()
        del groupinfo
        pool.close()
        pool.join()
        print "Grouped ("+str((time()-stime)/60)+" min)"
        structgroups = dict(hold)
        del hold

        #sort all structure sequences by count, highest to lowest
        for struct in structgroups: