def __init__(self, **kwargs): print 'invoke _BaseTreeSimulator__init__' #~ print 'kwargs:', kwargs self.model = kwargs.get('model') self.profile = kwargs.get('profile', IOsimul.SimulProfile(**kwargs)) if not self.model: if getattr(self.profile, 'modeltype', False): # by default create a private model instance; these are better not shared between gene families # as their rates might have to be updated at different time points self.model = eval('models.' + self.profile.modeltype)(**kwargs) else: raise ValueError, "must specify the model to simulate either through the 'profile' of the 'model' arguments" self.t = 0 self.times = kwargs.get('times', []) self.events = kwargs.get('events', {}) self.eventsrecord = kwargs.get('eventsrecord', []) self.eventsmap = kwargs.get('eventsmap', {}) self.extincts = kwargs.get('extincts', []) self.contempbranches = kwargs.get('contempbranches', []) self.nodeClass = kwargs.get('nodeClass', tree2.AnnotatedNode) self.nodeAttr = kwargs.get('nodeAttr', []) + ['extinct'] self.ngen = kwargs.get('ngen') self.eventidgen = models.eventIdGen( ) # generator object providing serial unique identifiers for event objects self.dumppickle_warning = '\n'.join(["Cannot pickle generator objects; to pickle simulator object instance %s,"%repr(self), \ "you have to first delete its event id generator attribute 'eventidgen'", \ "(will discontinue numeration of events for further simulation)."]) self.logger = IOsimul.SimulLogger(simultype=type(self))
def __init__(self, noTrigger=False, modeltype='BirthDeathDTLModel', **kwargs): print 'invoke _DTLtreeSimulator__init__' #~ print 'kwargs:', kwargs super(DTLtreeSimulator, self).__init__( modeltype=modeltype, noTrigger=True, allow_multiple=kwargs.get('allow_multiple', False), **kwargs) ##W : dynamic allow_multiple rather than static False # automatic checkdata() and evolve(ngen) triggers from parent classses are deactivated by noTrigger=True #~ self.rootfreq = kwargs['rootfreq'] # frequency a which the gene family is found at the root of each tree in the multiple reference tree set (species lineages from a Moran process) refsimul = kwargs.get('refsimul') if refsimul: self.refsimul = refsimul # keep hard link to reference/species tree simulator object; better pickle them together! self.refroot = refsimul.connecttrees( returnCopy=False ) # enforce existence of a root in reference simulation self.reftrees = refsimul.trees self.refconbran = refsimul.contempbranches self.refextincts = refsimul.extincts self.ngen = refsimul.t - 1 self.times = refsimul.times else: self.reftrees = kwargs['reftrees'] self.refconbran = kwargs['refcontempbranches'] self.refextincts = kwargs['refextincts'] self.times = kwargs['times'] self.ngen = kwargs.get('ngen') self.reftimeslices = _get_timeslices(len(self.times), self.times) # generate a list of nodes of the reference tree self.refnodeswithdescent = kwargs.get( 'refnodeswithdescent', refsimul.get_nodes_with_descendants()) self.transferrec = {} self.extantevents = [] self.profile = kwargs.get('profile', IOsimul.DTLSimulProfile(type='core')) self.genetrees = [] self.pickgenelineages(allow_multiple=kwargs.get( 'allow_multiple', False )) ## W : go search in kwargs rather than direct call to unknown if not noTrigger: # self.checkdata() # if ngen specified, launch simulation for ngen iterations if self.ngen: self.evolve(self.ngen, connecttrees=kwargs.get('connecttrees', False))
def __init__(self, model, **kwargs): print 'invoke _MultipleTreeSimulator__init__' print 'kwargs:', kwargs super(MultipleTreeSimulator, self).__init__(model=model, **kwargs) self.popsize = self.model.popsize self.trees = [ self.nodeClass(l=float(0), lab="Root_%d" % i) for i in range(self.popsize) ] self.profile = kwargs.get('profile', IOsimul.SimulProfile()) if not kwargs.get('noTrigger'): self.checkdata() # if ngen specified, launch simulation for ngen iterations if self.ngen: self.evolve(self.ngen)
def __init__(self, model, **kwargs): print 'invoke _BaseTreeSimulator__init__' print 'kwargs:', kwargs self.model = model self.t = 0 self.times = kwargs.get('times', []) self.events = kwargs.get('events', {}) self.eventsrecord = kwargs.get('eventsrecord', []) self.eventsmap = kwargs.get('eventsmap', {}) self.extincts = kwargs.get('extincts', []) self.contempbranches = kwargs.get('contempbranches', []) self.nodeClass = kwargs.get('nodeClass', tree2.AnnotatedNode) self.ngen = kwargs.get('ngen') self.eventidgen = models.eventIdGen( ) # generator object providing serial unique identifiers for event objects self.dumppickle_warning = '\n'.join(["Cannot pickle generator objects; to pickle simulator object instance %s,"%repr(self), \ "you have to first delete its event id generator attribute 'eventidgen'", \ "(will discontinue numeration of events for further simulation)."]) self.logger = IOsimul.SimulLogger(simultype=type(self))
def get_extanttrees(self, compute=True, addextincts=[], collapsenodes=False, removelosses=True): """returns a COPY of the list of all trees 'cleaned' of their dead branches ; NB: original tree will have all its nodes labelled afterward""" if not compute: return self._extanttrees # only works with all nodes being labelled, to use labels as references rather than the node objects (which refer to the original tree) in Node.pop() self.labeltreenodes(silent=False) # should not be necessary though # NB: will exclude null trees (None objects) from the returned list # first remove lineages associated to extinct species lineages = [ self.copy_prune_dead_lineages(t, trimroot=False, collapsenodes=collapsenodes) for t in self.genetrees if t ] # and second remove lineages associated to gene loss if removelosses: if collapsenodes: extanttrees = [ BaseTreeSimulator.copy_prune_dead_lineages( t, self.extincts, trimroot=False, collapsenodes=collapsenodes) for t in lineages if t ] else: # keep a "rosary" structure of branches where single-child nodes are annotated as speciation-loss events extanttrees = [ IOsimul.annotateSpeciationLossEvents( extanttree=t, lossnodes=self.extincts, trimLosses=True) for t in lineages if t ] else: extanttrees = lineages return extanttrees
def main(): # option parsing try: opts, args = getopt.getopt(sys.argv[1:], "o:n:r:f:e:l:c:p:vh", ["outputdir=", "ngenes=", "popsize=", "ngen=", \ "dtlrates=", "rootfreq=", "profiles=", \ "sample.larger.trees=", "connect.all.trees=", "sample.extant.species=", \ "help", "verbose"]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" print usage() sys.exit(2) dopt = dict(opts) if ('-h' in dopt) or ('--help' in dopt): print usage() sys.exit(0) if ('-v' in dopt) or ('--verbose' in dopt): silent = False else: silent = True outdir = dopt.get('-o', dopt.get('--outputdir', os.getcwd())) popsize = int(dopt.get('-s', dopt.get('--popsize', 100))) ngen = int(dopt.get('-g', dopt.get('--ngen', 1000))) nfprofiles = dopt.get('-p', dopt.get('--profiles')) # define the evolution rates and original frequencies of gene families if nfprofiles != None: if nfprofiles == '': exsampleprof = [(2, 'core'), (2, 'accessory-slow'), (6, 'orfan-fast')] dtlprof = IOsimul.MetaSimulProfile( profiles=[(n, IOsimul.DTLSimulProfile(type=t)) for n, t in exsampleprof]) else: # expects a JSON formating of simulator profiles conforming to the IOsimul.MetaSimulProfile class parsers # e.g. a list of dict objects representing the arguments of a simulators.DTLSimulProfile class instance dtlprof = IOsimul.MetaSimulProfile(json=nfprofiles) else: # default global parameters (no pangenome structure), overridden by any provided profile rootfreq = dopt.get('-f', dopt.get('--rootfreq', 0.5)) sdtlrates = dopt.get('-r', dopt['--dtlrates']) if sdtlrates: dtlrates = [ float(s) for s in dopt.get('-r', dopt['--dtlrates']).split(',') ] else: # by default, rates are: dtlrates = [0.001] # D = 1e-3 if len(dtlrates) < 2: dtlrates += dtlrates[0] # T = D if len(dtlrates) < 3: dtlrates += [sum(dtlrates)] # L = T+D dglobalprof = { 0: { 'rdup': dtlrates[0], 'rtrans': dtlrates[1], 'rloss': dtlrates[2] } } globalprof = IOsimul.DTLSimulProfile(rateschedule=dglobalprof, rootfreq=rootfreq) dtlprof = IOsimul.MetaSimulProfile(profiles=[(1, globalprof)]) # derive number of gene families to simulate from profile weights, or from dedicated option -n (overrides profiles), or take default value of 10 if dtlprof.ngenes > 1: ngenes = dtlprof.ngenes else: ngenes = int(dopt.get('-n', dopt.get('--ngenes', 10))) nlargegenetrees = int(dopt.get('-l', dopt.get('--sample.larger.trees', -1))) lentoroot = float(dopt.get('-c', dopt.get('--connect.all.trees', -1))) samplextant = int(dopt.get('-e', dopt.get('--sample.extant.species', 0))) assert samplextant <= popsize #~ parser = argparse.ArgumentParser(description='Simulate phylogenic trees describing evolution of a population of bacterial genomes, with species, replicon/locus and gene layers.') #~ parser.add_argument('-o', '--outdir', ) # creating output directories for d in ['logs', 'pickles', 'genetrees', 'reftrees']: outd = "%s/%s" % (outdir, d) if not os.path.exists(outd): os.mkdir(outd) # simualte species tree moranmodel = models.MoranProcess(popsize=popsize) moransim = simulators.MultipleTreeSimulator(moranmodel, ngen=ngen) if lentoroot >= 0: # connect all roots of the species lineage trees conrt = moransim.connecttrees(lentoroot, returnCopy=True) conrt.write_newick("%s/reftrees/connected.reftree_full.nwk" % (outdir)) # prune dead lineages and connect all roots of the species lineage trees extconrt = moransim.get_extanttree(compute=True, lentoroot=lentoroot) extconrt.write_newick("%s/reftrees/connected.reftree_extant.nwk" % (outdir)) extantspe = extconrt.get_leaf_labels() else: # write lineage trees separately lextrt = moransim.get_extanttrees(compute=True) extantspe = [] for k, extrt in enumerate(lextrt): extconrt.write_newick("%s/reftrees/reftree.%d_extant.nwk" % (outdir, k)) extantspe += extconrt.get_leaf_labels() # select sampled species among the N extant if samplextant: sampledspe = random.sample(extantspe, samplextant) refnodeswithdescent = moransim.get_nodes_with_descendants( sample=sampledspe) else: refnodeswithdescent = moransim.get_nodes_with_descendants() # serial simulation of gene families, have to offer a parrallel version for k in range(ngenes): print "### simulate gene tree", k # simulate gene tree under the same reference tree set (= species/organism population history) #~ bddtlmodel = models.BirthDeathDTLModel(rdup=dtlrates[0], rtrans=dtlrates[1], rloss=dtlrates[2]) bddtlmodel = models.BirthDeathDTLModel() bddtlsim = simulators.DTLtreeSimulator( model=bddtlmodel, refsimul=moransim, refnodeswithdescent=refnodeswithdescent, profile=dtlprof.sampleprofile(verbose=True), noTrigger=True) bddtlsim.evolve(bddtlsim.ngen) # save ref and gene tree simulation object together to save space as they share references to same objects IOsimul.dumppickle({ 'refsim': moransim, 'genesim': bddtlsim }, "%s/pickles/simul.%d.pickle" % (outdir, k)) # write out the largest n gene trees and corresponding species trees if nlargegenetrees >= 0: genetreesizes = [(genetree.nb_leaves(), i) for i, genetree in enumerate(bddtlsim.genetrees)] genetreesizes.sort(reverse=True) isavetrees = (genetreesizes[l][1] for l in range(nlargegenetrees)) else: isavetrees = xrange(len(bddtlsim.genetrees)) for l in isavetrees: genetree = bddtlsim.genetrees[l] genetree.write_newick("%s/genetrees/simul.%d.all_gt.nwk" % (outdir, k), mode=('w' if l == 0 else 'a')) #~ genetree.ref.write_newick("%s/reftrees/simul.%d.rt.%d.nwk"%(outdir, k, l)) if lentoroot >= 0: # connect all the gene trees in each gene population congt = bddtlsim.connecttrees(lentoroot, returnCopy=True) # write out connected trees congt.write_newick("%s/genetrees/simul.%d.connected_gt_full.nwk" % (outdir, k)) # prune dead lineages and connect all roots of the species lineage trees extconrt = bddtlsim.get_extanttree(compute=True, lentoroot=lentoroot) print extconrt extconrt.write_newick( "%s/genetrees/simul.%d.connected_gt_extant.nwk" % (outdir, k))
for k in range(nsims): print "### simulate gene tree", k # simulate gene tree under the same reference tree set (= species population history) bddtlmodel = models.BirthDeathDTLModel(rdup=dtlrate[0], rtrans=dtlrate[1], rloss=dtlrate[2]) bddtlsim = simulators.DTLtreeSimulator(model=bddtlmodel, refsimul=moransim, noTrigger=True) bddtlsim.evolve(bddtlsim.ngen) # save ref and gene tree simulation object together to save space as they share references to same objects IOsimul.dumppickle({ 'refsim': moransim, 'genesim': bddtlsim }, "%s/pickles/simul.%d.pickle" % (outdir, k), prompt=True) # write out the largest n gene trees and corresponding species trees genetreesizes = [(genetree.nb_leaves(), i) for i, genetree in enumerate(bddtlsim.genetrees)] largesizes = copy.copy(genetreesizes) largesizes.sort(reverse=True) for l in range(nlargegenetrees): il = largesizes[l] maxgenetree = bddtlsim.genetrees[il[1]] maxgenetree.write_newick("%s/genetrees/simul.%d.gt.%d.nwk" % (outdir, k, l)) maxgenetree.ref.write_newick("%s/reftrees/simul.%d.rt.%d.nwk" % (outdir, k, l))
def write_endlog(self, dirout, simultype): """data export of simulation (a posteriori)""" logger = IOsimul.SimulLogger(simultype=simultype, bnfout="%/log_" % dirout) logger