def get_contig(self, chromosome, genetic_map=None, length_multiplier=1): """ Returns a :class:`.Contig` instance describing a section of genome that is to be simulated based on empirical information for a given species and chromosome. :param str chromosome: The ID of the chromosome to simulate. :param str genetic_map: If specified, obtain recombination rate information from the genetic map with the specified ID. If None, simulate using a default uniform recombination rate on a region with the length of the specified chromosome. The default rates are species- and chromosome- specific, and can be found in the :ref:`sec_catalog`. (Default: None) :param float length_multiplier: If specified, simulate a region of length `length_multiplier` times the length of the specified chromosome with the same chromosome-specific mutation and recombination rates. This option cannot currently be used in conjunction with the ``genetic_map`` argument. :rtype: :class:`.Contig` :return: A :class:`.Contig` describing a simulation of the section of genome. """ # TODO: add non-autosomal support if (chromosome is not None and chromosome.lower() in ("x", "y", "m", "mt", "chrx", "chry", "chrm")): warnings.warn( stdpopsim.NonAutosomalWarning( "Non-autosomal simulations are not yet supported. See " "https://github.com/popsim-consortium/stdpopsim/issues/383 and " "https://github.com/popsim-consortium/stdpopsim/issues/406" )) chrom = self.genome.get_chromosome(chromosome) if genetic_map is None: logger.debug( f"Making flat chromosome {length_multiplier} * {chrom.id}") gm = None recomb_map = msprime.RecombinationMap.uniform_map( chrom.length * length_multiplier, chrom.recombination_rate) else: if length_multiplier != 1: raise ValueError( "Cannot use length multiplier with empirical maps") logger.debug(f"Getting map for {chrom.id} from {genetic_map}") gm = self.get_genetic_map(genetic_map) recomb_map = gm.get_chromosome_map(chrom.id) ret = stdpopsim.Contig(recombination_map=recomb_map, mutation_rate=chrom.mutation_rate, genetic_map=gm) return ret
def get_contig( self, chromosome=None, genetic_map=None, length_multiplier=1, length=None, inclusion_mask=None, exclusion_mask=None, ): """ Returns a :class:`.Contig` instance describing a section of genome that is to be simulated based on empirical information for a given species and chromosome. :param str chromosome: The ID of the chromosome to simulate. A complete list of chromosome IDs for each species can be found in the "Genome" subsection for the species in the :ref:`sec_catalog`. If the chromosome is not given, we specify a "generic" contig with given ``length``. :param str genetic_map: If specified, obtain recombination rate information from the genetic map with the specified ID. If None, simulate using a default uniform recombination rate on a region with the length of the specified chromosome. The default rates are species- and chromosome- specific, and can be found in the :ref:`sec_catalog`. (Default: None) :param float length_multiplier: If specified, simulate a region of length `length_multiplier` times the length of the specified chromosome with the same chromosome-specific mutation and recombination rates. This option cannot currently be used in conjunction with the ``genetic_map`` argument. :param inclusion_mask: If specified, simulated genomes are subset to only inlude regions given by the mask. The mask can be specified by the path and file name of a bed file or as a list or array of intervals given by the left and right end points of the intervals. :param exclusion_mask: If specified, simulated genomes are subset to exclude regions given by the mask. The mask can be specified by the path and file name of a bed file or as a list or array of intervals given by the left and right end points of the intervals. :param float length: Used with a "generic" contig, specifies the length of genome sequence for this contig. For a generic contig, mutation and recombination rates are equal to the genome-wide average across all autosomal chromosomes. :rtype: :class:`.Contig` :return: A :class:`.Contig` describing the section of the genome. """ # TODO: add non-autosomal support non_autosomal_lower = ["x", "y", "m", "mt", "chrx", "chry", "chrm"] if chromosome is not None and chromosome.lower( ) in non_autosomal_lower: warnings.warn( stdpopsim.NonAutosomalWarning( "Non-autosomal simulations are not yet supported. See " "https://github.com/popsim-consortium/stdpopsim/issues/383 and " "https://github.com/popsim-consortium/stdpopsim/issues/406" )) if chromosome is None: if genetic_map is not None: raise ValueError("Cannot use genetic map with generic contic") if length_multiplier != 1: raise ValueError( "Cannot use length multiplier for generic contig") if inclusion_mask is not None or exclusion_mask is not None: raise ValueError("Cannot use mask with generic contig") if length is None: raise ValueError( "Must specify sequence length of generic contig") L_tot = 0 r_tot = 0 u_tot = 0 for chrom_data in self.genome.chromosomes: if chrom_data.id.lower() not in non_autosomal_lower: L_tot += chrom_data.length r_tot += chrom_data.length * chrom_data.recombination_rate u_tot += chrom_data.length * chrom_data.mutation_rate u = u_tot / L_tot r = r_tot / L_tot recomb_map = msprime.RateMap.uniform(length, r) ret = stdpopsim.Contig(recombination_map=recomb_map, mutation_rate=u) else: if length is not None: raise ValueError( "Cannot specify sequence length for named contig") if inclusion_mask is not None and exclusion_mask is not None: raise ValueError( "Cannot specify both inclusion and exclusion masks") chrom = self.genome.get_chromosome(chromosome) if genetic_map is None: logger.debug( f"Making flat chromosome {length_multiplier} * {chrom.id}") gm = None recomb_map = msprime.RateMap.uniform( round(chrom.length * length_multiplier), chrom.recombination_rate) else: if length_multiplier != 1: raise ValueError( "Cannot use length multiplier with empirical maps") logger.debug(f"Getting map for {chrom.id} from {genetic_map}") gm = self.get_genetic_map(genetic_map) recomb_map = gm.get_chromosome_map(chrom.id) inclusion_intervals = None exclusion_intervals = None if inclusion_mask is not None: if length_multiplier != 1: raise ValueError("Cannot use length multiplier with mask") if isinstance(inclusion_mask, str): inclusion_intervals = stdpopsim.utils.read_bed( inclusion_mask, chromosome) else: inclusion_intervals = inclusion_mask if exclusion_mask is not None: if length_multiplier != 1: raise ValueError("Cannot use length multiplier with mask") if isinstance(exclusion_mask, str): exclusion_intervals = stdpopsim.utils.read_bed( exclusion_mask, chromosome) else: exclusion_intervals = exclusion_mask ret = stdpopsim.Contig( recombination_map=recomb_map, mutation_rate=chrom.mutation_rate, genetic_map=gm, inclusion_mask=inclusion_intervals, exclusion_mask=exclusion_intervals, ) return ret
def species_contig( *, species, chromosome=None, genetic_map=None, length_multiplier=1, length=None, mutation_rate=None, inclusion_mask=None, exclusion_mask=None, ): """ Build a Contig for a species. """ # TODO: add non-autosomal support non_autosomal_lower = ["x", "y", "m", "mt", "chrx", "chry", "chrm"] if chromosome is not None and chromosome.lower( ) in non_autosomal_lower: warnings.warn( stdpopsim.NonAutosomalWarning( "Non-autosomal simulations are not yet supported. See " "https://github.com/popsim-consortium/stdpopsim/issues/383 and " "https://github.com/popsim-consortium/stdpopsim/issues/406" )) if chromosome is None: if genetic_map is not None: raise ValueError("Cannot use genetic map with generic contig") if length_multiplier != 1: raise ValueError( "Cannot use length multiplier for generic contig") if inclusion_mask is not None or exclusion_mask is not None: raise ValueError("Cannot use mask with generic contig") if length is None: raise ValueError( "Must specify sequence length of generic contig") L_tot = 0 r_tot = 0 u_tot = 0 for chrom_data in species.genome.chromosomes: if chrom_data.id.lower() not in non_autosomal_lower: L_tot += chrom_data.length r_tot += chrom_data.length * chrom_data.recombination_rate u_tot += chrom_data.length * chrom_data.mutation_rate if mutation_rate is None: mutation_rate = u_tot / L_tot r = r_tot / L_tot contig = Contig.basic_contig( length=length, mutation_rate=mutation_rate, recombination_rate=r, ) else: if length is not None: raise ValueError( "Cannot specify sequence length for named contig") if inclusion_mask is not None and exclusion_mask is not None: raise ValueError( "Cannot specify both inclusion and exclusion masks") chrom = species.genome.get_chromosome(chromosome) if genetic_map is None: logger.debug( f"Making flat chromosome {length_multiplier} * {chrom.id}") gm = None recomb_map = msprime.RateMap.uniform( round(chrom.length * length_multiplier), chrom.recombination_rate) else: if length_multiplier != 1: raise ValueError( "Cannot use length multiplier with empirical maps") logger.debug(f"Getting map for {chrom.id} from {genetic_map}") gm = species.get_genetic_map(genetic_map) recomb_map = gm.get_chromosome_map(chrom.id) inclusion_intervals = None exclusion_intervals = None if inclusion_mask is not None: if length_multiplier != 1: raise ValueError("Cannot use length multiplier with mask") if isinstance(inclusion_mask, str): inclusion_intervals = stdpopsim.utils.read_bed( inclusion_mask, chromosome) else: inclusion_intervals = inclusion_mask if exclusion_mask is not None: if length_multiplier != 1: raise ValueError("Cannot use length multiplier with mask") if isinstance(exclusion_mask, str): exclusion_intervals = stdpopsim.utils.read_bed( exclusion_mask, chromosome) else: exclusion_intervals = exclusion_mask if mutation_rate is None: mutation_rate = chrom.mutation_rate contig = stdpopsim.Contig( recombination_map=recomb_map, mutation_rate=mutation_rate, genetic_map=gm, inclusion_mask=inclusion_intervals, exclusion_mask=exclusion_intervals, ) return contig