def generate_contigs_two_sizes(genome, small_size, large_size, min_distance, max_distance, distr_weight_large_ctgs): position = 0 index = 0 while True: r = random.uniform(0, 1) if r < distr_weight_large_ctgs: contig_len = large_size else: contig_len = small_size position += random.randrange(min_distance, max_distance) if position + contig_len > len(genome): break rev_comp = random.randrange(0, 2) if rev_comp: yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format( index, position, position + contig_len, reverse_complement(genome[position:position + contig_len])) else: yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format( index, position, position + contig_len, genome[position:position + contig_len]) index += 1 position += contig_len
def generate(self,reference_accession, reference_sequence, read_index): if self.distribution == 'normal': self.fragment_length = int(random.gauss(self.mean,self.sigma)) elif self.distribution == 'uniform': self.fragment_length = int(random.uniform(self.min_size,self.max_size)) if self.fragment_length >= len(reference_sequence): raise Exception("To short reference sequence length for \ simulated read. \nRead fragment: {0}\nTranscript \ length:{1}".format(self.fragment_length,len(reference_sequence))) self.start_pos = random.randrange(len(reference_sequence) - self.fragment_length) self.read1 = reverse_complement(reference_sequence[self.start_pos : self.start_pos + self.read_length]) self.read2 = reference_sequence[self.start_pos + self.fragment_length - self.read_length : self.start_pos+self.fragment_length] self.reference_accession = reference_accession self.read_index = read_index
def generate_contigs(genome,min_c_len,max_c_len,min_distance,max_distance): position = 0 index = 0 while True: contig_len = random.randrange(min_c_len,max_c_len) position += random.randrange(min_distance,max_distance) if position + contig_len > len(genome): break rev_comp = random.randrange(0,2) if rev_comp: yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format(index, position,position+contig_len, reverse_complement( genome[position:position+contig_len])) else: yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format(index, position,position+contig_len, genome[position:position+contig_len]) index += 1 position += contig_len
def generate_contigs(genome, min_c_len, max_c_len, min_distance, max_distance): position = 0 index = 0 while True: contig_len = random.randrange(min_c_len, max_c_len) position += random.randrange(min_distance, max_distance) if position + contig_len > len(genome): break rev_comp = random.randrange(0, 2) if rev_comp: yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format( index, position, position + contig_len, reverse_complement(genome[position:position + contig_len])) else: yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format( index, position, position + contig_len, genome[position:position + contig_len]) index += 1 position += contig_len
def generate_contigs_two_sizes(genome,small_size,large_size,min_distance,max_distance,distr_weight_large_ctgs): position = 0 index = 0 while True: r = random.uniform(0,1) if r < distr_weight_large_ctgs: contig_len = large_size else: contig_len = small_size position += random.randrange(min_distance,max_distance) if position + contig_len > len(genome): break rev_comp = random.randrange(0,2) if rev_comp: yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format(index, position,position+contig_len, reverse_complement( genome[position:position+contig_len])) else: yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format(index, position,position+contig_len, genome[position:position+contig_len]) index += 1 position += contig_len
def get_sequence(self): """ Generates an Transcript from a genome Returns: An exon """ nr_exons = random.randrange(1,5) self.intron_length = [random.randrange(self.min_intron_size,self.max_intron_size) for i in range(nr_exons)] self.exon_lengths = [random.randrange(self.min_exon_size,self.max_exon_size) for i in range(nr_exons)] self.reverse_complement = random.randrange(2) if sum(self.intron_length) + sum(self.exon_lengths) >= len(self.genome_strand): self.get_sequence() if self.reverse_complement: self.start_position = random.randrange(sum(self.intron_length) + sum(self.exon_lengths),len(self.genome_strand)) else: self.start_position = random.randrange(0,len(self.genome_strand)-sum(self.intron_length)-sum(self.exon_lengths)) position = self.start_position self.positions = [] self.sequence = '' for e_len,i_len in zip(self.exon_lengths,self.intron_length): if self.reverse_complement: self.sequence += reverse_complement( self.genome_strand[position-e_len:position]) self.positions.append((position-e_len,position)) position -= e_len + i_len else: self.sequence += self.genome_strand[position:position+e_len] self.positions.append((position,position+e_len)) position += e_len + i_len self.accession = '>spliced_variant{0},rc={1}'.format(self.positions,self.reverse_complement)