Ejemplo n.º 1
0
def generate_contigs_two_sizes(genome, small_size, large_size, min_distance,
                               max_distance, distr_weight_large_ctgs):
    position = 0
    index = 0
    while True:
        r = random.uniform(0, 1)
        if r < distr_weight_large_ctgs:
            contig_len = large_size
        else:
            contig_len = small_size

        position += random.randrange(min_distance, max_distance)
        if position + contig_len > len(genome):
            break

        rev_comp = random.randrange(0, 2)
        if rev_comp:
            yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format(
                index, position, position + contig_len,
                reverse_complement(genome[position:position + contig_len]))
        else:
            yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format(
                index, position, position + contig_len,
                genome[position:position + contig_len])
        index += 1
        position += contig_len
Ejemplo n.º 2
0
    def generate(self,reference_accession, reference_sequence, read_index):
        if self.distribution == 'normal':
            self.fragment_length = int(random.gauss(self.mean,self.sigma))
        elif self.distribution == 'uniform':
            self.fragment_length = int(random.uniform(self.min_size,self.max_size))

        if self.fragment_length >= len(reference_sequence): 
            raise Exception("To short reference sequence length for \
                simulated read. \nRead fragment: {0}\nTranscript \
                length:{1}".format(self.fragment_length,len(reference_sequence)))
        self.start_pos = random.randrange(len(reference_sequence) - self.fragment_length)
        self.read1 = reverse_complement(reference_sequence[self.start_pos : self.start_pos + self.read_length])
        self.read2 = reference_sequence[self.start_pos + self.fragment_length - self.read_length : self.start_pos+self.fragment_length]
        self.reference_accession = reference_accession
        self.read_index = read_index
Ejemplo n.º 3
0
def generate_contigs(genome,min_c_len,max_c_len,min_distance,max_distance):
    position = 0
    index = 0
    while True:
        contig_len = random.randrange(min_c_len,max_c_len)
        position += random.randrange(min_distance,max_distance)
        if position + contig_len > len(genome):
            break

        rev_comp = random.randrange(0,2)
        if rev_comp:
            yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format(index,
                position,position+contig_len, reverse_complement( genome[position:position+contig_len]))
        else:
            yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format(index,
                position,position+contig_len, genome[position:position+contig_len])
        index += 1
        position += contig_len
Ejemplo n.º 4
0
def generate_contigs(genome, min_c_len, max_c_len, min_distance, max_distance):
    position = 0
    index = 0
    while True:
        contig_len = random.randrange(min_c_len, max_c_len)
        position += random.randrange(min_distance, max_distance)
        if position + contig_len > len(genome):
            break

        rev_comp = random.randrange(0, 2)
        if rev_comp:
            yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format(
                index, position, position + contig_len,
                reverse_complement(genome[position:position + contig_len]))
        else:
            yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format(
                index, position, position + contig_len,
                genome[position:position + contig_len])
        index += 1
        position += contig_len
Ejemplo n.º 5
0
def generate_contigs_two_sizes(genome,small_size,large_size,min_distance,max_distance,distr_weight_large_ctgs):
    position = 0
    index = 0
    while True:
        r = random.uniform(0,1)
        if r < distr_weight_large_ctgs:
            contig_len = large_size
        else:
            contig_len = small_size

        position += random.randrange(min_distance,max_distance)
        if position + contig_len > len(genome):
            break

        rev_comp = random.randrange(0,2)
        if rev_comp:
            yield '>c{0},pos:{1}-{2},rc:1\n{3}\n'.format(index,
                position,position+contig_len, reverse_complement( genome[position:position+contig_len]))
        else:
            yield '>c{0},pos:{1}-{2},rc:0\n{3}\n'.format(index,
                position,position+contig_len, genome[position:position+contig_len])
        index += 1
        position += contig_len
Ejemplo n.º 6
0
    def get_sequence(self):
        """
        Generates an Transcript from a genome
        Returns:
        An exon
        """

        nr_exons = random.randrange(1,5)
        self.intron_length = [random.randrange(self.min_intron_size,self.max_intron_size) for i in range(nr_exons)]
        self.exon_lengths = [random.randrange(self.min_exon_size,self.max_exon_size) for i in range(nr_exons)]
        self.reverse_complement = random.randrange(2)

        if sum(self.intron_length) + sum(self.exon_lengths) >= len(self.genome_strand):
            self.get_sequence()

        if self.reverse_complement:
            self.start_position = random.randrange(sum(self.intron_length) + sum(self.exon_lengths),len(self.genome_strand))
        else:
            self.start_position = random.randrange(0,len(self.genome_strand)-sum(self.intron_length)-sum(self.exon_lengths))


        position = self.start_position
        self.positions = []
        self.sequence = ''
        for e_len,i_len in zip(self.exon_lengths,self.intron_length):
            if self.reverse_complement:
                self.sequence += reverse_complement( self.genome_strand[position-e_len:position])
                self.positions.append((position-e_len,position))
                position -= e_len + i_len

            else:

                self.sequence += self.genome_strand[position:position+e_len]
                self.positions.append((position,position+e_len))
                position += e_len + i_len

        self.accession = '>spliced_variant{0},rc={1}'.format(self.positions,self.reverse_complement)