Beispiel #1
0
    def test_ungap(self):
        seq = Seq.UnknownSeq(7,
                             alphabet=Alphabet.Gapped(Alphabet.DNAAlphabet(),
                                                      "-"))
        self.assertEqual("NNNNNNN", str(seq.ungap("-")))

        seq = Seq.UnknownSeq(20,
                             alphabet=Alphabet.Gapped(Alphabet.DNAAlphabet(),
                                                      "-"),
                             character='-')
        self.assertEqual("", seq.ungap("-"))
Beispiel #2
0
 def test_stops(self):
     for nucleotide_seq in [
             self.misc_stops,
             Seq.Seq(self.misc_stops),
             Seq.Seq(self.misc_stops, Alphabet.generic_nucleotide),
             Seq.Seq(self.misc_stops, Alphabet.DNAAlphabet()),
             Seq.Seq(self.misc_stops, IUPAC.unambiguous_dna),
     ]:
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq)))
         self.assertEqual("***RR",
                          str(Seq.translate(nucleotide_seq, table=1)))
         self.assertEqual("***RR",
                          str(Seq.translate(nucleotide_seq, table="SGC0")))
         self.assertEqual("**W**",
                          str(Seq.translate(nucleotide_seq, table=2)))
         self.assertEqual(
             "**WRR",
             str(Seq.translate(nucleotide_seq,
                               table="Yeast Mitochondrial")))
         self.assertEqual("**WSS",
                          str(Seq.translate(nucleotide_seq, table=5)))
         self.assertEqual("**WSS",
                          str(Seq.translate(nucleotide_seq, table=9)))
         self.assertEqual(
             "**CRR",
             str(Seq.translate(nucleotide_seq, table="Euplotid Nuclear")))
         self.assertEqual("***RR",
                          str(Seq.translate(nucleotide_seq, table=11)))
         self.assertEqual(
             "***RR", str(Seq.translate(nucleotide_seq, table="Bacterial")))
Beispiel #3
0
def make_seq(seq_data, seq_name):
    seq = SeqRecord(id=seq_name,
                    name=seq_name,
                    seq=Seq(seq_data['Sequence'],
                            alphabet=Alphabet.DNAAlphabet()))

    seq.features = [
        SeqFeature(FeatureLocation(a['start'], a['end'], a['strand']),
                   type=a['type'],
                   qualifiers=OrderedDict({
                       'label': a['label'],
                       'note': a['note'],
                   })) for a in seq_data.get('Annotations', [])
    ]

    if not os.path.exists(seq_name):
        os.makedirs(seq_name)

    with open('{}/README.md'.format(seq_name), 'w') as f:
        f.write(seq_data.get('ReadMe', ''))

    with open('{}/.sequence.json'.format(seq_name), 'w') as f:
        f.write(json.dumps(seq_data))

    SeqIO.write(seq, "{}/sequence.fasta".format(seq_name), "fasta")
    SeqIO.write(seq, "{}/sequence.gb".format(seq_name), "genbank")
Beispiel #4
0
 def extract(self, start_pos, end_pos, make_file=False):
     range_set = set(range(start_pos, end_pos))
     partial_gb = SeqRecord(
         Seq(str(self.gb.seq[start_pos:end_pos]), Alphabet.DNAAlphabet()))
     for afeat in self.gb.features:
         afeat_range = set(range(afeat.location.start, afeat.location.end))
         if len(afeat_range & range_set) > 0:
             partial_gb.features.append(afeat)
     if make_file == True:
         record_handle = open(
             partial_gb.id + "_" + str(start_pos) + "_" + str(end_pos), "w")
         SeqIO.write(partial_gb, record_handle, "genbank")
     return partial_gb
Beispiel #5
0
    except ValueError:
        pass
    if not isinstance(s, Seq.Seq):
        continue  # Only Seq has this method
    try:
        print s.translate()
        assert False, "Translation shouldn't work on a protein!"
    except ValueError:
        pass

misc_stops = "TAATAGTGAAGAAGG"
for nucleotide_seq in [
        misc_stops,
        Seq.Seq(misc_stops),
        Seq.Seq(misc_stops, Alphabet.generic_nucleotide),
        Seq.Seq(misc_stops, Alphabet.DNAAlphabet()),
        Seq.Seq(misc_stops, IUPAC.unambiguous_dna)
]:
    assert "***RR" == str(Seq.translate(nucleotide_seq))
    assert "***RR" == str(Seq.translate(nucleotide_seq, table=1))
    assert "***RR" == str(Seq.translate(nucleotide_seq, table="SGC0"))
    assert "**W**" == str(Seq.translate(nucleotide_seq, table=2))
    assert "**WRR" == str(
        Seq.translate(nucleotide_seq, table='Yeast Mitochondrial'))
    assert "**WSS" == str(Seq.translate(nucleotide_seq, table=5))
    assert "**WSS" == str(Seq.translate(nucleotide_seq, table=9))
    assert "**CRR" == str(
        Seq.translate(nucleotide_seq, table='Euplotid Nuclear'))
    assert "***RR" == str(Seq.translate(nucleotide_seq, table=11))
    assert "***RR" == str(Seq.translate(nucleotide_seq, table='Bacterial'))
del misc_stops
Beispiel #6
0
    def gen_components(self):
        """ Construct knowledge base components """

        # get options
        options = self.options
        num_chromosomes = options.get('num_chromosomes')
        chromosome_topology = options.get('chromosome_topology')
        mean_gc_frac = options.get('mean_gc_frac')
        mean_num_genes = options.get('mean_num_genes')
        mean_gene_len = options.get('mean_gene_len')
        mean_coding_frac = options.get('mean_coding_frac')

        # generate chromosomes and genes
        cell = self.knowledge_base.cell
        for i_chr in range(num_chromosomes):
            num_genes = self.rand(mean_num_genes / num_chromosomes)[0]
            gene_lens = self.rand(mean_gene_len, count=num_genes)
            intergene_lens = self.rand(mean_gene_len / mean_coding_frac *
                                       (1 - mean_coding_frac),
                                       count=num_genes)

            seq_len = numpy.sum(gene_lens) + numpy.sum(intergene_lens)
            seq = Seq.Seq(
                ''.join(
                    random.choice(('A', 'C', 'G', 'T'),
                                  p=((1 - mean_gc_frac) / 2, mean_gc_frac / 2,
                                     mean_gc_frac / 2, (1 - mean_gc_frac) / 2),
                                  size=(seq_len, ))), Alphabet.DNAAlphabet())

            chr = cell.species_types.get_or_create(
                id='chr_{}'.format(i_chr + 1),
                __type=wc_kb.core.DnaSpeciesType)
            chr.name = 'Chromosome {}'.format(i_chr + 1)
            chr.circular = chromosome_topology == 'circular'
            chr.double_stranded = True
            chr.seq = seq

            gene_starts = numpy.int64(
                numpy.cumsum(
                    numpy.concatenate(([0], gene_lens[0:-1])) +
                    numpy.concatenate((numpy.round(intergene_lens[0:1] / 2),
                                       intergene_lens[1:]))))
            for i_gene in range(num_genes):
                tu = cell.loci.get_or_create(
                    id='tu_{}_{}'.format(i_chr + 1, i_gene + 1),
                    __type=wc_kb.prokaryote.TranscriptionUnitLocus)
                tu.polymer = chr
                tu.name = 'Transcription unit {}-{}'.format(
                    i_chr + 1, i_gene + 1)
                tu.start = gene_starts[i_gene]
                tu.end = gene_starts[i_gene] + gene_lens[i_gene] - 1
                tu.strand = random.choice((wc_kb.core.PolymerStrand.positive,
                                           wc_kb.core.PolymerStrand.negative))

                gene = cell.loci.get_or_create(
                    id='gene_{}_{}'.format(i_chr + 1, i_gene + 1),
                    __type=wc_kb.prokaryote.GeneLocus)
                gene.polymer = chr
                gene.transcription_units.append(tu)
                gene.name = 'Gene {}-{}'.format(i_chr + 1, i_gene + 1)
                gene.start = gene_starts[i_gene]
                gene.end = gene_starts[i_gene] + gene_lens[i_gene] - 1
                gene.type = wc_kb.core.GeneType.mRna
                gene.strand = tu.strand
Beispiel #7
0
def translate(seq):
    return Seq(seq.replace('-', 'N'),
               Alphabet.DNAAlphabet()).translate().tostring()