Ejemplo n.º 1
0
class BlastTest(TestCase):

  def setUp(self):
    ft_gene = Feature_Type(type='Gene')
    ft_gene.save()

    self.dna = 'ATTGCGGATCGCGAATGCGATCG'
    self.pro = 'MKKKAPSI'
    self.pro_dna = 'ATGAAGAAGAAAGCACCAAGCATA'

    self.feature1 = Feature(type=ft_gene, name='G1', sequence=self.dna)
    self.feature1.as_dna()
    self.feature1.save()

    self.feature2 = Feature(type=ft_gene, name='G2', sequence=self.pro)
    self.feature2.as_protein()
    self.feature2.save()

    self.feature_db = Feature_Database(name='test')
    self.feature_db.save()
    self.feature_db.features.add(self.feature1, self.feature2)

  def test_blast(self):
    self.feature_db.build()

    query = 'G'*100+self.dna+'A'*40
    feature_list = blast(query, self.feature_db)

    self.assertEquals(len(feature_list), 1)
    self.assertEquals(feature_list[0].name, self.feature1.name)
    self.assertEquals(feature_list[0].query_start, 101)
    self.assertEquals(feature_list[0].query_end, 100+len(self.dna))
    self.assertEquals(feature_list[0].subject_start, 1)
    self.assertEquals(feature_list[0].subject_end, len(self.dna))

  def test_blast_evalue_threshold(self):
    self.feature_db.build()

    query = 'G'*100+self.dna+'A'*40
    feature_list = blast(query, self.feature_db)
    self.assertEquals(len(feature_list), 1)
    feature_list = blast(query, self.feature_db, evalue_threshold=1E-50)
    self.assertEquals(len(feature_list), 0)

  def test_blast_identity_threshold(self):
    self.feature_db.build()

    q = self.dna
    # make two changes
    q = q[0:3]+'C'+q[4:6]+'C'+q[7:]
    self.assertEquals(len(q), len(self.dna))
    query = 'G'*100+q+'A'*40

    feature_list = blast(query, self.feature_db, identity_threshold=None)
    self.assertEquals(len(feature_list), 1)
    self.assertEquals(feature_list[0].query_start, 101)
    self.assertEquals(feature_list[0].query_end, 100+len(self.dna))
    self.assertEquals(feature_list[0].subject_start, 1)
    self.assertEquals(feature_list[0].subject_end, len(self.dna))

    feature_list = blast(query, self.feature_db, identity_threshold=0.99)
    self.assertEquals(len(feature_list), 0)
 
  def test_blast_feature_threshold(self):
    self.feature_db.build()
    p = 0.8
    n = int(len(self.dna)*p)

    query = 'G'*100+self.dna[0:n]+'A'*40
    feature_list = blast(query, self.feature_db, feature_threshold=None)
    self.assertEquals(len(feature_list), 1)
    self.assertEquals(feature_list[0].query_start, 101)
    self.assertEquals(feature_list[0].query_end, 100+n)
    self.assertEquals(feature_list[0].subject_start, 1)
    self.assertEquals(feature_list[0].subject_end, n)

    feature_list = blast(query, self.feature_db, feature_threshold=p)
    self.assertEquals(len(feature_list), 0)
 
  def test_get_feature_from_blast_result(self):
    self.feature_db.build()

    query = 'G'*100+self.dna+'A'*40
    feature_list = blast(query, self.feature_db)
    self.assertEquals(len(feature_list), 1)
    self.assertEquals(feature_list[0].feature_id, self.feature1.id)

  def test_returns_one_result_from_across_circular_boundary(self):
    self.feature_db.build()
    q = 'G'*100+self.dna+'A'*40
    query = q[110:]+q[0:110]
    feature_list = blast(query, self.feature_db)
    # if we don't remove truncated features across circular boundary, we'd see
    # 2 results, one for truncated feature, one for full feature
    self.assertEquals(len(feature_list), 1)

  def test_returns_correct_coordinates_across_circular_boundary(self):
    self.feature_db.build()

    q = 'G'*100+self.dna+'A'*40
    query = q[110:]+q[0:110]

    feature_list = blast(query, self.feature_db)
    self.assertEquals(feature_list[0].query_start, len(q)-10+1)
    self.assertEquals(feature_list[0].query_end, len(self.dna)-10)
    self.assertEquals(feature_list[0].subject_start, 1)
    self.assertEquals(feature_list[0].subject_end, len(self.dna))
Ejemplo n.º 2
0
class IntegrationTest(TestCase):

  def setUp(self):
    ft_gene = Feature_Type(type='Gene')
    ft_gene.save()

    self.dna = 'ATTGCGGATCGCGAATGCGATCG'
    self.pro = 'MKKKAPSI'
    self.pro_dna = 'ATGAAGAAGAAAGCACCAAGCATA'

    self.feature1 = Feature(type=ft_gene, name='G1', sequence=self.dna)
    self.feature1.as_dna()
    self.feature1.save()

    self.feature2 = Feature(type=ft_gene, name='G2', sequence=self.pro)
    self.feature2.as_protein()
    self.feature2.save()

    self.feature_db = Feature_Database(name='test')
    self.feature_db.save()
    self.feature_db.features.add(self.feature1, self.feature2)


  def test_build_db(self):
    self.feature_db.build()


  def test_finds_orf_in_both_directions_and_across_boundary(self):
    self.feature_db.build()

    query = 'G'*100+'ATG'+'C'*3000+'TAG'+'CTA'+'G'*1800+'CAT'

    data = {'db': self.feature_db.name, 'sequence': query}
    url = reverse('giraffe-analyze')
    resp = self.client.post(url, data)
    self.assertEquals(resp.status_code, 200)
    res = json.loads(resp.content)

    self.assertEquals(res[0], len(query))
    self.assertItemsEqual(res[1],
      [
       {'orf_frame': 1,
        'layer': 'ORFs',
        'type_id': 10,
        'subject_start': 1,
        'subject_end': 3+3000+3,
        'query_start': 100+1,
        'query_end': 100+3+3000+3,
        'label': 'ORF frame 2',
        'name': 'ORF frame 2'},
       {'orf_frame': 0,
        'layer': 'ORFs',
        'type_id': 10,
        'subject_start': 3+1800+3,
        'subject_end': 1,
        'query_start': 100+3+3000+3+1,
        'query_end': 100+3+3000+3+3+1800+3,
        'label': 'ORF frame 1',
        'name': 'ORF frame 1'},
       # across circular boundary, there is AT, then G, it ends with the first
       # stop codon after C*3000
       {'orf_frame': 2,
        'layer': 'ORFs',
        'type_id': 10,
        'subject_start': 1,
        'subject_end': 3+99+3+3000+3,
        'query_start': len(query)-2+1,
        'query_end': 100+3+3000+3,
        'label': 'ORF frame 3',
        'name': 'ORF frame 3'}
      ]
  )


  def test_does_not_find_orf_across_boundary_if_not_in_circular_mode(self):
    self.feature_db.build()

    query = 'G'*100+'ATG'+'C'*3000+'TAG'+'CTA'+'G'*1800+'CAT'

    data = {'db': self.feature_db.name, 'sequence': query, 'circular': 0}
    url = reverse('giraffe-analyze')
    resp = self.client.post(url, data)
    self.assertEquals(resp.status_code, 200)
    res = json.loads(resp.content)

    self.assertEquals(res[0], len(query))
    self.assertItemsEqual(res[1],
      [
       {'orf_frame': 1,
        'layer': 'ORFs',
        'type_id': 10,
        'subject_start': 1,
        'subject_end': 3+3000+3,
        'query_start': 100+1,
        'query_end': 100+3+3000+3,
        'label': 'ORF frame 2',
        'name': 'ORF frame 2'},
       {'orf_frame': 0,
        'layer': 'ORFs',
        'type_id': 10,
        'subject_start': 3+1800+3,
        'subject_end': 1,
        'query_start': 100+3+3000+3+1,
        'query_end': 100+3+3000+3+3+1800+3,
        'label': 'ORF frame 1',
        'name': 'ORF frame 1'}
      ]
  )


  def test_find_tags_within_orf(self):
    self.feature_db.build()

    query = 'G'*100+'ATG'+'C'*3000+'CAT'*6+'TAG'

    data = {'db': self.feature_db.name, 'sequence': query, 'circular': 0}
    url = reverse('giraffe-analyze')
    resp = self.client.post(url, data)
    self.assertEquals(resp.status_code, 200)
    res = json.loads(resp.content)

    self.assertEquals(res[0], len(query))
    self.assertItemsEqual(res[1],
      [
       {'orf_frame': 1,
        'layer': 'ORFs',
        'type_id': 10,
        'subject_start': 1,
        'subject_end': 3+3000+3*6+3,
        'query_start': 100+1,
        'query_end': 100+3+3000+3*6+3,
        'label': 'ORF frame 2',
        'name': 'ORF frame 2'},
       {'layer': 'Detected Features',
        'type_id': 1,
        'subject_start': 1,
        'subject_end': 3*6,
        'query_start': 100+3+3000+1,
        'query_end': 100+3+3000+3*6,
        'label': '6xHIS',
        'name': '6xHIS'}
      ]
  )


  def test_blastn(self):
    self.feature_db.build()

    data = {'db': self.feature_db.name, 'sequence': self.dna}
    url = reverse('giraffe-analyze')
    resp = self.client.post(url, data)
    self.assertEquals(resp.status_code, 200)
    res = json.loads(resp.content)

    self.assertEquals(res[0], len(self.dna))
    self.assertItemsEqual(res[1],
      [{"layer": "Detected Features",
        "name": "G1",
        "type_id": 5,
        "label": "G1", 
        "query_start": 1,
        "query_end": 23,
        "subject_start": 1,
        "subject_end": 23,
        "evalue": 6.01355e-11,
        "identities": 23,
        "alignment": {  "query": "ATTGCGGATCGCGAATGCGATCG",
                        "match": "|||||||||||||||||||||||",
                      "subject": "ATTGCGGATCGCGAATGCGATCG"}},

       {"layer": "Restriction Enzymes",
        "name": "NruI", "type_id": 4, "elucidate": "TCG^_CGA", "label": "NruI", 
        "query_start": 9, "cut": 11, "query_end": 14, "subject_start": 1, "subject_end": 6},

       {"layer": "Restriction Enzymes",
        "name": "ClaI", "type_id": 4, "elucidate": "AT^CG_AT", "label": "ClaI",
        "query_start": 20, "cut": 21, "query_end": 2, "subject_start": 1, "subject_end": 6},
      ]
    )


  def test_blastx(self):
    self.feature_db.build()

    q = 'A'+self.pro_dna+'T'
    data = {'db': self.feature_db.name, 'sequence': q}
    url = reverse('giraffe-analyze')
    resp = self.client.post(url, data)
    self.assertEquals(resp.status_code, 200)
    res = json.loads(resp.content)

    self.assertEquals(res[0], len(q))
    self.assertItemsEqual(res[1], [
      {"layer": "Detected Features",
       "query_start": 2,
       "query_end": 25,
       "subject_start" : 1,
       "subject_end" : 8,
       "name": "G2",
       "type_id": 5,
       "label": "G2",
       "alignment": {"query": "MKKKAPSI", "match": "MKKKAPSI", "subject": "MKKKAPSI"},
       "evalue": 5.42133e-5,
       "identities": 8
      }
    ])


  def test_blast2(self):
    data = {'subject': self.dna, 'query': self.dna[0:22]+'T'}

    url = reverse('blast2')
    resp = self.client.post(url, data)
    self.assertEquals(resp.status_code, 200)
    res = json.loads(resp.content)

    self.assertItemsEqual(res, [
      { "subject_start": 1, "subject_end": 22,
        "query_start": 1, "query_end": 22,
        "evalue": 5.81644e-07,
          "query": "ATTGCGGATCGCGAATGCGATC",
          "match": "||||||||||||||||||||||",
        "subject": "ATTGCGGATCGCGAATGCGATC" },
    ])