def test_import_feature_ending_at_last_base(self): data = """##gff-version 3 chrI\tTest\tchromosome\t1\t160\t.\t.\t.\tID=i1;Name=f1 chrI\tTest\tcds\t20\t28\t.\t-\t.\tID=i2;Name=f2 chrI\tTest\trbs\t20\t160\t.\t+\t.\tID=i3 ### ##FASTA >chrI CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT """ with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write(data) f.close() genome = Genome.import_gff('Foo', f.name) os.unlink(f.name) # verify chrI fragment chrI = [fr.indexed_fragment() for fr in genome.fragments.all() if fr.name == 'chrI'][0] self.assertEquals(len(chrI.sequence), 160) # verify skips annotation on entire sequence self.assertEquals(len(chrI.annotations()), 2) self.assertEquals(chrI.annotations()[1].base_first, 20) self.assertEquals(chrI.annotations()[1].base_last, 160) self.assertEquals(chrI.annotations()[1].feature.name, 'i3') # no name, loaded ID self.assertEquals(chrI.annotations()[1].feature.strand, 1) self.assertEquals(chrI.annotations()[0].base_first, 20) self.assertEquals(chrI.annotations()[0].base_last, 28) self.assertEquals(chrI.annotations()[0].feature.name, 'f2') self.assertEquals(chrI.annotations()[0].feature.strand, -1)
def test_import_gff_creates_fragments_and_annotate_features(self): data = """##gff-version 3 chrI\tTest\tchromosome\t1\t160\t.\t.\t.\tID=i1;Name=f1 chrI\tTest\tcds\t30\t80\t.\t-\t.\tID=i2;Name=f2 chrI\tTest\trbs\t20\t28\t.\t+\t.\tID=i3 chrII\tTest\tgene\t40\t60\t.\t-\t.\tID=f4;gene=g4 chrII\tTest\tgene\t20\t80\t.\t+\t.\tID=i5;Name=f5 ### ##FASTA >chrI CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT >chrII CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT """ with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write(data) f.close() genome = Genome.import_gff('Foo', f.name) os.unlink(f.name) # created one fragment for each sequence in GFF file self.assertItemsEqual([fr.name for fr in genome.fragments.all()], ['chrI', 'chrII']) # verify chrI fragment chrI = [fr.indexed_fragment() for fr in genome.fragments.all() if fr.name == 'chrI'][0] self.assertEquals(len(chrI.sequence), 160) # verify skips annotation on entire sequence self.assertEquals(len(chrI.annotations()), 2) self.assertEquals(chrI.annotations()[0].base_first, 20) self.assertEquals(chrI.annotations()[0].base_last, 28) self.assertEquals(chrI.annotations()[0].feature.name, 'i3') # no name, loaded ID self.assertEquals(chrI.annotations()[0].feature.strand, 1) self.assertEquals(chrI.annotations()[1].base_first, 30) self.assertEquals(chrI.annotations()[1].base_last, 80) self.assertEquals(chrI.annotations()[1].feature.name, 'f2') self.assertEquals(chrI.annotations()[1].feature.strand, -1) # verify chrII fragment chrII = [fr.indexed_fragment() for fr in genome.fragments.all() if fr.name == 'chrII'][0] self.assertEquals(len(chrII.sequence), 160) # consecutive annotations merged even though they span multiple chunks self.assertEquals(len(chrII.annotations()), 2) self.assertEquals(chrII.annotations()[0].base_first, 20) self.assertEquals(chrII.annotations()[0].base_last, 80) self.assertEquals(chrII.annotations()[0].feature.name, 'f5') self.assertEquals(chrII.annotations()[0].feature.strand, 1) self.assertEquals(chrII.annotations()[1].base_first, 40) self.assertEquals(chrII.annotations()[1].base_last, 60) self.assertEquals(chrII.annotations()[1].feature.name, 'g4') # has gene, use gene name self.assertEquals(chrII.annotations()[1].feature.strand, -1)
def import_gff(name, fn): """ Creates a new genome using the specified GFF file. name: Name of genome fn: path to GFF file """ from edge.models import Genome if Genome.objects.filter(name=name).count() > 0: raise Exception('There is already a genome named "%s"' % (name, )) g = Genome.import_gff(name, fn) return g
def import_gff(name, fn): """ Creates a new genome using the specified GFF file. name: Name of genome fn: path to GFF file """ from edge.models import Genome if Genome.objects.filter(name=name).count() > 0: raise Exception('There is already a genome named "%s"' % (name,)) g = Genome.import_gff(name, fn) return g
def import_with_qualifiers(self, qualifiers): data = """##gff-version 3 chrI\tTest\tcds\t30\t80\t.\t-\t.\t%s ### ##FASTA >chrI CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT """ % (qualifiers,) with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write(data) f.close() self.genome = Genome.import_gff('Foo', f.name) os.unlink(f.name)
def import_with_qualifiers(self, qualifiers): data = """##gff-version 3 chrI\tTest\tcds\t30\t80\t.\t-\t.\t%s ### ##FASTA >chrI CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT """ % (qualifiers, ) with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write(data) f.close() self.genome = Genome.import_gff('Foo', f.name) os.unlink(f.name)
def import_gff(name, fn, output_dir='.'): """ Creates a new genome using the specified GFF file. name: Name of genome fn: path to GFF file output_dir: string path to output directory for reference file """ from edge.models import Genome if Genome.objects.filter(name=name).count() > 0: raise Exception('There is already a genome named "%s"' % (name, )) g = Genome.import_gff(name, fn, dirn=output_dir) return g
def test_import_feature_ending_at_last_base(self): data = """##gff-version 3 chrI\tTest\tchromosome\t1\t160\t.\t.\t.\tID=i1;Name=f1 chrI\tTest\tcds\t20\t28\t.\t-\t.\tID=i2;Name=f2 chrI\tTest\trbs\t20\t160\t.\t+\t.\tID=i3 ### ##FASTA >chrI CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT """ with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write(data) f.close() genome = Genome.import_gff('Foo', f.name) os.unlink(f.name) # verify chrI fragment chrI = [ fr.indexed_fragment() for fr in genome.fragments.all() if fr.name == 'chrI' ][0] self.assertEquals(len(chrI.sequence), 160) # verify skips annotation on entire sequence self.assertEquals(len(chrI.annotations()), 2) self.assertEquals(chrI.annotations()[1].base_first, 20) self.assertEquals(chrI.annotations()[1].base_last, 160) self.assertEquals(chrI.annotations()[1].feature.name, 'i3') # no name, loaded ID self.assertEquals(chrI.annotations()[1].feature.strand, 1) self.assertEquals(chrI.annotations()[0].base_first, 20) self.assertEquals(chrI.annotations()[0].base_last, 28) self.assertEquals(chrI.annotations()[0].feature.name, 'f2') self.assertEquals(chrI.annotations()[0].feature.strand, -1)
def test_import_gff_creates_fragments_and_annotate_features(self): data = """##gff-version 3 chrI\tTest\tchromosome\t1\t160\t.\t.\t.\tID=i1;Name=f1 chrI\tTest\tcds\t30\t80\t.\t-\t.\tID=i2;Name=f2 chrI\tTest\trbs\t20\t28\t.\t+\t.\tID=i3 chrII\tTest\tgene\t40\t60\t.\t-\t.\tID=f4;gene=g4 chrII\tTest\tgene\t20\t80\t.\t+\t.\tID=i5;Name=f5 ### ##FASTA >chrI CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT >chrII CCACACCACACCCACACACCCACACACCACACCACACACCACACCACACCCACACACACACATCCTAACACTACCCTAAC ACAGCCCTAATCTAACCCTGGCCAACCTGTCTCTCAACTTACCCTCCATTACCCTGCCTCCACTCGTTACCCTGTCCCAT """ with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write(data) f.close() genome = Genome.import_gff('Foo', f.name) os.unlink(f.name) # created one fragment for each sequence in GFF file self.assertItemsEqual([fr.name for fr in genome.fragments.all()], ['chrI', 'chrII']) # verify chrI fragment chrI = [ fr.indexed_fragment() for fr in genome.fragments.all() if fr.name == 'chrI' ][0] self.assertEquals(len(chrI.sequence), 160) # verify skips annotation on entire sequence self.assertEquals(len(chrI.annotations()), 2) self.assertEquals(chrI.annotations()[0].base_first, 20) self.assertEquals(chrI.annotations()[0].base_last, 28) self.assertEquals(chrI.annotations()[0].feature.name, 'i3') # no name, loaded ID self.assertEquals(chrI.annotations()[0].feature.strand, 1) self.assertEquals(chrI.annotations()[1].base_first, 30) self.assertEquals(chrI.annotations()[1].base_last, 80) self.assertEquals(chrI.annotations()[1].feature.name, 'f2') self.assertEquals(chrI.annotations()[1].feature.strand, -1) # verify chrII fragment chrII = [ fr.indexed_fragment() for fr in genome.fragments.all() if fr.name == 'chrII' ][0] self.assertEquals(len(chrII.sequence), 160) # consecutive annotations merged even though they span multiple chunks self.assertEquals(len(chrII.annotations()), 2) self.assertEquals(chrII.annotations()[0].base_first, 20) self.assertEquals(chrII.annotations()[0].base_last, 80) self.assertEquals(chrII.annotations()[0].feature.name, 'f5') self.assertEquals(chrII.annotations()[0].feature.strand, 1) self.assertEquals(chrII.annotations()[1].base_first, 40) self.assertEquals(chrII.annotations()[1].base_last, 60) self.assertEquals(chrII.annotations()[1].feature.name, 'g4') # has gene, use gene name self.assertEquals(chrII.annotations()[1].feature.strand, -1)