Exemple #1
0
    def test_vanilla(self):
        dummy = Genome(name='GRCh37.75_Y-Only', SNPs='dummySRY_AGN')
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('HTGCAATCATATGCTTCTGC', persProt.transcript.cDNA[:20])
Exemple #2
0
	def setUp(self):
		try :
			B.importGenome("Human.GRCh37.75_Y-Only.tar.gz")
		except KeyError :
			print "--> Seems to already exist in db"
     
		try :
			B.importSNPs("Human_agnostic.dummySRY.tar.gz")
		except KeyError :
			print "--> Seems to already exist in db"
		self.ref = Genome(name = 'GRCh37.75_Y-Only')
Exemple #3
0
	def test_noModif(self) :
		from pyGeno.SNPFiltering import SNPFilter

		class MyFilter(SNPFilter) :
			def __init__(self) :
				SNPFilter.__init__(self)

			def filter(self, chromosome, dummySRY_AGN) :
				return None

		dummy = Genome(name = 'GRCh37.75_Y-Only', SNPs = 'dummySRY_AGN', SNPFilter = MyFilter())
		persProt = dummy.get(Protein, id = 'ENSP00000438917')[0]
		refProt = self.ref.get(Protein, id = 'ENSP00000438917')[0]

		self.assertEqual(persProt.transcript.cDNA[:20], refProt.transcript.cDNA[:20])
Exemple #4
0
	def test_SNP(self) :
		from pyGeno.SNPFiltering import SNPFilter

		class MyFilter(SNPFilter) :
			def __init__(self) :
				SNPFilter.__init__(self)

			def filter(self, chromosome, dummySRY_AGN) :
				from pyGeno.SNPFiltering import SequenceSNP
	
				return SequenceSNP(dummySRY_AGN.alt)

		dummy = Genome(name = 'GRCh37.75_Y-Only', SNPs = 'dummySRY_AGN', SNPFilter = MyFilter())
		persProt = dummy.get(Protein, id = 'ENSP00000438917')[0]

		refProt = self.ref.get(Protein, id = 'ENSP00000438917')[0]
		self.assertEqual('M', refProt.sequence[0])
		self.assertEqual('L', persProt.sequence[0])
Exemple #5
0
	def test_deletion(self) :
		from pyGeno.SNPFiltering import SNPFilter

		class MyFilter(SNPFilter) :
			def __init__(self) :
				SNPFilter.__init__(self)

			def filter(self, chromosome, dummySRY_AGN) :
				from pyGeno.SNPFiltering import SequenceDel
		
				refAllele = chromosome.refSequence[dummySRY_AGN.start]
				return SequenceDel(1)

		dummy = Genome(name = 'GRCh37.75_Y-Only', SNPs = 'dummySRY_AGN', SNPFilter = MyFilter())
		persProt = dummy.get(Protein, id = 'ENSP00000438917')[0]
		refProt = self.ref.get(Protein, id = 'ENSP00000438917')[0]

		self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
		self.assertEqual('TGCAATCATATGCTTCTGCT', persProt.transcript.cDNA[:20])
Exemple #6
0
    def test_indels(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN_indels):
                from pyGeno.SNPFiltering import SequenceInsert
                ret = ""
                for s in dummySRY_AGN_indels:
                    ret += "X"
                return SequenceInsert(ret)

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN_indels',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]
        self.assertEqual('XXXATGCAATCATATGCTTC', persProt.transcript.cDNA[:20])
Exemple #7
0
    def setUp(self):
        # try :
        # 	B.importGenome("Human.GRCh37.75_Y-Only.tar.gz")
        # except KeyError :
        # 	deleteGenome("human", "GRCh37.75_Y-Only")
        # 	B.importGenome("Human.GRCh37.75_Y-Only.tar.gz")
        # 	print "--> Seems to already exist in db"

        # try :
        # 	B.importSNPs("Human_agnostic.dummySRY.tar.gz")
        # except KeyError :
        # 	deleteSNPs("dummySRY_AGN")
        # 	B.importSNPs("Human_agnostic.dummySRY.tar.gz")
        # 	print "--> Seems to already exist in db"

        # try :
        # 	B.importSNPs("Human_agnostic.dummySRY_indels")
        # except KeyError :
        # 	deleteSNPs("dummySRY_AGN_indels")
        # 	B.importSNPs("Human_agnostic.dummySRY_indels")
        # 	print "--> Seems to already exist in db"
        self.ref = Genome(name='GRCh37.75_Y-Only')
Exemple #8
0
class pyGenoSNPTests(unittest.TestCase):
    def setUp(self):
        # try :
        # 	B.importGenome("Human.GRCh37.75_Y-Only.tar.gz")
        # except KeyError :
        # 	deleteGenome("human", "GRCh37.75_Y-Only")
        # 	B.importGenome("Human.GRCh37.75_Y-Only.tar.gz")
        # 	print "--> Seems to already exist in db"

        # try :
        # 	B.importSNPs("Human_agnostic.dummySRY.tar.gz")
        # except KeyError :
        # 	deleteSNPs("dummySRY_AGN")
        # 	B.importSNPs("Human_agnostic.dummySRY.tar.gz")
        # 	print "--> Seems to already exist in db"

        # try :
        # 	B.importSNPs("Human_agnostic.dummySRY_indels")
        # except KeyError :
        # 	deleteSNPs("dummySRY_AGN_indels")
        # 	B.importSNPs("Human_agnostic.dummySRY_indels")
        # 	print "--> Seems to already exist in db"
        self.ref = Genome(name='GRCh37.75_Y-Only')

    def tearDown(self):
        pass

    # @unittest.skip("skipping")
    def test_vanilla(self):
        dummy = Genome(name='GRCh37.75_Y-Only', SNPs='dummySRY_AGN')
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('HTGCAATCATATGCTTCTGC', persProt.transcript.cDNA[:20])

    # @unittest.skip("skipping")
    def test_noModif(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                return None

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual(persProt.transcript.cDNA[:20],
                         refProt.transcript.cDNA[:20])

    # @unittest.skip("skipping")
    def test_insert(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                from pyGeno.SNPFiltering import SequenceInsert

                refAllele = chromosome.refSequence[dummySRY_AGN.start]
                return SequenceInsert('XXX')

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]
        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('XXXATGCAATCATATGCTTC', persProt.transcript.cDNA[:20])

    # @unittest.skip("skipping")
    def test_SNP(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                from pyGeno.SNPFiltering import SequenceSNP

                return SequenceSNP(dummySRY_AGN.alt)

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]

        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]
        self.assertEqual('M', refProt.sequence[0])
        self.assertEqual('L', persProt.sequence[0])

    # @unittest.skip("skipping")
    def test_deletion(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                from pyGeno.SNPFiltering import SequenceDel

                refAllele = chromosome.refSequence[dummySRY_AGN.start]
                return SequenceDel(1)

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]
        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('TGCAATCATATGCTTCTGCT', persProt.transcript.cDNA[:20])

    # @unittest.skip("skipping")
    def test_indels(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN_indels):
                from pyGeno.SNPFiltering import SequenceInsert
                ret = ""
                for s in dummySRY_AGN_indels:
                    ret += "X"
                return SequenceInsert(ret)

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN_indels',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]
        self.assertEqual('XXXATGCAATCATATGCTTC', persProt.transcript.cDNA[:20])

    # @unittest.skip("skipping")
    def test_bags(self):
        dummy = Genome(name='GRCh37.75_Y-Only')
        self.assertEqual(dummy.wrapped_object, self.ref.wrapped_object)

    # @unittest.skip("skipping")
    def test_prot_find(self):
        prot = self.ref.get(Protein, id='ENSP00000438917')[0]
        needle = prot.sequence[:10]
        self.assertEqual(0, prot.find(needle))
        needle = prot.sequence[-10:]
        self.assertEqual(len(prot) - 10, prot.find(needle))

    # @unittest.skip("skipping")
    def test_trans_find(self):
        trans = self.ref.get(Transcript, name="SRY-001")[0]
        self.assertEqual(0, trans.find(trans[:5]))
Exemple #9
0
 def test_bags(self):
     dummy = Genome(name='GRCh37.75_Y-Only')
     self.assertEqual(dummy.wrapped_object, self.ref.wrapped_object)
Exemple #10
0
 def setUp(self):
     self.ref = Genome(name='GRCh37.75_Y-Only')
Exemple #11
0
class pyGenoSNPTests(unittest.TestCase):
    def setUp(self):
        self.ref = Genome(name='GRCh37.75_Y-Only')

    def tearDown(self):
        pass

    def test_vanilla(self):
        dummy = Genome(name='GRCh37.75_Y-Only', SNPs='dummySRY_AGN')
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('HTGCAATCATATGCTTCTGC', persProt.transcript.cDNA[:20])

    def test_noModif(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                return None

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual(persProt.transcript.cDNA[:20],
                         refProt.transcript.cDNA[:20])

    def test_insert(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                from pyGeno.SNPFiltering import SequenceInsert

                refAllele = chromosome.refSequence[dummySRY_AGN.start]
                return SequenceInsert('TCA')

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('ATGATGCAATCATATGCTTC', persProt.transcript.cDNA[:20])

    def test_SNP(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                from pyGeno.SNPFiltering import SequenceSNP

                return SequenceSNP(dummySRY_AGN.alt)

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]
        self.assertEqual('M', refProt.sequence[0])
        self.assertEqual('L', persProt.sequence[0])

    def test_deletion(self):
        from pyGeno.SNPFiltering import SNPFilter

        class MyFilter(SNPFilter):
            def __init__(self):
                SNPFilter.__init__(self)

            def filter(self, chromosome, dummySRY_AGN):
                from pyGeno.SNPFiltering import SequenceDel

                refAllele = chromosome.refSequence[dummySRY_AGN.start]
                return SequenceDel(1)

        dummy = Genome(name='GRCh37.75_Y-Only',
                       SNPs='dummySRY_AGN',
                       SNPFilter=MyFilter())
        persProt = dummy.get(Protein, id='ENSP00000438917')[0]
        refProt = self.ref.get(Protein, id='ENSP00000438917')[0]

        self.assertEqual('ATGCAATCATATGCTTCTGC', refProt.transcript.cDNA[:20])
        self.assertEqual('TGCAATCATATGCTTCTGCT', persProt.transcript.cDNA[:20])

    def test_bags(self):
        dummy = Genome(name='GRCh37.75_Y-Only')
        self.assertEqual(dummy.wrapped_object, self.ref.wrapped_object)

    def test_prot_find(self):
        prot = self.ref.get(Protein, id='ENSP00000438917')[0]
        needle = prot.sequence[:10]
        self.assertEqual(0, prot.find(needle))
        needle = prot.sequence[-10:]
        self.assertEqual(len(prot) - 10, prot.find(needle))

    def test_trans_find(self):
        trans = self.ref.get(Transcript, name="SRY-001")[0]
        self.assertEqual(0, trans.find(trans[:5]))

    def test_import_remote_genome(self):
        self.assertRaises(KeyError, B.importRemoteGenome,
                          "Human.GRCh37.75_Y-Only.tar.gz")

    def test_import_remote_snps(self):
        self.assertRaises(KeyError, B.importRemoteSNPs,
                          "Human_agnostic.dummySRY.tar.gz")
Exemple #12
0
def importGenome(packageFile, batchSize=50, verbose=0):
    """Import a pyGeno genome package. A genome packages is a tar.gz ball that contains at it's root:

	* gziped fasta files for all chromosomes, or URLs from where them must be downloaded
	
	* gziped GTF gene_set file from Ensembl, or an URL from where it must be downloaded
	
	* a manifest.ini file such as::
	
		[package_infos]
		description = Test package. This package installs only chromosome Y of mus musculus
		maintainer = Tariq Daouda
		maintainer_contact = tariq.daouda [at] umontreal
		version = GRCm38.73

		[genome]
		species = Mus_musculus
		name = GRCm38_test
		source = http://useast.ensembl.org/info/data/ftp/index.html

		[chromosome_files]
		Y = Mus_musculus.GRCm38.73.dna.chromosome.Y.fa.gz / or an url such as ftp://... or http://

		[gene_set]
		gtf = Mus_musculus.GRCm38.73_Y-only.gtf.gz / or an url such as ftp://... or http://

	All files except the manifest can be downloaded from: http://useast.ensembl.org/info/data/ftp/index.html
	
	A rollback is performed if an exception is caught during importation
	
	batchSize sets the number of genes to parse before performing a database save. PCs with little ram like
	small values, while those endowed with more memory may perform faster with higher ones.
	
	Verbose must be an int [0, 4] for various levels of verbosity
	"""
    def reformatItems(items):
        s = str(items)
        s = s.replace('[', '').replace(']', '').replace("',", ': ').replace(
            '), ', '\n').replace("'", '').replace('(', '').replace(')', '')
        return s

    printf('Importing genome package: %s... (This may take a while)' %
           packageFile)

    packageDir = _decompressPackage(packageFile)

    parser = SafeConfigParser()
    parser.read(os.path.normpath(packageDir + '/manifest.ini'))
    packageInfos = parser.items('package_infos')

    genomeName = parser.get('genome', 'name')
    species = parser.get('genome', 'species')
    genomeSource = parser.get('genome', 'source')

    seqTargetDir = conf.getGenomeSequencePath(species.lower(), genomeName)
    if os.path.isdir(seqTargetDir):
        raise KeyError(
            "The directory %s already exists, Please call deleteGenome() first if you want to reinstall"
            % seqTargetDir)

    gtfFile = _getFile(parser.get('gene_set', 'gtf'), packageDir)

    chromosomesFiles = {}
    chromosomeSet = set()
    for key, fil in parser.items('chromosome_files'):
        chromosomesFiles[key] = _getFile(fil, packageDir)
        chromosomeSet.add(key)

    try:
        genome = Genome(name=genomeName, species=species)
    except KeyError:
        pass
    else:
        raise KeyError(
            "There seems to be already a genome (%s, %s), please call deleteGenome() first if you want to reinstall it"
            % (genomeName, species))

    genome = Genome_Raba()
    genome.set(name=genomeName,
               species=species,
               source=genomeSource,
               packageInfos=packageInfos)

    printf("Importing:\n\t%s\nGenome:\n\t%s\n..." %
           (reformatItems(packageInfos).replace('\n', '\n\t'),
            reformatItems(parser.items('genome')).replace('\n', '\n\t')))

    chros = _importGenomeObjects(gtfFile, chromosomeSet, genome, batchSize,
                                 verbose)
    os.makedirs(seqTargetDir)
    startChro = 0
    pBar = ProgressBar(nbEpochs=len(chros))
    for chro in chros:
        pBar.update(label="Importing DNA, chro %s" % chro.number)
        length = _importSequence(chro, chromosomesFiles[chro.number.lower()],
                                 seqTargetDir)
        chro.start = startChro
        chro.end = startChro + length
        startChro = chro.end
    pBar.close()

    shutil.rmtree(packageDir)

    #~ objgraph.show_most_common_types(limit=20)
    return True
def encode(seq) :
	code = {
		'A' : [0, 0, 0, 1],
		'T' : [0, 0, 1, 0],
		'C' : [0, 1, 0, 0],
		'G' : [1, 0, 0, 0]
	}
	
	res = []

	for c in seq :
		res.extend(code[c])

	return res

ref = Genome(name = 'GRCh37.75_Y-Only')
seqs = []
targs = []
for trans in ref.iterGet(Transcript) :
	if len(trans.cDNA) >= 150 :
		r = random.randint(0, len(trans.cDNA) - 100)
		dnaseq = trans.cDNA[r: r + 100]
		seqs.append(encode(dnaseq))
		targs.append(1)
		randomseq = getRandomSeq(100)
		seqs.append(encode(randomseq))
		targs.append(0)

t = int(len(seqs) * 0.2) 
validation = [seqs[0:t], targs[0:t]]
train = [seqs[t:], targs[t:]]
from pyGeno.Genome import *

g = Genome(name = "GRCh37.75")
prot = g.get(Protein, id = 'ENSP00000438917')[0]
#print the protein sequence
print prot.sequence
#print the protein's gene biotype
print prot.gene.biotype
#print protein's transcript sequence
print prot.transcript.sequence

#fancy queries
for exon in g.get(Exon, {"CDS_start >": x1, "CDS_end <=" : x2, "chromosome.number" : "22"}) :
        #print the exon's coding sequence
        print exon.CDS
        #print the exon's transcript sequence
        print exon.transcript.sequence

#You can do the same for your subject specific genomes
#by combining a reference genome with polymorphisms
g = Genome(name = "GRCh37.75", SNPs = ["STY21_RNA"], SNPFilter = MyFilter())