Beispiel #1
0
    def test_schema(self):
        "Test schema"
        sp_hbb1 = testutil.datafile('sp_hbb1') 
        sp2 = seqdb.BlastDB(sp_hbb1)
        sp2.__doc__ = 'another sp'
        worldbase.Bio.Seq.sp2 = sp2
        sp = worldbase.Bio.Seq.Swissprot.sp42()
        m = mapping.Mapping(sourceDB=sp,targetDB=sp2)
        m.__doc__ = 'sp -> sp2'
        worldbase.Bio.Seq.testmap = m
        worldbaseSchema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2)
        worldbase.commit()

        worldbase.clear_cache()

        sp3 = seqdb.BlastDB(sp_hbb1)
        sp3.__doc__ = 'sp number 3'
        worldbase.Bio.Seq.sp3 = sp3
        sp2 = worldbase.Bio.Seq.sp2()
        m = mapping.Mapping(sourceDB=sp3,targetDB=sp2)
        m.__doc__ = 'sp3 -> sp2'
        worldbase.Bio.Seq.testmap2 = m
        worldbaseSchema.Bio.Seq.testmap2 = metabase.OneToManyRelation(sp3, sp2)
        l = worldbase._mdb.resourceCache.keys()
        l.sort()
        assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
        worldbase.commit()
        g = worldbase._mdb.writer.storage.graph
        expected = set(['Bio.Annotation.annoDB',
                     'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3'])
        found = set(g.keys()) 
        self.EQ(len(expected - found), 0) 
Beispiel #2
0
    def test_schema(self):
        "Test schema"
        sp_hbb1 = testutil.datafile('sp_hbb1')
        sp2 = seqdb.BlastDB(sp_hbb1)
        sp2.__doc__ = 'another sp'
        worldbase.Bio.Seq.sp2 = sp2
        sp = worldbase.Bio.Seq.Swissprot.sp42()
        m = mapping.Mapping(sourceDB=sp, targetDB=sp2)
        m.__doc__ = 'sp -> sp2'
        worldbase.Bio.Seq.testmap = m
        worldbase.schema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2)
        worldbase.commit()

        worldbase.clear_cache()

        sp3 = seqdb.BlastDB(sp_hbb1)
        sp3.__doc__ = 'sp number 3'
        worldbase.Bio.Seq.sp3 = sp3
        sp2 = worldbase.Bio.Seq.sp2()
        m = mapping.Mapping(sourceDB=sp3, targetDB=sp2)
        m.__doc__ = 'sp3 -> sp2'
        worldbase.Bio.Seq.testmap2 = m
        worldbase.schema.Bio.Seq.testmap2 = metabase.OneToManyRelation(
            sp3, sp2)
        l = worldbase._mdb.resourceCache.keys()
        l.sort()
        assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
        worldbase.commit()
        g = worldbase._mdb.writer.storage.graph
        expected = set([
            'Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2',
            'Bio.Seq.sp3'
        ])
        found = set(g.keys())
        self.EQ(len(expected - found), 0)
Beispiel #3
0
    def setUp(self,**kwargs):
        TestBase.setUp(self)
        dnaseq = testutil.datafile('dnaseq.fasta')
        tryannot = testutil.tempdatafile('tryannot')

        db = seqdb.BlastDB(dnaseq)
        try:
            db.__doc__ = 'little dna'

            worldbase.Bio.Test.dna = db
            annoDB = seqdb.AnnotationDB({1:('seq1',5,10,'fred'),
                                         2:('seq1',-60,-50,'bob'),
                                         3:('seq2',-20,-10,'mary')},
                                        db,
                                  sliceAttrDict=dict(id=0, start=1, stop=2,
                                                     name=3))
            annoDB.__doc__ = 'trivial annotation'
            worldbase.Bio.Test.annoDB = annoDB
            nlmsa = cnestedlist.NLMSA(tryannot,'w',pairwiseMode=True,
                                      bidirectional=False)
            try:
                for annID in annoDB:
                    nlmsa.addAnnotation(annoDB[annID])

                nlmsa.build(verbose=False)
                nlmsa.__doc__ = 'trivial map'
                worldbase.Bio.Test.map = nlmsa
                worldbaseSchema.Bio.Test.map = metabase.ManyToManyRelation(db,
                                                       annoDB,bindAttrs=('exons',))
                worldbase.commit()
                worldbase.clear_cache()
            finally:
                nlmsa.close()
        finally:
            db.close()
Beispiel #4
0
    def setUp(self):
        TestBase.setUp(self)
        populate_swissprot() # save some data
        worldbase.commit() # finally save everything to metabase
        worldbase.clear_cache() # force all requests to reload

        res = [ 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap',
                'Bio.Annotation.annoDB', 'Bio.Annotation.map' ]
        self.server = testutil.TestXMLRPCServer(res, self.tempdir.path)
Beispiel #5
0
    def setUp(self):
        TestBase.setUp(self)
        populate_swissprot()  # save some data
        worldbase.commit()  # finally save everything to metabase
        worldbase.clear_cache()  # force all requests to reload

        res = [
            'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap',
            'Bio.Annotation.annoDB', 'Bio.Annotation.map'
        ]
        self.server = testutil.TestXMLRPCServer(res, self.tempdir.path)
Beispiel #6
0
    def close(self,commitData=True):
        """Close method which performs updating the docstrings and creating the schema objects."""
        print "# Finalizing mapping schema(s)..."
        if self._closed == True:
            return
        
        # UPDATE DOC STRING FOR forward MAPPING
        self.Mf.__doc__ = "Mapping resource (forward) between annotations %s and %s" % (self.Mf.sourceDB._persistent_id,
                                                                                self.Mf.targetDB._persistent_id)
        
        #UPDATE OUR METABASE WITH THE RESOURCE STRING FOR THE MAPPING
        worldbase.add_resource(self.resourceString+"_forward", self.Mf)
        
        # FOR forward MAPPING
        forward_bindAttrs = (self.forwardAttr, self.inverseAttr) # self.inverseAttr is either None or set to an appropriate inverse attribute
        relationF = metabase.OneToManyRelation(self.Mf.sourceDB, self.Mf.targetDB, bindAttrs=forward_bindAttrs)
        relationF.__doc__ = "Mapping schema (forward) between annotations %s and %s" % (self.Mf.sourceDB._persistent_id,
                                                                                self.Mf.targetDB._persistent_id) 
        
        # UPDATE OUR SCHEMA WITH THE RESOURCE STRING FOR THE MAPPING
        worldbase.add_schema(self.resourceString+"_forward", relationF) 
        
        # HANDLE REVERSE MAPPING AND SCHEMA
        relationR = None
        if self.Mr != None:
            # UPDATE DOC STRING FOR reverse MAPPING
            self.Mr.__doc__ = "Mapping resource (reverse) between annotations %s and %s" % (self.Mr.sourceDB._persistent_id,
                                                                                            self.Mr.targetDB._persistent_id)
            
            #UPDATE OUR METABASE WITH THE RESOURCE STRING FOR THE MAPPING
            worldbase.add_resource(self.resourceString+"_reverse", self.Mr)
            
            # FOR reverse MAPPING
            reverse_bindAttrs = (self.reverseAttr, None)
            relationR = metabase.OneToManyRelation(self.Mr.sourceDB, self.Mr.targetDB, bindAttrs=reverse_bindAttrs)
            relationR.__doc__ = "Mapping schema (reverse) between annotations %s and %s" % (self.Mr.sourceDB._persistent_id, # Use self.Mf for consistent
                                                                                            self.Mr.targetDB._persistent_id) # doc strings with forward mapping
            
            # UPDATE OUR SCHEMA WITH THE RESOURCE STRING FOR THE MAPPING
            worldbase.add_schema(self.resourceString+"_reverse", relationR) 

        if(commitData==True):
            print "# Committing to worldbase: (1) %s (2) %s" % (str(self.Mf.__doc__),str(self.Mr.__doc__))
            worldbase.commit()
        
            print "# Closing mapping object(s)"
            # FLUSH MAPPING(S) TO PERSISTENT STORAGE
            self.Mf.close()
            if self.Mr:
                self.Mr.close()
        
            # Set closed flag
            self._closed = True
Beispiel #7
0
    def test_download(self):
        "Downloading of gzipped file using worldbase"

        url = SourceURL('http://www.doe-mbi.ucla.edu/~leec/test.gz')
        url.__doc__ = 'test download'

        worldbase.add_resource('Bio.Test.Download1', url)
        worldbase.commit()

        # performs the download
        fpath = worldbase.Bio.Test.Download1()
        h = testutil.get_file_md5(fpath)
        self.assertEqual(h.hexdigest(), 'f95656496c5182d6cff9a56153c9db73')
        os.remove(fpath)
Beispiel #8
0
    def test_download(self): 
        "Downloading of gzipped file using pygr.Data"
        
        url = SourceURL('http://www.doe-mbi.ucla.edu/~leec/test.gz')
        url.__doc__ = 'test download'

        worldbase.add_resource('Bio.Test.Download1', url)
        worldbase.commit()

        # performs the download            
        fpath = worldbase.Bio.Test.Download1()
        h = testutil.get_file_md5(fpath)
        self.assertEqual(h.hexdigest(), 'f95656496c5182d6cff9a56153c9db73')
        os.remove(fpath)
Beispiel #9
0
def save_NLMSA_downloaders(url, fileFilter=lambda x: x.endswith(".txt.gz"),
                           resourceStem='Bio.MSA.UCSC.',
                           fileDocumenter=None, fileNamer=None):
    'save NLMSA downloader / builder objects for a set of downloadable textdump files'
    if fileDocumenter is None:
        fileDocumenter = lambda x: 'NLMSA alignment '+x
    if fileNamer is None: # a default resource naming function
        fileNamer = lambda x:resourceStem+x[:-3] # remove .gz suffix
    from pygr.nlmsa_utils import NLMSABuilder
    from pygr.downloader import SourceURL
    d = catalog_downloads(url, fileFilter, fileNamer,
                          fileDocumenter, SourceURL)
    for resID,o in d.items():
        nlmsa = NLMSABuilder(o)
        nlmsa.__doc__ = fileDocumenter(resID)
        d[resID[:-4]] = nlmsa # remove .txt suffix
    from pygr import worldbase
    worldbase.add_resource(d)
    worldbase.commit()
    return d # just in case the user wants to see what was saved
def all_vs_all_blast_save():
    """
    Creates the blast files used during testing.
    Must be called before running the tests
    """

    tempdir = testutil.TempDir("blast-test")
    testutil.change_pygrdatapath(tempdir.path)

    sp_hbb1 = testutil.datafile("sp_hbb1")
    all_vs_all = testutil.tempdatafile("all_vs_all")

    sp = seqdb.BlastDB(sp_hbb1)
    msa = cnestedlist.NLMSA(all_vs_all, mode="w", pairwiseMode=True, bidirectional=False)

    # get strong homologs, save alignment in msa for every sequence
    reader = islice(sp.iteritems(), None)
    for id, s in reader:
        sp.blast(s, msa, expmax=1e-10, verbose=False)

    # done constructing the alignment, so build the alignment db indexes
    msa.build(saveSeqDict=True)

    db = msa.seqDict.dicts.keys()[0]
    working, result = {}, {}
    for k in db.values():
        edges = msa[k].edges(minAlignSize=12, pIdentityMin=0.5)
        for t in edges:
            assert len(t[0]) >= 12
        tmpdict = dict(
            map(lambda x: (x, None), [(str(t[0]), str(t[1]), t[2].pIdentity(trapOverflow=False)) for t in edges])
        )
        result[repr(k)] = tmpdict.keys()
        result[repr(k)].sort()

    # save it into worldbase
    data = testutil.TestData()
    data.__doc__ = "sp_allvall"
    data.result = result
    worldbase.Bio.Blast = data
    worldbase.commit()
Beispiel #11
0
    def setUp(self, **kwargs):
        TestBase.setUp(self)
        dnaseq = testutil.datafile('dnaseq.fasta')
        tryannot = testutil.tempdatafile('tryannot')

        db = seqdb.BlastDB(dnaseq)
        try:
            db.__doc__ = 'little dna'

            worldbase.Bio.Test.dna = db
            annoDB = seqdb.AnnotationDB(
                {
                    1: ('seq1', 5, 10, 'fred'),
                    2: ('seq1', -60, -50, 'bob'),
                    3: ('seq2', -20, -10, 'mary')
                },
                db,
                sliceAttrDict=dict(id=0, start=1, stop=2, name=3))
            annoDB.__doc__ = 'trivial annotation'
            worldbase.Bio.Test.annoDB = annoDB
            nlmsa = cnestedlist.NLMSA(tryannot,
                                      'w',
                                      pairwiseMode=True,
                                      bidirectional=False)
            try:
                for annID in annoDB:
                    nlmsa.addAnnotation(annoDB[annID])

                nlmsa.build()
                nlmsa.__doc__ = 'trivial map'
                worldbase.Bio.Test.map = nlmsa
                worldbase.schema.Bio.Test.map = metabase.ManyToManyRelation(
                    db, annoDB, bindAttrs=('exons', ))
                worldbase.commit()
                worldbase.clear_cache()
            finally:
                nlmsa.close()
        finally:
            db.close()
Beispiel #12
0
 def test_xmlrpc(self):
     "Test XMLRPC"
     worldbase.clear_cache() # force all future requests to reload
     worldbase.update("http://localhost:%s" % self.server.port) # from XMLRPC
     
     check_match(self) # run all our tests
     check_dir(self)
     check_dir_noargs(self)
     check_dir_download(self)
     check_dir_re(self)
     check_bind(self)
     check_bind2(self)
     
     sb_hbb1 = testutil.datafile('sp_hbb1') # test readonly checks
     sp2 = seqdb.BlastDB(sb_hbb1)
     sp2.__doc__ = 'another sp'
     try:
         worldbase.Bio.Seq.sp2 = sp2
         worldbase.commit()
         msg = 'failed to catch bad attempt to write to XMLRPC server'
         raise KeyError(msg)
     except ValueError:
         pass
Beispiel #13
0
def save_NLMSA_downloaders(url,
                           fileFilter=lambda x: x.endswith(".txt.gz"),
                           resourceStem='Bio.MSA.UCSC.',
                           fileDocumenter=None,
                           fileNamer=None):
    '''save NLMSA downloader / builder objects for a set
    of downloadable textdump files'''
    if fileDocumenter is None:
        fileDocumenter = lambda x: 'NLMSA alignment ' + x
    if fileNamer is None:  # a default resource naming function
        fileNamer = lambda x: resourceStem + x[:-3]  # remove .gz suffix
    from pygr.nlmsa_utils import NLMSABuilder
    from pygr.downloader import SourceURL
    d = catalog_downloads(url, fileFilter, fileNamer, fileDocumenter,
                          SourceURL)
    for resID, o in d.items():
        nlmsa = NLMSABuilder(o)
        nlmsa.__doc__ = fileDocumenter(resID)
        d[resID[:-4]] = nlmsa  # remove .txt suffix
    from pygr import worldbase
    worldbase.add_resource(d)
    worldbase.commit()
    return d  # just in case the user wants to see what was saved
Beispiel #14
0
    def test_xmlrpc(self):
        "Test XMLRPC"
        worldbase.clear_cache()  # force all future requests to reload
        # Add our test XMLRPC resource.
        worldbase.update("http://localhost:%s" % self.server.port)

        check_match(self)  # run all our tests
        check_dir(self)
        check_dir_noargs(self)
        check_dir_download(self)
        check_dir_re(self)
        check_bind(self)
        check_bind2(self)

        sb_hbb1 = testutil.datafile('sp_hbb1')  # test readonly checks
        sp2 = seqdb.BlastDB(sb_hbb1)
        sp2.__doc__ = 'another sp'
        try:
            worldbase.Bio.Seq.sp2 = sp2
            worldbase.commit()
            msg = 'failed to catch bad attempt to write to XMLRPC server'
            raise KeyError(msg)
        except ValueError:
            pass
Beispiel #15
0
print 'added', n, 'records'

### create

slicedb = sqlgraph.SQLTable('annotations',
                            serverInfo=SQLiteServerInfo(sql_file))

annodb = annotation.AnnotationDB(slicedb, genome, annotationType='sql:',
                                 sliceAttrDict=dict(id='seq_id'))

### save

from pygr import worldbase

genome.__doc__ = 'Campy genome'
worldbase.Bio.campy.genome = genome

annodb.__doc__ = 'Campy gene annotations from NCBI (PTT)'
worldbase.Bio.campy.genes = annodb

nlmsa = cnestedlist.NLMSA('genes_map', 'w', pairwiseMode=True)
for v in annodb.itervalues():
    nlmsa.addAnnotation(v)
nlmsa.build(saveSeqDict=False)
    
nlmsa.__doc__ = 'Campy gene mapping from NCBI'
worldbase.Bio.campy.gene_map = nlmsa

worldbase.commit()
def main():
    """Build an annotation from the given gff file
    """
    
    usage = """Build and save the annotations defined in the given gff files
    Saves an annotationDB (representing the file itself) and creates a mapping 
    in the form genome[chromosome][100:200].officialGenes"""
    parser = optparse.OptionParser("%prog [options] data1.gff [data2.gff ...]\n"+usage)
    parser.add_option("--genome_resource", '-g', dest="genome_resource", type="string",
                      help="""The pygr resource for the genome, eg, 'Bio.Seq.Genome.TRICA.triCas3'""")
    parser.add_option("--annotationDB_resource", '-a', dest="annotationDB_resource", type="string",
                      help="""Where to save the created annotationDB. eg, 
                      Bio.Annotation.TRICA.triCas3.officialGenes""")
    parser.add_option("--save_pathstem", '-p', dest="pathstem", type="string", 
                      help="""The file to save the exon resource to, eg,
                    '/home/baldig/projects/genomics/pygrdata/annotations/fly/triCas3_official_genes'""")
    parser.add_option("--map_resource", '-m', dest="map_resource", type="string",
                      help="""the resource to save the annotationDB->Genome map,
                      saved both to worldbase and to worldbase.schema, eg,
                      'Bio.Annotation.TRICA.triCas3.BeetleBase.officialGenesMap""")
    parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", 
                      help="""The attribute to access annotationDB from genome region, eg, 
                      'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes 
                      Default is not to bind an attribute to genome""")


    (opts, args) = parser.parse_args()

    if len(args) < 1: 
        parser.print_help()
        print 'Please specify at least one gff file to read'
        sys.exit(-1)
    if None in [opts.genome_resource, opts.annotationDB_resource, opts.pathstem, opts.map_resource]:
        parser.print_help()
        print 'Required options: genome_resource, annotationDB_resource, pathstem, map_resource'
        sys.exit(-1)
    
    print '# Loading original genome db'
    genome = worldbase(opts.genome_resource)
    annotDB = annotation.AnnotationDB(None, genome, opts.bind_attribute, 
                                        filename=opts.pathstem + '_annotDB', mode='c', verbose=False)
    nlmsa = cnestedlist.NLMSA(opts.pathstem, 'w', pairwiseMode=True, bidirectional=False)

    index = 0  # unique ID used in annotationD
    for filename in args:
        print '# adding to annotationDB from %s' % filename
        fileIn = open(filename)
        for row in read_for_pygr(fileIn):
            curAnnot = annotDB.new_annotation(index, row)
            nlmsa.addAnnotation(curAnnot)
            index += 1
    annotDB.close() # Flush annotation data to disk
    
    print '# building NLMSA from all gff files'
    nlmsa.build(saveSeqDict=True)
    print '# saving annotationDB and NLMSA to worldbase as %s and %s' % (opts.annotationDB_resource,
                                                                        opts.map_resource)
    annotDB.__doc__ = 'Combined gff annotationDB from files %s on genome %s' % (', '.join(args), 
                                                                                opts.genome_resource)
    nlmsa.__doc__ = 'Mapping of %s, from gff files %s onto genome %s' % (opts.annotationDB_resource,
                                                                            ', '.join(args),
                                                                            opts.genome_resource)
    worldbase.add_resource(opts.annotationDB_resource, annotDB)
    worldbase.add_resource(opts.map_resource, nlmsa)

    if opts.bind_attribute:
        print '# saving worldbase schema with bindAttrs=(%s)' % opts.bind_attribute
        genome_annotDB_relation = metabase.ManyToManyRelation(genome, annotDB, bindAttrs=(opts.bind_attribute,))
        genome_annotDB_relation.__doc__ = 'GFF based mapping from %s to genome %s' % (opts.annotationDB_resource,
                                                                                        opts.genome_resource)
        worldbase.add_schema('%s' % opts.map_resource, genome_annotDB_relation)
                                
    
    print '# committing worldbase resources'
    worldbase.commit()
Beispiel #17
0
 def setUp(self, *args, **kwargs):
     TestBase.setUp(self, *args, **kwargs)
     populate_swissprot()
     worldbase.commit() # finally save everything
     worldbase.clear_cache() # force all requests to reload
Beispiel #18
0
def main():
    """ Load the given csv file into an sqlite table, saving an
        annotationDB and an NLMSA version of the original file """

    parser = optparse.OptionParser("%prog [options] infile.csv\n"+main.__doc__)
    parser.add_option("--datapath", '-p', dest="datapath", type="string",
                      default='/home/shared/pygrdata/annotations/HUMAN/hg18',
                      help="""Sets the datafile path.  Default=%default""")
    parser.add_option("--table_name", '-t', dest="table_name", type="string",
                      help="""The resource table's name and data stem, e.g.,
                      refGene => datapath/refGene.sqlite """)
    parser.add_option("--genome", '-g', dest="genome_resource", type="string", default='hg18',
                      help="""The pygr resource for the genome, default=%default""")
    parser.add_option("--save_resource", '-r', dest="save_resource", type="string",
                      help="""Where to save the created annotationDB and NLMSA. eg, 
                      Bio.Annotation.HUMAN.hg18.MotifMap.M0001""")
    parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", 
                      help="""The attribute to access annotationDB from genome region, eg, 
                      'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes 
                      Default is not to bind an attribute to genome""")
    parser.add_option("--slice_attrs", '-s', dest="slice_attrs", type="string",
                      default='dict(id="chromosome", start="start", stop="stop", orientation="orientation")',
                      help="""dictionary providing aliases in csv file for id, start, stop, etc. 
                      default=%default'""")
    parser.add_option("--bed_format", dest="bed_format", action='store_true',
                      help="""csv file is in BED file format, without headers.""")
    opts, args = parser.parse_args()
    if len(args) < 1: 
        parser.print_help()
        print 'Please specify at least one csv file to read'
        sys.exit(-1)
    if None in [opts.save_resource, opts.table_name]:
        parser.print_help()
        print 'Required options: save_resource, table_name'
        sys.exit(-1)
    
    fileIn = open(args[0])
    if not opts.bed_format:
        reader = csv.DictReader(fileIn, delimiter='\t')
    else:
        fileIn = itertools.ifilter(bedCommentFilter, fileIn)
        reader = csv.DictReader(fileIn, delimiter='\t', fieldnames=['chromosome', 'start', 'stop'], restkey='junkData')
    fieldnames = reader.fieldnames
    print fieldnames
    
    print '# Loading genome %s' % opts.genome_resource
    genome = getGenome(opts.genome_resource)
    
    opts.table_name = opts.table_name.replace('.','_')      # SQL interprets . as membership
    tablePath = os.path.join(opts.datapath,opts.table_name + '.sqlite')
    print '# Creating sqlite table for %s at %s' % (opts.table_name, tablePath)
    dataTable = convertBedToSQLite(reader, opts.table_name, fieldNames=fieldnames)
 
 
    
    print '# Making AnnotationDB and NLMSA...'
    annotDB = annotation.AnnotationDB(dataTable, genome, annotationType=opts.table_name+':',
                                      sliceAttrDict=eval(opts.slice_attrs))
    annotDB.__doc__ = 'AnnotationDB for %s on %s' % (opts.table_name, opts.genome_resource)
    
    msaName = os.path.join(opts.datapath, opts.table_name + '_')
    annotMap = makeNLMSA([annotDB], dataPath=msaName)

    print '# Saving results to worldbase as %s and %s...' % (opts.save_resource,
                                                             opts.save_resource+'_db')
    worldbase.add_resource(opts.save_resource, annotMap)
    worldbase.add_resource(opts.save_resource+'_db', annotDB)
    worldbase.commit()
Beispiel #19
0
def main():
    """Build an annotation from the given gff file
    """
    
    usage = """Build and save the annotations defined in the given gff files
    Saves an annotationDB (representing the file itself) and creates a mapping 
    in the form genome[chromosome][100:200].officialGenes"""
    parser = optparse.OptionParser("%prog [options] data1.gff [data2.gff ...]\n"+usage)
    parser.add_option("--genome_resource", '-g', dest="genome_resource", type="string",
                      help="""The pygr resource for the genome, eg, 'Bio.Seq.Genome.TRICA.triCas3'""")
    #parser.add_option("--annotationDB_resource", '-a', dest="annotationDB_resource", type="string",
                      #help="""Where to save the created annotationDB. eg, 
                      #Bio.Annotation.TRICA.triCas3.officialGenes""")
    parser.add_option("--sqlDB_resource", '-s', dest="sqlDB_resource", type="string",
                      help="""Where to save the created sqlDB and a unique file name eg, 
                      Bio.Annotation.TRICA.triCas3.features_sqlDB,gffDB_v1""")
    parser.add_option("--save_pathstem", '-p', dest="pathstem", type="string", 
                      help="""The file to save the resource to, eg,
                    '/home/baldig/projects/genomics/pygrdata/annotations/fly/triCas3_official_genes'""")
    parser.add_option("--map_resource", '-m', dest="map_resource", type="string",
                      help="""the resource to save the annotationDB->Genome map,
                      saved both to worldbase and to worldbase.schema, eg,
                      'Bio.Annotation.TRICA.triCas3.BeetleBase.officialGenesMap""")
    parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", 
                      help="""The attribute to access annotationDB from genome region, eg, 
                      'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes 
                      Default is not to bind an attribute to genome""")


    (opts, args) = parser.parse_args()

    if len(args) < 1: 
        parser.print_help()
        print 'Please specify at least one gff file to read'
        sys.exit(-1)
    if None in [opts.genome_resource, opts.pathstem, opts.map_resource]:
        parser.print_help()
        print 'Required options: genome_resource, sqlDB_resource, pathstem, map_resource'
        sys.exit(-1)
    if opts.sqlDB_resource.count(',') != 1:
        parser.print_help()
        print 'Error: sqlDB_resource must be comma separated string with exactly one comma.'
    else:
        opts.sqlDB_resource = opts.sqlDB_resource.split(',')
    try :
        w = worldbase(opts.sqlDB_resource[0])
        parser.print_help()
        print "Warning: sqlDB_resource already exists.  Please select a new name."
        exit(-1)
    except WorldbaseNotFoundError:
        pass
    
    
    print '# Loading original genome db'
    genome = worldbase(opts.genome_resource)
    #annotDB = annotation.AnnotationDB(None, genome, opts.bind_attribute, 
                                        #filename=opts.pathstem + '_annotDB', mode='c', verbose=False)
    sqlDB    = sqlgraph.SQLiteServerInfo('%s/%s.sqlite' %(opts.pathstem,opts.sqlDB_resource[1]))
    gff2lite = simpleGFF2PygrSQLite(sqlDB)
    nlmsa    = cnestedlist.NLMSA(opts.pathstem, 'w', pairwiseMode=True, bidirectional=False)
    
    
    for filename in args:
        print '# adding to sqlDB from %s' % filename
        gff2lite.update(filename)
    
    tableNames = gff2lite.getTableNames()
    for table in tableNames:
        
    
    
        
    #for row in read_for_pygr(fileIn):
        #curAnnot = annotDB.new_annotation(index, row)
        #nlmsa.addAnnotation(curAnnot)
        #index += 1
    #annotDB.close() # Flush annotation data to disk
    
    print '# building NLMSA from all gff files'
    nlmsa.build(saveSeqDict=True)
    print '# saving annotationDB and NLMSA to worldbase as %s and %s' % (opts.annotationDB_resource,
                                                                        opts.map_resource)
    annotDB.__doc__ = 'Combined gff annotationDB from files %s on genome %s' % (', '.join(args), 
                                                                                opts.genome_resource)
    nlmsa.__doc__ = 'Mapping of %s, from gff files %s onto genome %s' % (opts.annotationDB_resource,
                                                                            ', '.join(args),
                                                                            opts.genome_resource)
    worldbase.add_resource(opts.annotationDB_resource, annotDB)
    worldbase.add_resource(opts.map_resource, nlmsa)

    if opts.bind_attribute:
        print '# saving worldbase schema with bindAttrs=(%s)' % opts.bind_attribute
        genome_annotDB_relation = metabase.ManyToManyRelation(genome, annotDB, bindAttrs=(opts.bind_attribute,))
        genome_annotDB_relation.__doc__ = 'GFF based mapping from %s to genome %s' % (opts.annotationDB_resource,
                                                                                        opts.genome_resource)
        worldbase.add_schema('%s' % opts.map_resource, genome_annotDB_relation)
                                
    
    print '# committing worldbase resources'
    worldbase.commit()

if __name__ == "__main__":
    main()
    def setUp(self):
        """Set up some testing sequences and features.
        
        """
        print "# Setting annotation databases, nlmsa and committing to worldbase"

        tuple_attrdict = dict(id=0, start=1, stop=2, orientation=3)
        self.genome = worldbase("Bio.Seq.Genome.HUMAN.hg18")
        
        # annotation db1
        self.annodb1 = annotation.AnnotationDB({}, self.genome,
                                              sliceAttrDict=tuple_attrdict)
        self.annodb1._persistent_id = 'foo1_db'
        
        # set up some test slices in an AnnotationDB
        self.seq_id = "chr1"
        self.annot1 = self.annodb1.new_annotation('A1', (self.seq_id, 200, 300, 1))
        self.annot2 = self.annodb1.new_annotation('B1', (self.seq_id, 100, 150, 1))
        self.annot3 = self.annodb1.new_annotation('C1', (self.seq_id, 50, 75, -1))
        self.annot4 = self.annodb1.new_annotation('D1', (self.seq_id, 400, 500, 1))
        self.annot5 = self.annodb1.new_annotation('E1', (self.seq_id, 600, 700, 1))
        
        # create a nested list from our AnnotationDB
        # these are our "features"
        self.nlmsa1 = cnestedlist.NLMSA(pathstem='test.mapping.foo1', mode='w', pairwiseMode=True)
        
        for k in self.annodb1:
            self.nlmsa1.addAnnotation(self.annodb1[k])
            
        self.nlmsa1.build()

        # annotation db2
        self.annodb2 = annotation.AnnotationDB({}, self.genome,
                                              sliceAttrDict=tuple_attrdict)
        self.annodb2._persistent_id = 'foo2_db'
        
        # set up some test slices in an AnnotationDB
        self.seq_id2 = "chr2"
        self.annot6 = self.annodb2.new_annotation('A2', (self.seq_id2, 200, 300, 1))
        self.annot7 = self.annodb2.new_annotation('B2', (self.seq_id2, 100, 150, 1))
        self.annot8 = self.annodb2.new_annotation('C2', (self.seq_id2, 50, 75, -1))
        self.annot9 = self.annodb2.new_annotation('D2', (self.seq_id2, 400, 500, 1))
        self.annot10 = self.annodb2.new_annotation('E2', (self.seq_id2, 600, 700, 1))
        
        # create a nested list from our AnnotationDB
        # these are our "features"
        self.nlmsa2 = cnestedlist.NLMSA(pathstem='test.mapping.foo2', mode='w', pairwiseMode=True)
        
        for k in self.annodb2:
            self.nlmsa2.addAnnotation(self.annodb2[k])
            
        self.nlmsa2.build()

        # update WORLDBASEPATH
        self.annodb1.__doc__ = 'annodb1 db'
        self.nlmsa1.__doc__ = 'annodb1 nlmsa'

        self.annodb2.__doc__ = 'annodb2 db'
        self.nlmsa2.__doc__ = 'annodb2 nlmsa'

        worldbase.add_resource('Test.Annotations.annodb1_db',self.annodb1)
        worldbase.add_resource('Test.Annotations.annodb2_db',self.annodb2)

        worldbase.add_resource('Test.Annotations.annodb1',self.nlmsa1)
        worldbase.add_resource('Test.Annotations.annodb2',self.nlmsa2)

        worldbase.commit()
Beispiel #21
0
 def setUp(self, *args, **kwargs):
     TestBase.setUp(self, *args, **kwargs)
     populate_swissprot()
     worldbase.commit()  # finally save everything
     worldbase.clear_cache()  # force all requests to reload