Exemple #1
0
    def close(self,commitData=True):
        """Close method which performs updating the docstrings and creating the schema objects."""
        print "# Finalizing mapping schema(s)..."
        if self._closed == True:
            return
        
        # UPDATE DOC STRING FOR forward MAPPING
        self.Mf.__doc__ = "Mapping resource (forward) between annotations %s and %s" % (self.Mf.sourceDB._persistent_id,
                                                                                self.Mf.targetDB._persistent_id)
        
        #UPDATE OUR METABASE WITH THE RESOURCE STRING FOR THE MAPPING
        worldbase.add_resource(self.resourceString+"_forward", self.Mf)
        
        # FOR forward MAPPING
        forward_bindAttrs = (self.forwardAttr, self.inverseAttr) # self.inverseAttr is either None or set to an appropriate inverse attribute
        relationF = metabase.OneToManyRelation(self.Mf.sourceDB, self.Mf.targetDB, bindAttrs=forward_bindAttrs)
        relationF.__doc__ = "Mapping schema (forward) between annotations %s and %s" % (self.Mf.sourceDB._persistent_id,
                                                                                self.Mf.targetDB._persistent_id) 
        
        # UPDATE OUR SCHEMA WITH THE RESOURCE STRING FOR THE MAPPING
        worldbase.add_schema(self.resourceString+"_forward", relationF) 
        
        # HANDLE REVERSE MAPPING AND SCHEMA
        relationR = None
        if self.Mr != None:
            # UPDATE DOC STRING FOR reverse MAPPING
            self.Mr.__doc__ = "Mapping resource (reverse) between annotations %s and %s" % (self.Mr.sourceDB._persistent_id,
                                                                                            self.Mr.targetDB._persistent_id)
            
            #UPDATE OUR METABASE WITH THE RESOURCE STRING FOR THE MAPPING
            worldbase.add_resource(self.resourceString+"_reverse", self.Mr)
            
            # FOR reverse MAPPING
            reverse_bindAttrs = (self.reverseAttr, None)
            relationR = metabase.OneToManyRelation(self.Mr.sourceDB, self.Mr.targetDB, bindAttrs=reverse_bindAttrs)
            relationR.__doc__ = "Mapping schema (reverse) between annotations %s and %s" % (self.Mr.sourceDB._persistent_id, # Use self.Mf for consistent
                                                                                            self.Mr.targetDB._persistent_id) # doc strings with forward mapping
            
            # UPDATE OUR SCHEMA WITH THE RESOURCE STRING FOR THE MAPPING
            worldbase.add_schema(self.resourceString+"_reverse", relationR) 

        if(commitData==True):
            print "# Committing to worldbase: (1) %s (2) %s" % (str(self.Mf.__doc__),str(self.Mr.__doc__))
            worldbase.commit()
        
            print "# Closing mapping object(s)"
            # FLUSH MAPPING(S) TO PERSISTENT STORAGE
            self.Mf.close()
            if self.Mr:
                self.Mr.close()
        
            # Set closed flag
            self._closed = True
def main():
    """Build an annotation from the given gff file
    """
    
    usage = """Build and save the annotations defined in the given gff files
    Saves an annotationDB (representing the file itself) and creates a mapping 
    in the form genome[chromosome][100:200].officialGenes"""
    parser = optparse.OptionParser("%prog [options] data1.gff [data2.gff ...]\n"+usage)
    parser.add_option("--genome_resource", '-g', dest="genome_resource", type="string",
                      help="""The pygr resource for the genome, eg, 'Bio.Seq.Genome.TRICA.triCas3'""")
    #parser.add_option("--annotationDB_resource", '-a', dest="annotationDB_resource", type="string",
                      #help="""Where to save the created annotationDB. eg, 
                      #Bio.Annotation.TRICA.triCas3.officialGenes""")
    parser.add_option("--sqlDB_resource", '-s', dest="sqlDB_resource", type="string",
                      help="""Where to save the created sqlDB and a unique file name eg, 
                      Bio.Annotation.TRICA.triCas3.features_sqlDB,gffDB_v1""")
    parser.add_option("--save_pathstem", '-p', dest="pathstem", type="string", 
                      help="""The file to save the resource to, eg,
                    '/home/baldig/projects/genomics/pygrdata/annotations/fly/triCas3_official_genes'""")
    parser.add_option("--map_resource", '-m', dest="map_resource", type="string",
                      help="""the resource to save the annotationDB->Genome map,
                      saved both to worldbase and to worldbase.schema, eg,
                      'Bio.Annotation.TRICA.triCas3.BeetleBase.officialGenesMap""")
    parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", 
                      help="""The attribute to access annotationDB from genome region, eg, 
                      'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes 
                      Default is not to bind an attribute to genome""")


    (opts, args) = parser.parse_args()

    if len(args) < 1: 
        parser.print_help()
        print 'Please specify at least one gff file to read'
        sys.exit(-1)
    if None in [opts.genome_resource, opts.pathstem, opts.map_resource]:
        parser.print_help()
        print 'Required options: genome_resource, sqlDB_resource, pathstem, map_resource'
        sys.exit(-1)
    if opts.sqlDB_resource.count(',') != 1:
        parser.print_help()
        print 'Error: sqlDB_resource must be comma separated string with exactly one comma.'
    else:
        opts.sqlDB_resource = opts.sqlDB_resource.split(',')
    try :
        w = worldbase(opts.sqlDB_resource[0])
        parser.print_help()
        print "Warning: sqlDB_resource already exists.  Please select a new name."
        exit(-1)
    except WorldbaseNotFoundError:
        pass
    
    
    print '# Loading original genome db'
    genome = worldbase(opts.genome_resource)
    #annotDB = annotation.AnnotationDB(None, genome, opts.bind_attribute, 
                                        #filename=opts.pathstem + '_annotDB', mode='c', verbose=False)
    sqlDB    = sqlgraph.SQLiteServerInfo('%s/%s.sqlite' %(opts.pathstem,opts.sqlDB_resource[1]))
    gff2lite = simpleGFF2PygrSQLite(sqlDB)
    nlmsa    = cnestedlist.NLMSA(opts.pathstem, 'w', pairwiseMode=True, bidirectional=False)
    
    
    for filename in args:
        print '# adding to sqlDB from %s' % filename
        gff2lite.update(filename)
    
    tableNames = gff2lite.getTableNames()
    for table in tableNames:
        
    
    
        
    #for row in read_for_pygr(fileIn):
        #curAnnot = annotDB.new_annotation(index, row)
        #nlmsa.addAnnotation(curAnnot)
        #index += 1
    #annotDB.close() # Flush annotation data to disk
    
    print '# building NLMSA from all gff files'
    nlmsa.build(saveSeqDict=True)
    print '# saving annotationDB and NLMSA to worldbase as %s and %s' % (opts.annotationDB_resource,
                                                                        opts.map_resource)
    annotDB.__doc__ = 'Combined gff annotationDB from files %s on genome %s' % (', '.join(args), 
                                                                                opts.genome_resource)
    nlmsa.__doc__ = 'Mapping of %s, from gff files %s onto genome %s' % (opts.annotationDB_resource,
                                                                            ', '.join(args),
                                                                            opts.genome_resource)
    worldbase.add_resource(opts.annotationDB_resource, annotDB)
    worldbase.add_resource(opts.map_resource, nlmsa)

    if opts.bind_attribute:
        print '# saving worldbase schema with bindAttrs=(%s)' % opts.bind_attribute
        genome_annotDB_relation = metabase.ManyToManyRelation(genome, annotDB, bindAttrs=(opts.bind_attribute,))
        genome_annotDB_relation.__doc__ = 'GFF based mapping from %s to genome %s' % (opts.annotationDB_resource,
                                                                                        opts.genome_resource)
        worldbase.add_schema('%s' % opts.map_resource, genome_annotDB_relation)
                                
    
    print '# committing worldbase resources'
    worldbase.commit()

if __name__ == "__main__":
    main()
def main():
    """Build an annotation from the given gff file
    """
    
    usage = """Build and save the annotations defined in the given gff files
    Saves an annotationDB (representing the file itself) and creates a mapping 
    in the form genome[chromosome][100:200].officialGenes"""
    parser = optparse.OptionParser("%prog [options] data1.gff [data2.gff ...]\n"+usage)
    parser.add_option("--genome_resource", '-g', dest="genome_resource", type="string",
                      help="""The pygr resource for the genome, eg, 'Bio.Seq.Genome.TRICA.triCas3'""")
    parser.add_option("--annotationDB_resource", '-a', dest="annotationDB_resource", type="string",
                      help="""Where to save the created annotationDB. eg, 
                      Bio.Annotation.TRICA.triCas3.officialGenes""")
    parser.add_option("--save_pathstem", '-p', dest="pathstem", type="string", 
                      help="""The file to save the exon resource to, eg,
                    '/home/baldig/projects/genomics/pygrdata/annotations/fly/triCas3_official_genes'""")
    parser.add_option("--map_resource", '-m', dest="map_resource", type="string",
                      help="""the resource to save the annotationDB->Genome map,
                      saved both to worldbase and to worldbase.schema, eg,
                      'Bio.Annotation.TRICA.triCas3.BeetleBase.officialGenesMap""")
    parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", 
                      help="""The attribute to access annotationDB from genome region, eg, 
                      'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes 
                      Default is not to bind an attribute to genome""")


    (opts, args) = parser.parse_args()

    if len(args) < 1: 
        parser.print_help()
        print 'Please specify at least one gff file to read'
        sys.exit(-1)
    if None in [opts.genome_resource, opts.annotationDB_resource, opts.pathstem, opts.map_resource]:
        parser.print_help()
        print 'Required options: genome_resource, annotationDB_resource, pathstem, map_resource'
        sys.exit(-1)
    
    print '# Loading original genome db'
    genome = worldbase(opts.genome_resource)
    annotDB = annotation.AnnotationDB(None, genome, opts.bind_attribute, 
                                        filename=opts.pathstem + '_annotDB', mode='c', verbose=False)
    nlmsa = cnestedlist.NLMSA(opts.pathstem, 'w', pairwiseMode=True, bidirectional=False)

    index = 0  # unique ID used in annotationD
    for filename in args:
        print '# adding to annotationDB from %s' % filename
        fileIn = open(filename)
        for row in read_for_pygr(fileIn):
            curAnnot = annotDB.new_annotation(index, row)
            nlmsa.addAnnotation(curAnnot)
            index += 1
    annotDB.close() # Flush annotation data to disk
    
    print '# building NLMSA from all gff files'
    nlmsa.build(saveSeqDict=True)
    print '# saving annotationDB and NLMSA to worldbase as %s and %s' % (opts.annotationDB_resource,
                                                                        opts.map_resource)
    annotDB.__doc__ = 'Combined gff annotationDB from files %s on genome %s' % (', '.join(args), 
                                                                                opts.genome_resource)
    nlmsa.__doc__ = 'Mapping of %s, from gff files %s onto genome %s' % (opts.annotationDB_resource,
                                                                            ', '.join(args),
                                                                            opts.genome_resource)
    worldbase.add_resource(opts.annotationDB_resource, annotDB)
    worldbase.add_resource(opts.map_resource, nlmsa)

    if opts.bind_attribute:
        print '# saving worldbase schema with bindAttrs=(%s)' % opts.bind_attribute
        genome_annotDB_relation = metabase.ManyToManyRelation(genome, annotDB, bindAttrs=(opts.bind_attribute,))
        genome_annotDB_relation.__doc__ = 'GFF based mapping from %s to genome %s' % (opts.annotationDB_resource,
                                                                                        opts.genome_resource)
        worldbase.add_schema('%s' % opts.map_resource, genome_annotDB_relation)
                                
    
    print '# committing worldbase resources'
    worldbase.commit()