예제 #1
0
def main(argv = None):

    # Setup argument parser
    parser = ArgumentParser(description = '%s -- create actual bacteria databases from NCBI Sources' % 
                            (os.path.basename(sys.argv[0])),
                            epilog = 'created by Philipp Sehnert',
                            add_help = True)
    parser.add_argument('--version', action = 'version', version = '%s 1.0' % 
                        (os.path.basename(sys.argv[0])))
    parser.add_argument("-type", dest = "type", default = 'nucl', 
                        choices = {'nucl','prot'},  help = "set type of blastdb")
    parser.add_argument('-metacv', dest = 'metacv', action = 'store_true',
                        default = False, help = 'create metacv database')
    parser.add_argument('-exe', dest = 'exe', 
                        help = "if not installed, specify path to executable of 'makeblastdb' or 'metacv'")
    parser.add_argument('-name', dest = 'name', default = 'bacterial', required = True,
                        help = 'outname for the databases')
    parser.add_argument('-parse_seqids', dest = 'parse_seqids', action = 'store_false', default = True,
                        help = 'Remove duplicated GI numbers from downloaded files and run "makeblastdb" with -parse_seqids statement ')
    # Process arguments
    args = parser.parse_args()
    DB_TYPE = args.type
    METACV = args.metacv
    DB_NAME = args.name  
    EXECUTABLE = args.exe
    PARSE_SEQIDS = args.parse_seqids
    
    if __name__ == '__main__':
        # check for protein or nucleotide database
        DB_TYPE = check_db_type(METACV, DB_TYPE)
        # verify executable for external scripts
        EXECUTABLE = check_executable(EXECUTABLE, METACV)
        # create dir for sources
        create_folder(DOWNLOAD_FOLDER)
        # init FTP functions
        ftp = ftp_functions(FTP_SERVER, FTP_ROOT, DOWNLOAD_FOLDER, DEBUG)
        # connect to Blast FTP Server 
        ftp.connect()
        ftp.go_to_root()
        # start Downloading
        for ftp_folder in SOURCES:
            sys.stdout.write("Downloading files from %s \n" % (ftp_folder))
            ftp.download_folder(ftp_folder, DB_TYPE)
        # close ftp connection
        ftp.close()
        # run external database creation scripts
        DBCreate = DBCreation(DB_OUT, DOWNLOAD_FOLDER, DB_TYPE, PARSE_SEQIDS, DEBUG, EXECUTABLE)
        if METACV:
            DBCreate.set_METACV(True)
            # select the subfolder for MetaCV database
            DBCreate.createMetaCVDB(DB_NAME, ['Bacteria', 'Bacteria_DRAFT'])
        else:
            DBCreate.set_METACV(False)
            DBCreate.createBlastDB(DB_NAME)     
예제 #2
0
 def get_functional_annotation(self):
     '''checks if functional annotation if function annotion exists and is actual
     if not the annotation will be downloaded from uniprot ftp server'''
     # FTP Server information
     uniprotFTP = 'ftp.uniprot.org'
     functional = 'pub/databases/uniprot/current_release/knowledgebase/idmapping'
     idmapping = 'idmapping.dat.gz'
     # establish connection
     uniprot = ftp_functions(uniprotFTP, functional, DOWNLOAD_FOLDER, self.DEBUG)
     uniprot.connect()
     # go to functional dir
     uniprot.go_down(functional)
     # download file and extract it
     idmapping = uniprot.get_idmapping(idmapping)
     # close connection
     uniprot.close()
     return idmapping
예제 #3
0
 def get_taxonomy(self):
     '''checks if taxonomical annotation for metacv database is existing and actual
     if not the files will be downloaded from ncbi ftp server'''
     # FTP Server information
     ncbiFTP = 'ftp.ncbi.nih.gov' 
     taxonomy = '/pub/taxonomy/'
     ncbi_files = ['gi_taxid_prot.dmp.gz', 'taxdump.tar.gz']
     files = []
     # establish connection
     ncbi = ftp_functions(ncbiFTP, taxonomy, self.DOWNLOAD_FOLDER, self.DEBUG)
     ncbi.connect()
     # go to taxonomy dir
     ncbi.go_down(taxonomy)
     for item in ncbi_files:
         # download actual files and extract needed files
         if item in ncbi_files[0]:
             files.append(ncbi.get_gi_map(item))
         else:
             [files.append(x) for x in ncbi.get_taxdump(item)]
     # close connection
     ncbi.close()
     return files