def download(self): if not self.has_local: raise NameError('A local SCOP database directory must be defined.') Path.mkdir(self.local) urllib.urlretrieve(SCOPftp['desc'], self._desc) urllib.urlretrieve(SCOPftp['rel'], self._rel) return True
def download(self): if not self.has_local: raise NameError('A local GO database directory must be defined.') Path.mkdir(self.local) destination = os.path.join(self.local, self._gfile) urllib.urlretrieve(GOftp['source'], destination) self._process() return True
def download(self): if not self.has_local: raise NameError('A local drugBank database directory must be defined.') Path.mkdir(self.local) here = os.getcwd() os.chdir(self.local) os.system("svn export {0}".format(PDBTMftp['svn'])) self._process() return True
def download(self): if not self.has_local: raise NameError( 'A local Enzyme database directory must be defined.') Path.mkdir(self.local) urllib.urlretrieve(Enzymeftp['dat'], self._dfile) urllib.urlretrieve(Enzymeftp['cls'], self._cfile) self._process() return True
def download(self): if not self.has_local: raise NameError( 'A local drugBank database directory must be defined.') Path.mkdir(self.local) urllib.urlretrieve(drugBankftp['targets'], self._target) urllib.urlretrieve(drugBankftp['main'], self._main) self._process() return True
def download(self): if not self.has_local: raise NameError('A local Uniprot database directory must be defined.') Path.mkdir(self.local) destination = os.path.join(self.local, 'uniprot_sprot.dat.gz') urllib.urlretrieve(Uniprotftp['swissprot'], destination) destination = os.path.join(self.local, 'uniprot_trembl.dat.gz') urllib.urlretrieve(Uniprotftp['trembl'], destination) self._process() return True
def make_PDBseq(self, log_file, resolution_threshold=None): if not self.has_local: raise NameError( 'A local PDB database must be defined to do create a PDBseq database.' ) outdir = self.PDBseq if self.PDBseq is not None else os.curdir Path.mkdir(self.PDBseq) fasta_file = File(file_name=os.path.join(outdir, 'PDBseq.fa'), action='w', overwrite=True) fasta_fd = fasta_file.descriptor idx_file = File(file_name=os.path.join(outdir, 'PDBseq.fa.idx'), action='w', overwrite=True) idx_fd = idx_file.descriptor # if resolution_threshold is not None: # filtered_file_name = self.get_PDBseq_filtered(resolution_threshold) # filtered_file = File(file_name = filtered_file_name, action = 'w', overwrite = True) # filtered_fd = filtered_file.descriptor # resolutions = self.get_resolutions(resolution_threshold = resolution_threshold) log_file = File(file_name=log_file, action='w', overwrite=True) log_idx = log_file.descriptor for pdb_file in self.localPDBs: log_idx.write("Reading File: {0}\n".format(pdb_file)) newPDB = PDB(pdb_file=pdb_file, dehydrate=True) fasta_idx = newPDB.FASTA_IDX(nucleotide=False) if len(fasta_idx['FASTA']) != len(fasta_idx['IDX']): log_idx.write( 'ERROR!!!!! Number of fastas and indexes are different for pdb {0}!!\n' .format(newPDB.id)) if len(fasta_idx['FASTA']) > 0: log_idx.write('\tPrinting FASTA and IDX...\n') else: log_idx.write('\tProblably just a nucleotide PDB...\n') for c in range(len(fasta_idx['FASTA'])): sequence = fasta_idx['FASTA'][c].split('\n')[1] sequence = sequence.replace('X', '').replace('x', '') if len(sequence) > 0: fasta_fd.write(fasta_idx['FASTA'][c] + "\n") if resolution_threshold is not None and newPDB.id in resolutions and not newPDB.is_all_ca: filtered_fd.write(fasta_idx['FASTA'][c] + "\n") idx_fd.write(fasta_idx['IDX'][c] + "\n") del (newPDB) #CLOSE & END fasta_file.close() idx_file.close() if resolution_threshold is not None: filtered_fd.close()
def _process(self): tmoFile = File(self._pdbtmfile,'w', True) for xmlfile in Path.list_files(os.path.join(self._local,'pdbtm/database/'), '*.xml'): xmldata = TM(pdb = os.path.splitext(os.path.split(xmlfile)[1])[0].upper()) skip_chains = set() read = False fdxml = open(xmlfile) for line in fdxml: if line.startswith(' <TMRES>'): xmldata.tmres = line elif line.startswith(' <TMTYPE'): xmldata.tmtype = line elif line.startswith(' <PDBKWRES'): xmldata.kwres = line elif line.startswith(' <SIDEDEFINITION'): m = re.search('Side1="(\S+)"', line) xmldata.side = m.group(1) elif line.startswith(' <APPLY_TO_CHAIN'): m = re.search('NEW_CHAINID=\"(\S{1})\"', line) if m: skip_chains.add(m.group(1)) elif line.startswith(' <CHAIN '): m = re.search('CHAINID=\"(\S{1})\" NUM_TM=\"(\d{1})\" TYPE=\"(\S+)\"', line) if m: chain, num, tmtype = m.group(1), m.group(2), m.group(3) if not chain in skip_chains: cdata = tuple([chain, num, tmtype]) xmldata.set_chain(cdata) read = True elif line.startswith(' <REGION ') and read: m = re.search('pdb_beg=\"(\-*\d+\w*)\"[\s\S]+pdb_end=\"(\-*\d+\w*)\"\s+type=\"(\w{1})\"', line) ini, end, tmtype = m.group(1), m.group(2), m.group(3) xmldata.set_chain(cdata, tuple([ini, end, tmtype])) elif line.startswith(' </CHAIN>'): read = False fdxml.close() if len(xmldata.chains) > 0: tmoFile.write(str(xmldata)+"\n") tmoFile.close()
def download(self): if not self.has_local: raise NameError( 'A local TaxID database directory must be defined.') Path.mkdir(self.local) destination = os.path.join(self.local, 'taxdmp.zip') urllib.urlretrieve(taxIDftp['global'], destination) command = ['unzip', '-o', destination, '-d', self.local] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() self._process() return True
def download(self): if not self.has_local: raise NameError( 'A local PDBeChem database directory must be defined.') Path.mkdir(self.local) destination = os.path.join(self.local, 'mmcif.tar.gz') try: urllib.urlretrieve(PDBeChemftp['global'], destination) except: return False command = ['tar', 'zxvf', destination, '-C', self.local] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() return True
def sync_PDB(self, log_file=None): if not self.has_local: raise NameError( 'A local PDB database must be defined to sync with.') Path.mkdir(self.local) command = [ 'rsync', '-rlpt', '-v', '-z', '--port=' + PDBrsync['port'], PDBrsync['address'], self.local ] p = subprocess.Popen(command, stdout=open(log_file, 'w') if log_file is not None else subprocess.PIPE, stderr=subprocess.PIPE) SBIglobals.alert('verbose', self, 'Executing: {0}'.format(" ".join(command))) out, err = p.communicate() if err.strip() != '': raise SystemError('{0}'.format(err))
def get_PDB(self, pdbID): if self.has_local: rootdir = os.path.join(self.local, pdbID.lower()[1:3]) for pdb_file in Path.list_files(root=rootdir, pattern='*.ent.gz'): newfile = File(file_name=pdb_file, action='r') if newfile.prefix.lstrip('pdb').upper() == pdbID.upper(): return pdb_file #If we do not find it in local (or we do not have a local) we search it on the FTP pdb_file = 'pdb' + pdbID.lower() + '.ent.gz' source = 'ftp://' + PDBftp['address'] + os.path.join( PDBftp['structures'], pdbID[1:3].lower(), pdb_file) try: urllib.urlretrieve(source, pdb_file) except: return False return os.path.abspath(pdb_file)
def localPDBeChems(self): for chem_file in Path.list_files(root=self.local, pattern='*.cif'): yield chem_file
def localPDBs(self): for pdb_file in Path.list_files(root=self.local, pattern='*.ent.gz'): yield pdb_file