Ejemplo n.º 1
0
    def download(self):
        if not self.has_local:
            raise NameError('A local SCOP database directory must be defined.')

        Path.mkdir(self.local)
        urllib.urlretrieve(SCOPftp['desc'], self._desc)
        urllib.urlretrieve(SCOPftp['rel'], self._rel)

        return True
Ejemplo n.º 2
0
    def download(self):
        if not self.has_local:
            raise NameError('A local GO database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, self._gfile)

        urllib.urlretrieve(GOftp['source'], destination)
        self._process()

        return True
Ejemplo n.º 3
0
    def download(self):
        if not self.has_local:
            raise NameError('A local drugBank database directory must be defined.')

        Path.mkdir(self.local)
        here = os.getcwd()
        os.chdir(self.local)
        os.system("svn export {0}".format(PDBTMftp['svn']))

        self._process()

        return True
Ejemplo n.º 4
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local Enzyme database directory must be defined.')

        Path.mkdir(self.local)
        urllib.urlretrieve(Enzymeftp['dat'], self._dfile)
        urllib.urlretrieve(Enzymeftp['cls'], self._cfile)

        self._process()

        return True
Ejemplo n.º 5
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local drugBank database directory must be defined.')

        Path.mkdir(self.local)
        urllib.urlretrieve(drugBankftp['targets'], self._target)
        urllib.urlretrieve(drugBankftp['main'], self._main)

        self._process()

        return True
Ejemplo n.º 6
0
    def download(self):
        if not self.has_local:
            raise NameError('A local Uniprot database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, 'uniprot_sprot.dat.gz')
        urllib.urlretrieve(Uniprotftp['swissprot'], destination)
        destination = os.path.join(self.local, 'uniprot_trembl.dat.gz')
        urllib.urlretrieve(Uniprotftp['trembl'], destination)

        self._process()

        return True
Ejemplo n.º 7
0
    def make_PDBseq(self, log_file, resolution_threshold=None):
        if not self.has_local:
            raise NameError(
                'A local PDB database must be defined to do create a PDBseq database.'
            )
        outdir = self.PDBseq if self.PDBseq is not None else os.curdir

        Path.mkdir(self.PDBseq)
        fasta_file = File(file_name=os.path.join(outdir, 'PDBseq.fa'),
                          action='w',
                          overwrite=True)
        fasta_fd = fasta_file.descriptor
        idx_file = File(file_name=os.path.join(outdir, 'PDBseq.fa.idx'),
                        action='w',
                        overwrite=True)
        idx_fd = idx_file.descriptor
        # if resolution_threshold is not None:
        #     filtered_file_name = self.get_PDBseq_filtered(resolution_threshold)
        #     filtered_file      = File(file_name = filtered_file_name, action = 'w', overwrite = True)
        #     filtered_fd        = filtered_file.descriptor
        #     resolutions        = self.get_resolutions(resolution_threshold = resolution_threshold)
        log_file = File(file_name=log_file, action='w', overwrite=True)
        log_idx = log_file.descriptor

        for pdb_file in self.localPDBs:
            log_idx.write("Reading File: {0}\n".format(pdb_file))
            newPDB = PDB(pdb_file=pdb_file, dehydrate=True)
            fasta_idx = newPDB.FASTA_IDX(nucleotide=False)
            if len(fasta_idx['FASTA']) != len(fasta_idx['IDX']):
                log_idx.write(
                    'ERROR!!!!! Number of fastas and indexes are different for pdb {0}!!\n'
                    .format(newPDB.id))
            if len(fasta_idx['FASTA']) > 0:
                log_idx.write('\tPrinting FASTA and IDX...\n')
            else:
                log_idx.write('\tProblably just a nucleotide PDB...\n')
            for c in range(len(fasta_idx['FASTA'])):
                sequence = fasta_idx['FASTA'][c].split('\n')[1]
                sequence = sequence.replace('X', '').replace('x', '')
                if len(sequence) > 0:
                    fasta_fd.write(fasta_idx['FASTA'][c] + "\n")
                    if resolution_threshold is not None and newPDB.id in resolutions and not newPDB.is_all_ca:
                        filtered_fd.write(fasta_idx['FASTA'][c] + "\n")
                    idx_fd.write(fasta_idx['IDX'][c] + "\n")
            del (newPDB)

        #CLOSE & END
        fasta_file.close()
        idx_file.close()
        if resolution_threshold is not None:
            filtered_fd.close()
Ejemplo n.º 8
0
 def _process(self):
     tmoFile = File(self._pdbtmfile,'w', True)
     for xmlfile in Path.list_files(os.path.join(self._local,'pdbtm/database/'), '*.xml'):
         xmldata = TM(pdb = os.path.splitext(os.path.split(xmlfile)[1])[0].upper())
         skip_chains = set()
         read = False
         fdxml = open(xmlfile)
         for line in fdxml:
             if line.startswith('    <TMRES>'):     xmldata.tmres  = line
             elif line.startswith('    <TMTYPE'):   xmldata.tmtype = line
             elif line.startswith('    <PDBKWRES'): xmldata.kwres  = line
             elif line.startswith('  <SIDEDEFINITION'):
                 m = re.search('Side1="(\S+)"', line)
                 xmldata.side = m.group(1)
             elif line.startswith('      <APPLY_TO_CHAIN'):
                 m = re.search('NEW_CHAINID=\"(\S{1})\"', line)
                 if m: skip_chains.add(m.group(1))
             elif line.startswith('  <CHAIN '):
                 m = re.search('CHAINID=\"(\S{1})\" NUM_TM=\"(\d{1})\" TYPE=\"(\S+)\"', line)
                 if m:
                     chain, num, tmtype = m.group(1), m.group(2), m.group(3)
                     if not chain in skip_chains:
                         cdata = tuple([chain, num, tmtype])
                         xmldata.set_chain(cdata)
                         read = True
             elif line.startswith('    <REGION ') and read:
                 m = re.search('pdb_beg=\"(\-*\d+\w*)\"[\s\S]+pdb_end=\"(\-*\d+\w*)\"\s+type=\"(\w{1})\"', line)
                 ini, end, tmtype = m.group(1), m.group(2), m.group(3)
                 xmldata.set_chain(cdata, tuple([ini, end, tmtype]))
             elif line.startswith('  </CHAIN>'): read = False
         fdxml.close()
         if len(xmldata.chains) > 0:
             tmoFile.write(str(xmldata)+"\n")
     tmoFile.close()
Ejemplo n.º 9
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local TaxID database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, 'taxdmp.zip')
        urllib.urlretrieve(taxIDftp['global'], destination)
        command = ['unzip', '-o', destination, '-d', self.local]
        p = subprocess.Popen(command,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        out, err = p.communicate()

        self._process()

        return True
Ejemplo n.º 10
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local PDBeChem database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, 'mmcif.tar.gz')
        try:
            urllib.urlretrieve(PDBeChemftp['global'], destination)
        except:
            return False
        command = ['tar', 'zxvf', destination, '-C', self.local]
        p = subprocess.Popen(command,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        out, err = p.communicate()

        return True
Ejemplo n.º 11
0
    def sync_PDB(self, log_file=None):
        if not self.has_local:
            raise NameError(
                'A local PDB database must be defined to sync with.')

        Path.mkdir(self.local)

        command = [
            'rsync', '-rlpt', '-v', '-z', '--port=' + PDBrsync['port'],
            PDBrsync['address'], self.local
        ]

        p = subprocess.Popen(command,
                             stdout=open(log_file, 'w')
                             if log_file is not None else subprocess.PIPE,
                             stderr=subprocess.PIPE)

        SBIglobals.alert('verbose', self,
                         'Executing: {0}'.format(" ".join(command)))

        out, err = p.communicate()
        if err.strip() != '':
            raise SystemError('{0}'.format(err))
Ejemplo n.º 12
0
    def get_PDB(self, pdbID):
        if self.has_local:
            rootdir = os.path.join(self.local, pdbID.lower()[1:3])
            for pdb_file in Path.list_files(root=rootdir, pattern='*.ent.gz'):
                newfile = File(file_name=pdb_file, action='r')
                if newfile.prefix.lstrip('pdb').upper() == pdbID.upper():
                    return pdb_file

        #If we do not find it in local (or we do not have a local) we search it on the FTP
        pdb_file = 'pdb' + pdbID.lower() + '.ent.gz'
        source = 'ftp://' + PDBftp['address'] + os.path.join(
            PDBftp['structures'], pdbID[1:3].lower(), pdb_file)
        try:
            urllib.urlretrieve(source, pdb_file)
        except:
            return False
        return os.path.abspath(pdb_file)
Ejemplo n.º 13
0
 def localPDBeChems(self):
     for chem_file in Path.list_files(root=self.local, pattern='*.cif'):
         yield chem_file
Ejemplo n.º 14
0
 def localPDBs(self):
     for pdb_file in Path.list_files(root=self.local, pattern='*.ent.gz'):
         yield pdb_file