Example #1
0
    def make_PDBseq(self, log_file, resolution_threshold=None):
        if not self.has_local:
            raise NameError(
                'A local PDB database must be defined to do create a PDBseq database.'
            )
        outdir = self.PDBseq if self.PDBseq is not None else os.curdir

        Path.mkdir(self.PDBseq)
        fasta_file = File(file_name=os.path.join(outdir, 'PDBseq.fa'),
                          action='w',
                          overwrite=True)
        fasta_fd = fasta_file.descriptor
        idx_file = File(file_name=os.path.join(outdir, 'PDBseq.fa.idx'),
                        action='w',
                        overwrite=True)
        idx_fd = idx_file.descriptor
        # if resolution_threshold is not None:
        #     filtered_file_name = self.get_PDBseq_filtered(resolution_threshold)
        #     filtered_file      = File(file_name = filtered_file_name, action = 'w', overwrite = True)
        #     filtered_fd        = filtered_file.descriptor
        #     resolutions        = self.get_resolutions(resolution_threshold = resolution_threshold)
        log_file = File(file_name=log_file, action='w', overwrite=True)
        log_idx = log_file.descriptor

        for pdb_file in self.localPDBs:
            log_idx.write("Reading File: {0}\n".format(pdb_file))
            newPDB = PDB(pdb_file=pdb_file, dehydrate=True)
            fasta_idx = newPDB.FASTA_IDX(nucleotide=False)
            if len(fasta_idx['FASTA']) != len(fasta_idx['IDX']):
                log_idx.write(
                    'ERROR!!!!! Number of fastas and indexes are different for pdb {0}!!\n'
                    .format(newPDB.id))
            if len(fasta_idx['FASTA']) > 0:
                log_idx.write('\tPrinting FASTA and IDX...\n')
            else:
                log_idx.write('\tProblably just a nucleotide PDB...\n')
            for c in range(len(fasta_idx['FASTA'])):
                sequence = fasta_idx['FASTA'][c].split('\n')[1]
                sequence = sequence.replace('X', '').replace('x', '')
                if len(sequence) > 0:
                    fasta_fd.write(fasta_idx['FASTA'][c] + "\n")
                    if resolution_threshold is not None and newPDB.id in resolutions and not newPDB.is_all_ca:
                        filtered_fd.write(fasta_idx['FASTA'][c] + "\n")
                    idx_fd.write(fasta_idx['IDX'][c] + "\n")
            del (newPDB)

        #CLOSE & END
        fasta_file.close()
        idx_file.close()
        if resolution_threshold is not None:
            filtered_fd.close()
Example #2
0
 def format2file(self, filename, extension='pdb', center=False):
     if extension not in ('pdb', 'js'):
         raise AttributeError('Not accepted extension')
     structure = File('.'.join([filename, extension]), 'w')
     if extension == 'pdb': structure.write(self.pdb_format(center=center))
     elif extension == 'js': structure.write(self.js_format(center=center))
     structure.close()
Example #3
0
 def build_multifasta(file_name, sequenceList, force=False):
     newFasta = File(file_name, 'w', overwrite=force)
     file_dsc = newFasta.descriptor
     for sequence in sequenceList:
         file_dsc.write(sequence.format('FASTA') + "\n")
     newFasta.close()
     return Fasta(fasta_file=newFasta.full)
Example #4
0
 def build(file_name, sequenceID, sequence, force=False):
     newFasta = File(file_name, 'w', overwrite=force)
     newSeq = Sequence(seqID=sequenceID, sequence=sequence)
     file_dsc = newFasta.descriptor
     file_dsc.write(newSeq.format('FASTA'))
     newFasta.close()
     return Fasta(fasta_file=newFasta.full)
Example #5
0
    def __init__(self, database, search_type = 'prot'):

        #Search Type Check
        if search_type not in set(['prot','nucl']):
            raise BE(-10)
        self._search_type = search_type

        #Blast executable configuration
        self._configurator = ConfigParser.RawConfigParser(allow_no_value=True)
        self._configurator.read(os.getenv('SBI_CONFIG_FILE',default_configuration_file))
        self._exe    = Executable(executable    = self._configurator.get('blast','executable'),
                                  path          = self._configurator.get('blast','path'),
                                  variable_path = self._configurator.get('blast','variable_path'))

        #Database Configuration
        self._database = self._check_database(os.path.abspath(database))
        if os.path.isfile(self._database.file.full + ".idx"):
            self._idx = File(file_name = self._database.file.full + ".idx", action = 'r')
        else:
            self._idx = None

        #Adding fixed blast parameters
        self._exe.add_attribute(self._database.file.full, '-db')
        self._exe.add_attribute('5', '-outfmt')
        self._exe.add_parameter('-lcase_masking')

        SBIglobals.alert('debug', self, 'New Blast Executable created.\nBlast executable at {0}\n'.format(self._exe.full_executable))

        self._selfHit     = False
        self._hitIDformat = 'single'
        self._overwritte  = False
        self._clean_files = True
Example #6
0
 def _process(self):
     tmoFile = File(self._pdbtmfile,'w', True)
     for xmlfile in Path.list_files(os.path.join(self._local,'pdbtm/database/'), '*.xml'):
         xmldata = TM(pdb = os.path.splitext(os.path.split(xmlfile)[1])[0].upper())
         skip_chains = set()
         read = False
         fdxml = open(xmlfile)
         for line in fdxml:
             if line.startswith('    <TMRES>'):     xmldata.tmres  = line
             elif line.startswith('    <TMTYPE'):   xmldata.tmtype = line
             elif line.startswith('    <PDBKWRES'): xmldata.kwres  = line
             elif line.startswith('  <SIDEDEFINITION'):
                 m = re.search('Side1="(\S+)"', line)
                 xmldata.side = m.group(1)
             elif line.startswith('      <APPLY_TO_CHAIN'):
                 m = re.search('NEW_CHAINID=\"(\S{1})\"', line)
                 if m: skip_chains.add(m.group(1))
             elif line.startswith('  <CHAIN '):
                 m = re.search('CHAINID=\"(\S{1})\" NUM_TM=\"(\d{1})\" TYPE=\"(\S+)\"', line)
                 if m:
                     chain, num, tmtype = m.group(1), m.group(2), m.group(3)
                     if not chain in skip_chains:
                         cdata = tuple([chain, num, tmtype])
                         xmldata.set_chain(cdata)
                         read = True
             elif line.startswith('    <REGION ') and read:
                 m = re.search('pdb_beg=\"(\-*\d+\w*)\"[\s\S]+pdb_end=\"(\-*\d+\w*)\"\s+type=\"(\w{1})\"', line)
                 ini, end, tmtype = m.group(1), m.group(2), m.group(3)
                 xmldata.set_chain(cdata, tuple([ini, end, tmtype]))
             elif line.startswith('  </CHAIN>'): read = False
         fdxml.close()
         if len(xmldata.chains) > 0:
             tmoFile.write(str(xmldata)+"\n")
     tmoFile.close()
Example #7
0
    def _process(self):
        enzymes = self._parse_enzclass() + self._parse_enzymedat()
        enzymes.sort()

        enzFile = File(self._enzfile, 'w', True)
        for e in enzymes:
            enzFile.write(repr(e) + "\n")
        enzFile.close()
Example #8
0
    def get_FASTA_IDX_by_names_to_file(self, names, outfile):

        fastafile = Fasta(self.PDBseq)
        selectedfasta = fastafile.retrieve(copy.deepcopy(names))
        output_fasta = File(outfile, 'w')
        for sequence in selectedfasta:
            output_fasta.write(sequence.format('FASTA') + "\n")
        output_fasta.close()
        idxfile = self.PDBseq + '.idx'
        output_idx = File(outfile + '.idx', 'w')
        input_idx = File(idxfile, 'r')
        for line in input_idx.descriptor:
            info = line.split()
            pdbname = info[0][1:]
            if pdbname in names:
                output_idx.write(line)
        input_idx.close()
        output_idx.close()
    def _process(self):

        targets = self._process_targets()
        drugs = self._process_drugs(targets)

        drugFile = File(self._drugfile, 'w', True)
        for d in drugs:
            drugFile.write(repr(d) + "\n")
        drugFile.close()
Example #10
0
    def _process(self):
        go_dic = {}
        parseFile = File(os.path.join(self.local, self._gfile), 'r')
        go = None
        for line in parseFile.descriptor:
            line = re.sub('\'', '\\\'', line)
            if line.startswith('[Term]'):
                if go is not None:
                    go_dic[go.id] = go
            if line.startswith('id:'):
                go = GOterm(id=line.split()[1].strip())
                continue
            if line.startswith('name:'):
                go.name = " ".join(line.split()[1:]).strip()
                continue
            if line.startswith('namespace:'):
                go.namespace = line.split()[1].strip()
                continue
            if line.startswith('alt_id:'):
                go.alt_id.append(line.split()[1].strip())
                continue
            if line.startswith('is_obsolete:'):
                go.obsolete = True
                continue
            if line.startswith('is_a:'):
                go.parents.add(line.split()[1].strip())
                continue
            if line.startswith('relationship:'):
                go.relations.append(
                    (line.split()[1].strip(), line.split()[2].strip()))
                continue
            if line.startswith('[Typedef]'):
                go_dic[go.id] = go
                break
        parseFile.close()

        for go in go_dic:
            go_dic[go].parents = self._search_parents(go_dic, go)

        goFile = File(self._gofile, 'w', True)
        for go in go_dic:
            go_dic[go].parents.add(go)
            goFile.write(str(go_dic[go]) + "\n")
        goFile.close()
Example #11
0
    def write(self, output_file=None, format='PDB', force=False, clean=False):
        """
        Writes the object in a specific format

        @type  output_file: String
        @param output_file: File to write

        @type  format: String
        @param format: Format of the file to print
        """
        outfile = File(file_name=output_file, action='w', overwrite=force)
        if format == 'PDB':
            self._write_PDB_file(pdb_file=outfile, clean=clean)
Example #12
0
    def _process(self):
        inh = {}
        nodefile = File(file_name=self._nodes, action='r')
        for line in nodefile.descriptor:
            line = re.sub('\'', '\\\'', line)
            line_data = line.split('|')
            inh[line_data[0].strip()] = TaxID(line_data[0].strip())
            inh[line_data[0].strip()].parent = line_data[1].strip()
            inh[line_data[0].strip()].rank = line_data[2].strip()
        nodefile.close()

        namefile = File(file_name=self._names, action='r')
        for line in namefile.descriptor:
            line = re.sub('\'', '\\\'', line)
            line_data = line.split('|')
            if line_data[3].strip() == 'scientific name':
                inh[line_data[0].strip()].name = line_data[1].strip()
        namefile.close()

        delefile = File(file_name=self._delet, action='r')
        for line in delefile.descriptor:
            data = line.split('|')
            inh[data[0].strip()] = TaxID(data[0].strip())
            inh[data[0].strip()].old = True
        delefile.close()

        mrgefile = File(file_name=self._merged, action='r')
        for line in mrgefile.descriptor:
            data = line.split('|')
            inh[data[0].strip()] = TaxID(data[0].strip())
            inh[data[0].strip()].old = True
            inh[data[0].strip()].new = data[1].strip()
        mrgefile.close()

        taxFile = File(self._taxid, 'w', True)
        for taxid in inh:
            taxFile.write(str(inh[taxid]) + "\n")
        taxFile.close()
Example #13
0
    def pdb_file(self, value):
        """
        Sets a PDB file if none has been given
        @raise UsedAttributeError
        """
        if self._pdb_file is not None:
            raise AttributeError(
                "The PDB object is loaded from file {0}. To load the new file {1} create a new PDB object"
                .format(self._pdb_file.full, value))

        if isinstance(value, File):
            self._pdb_file = value
        else:
            self._pdb_file = File(file_name=value, type='r')
Example #14
0
    def __init__(self, fasta_file):

        if isinstance(fasta_file, basestring):
            self._file = File(file_name=fasta_file, action='r')
        elif isinstance(fasta_file, File):
            self._file = fasta_file
            self._file.action = 'r'
        else:
            raise AttributeError('Check the input of the Fasta object')

        self._is_multifasta = self._check_multifasta()

        self._sequences = []
        self._seqfinder = {}
Example #15
0
 def _parse_uniprot_file(self, source, destination, fasta, code):
     sourceFile = File(source, 'r')
     destinFile = File(destination, 'w', True)
     fastaFile = File(fasta, 'w', True)
     protein = None
     for line in sourceFile.descriptor:
         if line.startswith('ID'):
             protein = Uniprot(line.split()[1].strip(), code)
         if line.startswith('AC'):
             protein.accession = line.split()[1:]
         if line.startswith('OX'):
             protein.taxid = line.split()[1]
         if line.startswith('OH'):
             protein.hosts = line.split()[1]
         if line.startswith('DR'):
             protein.databases = line.split()[1:3]
         if line.startswith('  '):
             protein.sequence = line.strip().replace(' ', '')
         if line.startswith('//'):
             destinFile.write(str(protein) + "\n")
             fastaFile.write(repr(protein) + "\n")
     sourceFile.close()
     destinFile.close()
Example #16
0
    def get_PDBeChem(self, chemID):
        if self.has_local:
            for chem_file in self.localPDBeChems:
                newfile = File(file_name=chem_file, action='r')
                if newfile.prefix.upper() == chemID.upper():
                    return chem_file

        #If we do not find it in local (or we do not have a local) we search it on the FTP
        chem_file = chemID.upper() + '.cif'
        source = PDBeChemftp['single'] + chem_file
        try:
            urllib.urlretrieve(source, chem_file)
        except:
            return False
        return os.path.abspath(chem_file)
Example #17
0
    def __init__(self, cif_file):
        self._file = File(file_name=cif_file, action='r')
        self.__name__ = 'databases.PDBeChem'  # This must be included in every class for the SBIglobals.alert()

        self._id = None
        self._name = None
        self._type = None
        self._formula = None
        self._parent = None
        self._weight = None
        self._fcharge = None
        self._code1l = None
        self._flformula = {}

        self._parse()
        self._decompose_formula()
Example #18
0
    def get_PDBs(self, pdbIDset):
        if isintance(pdbIDset, str):
            warnings.warn(
                'For single PDB search the get_PDB function is recomended.')
            yield self.get_PDB(pdbIDset)
        else:
            pdbIDset = set([x.upper() for x in pdbIDset])

        if self.has_local:
            for pdb_file in self.localPDBs:
                newfile = File(file_name=pdb_file, action='r')
                if newfile.prefix.lstrip('pdb').upper() in pdbIDset:
                    yield pdb_file
        else:
            for pdbID in pdbIDset:
                yield self.get_PDB(pdbID)
Example #19
0
    def get_PDB(self, pdbID):
        if self.has_local:
            rootdir = os.path.join(self.local, pdbID.lower()[1:3])
            for pdb_file in Path.list_files(root=rootdir, pattern='*.ent.gz'):
                newfile = File(file_name=pdb_file, action='r')
                if newfile.prefix.lstrip('pdb').upper() == pdbID.upper():
                    return pdb_file

        #If we do not find it in local (or we do not have a local) we search it on the FTP
        pdb_file = 'pdb' + pdbID.lower() + '.ent.gz'
        source = 'ftp://' + PDBftp['address'] + os.path.join(
            PDBftp['structures'], pdbID[1:3].lower(), pdb_file)
        try:
            urllib.urlretrieve(source, pdb_file)
        except:
            return False
        return os.path.abspath(pdb_file)
Example #20
0
    def get_PDBeChems(self, chemIDset):
        if isintance(chemIDset, str):
            warnings.warn(
                'For single PDBeChem search the get_PDBeChem function is recomended.'
            )
            yield self.get_PDBeChem(chemIDset)
        else:
            chemIDset = set([x.upper() for x in chemIDset])

        if self.has_local:
            for chem_file in self.localPDBeChems:
                newfile = File(file_name=chem_file, action='r')
                if newfile.prefix.lstrip('pdb').upper() in chemIDset:
                    yield chem_file
        else:
            for chemID in chemIDset:
                yield self.get_PDBeChem(chemID)
Example #21
0
    def get_resolutions(self):
        # resolutions (-1) are for methods that do not define resolution
        resolutions = {}

        ftp = ftplib.FTP(PDBftp['address'])
        ftp.login()
        ftp.cwd(PDBftp['derived'])
        resoluIDX = []
        ftp.retrlines('RETR ' + PDBftp['resolution'], resoluIDX.append)
        ftp.quit()

        SBIglobals.alert('debug', self,
                         'Retrieving resolution data from PDB FTP...')

        active = False
        for line in resoluIDX:
            if line.startswith('-'):
                active = True
                continue
            if active and len(line.strip()) > 0:
                data = [x.strip() for x in line.split(';')]
                if len(data[1]) > 0:
                    SBIglobals.alert(
                        'debug', self,
                        '\tResolution for {0[0]} is {0[1]}...'.format(data))
                    # if resolution_threshold is None:
                    resolutions[data[0]] = data[1]

        #rsync is accumulative, we might have structures that are not in the residu.idx anymore.. must check
        for pdb_file in self.localPDBs:
            newfile = File(file_name=pdb_file, action='r')
            pdbid = newfile.prefix.lstrip('pdb').upper()
            if pdbid not in resolutions:
                pdbobj = PDB(pdb_file=pdb_file, header=True, onlyheader=True)
                SBIglobals.alert(
                    'debug', self,
                    '\tGrabbing Resolution for {0} is {1}...'.format(
                        pdbid, pdbobj.header.resolution))
                resolutions[pdbid] = pdbobj.header.resolution

        return resolutions
Example #22
0
    def __init__(self,
                 pdb_file=None,
                 dehydrate=False,
                 header=False,
                 onlyheader=False,
                 biomolecule=False):
        """
        @type  pdb_file: String
        @param pdb_file: PDB formated file to read

        @raise IOError if pdb_file does not exist and it is not an empty object
        """
        if biomolecule or onlyheader:
            header = True

        self._pdb_file = pdb_file
        self._chains = []
        self._NMR = False
        self._NMR_chains = []
        self._chain_id = set()

        self._biomol_id = -1  # -1 -> original
        #  0 -> symmetry
        # >0 -> biomolecule

        self._header = None

        self._has_prot = False
        self._has_nucl = False

        self._COMPND = None

        if self.pdb_file is not None:
            self._pdb_file = File(file_name=self._pdb_file, action='r')
            self._read_PDB_file(header=header,
                                onlyheader=onlyheader,
                                biomolecule=biomolecule)

        if dehydrate:
            self.dehydrate()
Example #23
0
 def localTrembls(self):
     tblFile = File(self._trbfile, 'r')
     for uni_line in tblFile.descriptor:
         yield uni_line
Example #24
0
 def localTM(self):
     tmoFile = File(self._pdbtmfile, 'r')
     for tm_line in tmoFile.descriptor:
         yield tm_line
Example #25
0
 def localEnzymes(self):
     enzFile = File(self._enzfile, 'r')
     for enz_line in enzFile.descriptor:
         yield enz_line
Example #26
0
 def relations(self):
     relFile = File(self._rel, 'r')
     for rel_line in relFile.descriptor:
         if not rel_line.startswith('#'):
             yield rel_line
Example #27
0
 def localSwissprots(self):
     swsFile = File(self._swsfile, 'r')
     for uni_line in swsFile.descriptor:
         yield uni_line
Example #28
0
 def localGOs(self):
     goFile = File(self._gofile, 'r')
     for go_line in goFile.descriptor:
         yield go_line
Example #29
0
    def __init__(self, cdhitfile):
        self._clusters = []
        self._allseqids = {}
        self._file = File(file_name=cdhitfile)

        self._parse_file()
Example #30
0
 def descriptions(self):
     dscFile = File(self._desc, 'r')
     for dsc_line in dscFile.descriptor:
         if not dsc_line.startswith('#'):
             yield dsc_line