Exemple #1
0
 def testCenterOfMass(self):
     aSH = AtomicStructHandler()
     structure = aSH.read(self.CIFFileName)
     x, y, z = aSH.centerOfMass(structure)
     self.assertAlmostEqual(x,  8.1593891597, 2)
     self.assertAlmostEqual(y, 21.1833304818, 2)
     self.assertAlmostEqual(z, 20.0177924407, 2)
Exemple #2
0
    def testReadFromPDBDatabase(self):
        PDBID = '6CUD'
        aSH = AtomicStructHandler()
        # EMD-7620
        fileName = aSH.readFromPDBDatabase(PDBID, type='pdb', dir='/tmp')
        self.assertTrue(os.path.exists(fileName))

        os.unlink(fileName)
Exemple #3
0
 def pdbDownloadStep(self):
     """Download all pdb files in file_list and unzip them.
     """
     aSH = AtomicStructHandler()
     print "retriving PDB file %s" % self.pdbId.get()
     pdbPath = aSH.readFromPDBDatabase(self.pdbId.get(),
                                       type='mmCif',
                                       dir=self._getExtraPath())
     self.createOutputStep(pdbPath)
Exemple #4
0
    def testRenameToChains(self):
        aSH = AtomicStructHandler(self.PDBFileName)
        structure = aSH.getStructure()

        model = structure[0]
        chain = model['B']
        chain.id = 'CC'
        aSH.renameChains(structure)
        for chain in structure.get_chains():
            self.assertEqual(chain.id, 'C')
Exemple #5
0
 def testIntToChain(self):
     aSH = AtomicStructHandler(self.PDBFileName)
     solString = ['A',  'B',  'C',  'D',  'E',  'F',  'G',
                  'H',  'I',  'J',  'K',  'L',  'M',  'N',
                  'O',  'P',  'Q',  'R',  'S',  'T',  'U',
                  'V',  'W',  'X',  'Y',  'Z',  '0',  '1',
                  '2',  '3',  '4',  '5',  '6',  '7',  '8',
                  '9',  'a',  'b',  'c',  'd',  'e',  'f',
                  'g',  'h',  'i',  'j',  'k',  'l',  'm',
                  'n',  'o',  'p',  'q',  'r',  's',  't',
                  'u',  'v',  'w',  'x',  'y',  'z', "AA"]
     for i in range(63):
         self.assertEqual(solString[i], aSH._intToChain(i))
Exemple #6
0
    def getModelsChainsStep(cls, protocol):
        structureHandler = AtomicStructHandler()
        fileName = ""
        if hasattr(protocol, 'pdbId'):
            if protocol.pdbId.get() is not None:
                pdbID = protocol.pdbId.get()
                url = "https://www.rcsb.org/structure/"
                URL = url + ("%s" % pdbID)
                try:
                    response = requests.get(URL)
                except:
                    raise Exception("Cannot connect to PDB server")
                if ((response.status_code >= 400)
                        and (response.status_code < 500)):
                    raise Exception("%s is a wrong PDB ID" % pdbID)
                fileName = structureHandler.readFromPDBDatabase(
                    os.path.basename(pdbID), dir="/tmp/")
            else:
                fileName = protocol.pdbFile.get()
        else:
            if protocol.pdbFileToBeRefined.get() is not None:
                fileName = os.path.abspath(
                    protocol.pdbFileToBeRefined.get().getFileName())

        structureHandler.read(fileName)
        structureHandler.getStructure()
        models = structureHandler.getModelsChains()
        return models
Exemple #7
0
 def testTransformRotation(self):
     aSHSource = AtomicStructHandler(self.PDBFileName)
     structure = aSHSource.getStructure()
     structure_copy = deepcopy(structure)
     rot = numpy.deg2rad(10)
     theta = numpy.deg2rad(20.)
     psi = numpy.deg2rad(30.)
     rotation_matrix = euler_matrix(rot, theta, psi, 'szyz')
     translation = translation_matrix([0., 0., 0.])
     M = concatenate_matrices(rotation_matrix, translation)
     aSHSource.transform(M)
     m = M[:3, :3]
     for atom1, atom2 in zip(structure.get_atoms(),
                             structure_copy.get_atoms()):
         coord1 = atom1.get_coord()
         coord2 = m.dot(atom2.get_coord())
         for i in range(3):
             self.assertAlmostEqual(coord1[i], coord2[i], 2)
Exemple #8
0
    def getSequenceOfChainStep(self, chainId):
        # sequece is obtained from PDB file

        # form has a wizard that creates label with the format
        # [model: x, chain: x, xxx residues]
        import json
        chainIdDict = json.loads(self.inputStructureChain.get())

        selectedModel = chainIdDict['model']
        selectedChain = chainIdDict['chain']
        self.structureHandler = AtomicStructHandler()

        if self.pdbId.get() is not None:
            # PDB from remote database
            pdbID = self.pdbId.get()
            tmpFilePath = os.path.join("/tmp", pdbID + ".cif")
            if exists(tmpFilePath):
                # wizard already downloaded the file
                self.structureHandler.read(tmpFilePath)
            else:
                # wizard has not used and the file has not been downloaded yet
                self.structureHandler.readFromPDBDatabase(pdbID, dir="/tmp")
        else:
            # PDB from file
            self.structureHandler.read(self.pdbFile.get())

        _sequence = self.structureHandler.getSequenceFromChain(
            selectedModel, selectedChain)
        self.sequence = str(_sequence)
        self.alphabet = alphabetToIndex(
            self.inputSequence == SEQ_TYPE_AMINOACIDS, _sequence.alphabet)

        # Assignation of sequence ID: if the user has provided a specific
        #  ID, this will be adopted by default; otherwise, a sequence ID
        # related with the starting structure will be selected.
        if self.inputSequenceID.get() is not None:
            self.id = self.inputSequenceID.get()
        else:
            self.id = self.structureHandler.getFullID(selectedModel,
                                                      selectedChain)

        print "Selected chain: %s from model: %s from structure: %s" \
              % (selectedChain, selectedModel,
                 self.structureHandler.structure.get_id())
Exemple #9
0
 def testCIFToPDB(self):
     aSH = AtomicStructHandler(self.CIFFileName)
     structure1 = aSH.getStructure()
     PDBFileName2 = self.PDBFileName.replace(".pdb", "_2.pdb")
     aSH.write(PDBFileName2)
     aSH.read(PDBFileName2)
     structure2 = aSH.getStructure()
     for atom1, atom2 in zip(structure1.get_atoms(),
                             structure2.get_atoms()):
         self.assertEqual(atom1.get_name(), atom2.get_name())
     os.unlink(PDBFileName2)
Exemple #10
0
    def testPDBToCIF(self):
        aSH1 = AtomicStructHandler(self.PDBFileName)
        CIFFileName2 = self.CIFFileName.replace(".cif", "_2.cif")
        aSH1.write(CIFFileName2)
        aSH2 = AtomicStructHandler(CIFFileName2)

        structure1 = aSH1.getStructure()
        structure2 = aSH2.getStructure()
        for atom1, atom2 in zip(structure1.get_atoms(),
                                structure2.get_atoms()):
            self.assertEqual(atom1.get_name(), atom2.get_name())
        os.unlink(CIFFileName2)
Exemple #11
0
    def testReadCIF(self):
        aSH = AtomicStructHandler(self.CIFFileName)
        structure = aSH.getStructure()

        solList = ['N', 'CA', 'C', 'O', 'CB']
        counter = 0
        for atom in structure.get_atoms():
            self.assertEqual(solList[counter], atom.get_name())
            counter += 1

        solDict = {}
        solDict['_exptl.method'] = 'x-ray diffraction'
        solDict['_struct_keywords.pdbx_keywords'] = 'extracellular matrix'
        solDict['_struct.title'] = 'x-ray crystallographic determination ' \
                                   'of a collagen-like peptide with the ' \
                                   'repeating sequence (pro-pro-gly)'
        _dict = aSH.readLowLevel(self.CIFFileName)

        for k, v in solDict.iteritems():
            self.assertEqual(_dict[k].strip().lower(), v.lower())
Exemple #12
0
    def testReadPDB(self):
        aSH = AtomicStructHandler(self.PDBFileName)
        structure = aSH.getStructure()

        solDict = {}
        solDict['structure_method'] = 'x-ray diffraction'
        solDict['head'] = 'extracellular matrix'
        solDict['name'] = 'x-ray crystallographic determination ' \
                          'of a collagen-like peptide with the ' \
                          'repeating sequence (pro-pro-gly)'
        solDict['author'] = 'R.Z.Kramer,L.Vitagliano,J.Bella,R.Berisio,' \
                            'L.Mazzarella,B.Brodsky,A.Zagari,H.M.Berman'
        solDict['deposition_date'] = '1998-01-22'
        for k, v in solDict.iteritems():
            self.assertEqual(structure.header[k].strip(), v)

        solList = ['N', 'CA', 'C', 'O', 'CB']
        counter = 0
        for atom in structure.get_atoms():
            self.assertEqual(solList[counter], atom.get_name())
            counter += 1
Exemple #13
0
    def createOutputStep(self, atomStructPath):
        """ Copy the PDB structure and register the output object.
        """
        if not exists(atomStructPath):
            raise Exception("Atomic structure not found at *%s*" %
                            atomStructPath)

        baseName = basename(atomStructPath)
        localPath = abspath(self._getExtraPath(baseName))

        if str(atomStructPath) != str(localPath):  # from local file
            if atomStructPath.endswith(".pdb") or \
                    atomStructPath.endswith(".ent"):
                localPath = localPath.replace(".pdb", ".cif").\
                    replace(".ent", ".cif")
            # normalize input format
            aSH = AtomicStructHandler()
            aSH.read(atomStructPath)
            aSH.write(localPath)
        pdb = AtomStruct()
        volume = self.inputVolume.get()

        # if a volume exists assign it to the pdb object
        # IMPORTANT: we DO need to if volume is not None
        # because we need to persist the pdb object
        # before we can make the last source relation
        if volume is not None:
            pdb.setVolume(volume)

        pdb.setFileName(localPath)
        self._defineOutputs(outputPdb=pdb)

        if volume is not None:
            self._defineSourceRelation(volume, pdb)
Exemple #14
0
 def exportAtomStructStep(self):
     exportAtomStruct = self.exportAtomStruct.get()
     originStructPath = exportAtomStruct.getFileName()
     dirName = self.filesPath.get()
     destinyStructPath = os.path.join(dirName, self.COORDINATEFILENAME)
     if originStructPath.endswith(".cif") or originStructPath.endswith(
             ".mmcif"):
         h = AtomicStructHandler()
         h.read(originStructPath)
         h.write(destinyStructPath)
     else:
         toCIF(originStructPath, destinyStructPath)
Exemple #15
0
 def testTransformTranslation(self):
     aSHSource = AtomicStructHandler(self.PDBFileName)
     structure = aSHSource.getStructure()
     structure_copy = deepcopy(aSHSource.getStructure())
     shift = [100., 50., 25.]
     #        rotation_matrix = euler_matrix(deg2rad(45.), 0., 0., 'szyz')
     rotation_matrix = euler_matrix(0., 0., 0., 'szyz')
     translation = translation_matrix(shift)
     M = concatenate_matrices(rotation_matrix, translation)
     aSHSource.transform(M)
     for atom1, atom2 in zip(structure.get_atoms(),
                             structure_copy.get_atoms()):
         coord1 = atom1.get_coord()
         coord2 = [sum(x) for x in zip(atom2.get_coord(), shift)]
         for i in range(3):
             self.assertAlmostEqual(coord1[i], coord2[i], 2)
Exemple #16
0
    def testFunctionAddStructNoNewModelAddTwice(self):
        """ add two atomic structures with overlaping chain ids, last atomic
        structure is added two times"""
        pdbID1 = '1P30'  # A,
        pdbID2 = '1CJD'  # A, B, C
        outFile = "/tmp/nomodel.cif"  # A, A002, B, C, A003, B002, C002
        aSH1 = AtomicStructHandler()
        aSH2 = AtomicStructHandler()
        #
        fileName1 = aSH1.readFromPDBDatabase(pdbID1, type='mmCif', dir='/tmp')
        fileName2 = aSH2.readFromPDBDatabase(pdbID2, type='mmCif', dir='/tmp')
        atomsNum1 = len([atom.id for atom in aSH1.getStructure().get_atoms()])
        atomsNum2 = len([atom.id for atom in aSH2.getStructure().get_atoms()])
        #
        aSH1.addStruct(fileName2, outPDBfileName=outFile, useModel=False)
        aSH1.addStruct(fileName2, outPDBfileName=outFile, useModel=False)
        chains = [chain.id for chain in aSH1.getStructure().get_chains()]
        # compare unordered lists of chains
        goal = ['A', 'A002', 'B', 'C', 'A003', 'B002', 'C002']
        self.assertTrue(Counter(chains) == Counter(goal),
                        "{} != {}".format(chains, goal))

        atomsNumT = len([atom.id for atom in aSH1.getStructure().get_atoms()])
        self.assertEqual(atomsNum1 + atomsNum2 + atomsNum2, atomsNumT)
        os.unlink(fileName1)
        os.unlink(fileName2)
        os.unlink(outFile)
Exemple #17
0
class ProtImportSequence(ProtImportFiles):
    """ Protocol to import an aminoacid/nucleotide sequence file to the
    project"""
    _label = 'import sequence'
    #SEQUENCEFILENAME = '_sequence.fasta'
    IMPORT_FROM_PLAIN_TEXT = 0
    IMPORT_FROM_STRUCTURE = 1
    IMPORT_FROM_FILES = 2
    IMPORT_FROM_UNIPROT = 3
    IMPORT_FROM_NUCLEOTIDE_PLAIN_TEXT = 0
    IMPORT_FROM_NUCLEOTIDE_STRUCTURE = 1
    IMPORT_FROM_NUCLEOTIDE_FILES = 2
    IMPORT_FROM_GENEBANK = 3
    IMPORT_STRUCTURE_FROM_ID = 0
    IMPORT_STRUCTURE_FROM_FILES = 1

    url = "http://www.uniprot.org/uniprot/"

    def __init__(self, **args):
        ProtImportFiles.__init__(self, **args)

    def _defineParams(self, form):

        form.addSection(label='Input')
        form.addParam('inputSequenceID',
                      params.StringParam,
                      label="Sequence ID",
                      allowsNull=True,
                      help="Write a sequence ID. Otherwise, if the "
                      "sequence derives from GeneBank/UniProt/PDB "
                      "databases, the respective database ID will be "
                      "selected as starting sequence ID; examples: if "
                      "you select GeneBank accession AJ520101, SCIPION "
                      "will assign AJ520101 as sequence ID; if "
                      "you select UniProt accession P12345, SCIPION will "
                      "assign P12345 as sequence ID; if you "
                      "select atomic structure 3lqd.cif, chain B, "
                      "SCIPION will assign 3lqd_B as sequence ID. In "
                      "the rest of cases, the Sequence name "
                      "will be selected as starting Sequence ID.")
        form.addParam('inputSequenceName',
                      params.StringParam,
                      important=True,
                      label="Sequence name",
                      allowsNull=False,
                      help="Write a sequence name.")
        form.addParam('inputSequenceDescription',
                      params.StringParam,
                      label="Sequence description",
                      allowsNull=True,
                      help="Write a description for your sequence. Otherwise, "
                      "if the "
                      "sequence derives from GeneBank/UniProt/PDB "
                      "databases, the respective database description "
                      "will be "
                      "selected as starting sequence description. In "
                      "the rest of cases, no sequence description will "
                      "be added.")
        form.addParam('inputSequence',
                      params.EnumParam,
                      pointerClass='Sequence',
                      choices=SEQ_TYPE,
                      display=params.EnumParam.DISPLAY_HLIST,
                      label="Import sequence of ",
                      default=SEQ_TYPE_AMINOACIDS,
                      help='Select the type of sequence to import.')
        form.addParam(
            'inputProteinSequence',
            params.EnumParam,
            choices=['plain text', 'atomic structure', 'file', 'UniProt ID'],
            display=params.EnumParam.DISPLAY_HLIST,
            condition='inputSequence == %d' % SEQ_TYPE_AMINOACIDS,
            label="From ",
            default=self.IMPORT_FROM_PLAIN_TEXT,
            help='Select one of the four options: write the '
            'aminoacid sequence or import it '
            'from a previously loaded atomic structure, a local '
            'file or an online server.')
        form.addParam('proteinIUPACalphabet',
                      params.EnumParam,
                      choices=IUPAC_PROTEIN_ALPHABET,
                      display=params.EnumParam.DISPLAY_HLIST,
                      condition='inputSequence == %d and '
                      'inputProteinSequence == %d' %
                      (SEQ_TYPE_AMINOACIDS, self.IMPORT_FROM_PLAIN_TEXT),
                      label="IUPAC Protein alphabet: ",
                      default=EXTENDED_PROTEIN_ALPHABET,
                      help='Your raw sequence will be cleaned according '
                      'a certain alphabet, i.e., only the letters '
                      'contained in the alphabet will be maintained in '
                      'the sequence. Select thus the type of protein '
                      'alphabet in order to accomplish the '
                      'cleaning:\n\nProtein alphabet: IUPAC protein '
                      'alphabet of the 20 standard amino acids; uppercase'
                      ' and single letter: *ACDEFGHIKLMNPQRSTVWY*.\n\n'
                      'Extended Protein alphabet: Extended uppercase '
                      'IUPAC '
                      'protein single letter alphabet including X etc.\n'
                      'In addition to the standard 20 single letter '
                      'protein codes, this includes:\n*B = Asx*; '
                      'Aspartic acid (R) or Asparagine (N)\n*X = Xxx*"; '
                      'Unknown or other amino acid\n*Z = Glx*; Glutamic '
                      'acid (E) or Glutamine (Q)\n*J = Xle*; Leucine ('
                      'L) or Isoleucine (I), used in mass-spec (NMR)\n'
                      '*U = Sec*; Selenocysteine\n*O = Pyl*; '
                      'Pyrrolysine\nThis alphabet is not intended to be '
                      'used with X for Selenocysteine (an ad-hoc standard'
                      ' prior to the IUPAC adoption of U instead).\n')
        form.addParam('uniProtSequence',
                      params.StringParam,
                      condition='inputSequence == %d and '
                      'inputProteinSequence == %d' %
                      (SEQ_TYPE_AMINOACIDS, self.IMPORT_FROM_UNIPROT),
                      label="UniProt name/ID ",
                      allowsNull=True,
                      help='Write a UniProt ID (six or ten alphanumeric '
                      'characters; examples: A2BC19, P12345, '
                      'A0A022YWF9, DGAL_ECOLI).\n You can convert other '
                      'database identifiers to UniProt accession codes '
                      'by using the "ID Mapping" tab on '
                      'https://www.uniprot.org/')
        form.addParam(
            'inputNucleotideSequence',
            params.EnumParam,
            choices=['plain text', 'atomic structure', 'file', 'GeneBank'],
            display=params.EnumParam.DISPLAY_HLIST,
            condition='inputSequence == %d' % SEQ_TYPE_NUCLEOTIDES,
            label="From ",
            default=self.IMPORT_FROM_NUCLEOTIDE_PLAIN_TEXT,
            help='Select one of the four options: write the '
            'nucleic acid sequence or import it '
            'from a local file or an online server.')
        form.addParam(
            'nucleotideIUPACalphabet',
            params.EnumParam,
            choices=IUPAC_NUCLEOTIDE_ALPHABET,
            display=params.EnumParam.DISPLAY_HLIST,
            condition='inputSequence == %d and '
            'inputNucleotideSequence == %d' %
            (SEQ_TYPE_NUCLEOTIDES, self.IMPORT_FROM_NUCLEOTIDE_PLAIN_TEXT),
            label="IUPAC Nucleic acid alphabet: ",
            default=EXTENDED_DNA_ALPHABET,
            help='Your raw sequence will be cleaned according '
            'a certain alphabet, i.e., only the letters '
            'contained in the alphabet will be maintained in '
            'the sequence. Select thus the type of nucleic '
            'acid alphabet in order to accomplish the '
            'cleaning:\n\n Ambiguous DNA alphabet: Uppercase '
            'IUPAC ambiguous DNA: *GATCRYWSMKHBVDN*.\n\n'
            'Unambiguous DNA alphabet: Uppercase IUPAC unambiguous DNA '
            '(letters *GATC* only).\n\nExtended DNA: Extended '
            'IUPAC DNA alphabet.\nIn addition to the standard letter '
            'codes GATC, this includes:\n*B* = 5-bromouridine\n'
            '*D* = 5,6-dihydrouridine\n*S* = thiouridine\n*W* '
            '= wyosine\n\nAmbiguous RNA: Uppercase IUPAC '
            'ambiguous RNA; *GAUCRYWSMKHBVDN*\n\nUnambigous '
            'RNA alphabet: Generic single letter RNA '
            'alphabet.\n\n')
        form.addParam(
            'inputRawSequence',
            params.StringParam,
            condition='(inputSequence == %d and '
            'inputProteinSequence == %d) or '
            '(inputSequence == %d and '
            'inputNucleotideSequence == %d) ' %
            (SEQ_TYPE_AMINOACIDS, self.IMPORT_FROM_PLAIN_TEXT,
             SEQ_TYPE_NUCLEOTIDES, self.IMPORT_FROM_NUCLEOTIDE_PLAIN_TEXT),
            label="Write your sequence here:",
            important=True,
            help="Write the aminoacid or nucleotide raw sequence.\n")
        form.addParam('inputStructureSequence',
                      params.EnumParam,
                      choices=['id', 'file'],
                      condition='inputProteinSequence == %d or '
                      'inputNucleotideSequence == %d' %
                      (self.IMPORT_FROM_STRUCTURE,
                       self.IMPORT_FROM_NUCLEOTIDE_STRUCTURE),
                      label="Atomic structure from",
                      default=self.IMPORT_STRUCTURE_FROM_ID,
                      display=params.EnumParam.DISPLAY_HLIST,
                      help='Import structure data from online server or local '
                      'file',
                      pointerClass='AtomStruct',
                      allowsNull=True)
        form.addParam(
            'pdbId',
            params.StringParam,
            condition='(inputProteinSequence == %d or '
            'inputNucleotideSequence == %d) and '
            'inputStructureSequence == %d' %
            (self.IMPORT_FROM_STRUCTURE, self.IMPORT_FROM_NUCLEOTIDE_STRUCTURE,
             self.IMPORT_STRUCTURE_FROM_ID),
            label="Atomic structure ID ",
            allowsNull=True,
            help='Type a structure ID (four alphanumeric '
            'characters).')
        form.addParam(
            'pdbFile',
            params.PathParam,
            label="File path",
            condition='(inputProteinSequence == %d or '
            'inputNucleotideSequence == %d) and '
            'inputStructureSequence == %d' %
            (self.IMPORT_FROM_STRUCTURE, self.IMPORT_FROM_NUCLEOTIDE_STRUCTURE,
             self.IMPORT_STRUCTURE_FROM_FILES),
            allowsNull=True,
            help='Specify a path to desired atomic structure.')
        form.addParam('inputStructureChain',
                      params.StringParam,
                      condition='inputProteinSequence == %d or '
                      'inputNucleotideSequence == %d' %
                      (self.IMPORT_FROM_STRUCTURE,
                       self.IMPORT_FROM_NUCLEOTIDE_STRUCTURE),
                      label="Chain ",
                      allowsNull=True,
                      help="Select a particular chain of the atomic "
                      "structure.")
        form.addParam(
            'fileSequence',
            params.PathParam,
            label="File path",
            condition='inputProteinSequence == %d or '
            'inputNucleotideSequence == %d' %
            (self.IMPORT_FROM_FILES, self.IMPORT_FROM_NUCLEOTIDE_FILES),
            allowsNull=True,
            help='Specify a path to desired aminoacid or '
            'nucleic acid sequence '
            'file.\nIf your file contains more than one '
            'sequence, only the first one will be considered.')
        form.addParam('geneBankSequence',
                      params.StringParam,
                      condition='inputSequence == %d and '
                      'inputNucleotideSequence == %d' %
                      (SEQ_TYPE_NUCLEOTIDES, self.IMPORT_FROM_GENEBANK),
                      label="GeneBank accession ",
                      allowsNull=True,
                      help='Write a GeneBank accession.\n')

    def _insertAllSteps(self):
        self.name = self.inputSequenceName.get()

        if self.inputSequence == SEQ_TYPE_AMINOACIDS:
            if self.inputProteinSequence == self.IMPORT_FROM_PLAIN_TEXT:
                rawSequence = self.inputRawSequence.get()
                self._insertFunctionStep('getRawSequenceStep', rawSequence)
            elif self.inputProteinSequence == self.IMPORT_FROM_STRUCTURE:
                chainId = self.inputStructureChain.get()
                self._insertFunctionStep('getSequenceOfChainStep', chainId)
            elif self.inputProteinSequence == self.IMPORT_FROM_UNIPROT:
                sequenceDB = self._getUniProtID()
                self._insertFunctionStep('sequenceDatabaseDownloadStep',
                                         sequenceDB)
            elif self.inputProteinSequence == self.IMPORT_FROM_FILES:
                self.sequenceFile = self.fileSequence.get()
                sequenceFile = self.sequenceFile
                self._insertFunctionStep('fileDownloadStep', sequenceFile)
        else:
            if self.inputNucleotideSequence == \
                    self.IMPORT_FROM_NUCLEOTIDE_PLAIN_TEXT:
                rawSequence = self.inputRawSequence.get()
                self._insertFunctionStep('getRawSequenceStep', rawSequence)
            elif self.inputNucleotideSequence == \
                     self.IMPORT_FROM_NUCLEOTIDE_STRUCTURE:
                chainId = self.inputStructureChain.get()
                self._insertFunctionStep('getSequenceOfChainStep', chainId)
            elif self.inputNucleotideSequence == self.IMPORT_FROM_GENEBANK:
                sequenceDB = self._getGeneBankID()
                self._insertFunctionStep('sequenceDatabaseDownloadStep',
                                         sequenceDB)
            elif self.inputNucleotideSequence == \
                self.IMPORT_FROM_NUCLEOTIDE_FILES:
                self.sequenceFile = self.fileSequence.get()
                sequenceFile = self.sequenceFile
                self._insertFunctionStep('fileDownloadStep', sequenceFile)

        self._insertFunctionStep('createOutputStep')

    def getRawSequenceStep(self, rawSequence):
        # user types sequence
        if self.inputSequenceID.get() is not None:
            self.id = self.inputSequenceID.get()
        else:
            self.id = self.name
        self.alphabet = self._getAlphabet()  # index number
        self.sequence = cleanSequenceScipion(
            self.inputSequence == SEQ_TYPE_AMINOACIDS, self.alphabet,
            rawSequence)

    def getSequenceOfChainStep(self, chainId):
        # sequece is obtained from PDB file

        # form has a wizard that creates label with the format
        # [model: x, chain: x, xxx residues]
        import json
        chainIdDict = json.loads(self.inputStructureChain.get())

        selectedModel = chainIdDict['model']
        selectedChain = chainIdDict['chain']
        self.structureHandler = AtomicStructHandler()

        if self.pdbId.get() is not None:
            # PDB from remote database
            pdbID = self.pdbId.get()
            tmpFilePath = os.path.join("/tmp", pdbID + ".cif")
            if exists(tmpFilePath):
                # wizard already downloaded the file
                self.structureHandler.read(tmpFilePath)
            else:
                # wizard has not used and the file has not been downloaded yet
                self.structureHandler.readFromPDBDatabase(pdbID, dir="/tmp")
        else:
            # PDB from file
            self.structureHandler.read(self.pdbFile.get())

        _sequence = self.structureHandler.getSequenceFromChain(
            selectedModel, selectedChain)
        self.sequence = str(_sequence)
        self.alphabet = alphabetToIndex(
            self.inputSequence == SEQ_TYPE_AMINOACIDS, _sequence.alphabet)

        # Assignation of sequence ID: if the user has provided a specific
        #  ID, this will be adopted by default; otherwise, a sequence ID
        # related with the starting structure will be selected.
        if self.inputSequenceID.get() is not None:
            self.id = self.inputSequenceID.get()
        else:
            self.id = self.structureHandler.getFullID(selectedModel,
                                                      selectedChain)

        print "Selected chain: %s from model: %s from structure: %s" \
              % (selectedChain, selectedModel,
                 self.structureHandler.structure.get_id())

    def sequenceDatabaseDownloadStep(self, sequenceDB):
        """Download UniProt/GeneBank sequence from its respective database
        """
        #sequenceDB = str(sequenceDB)
        if self.uniProtSequence.get() is not None:
            seqHandler = SequenceHandler()

        elif self._getGeneBankID() is not None:
            seqHandler = SequenceHandler(isAminoacid=False)

        record, error = seqHandler.downloadSeqFromDatabase(sequenceDB)
        if record is None:
            print("Error: ", error)
            self.setAborted()
            exit(0)

        if self.inputSequenceID.get() is not None:
            self.id = self.inputSequenceID.get()
        elif sequenceDB != '':
            self.id = sequenceDB
        else:
            self.id = self.name
        if record.description != '':
            self.description = record.description

        self.sequence = str(record.seq)
        self.alphabet = alphabetToIndex(
            self.inputSequence == SEQ_TYPE_AMINOACIDS, record.seq.alphabet)

    def fileDownloadStep(self, sequenceFile):
        # If sequencePath contains more than one sequence, only
        # the first one will be considered
        seqHandler = SequenceHandler()
        record = seqHandler.downloadSeqFromFile(sequenceFile, type="fasta")
        if self.inputSequenceID.get() is not None:
            self.id = self.inputSequenceID.get()
        elif record.id != '':
            self.id = record.id
        else:
            self.id = self.name
        if record.description != '':
            self.description = record.description

        self.sequence = str(record.seq)
        self.alphabet = alphabetToIndex(
            self.inputSequence == SEQ_TYPE_AMINOACIDS, record.seq.alphabet)

    def createOutputStep(self):
        """ Register the output object. """

        if self.inputSequenceDescription.get() is not None:
            self.description = self.inputSequenceDescription.get()
        elif hasattr(self, 'description'):
            pass
        else:
            self.description = ''

        seq = Sequence(
            name=self.name,
            sequence=self.sequence,
            alphabet=self.alphabet,
            isAminoacids=(self.inputSequence == SEQ_TYPE_AMINOACIDS),
            id=self.id,
            description=self.description)
        outputs = {'outputSequence': seq}
        self._defineOutputs(**outputs)

    def _summary(self):
        summary = []
        self.name = self.inputSequenceName.get()
        uniProtId = self._getUniProtID()
        geneBankID = self._getGeneBankID()
        if self.inputSequence == SEQ_TYPE_AMINOACIDS:
            summary.append('Sequence of aminoacids:\n')
            if self.inputProteinSequence == self.IMPORT_FROM_PLAIN_TEXT:
                summary.append("Sequence *%s* imported from plain text\n" %
                               self.name)
            elif self.inputProteinSequence == self.IMPORT_FROM_STRUCTURE:
                if self.inputStructureSequence == \
                    self.IMPORT_STRUCTURE_FROM_ID:
                    summary.append("Sequence *%s* imported from atomic "
                                   "structure *%s.cif*\n" %
                                   (self.name, self.pdbId.get()))
                elif self.inputStructureSequence == \
                    self.IMPORT_STRUCTURE_FROM_FILES:
                    summary.append("Sequence *%s* imported from file *%s*\n" %
                                   (self.name, self.pdbFile.get()))
            elif self.inputProteinSequence == self.IMPORT_FROM_UNIPROT:
                summary.append("Sequence *%s* imported from UniProt ID "
                               "*%s*\n" % (self.name, uniProtId))
            elif self.inputProteinSequence == self.IMPORT_FROM_FILES:
                summary.append("Sequence *%s* imported from file name: "
                               "*%s*\n" % (self.name, self.fileSequence.get()))
        else:
            summary.append('Sequence of nucleotides:\n')
            if self.inputNucleotideSequence == \
                    self.IMPORT_FROM_NUCLEOTIDE_PLAIN_TEXT:
                summary.append("Sequence *%s* imported from plain text\n" %
                               self.name)
            elif self.inputNucleotideSequence == \
                    self.IMPORT_FROM_NUCLEOTIDE_STRUCTURE:
                if self.inputStructureSequence == \
                    self.IMPORT_STRUCTURE_FROM_ID:
                    summary.append("Sequence *%s* imported from atomic "
                                   "structure *%s.cif*\n" %
                                   (self.name, self.pdbId.get()))
                elif self.inputStructureSequence == \
                    self.IMPORT_STRUCTURE_FROM_FILES:
                    summary.append("Sequence *%s* imported from file *%s*\n" %
                                   (self.name, self.pdbFile.get()))
            elif self.inputNucleotideSequence == self.IMPORT_FROM_GENEBANK:
                summary.append("Sequence *%s* imported from geneBank ID "
                               "*%s*\n" % (self.name, geneBankID))
            elif self.inputNucleotideSequence == \
                    self.IMPORT_FROM_NUCLEOTIDE_FILES:
                summary.append("Sequence *%s* imported from file name: "
                               "*%s*\n" % (self.name, self.fileSequence.get()))
        return summary

    def _validate(self):
        errors = []
        return errors

    def _getSequenceName(self):
        pass

    def _getUniProtID(self):
        return self.uniProtSequence.get()

    def _getGeneBankID(self):
        return self.geneBankSequence

    def _getAlphabet(self):
        if self.inputSequence == SEQ_TYPE_AMINOACIDS:
            return self.proteinIUPACalphabet.get()
        else:
            return self.nucleotideIUPACalphabet.get()
Exemple #18
0
 def testFunctionSelectChain(self):
     pdbID1 = '1P30'  # A,B,C
     outFile = "/tmp/model.cif"
     aSH1 = AtomicStructHandler()
     aSH2 = AtomicStructHandler()
     #
     fileName1 = aSH1.readFromPDBDatabase(pdbID1, type='mmCif', dir='/tmp')
     atomsNum1 = len([atom.id for atom in aSH1.getStructure().get_atoms()])
     #
     chainID = 'A'
     outFileName = "/tmp/output.mmcif"
     aSH1.extractChain(chainID=chainID, modelID='0', end=20,
                       filename=outFileName)
     chains = [chain.id for chain in aSH1.getStructure().get_chains()]
     # compare unordered lists of chains
     goal = chainID
     self.assertTrue(Counter(chains) == Counter(goal),
                     "{} != {}".format(chains, goal))
     aSH2.read(outFileName)
     atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()])
     self.assertEqual(122, atomsNumT)
Exemple #19
0
        def __applyTransform(suffix, pdbFileName, shift, angles, sampling):
            """ auxiliary function, transform PDB and 3dmap files"""
            # create a Scipion transformation matrix
            from numpy import deg2rad
            rotation_matrix = euler_matrix(deg2rad(angles[0]),
                                           deg2rad(angles[1]),
                                           deg2rad(angles[2]), 'szyz')
            translation = translation_matrix(shift)
            M = concatenate_matrices(rotation_matrix, translation)

            # apply it to the pdb file
            # if rotation move to center
            aSH = AtomicStructHandler(pdbFileName)
            if (angles[0] != 0. or angles[1] != 0. or angles[2] != 0.):
                from pyworkflow.em.convert import ImageHandler
                ih = ImageHandler()
                x, y, z, n = ih.getDimensions("emd_%s.map" % EMDBID)
                x /= 2.
                y /= 2.
                z /= 2.
                localShift = [-x, -y, -z]
                rotation_matrix = euler_matrix(0., 0., 0., 'szyz')
                translation = translation_matrix(localShift)
                localM = concatenate_matrices(rotation_matrix, translation)
                aSH.transform(localM, sampling=sampling)

            aSH.transform(M, sampling=sampling)

            if (angles[0] != 0. or angles[1] != 0. or angles[2] != 0.):
                localShift = [x, y, z]
                rotation_matrix = euler_matrix(0., 0., 0., 'szyz')
                translation = translation_matrix(localShift)
                localM = concatenate_matrices(rotation_matrix, translation)
                aSH.transform(localM, sampling=sampling)

            aSH.write("%s_%s_transformed.ent" % (suffix, PDBID.lower()))

            # get equivalent xmipp transformation
            shift, angles = __getXmippEulerAngles(M)
            # shift 3map and set sampling
            __runXmippProgram("xmipp_transform_geometry",
                              '-i emd_%s.map '
                              '-o %s_emd_%s_transform.map '
                              '--interp linear '
                              '--shift %f %f %f '
                              '--rotate_volume euler %f %f %f ' % (
                                  EMDBID,
                                  suffix,
                                  EMDBID,
                                  shift[0], shift[1], shift[2],
                                  angles[0], angles[1], angles[2]
                              )
                              )
            header = Ccp4Header("%s_emd_%s_transform.map" % (suffix, EMDBID),
                                readHeader=True)
            header.setSampling(sampling)
            # put the sampling back, xmipp_transform_geometry erased it
            header.writeHeader()

            # view the results with chimera
            from pyworkflow.em.viewers import Chimera
            args = "%s %s %s %s" % (
                   pdbFileName,
                   "emd_%s.map" % EMDBID,
                   "%s_%s_transformed.ent" % (suffix, PDBID.lower()),
                   "%s_emd_%s_transform.map" % (suffix, EMDBID)
            )
            Chimera.runProgram(args)
Exemple #20
0
    def testTransformTranslationCoherence(self):
        """
        Question: If I transform the PDB and the 3D map with matrix T
        do they move in the same direction?
        I do not know how to make an automatic test to check this
        The following code perform all operations but check that
        the PDB and 3D map match. This should be check using
        your eye.

        change False to True (in the following if) to
        perform all or some of the checks
        """

        # retrieve "Structure of the human TRPC3
        # both 3Dmap and PDB


        doTest = False

        if not doTest:

            print "This test is to be tested manually since it opens chimera afterwards"
            print "For testing this, edit this file and set doTest = True"
            return


        PDBID = '6CUD'
        EMDBID = '7620'

        doAll = False

        if False or doAll:  # set to False if you aready have the 3dmap file
            url = 'ftp://ftp.ebi.ac.uk/pub/databases/emdb/structures/EMD-%s/map/emd_%s.map.gz' % \
                  (EMDBID, EMDBID)
            import urllib
            urllib.urlretrieve(url, 'emd_%s.map.gz' % EMDBID)
            os.system("gunzip emd_%s.map.gz" % EMDBID)  # file is gzipped
        if False or doAll:  # set to False if you aready have the PDB file
            aSH = AtomicStructHandler()
            pdbFileName = aSH.readFromPDBDatabase(PDBID, type='pdb',
                                                  dir=os.getcwd())
        else:
            pdbFileName = 'pdb%s.ent' % PDBID.lower()

        # get 3D map sampling
        from pyworkflow.em.headers import Ccp4Header
        header = Ccp4Header("emd_%s.map" % EMDBID, readHeader=True)
        sampling, y, z = header.getSampling()

        def __runXmippProgram(program, args):
            """ Internal function to launch a Xmipp program. """
            xmipp3 = pwutils.importFromPlugin('xmipp3')
            xmipp3.runXmippProgram(program, args)

        def __getXmippEulerAngles(matrix):
            """ Internal fuction to convert scipion to xmipp angles"""
            geometryFromMatrix = importFromPlugin('xmipp3.convert',
                                                  'geometryFromMatrix')

            return geometryFromMatrix(matrix, False)

        def __applyTransform(suffix, pdbFileName, shift, angles, sampling):
            """ auxiliary function, transform PDB and 3dmap files"""
            # create a Scipion transformation matrix
            from numpy import deg2rad
            rotation_matrix = euler_matrix(deg2rad(angles[0]),
                                           deg2rad(angles[1]),
                                           deg2rad(angles[2]), 'szyz')
            translation = translation_matrix(shift)
            M = concatenate_matrices(rotation_matrix, translation)

            # apply it to the pdb file
            # if rotation move to center
            aSH = AtomicStructHandler(pdbFileName)
            if (angles[0] != 0. or angles[1] != 0. or angles[2] != 0.):
                from pyworkflow.em.convert import ImageHandler
                ih = ImageHandler()
                x, y, z, n = ih.getDimensions("emd_%s.map" % EMDBID)
                x /= 2.
                y /= 2.
                z /= 2.
                localShift = [-x, -y, -z]
                rotation_matrix = euler_matrix(0., 0., 0., 'szyz')
                translation = translation_matrix(localShift)
                localM = concatenate_matrices(rotation_matrix, translation)
                aSH.transform(localM, sampling=sampling)

            aSH.transform(M, sampling=sampling)

            if (angles[0] != 0. or angles[1] != 0. or angles[2] != 0.):
                localShift = [x, y, z]
                rotation_matrix = euler_matrix(0., 0., 0., 'szyz')
                translation = translation_matrix(localShift)
                localM = concatenate_matrices(rotation_matrix, translation)
                aSH.transform(localM, sampling=sampling)

            aSH.write("%s_%s_transformed.ent" % (suffix, PDBID.lower()))

            # get equivalent xmipp transformation
            shift, angles = __getXmippEulerAngles(M)
            # shift 3map and set sampling
            __runXmippProgram("xmipp_transform_geometry",
                              '-i emd_%s.map '
                              '-o %s_emd_%s_transform.map '
                              '--interp linear '
                              '--shift %f %f %f '
                              '--rotate_volume euler %f %f %f ' % (
                                  EMDBID,
                                  suffix,
                                  EMDBID,
                                  shift[0], shift[1], shift[2],
                                  angles[0], angles[1], angles[2]
                              )
                              )
            header = Ccp4Header("%s_emd_%s_transform.map" % (suffix, EMDBID),
                                readHeader=True)
            header.setSampling(sampling)
            # put the sampling back, xmipp_transform_geometry erased it
            header.writeHeader()

            # view the results with chimera
            from pyworkflow.em.viewers import Chimera
            args = "%s %s %s %s" % (
                   pdbFileName,
                   "emd_%s.map" % EMDBID,
                   "%s_%s_transformed.ent" % (suffix, PDBID.lower()),
                   "%s_emd_%s_transform.map" % (suffix, EMDBID)
            )
            Chimera.runProgram(args)

        # shift atomic structure
        doAll = True
        if False or doAll:
            shift = [20., 0., 0.]
            angles = [0., 0., 0.]
            __applyTransform("Xshift", pdbFileName, shift, angles, sampling)

        # repeat test this time  rotation one angle
        # problem, xmipp rotates with respect the volume center
        # pdb with respect the origin of coordinates (much better convention)
        # in order to compare both I need to
        # move pdb to origin, rotate it, put it back in the possition
        if False or doAll:
            shift = [0., 0., 0.]
            angles = [30., 0., 0.]
            __applyTransform("Rot2D", pdbFileName, shift, angles, sampling)

        # repeat test this time  rotation in 3 angles
        # problem, xmipp rotates with respect the volume center
        # pdb with respect the origin of coordinates (much better convention)
        if False or doAll:
            shift = [0., 0., 0.]
            angles = [10., 20., 30.]
            __applyTransform("Rot3D", pdbFileName, shift, angles, sampling)

        # repeat test this time  rotation in 3 angles and shift
        # problem, xmipp rotates with respect the volume center
        # pdb with respect the origin of coordinates (much better convention)
        if False or doAll:
            shift = [5., 10., 15.]
            angles = [10., 20., 30.]
            __applyTransform("Rot3DShift", pdbFileName,
                             shift, angles, sampling)
Exemple #21
0
    def testFunctionAddStructNewModel(self):
        pdbID1 = '1P30'  # A
        pdbID2 = '1CJD'  # A, B,C
        outFile = "/tmp/model.cif"
        aSH1 = AtomicStructHandler()
        aSH2 = AtomicStructHandler()
        #
        fileName1 = aSH1.readFromPDBDatabase(pdbID1, type='mmCif', dir='/tmp')
        fileName2 = aSH2.readFromPDBDatabase(pdbID2, type='mmCif', dir='/tmp')
        atomsNum1 = len([atom.id for atom in aSH1.getStructure().get_atoms()])
        atomsNum2 = len([atom.id for atom in aSH2.getStructure().get_atoms()])
        #
        aSH1.addStruct(fileName2, outPDBfileName=outFile, useModel=True)
        #
        #aSH1.addStruct(fileName2, outPDBfileName=outFile, useModel=False)
        chains = [chain.id for chain in aSH1.getStructure().get_chains()]
        # compare unordered lists of chains
        goal = ['A', 'A', 'B', 'C']
        self.assertTrue(Counter(chains) == Counter(goal),
                        "{} != {}".format(chains, goal))
        atomsNumT = len([atom.id for atom in aSH1.getStructure().get_atoms()])
        self.assertEqual(atomsNum1 + atomsNum2, atomsNumT)

        os.unlink(fileName1)
        os.unlink(fileName2)
        os.unlink(outFile)
Exemple #22
0
    def powerfitWrapper(self):
        # Horrible hack to release this plugin before scipion next version.
        # TODO: remove when possible
        # keep this import protected inside the function otherwise it fails
        # if uploaded before chimera plugin is read
        # This import should be removed for scipio greater than 2.0
        from pyworkflow import LAST_VERSION, VERSION_2_0
        if LAST_VERSION == VERSION_2_0:
            from pyworkflow.utils import importFromPlugin
            AtomicStructHandler = importFromPlugin('chimera.atom_struct',
                                                   'AtomicStructHandler')
        else:
            from pyworkflow.em.convert.atom_struct import AtomicStructHandler

        _localInputVol = "volume.mrc"
        localInputVol = self._getExtraPath(_localInputVol)
        if self.inputVol.get() is None:
            volume = self.inputPDB.get().getVolume()
            print "Volume: Volume associated to atomic structure %s\n" % volume
        else:
            volume = self.inputVol.get()
            print "Volume: Input volume %s\n" % volume
        sampling = volume.getSamplingRate()
        origin = volume.getOrigin(force=True).getShifts()

        # provide PDB file instead of CIF
        # power fit does not process some types of CIF files
        atomStructPath = self.inputPDB.get().getFileName()
        if atomStructPath.endswith(".cif"):
            baseName = basename(atomStructPath)
            localPath = os.path.abspath(self._getExtraPath(baseName))
            localPath = localPath.replace(".cif", ".pdb")

            # normalize input format
            aSH = AtomicStructHandler()
            aSH.read(atomStructPath)
            aSH.write(localPath)
            atomStructPath = localPath

        # powerfit needs offset in origin
        Ccp4Header.fixFile(volume.getFileName(), localInputVol, origin,
                           sampling, Ccp4Header.ORIGIN)
        args = "%s %f %s -d %s -p %d -a %f -n %d" % (
            localInputVol, self.resolution, atomStructPath,
            self._getExtraPath(), self.numberOfThreads, self.angleStep,
            self.nModels)

        if self.doLaplacian:
            args += " -l"
        if self.doCoreWeight:
            args += " -cw"
        self.runJob('powerfit', args)

        # Construct the chimera viewers
        dim = volume.getDim()[0]
        bildFileName = os.path.abspath(self._getExtraPath("axis.bild"))
        Chimera.createCoordinateAxisFile(dim,
                                         bildFileName=bildFileName,
                                         sampling=sampling)

        for n in range(1, self.nModels.get() + 1):
            fnPdb = self._getExtraPath("fit_%d.pdb" % n)
            if exists(fnPdb):
                fnCmd = self._getExtraPath("chimera_%d.cmd" % n)
                fhCmd = open(fnCmd, 'w')
                fhCmd.write("open %s\n" % _localInputVol)
                fhCmd.write("open lcc.mrc\n")
                fhCmd.write("open fit_%d.pdb\n" % n)
                fhCmd.write("vol #1 hide\n")
                fhCmd.write("scolor #0 volume #1 cmap rainbow\n")
                fhCmd.write("open %s\n" % bildFileName)
                fhCmd.close()