def searchStep(self, structFileName): outFileName = self._getExtraPath("atomStruct.pdb") aStruct1 = AtomicStructHandler(structFileName) aStruct1.write(outFileName) args='-F "file1=@%s" -F "method=%s" -F "title=%s" -F "address=%s" http://ekhidna.biocenter.helsinki.fi/cgi-bin/dali/dump.cgi' %\ (outFileName,self.methodsDict[self.method.get()],self.title.get(),self.email.get()) self.runJob("curl", args)
def reNameChainStep(self, structFileName): import json outFileName = self._getExtraPath("atomStruct_reNamedChain.cif") aStruct1 = AtomicStructHandler(structFileName) chainIdDict = json.loads(self.inputStructureChain.get()) aStruct1.renameChain(chainID=chainIdDict['chain'], newChainName=self.chainName.get(), modelID=chainIdDict['model'], filename=outFileName) #aStruct1.write(outFileName) self.createOutputStep(outFileName)
def addChainStep(self, structFileName, listStructFileName): outFileName = self._getExtraPath("atomStruct_addChain.cif") aStruct1 = AtomicStructHandler(structFileName) print("Adding to Atomic Struct {}".format(structFileName)) for fileName in listStructFileName: print("AddingStruct {}".format(fileName)) sys.stdout.flush() aStruct1.addStruct(fileName, outFileName) #aStruct1.write(outFileName) self.createOutputStep(outFileName, twoRelations=True)
def preparationStep(self): if isinstance(self.inputStructure.get(),AtomStruct): fnIn = self._getExtraPath("atomStructIn.pdb") aStruct1 = AtomicStructHandler(self.inputStructure.get().getFileName()) aStruct1.write(fnIn) fnIn='extra/atomStructIn.pdb' else: fnIn = self._getExtraPath("atomStructIn.mae") createLink(self.inputStructure.get().getFileName(),fnIn) fnIn='extra/atomStructIn.mae' self.runJob(schrodinger_plugin.getHome('maestro'), "-b %s"%fnIn, cwd=self._getPath())
def preparationStep(self): if self.inputStructure.get().getFileName().endswith('.cif'): fnIn = self._getTmpPath("atomStructIn.pdb") aStruct1 = AtomicStructHandler( self.inputStructure.get().getFileName()) aStruct1.write(fnIn) else: fnIn = self.inputStructure.get().getFileName() fnOut = self._getExtraPath('atomStruct.pdbqt') args = ' -v -r %s -o %s' % (fnIn, fnOut) ProtBioinformaticsADTPrepare.callPrepare(self, "prepare_receptor4", args)
def extractChainStep(self, structFileName): import json outFileName = self._getExtraPath("atomStruct_extractChain.cif") aStruct1 = AtomicStructHandler(structFileName) chainIdDict = json.loads(self.inputStructureChain.get()) end = self.end.get() if end == -1: end = sys.maxsize aStruct1.extractChain(chainID=chainIdDict['chain'], start=self.start.get(), end=end, modelID=chainIdDict['model'], filename=outFileName) self.createOutputStep(outFileName)
def testExtractAllChains(self): pdb1 = self._importStructurePDB('6vsb') # A, B, C _dictOperations = ProtAtomStrucOperate.operationsDictInv args = { 'pdbFileToBeRefined': pdb1, 'Operation': _dictOperations['extractAllChains'] } protAtomStrucExtractChain = self.newProtocol(ProtAtomStrucOperate, **args) self.launchProtocol(protAtomStrucExtractChain) aSH2 = AtomicStructHandler() outFileName = protAtomStrucExtractChain.outputPdb_chainC.getFileName() aSH2.read(outFileName) atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()]) self.assertEqual(7327, atomsNumT)
def preparationStep(self): if isinstance(self.inputStructure.get(), AtomStruct): fnIn = self._getExtraPath("atomStructIn.pdb") aStruct1 = AtomicStructHandler( self.inputStructure.get().getFileName()) aStruct1.write(fnIn) fnIn = 'extra/atomStructIn.pdb' self.runJob(schrodinger_plugin.getHome('maestro'), "-b %s" % fnIn, cwd=self._getPath()) else: fnIn = self._getExtraPath( "atomStructIn") + self.inputStructure.get().getExtension() createLink(self.inputStructure.get().getFileName(), fnIn) fnIn = os.path.join('extra', os.path.split(fnIn)[1]) self.runJob(schrodinger_plugin.getHome('maestro'), "-m %s" % fnIn, cwd=self._getPath())
def extractAllChainsStep(self, structFileName): import json outFileName = self._getExtraPath("atomStruct_extractChain_%s.cif") aStruct1 = AtomicStructHandler(structFileName) listOfChains, _ = aStruct1.getModelsChains() for model, chainDic in listOfChains.items(): for chainID, lenResidues in chainDic.items(): chainIdDict = json.loads( '{"model": %d, "chain": "%s", "residues": %d}' % (model, str(chainID), lenResidues)) chainIDStr = chainIdDict['chain'] aStruct1.extractChain(modelID=chainIdDict['model'], chainID=chainIDStr, start=-1, end=sys.maxsize, filename=outFileName % chainIDStr) self.createOutputStep(outFileName % chainIDStr, suffix=chainIDStr)
def testRenumberChain(self): pdb1 = self._importStructurePDB('1P30') # A _dictOperations = ProtAtomStrucOperate.operationsDictInv args = { 'pdbFileToBeRefined': pdb1, 'Operation': _dictOperations['reNumberChain'], 'inputStructureChain': '{"model": 0, "chain": "A", "residues": 891}', 'offset': 5 } protAtomStrucExtractChain = self.newProtocol(ProtAtomStrucOperate, **args) self.launchProtocol(protAtomStrucExtractChain) aSH2 = AtomicStructHandler() outFileName = protAtomStrucExtractChain.outputPdb.getFileName() aSH2.read(outFileName) atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()]) self.assertEqual(7304, atomsNumT)
def testRenameChain(self): pdb1 = self._importStructurePDB('5NI1') # A, B, C _dictOperations = ProtAtomStrucOperate.operationsDictInv args = { 'pdbFileToBeRefined': pdb1, 'Operation': _dictOperations['reNameChain'], 'inputStructureChain': '{"model": 0, "chain": "B", "residues": 146}', 'chainName': 'AA' } protAtomStrucExtractChain = self.newProtocol(ProtAtomStrucOperate, **args) self.launchProtocol(protAtomStrucExtractChain) aSH2 = AtomicStructHandler() outFileName = protAtomStrucExtractChain.outputPdb.getFileName() aSH2.read(outFileName) atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()]) # check size self.assertEqual(4579, atomsNumT) # check chain names d1, d2 = aSH2.getModelsChains() chains = d1[0].keys() self.assertTrue('A' in chains) self.assertTrue('AA' in chains) self.assertTrue('C' in chains) self.assertTrue('D' in chains) self.assertFalse('B' in chains)
def exportAtomStructStep(self): exportAtomStruct = self.exportAtomStruct.get() originStructPath = exportAtomStruct.getFileName() dirName = self.filesPath.get() destinyStructPath = os.path.join(dirName, self.COORDINATEFILENAME) destinySympleStructPath = os.path.join(dirName, self.SYMPLIFIED_STRUCT) # save input atom struct with no change baseName = os.path.basename(originStructPath) localPath = os.path.abspath(os.path.join(dirName, baseName)) copyFile(originStructPath, localPath) # call biopython to simplify atom struct and save it aSH = AtomicStructHandler() aSH.read(originStructPath) aSH.write(destinySympleStructPath) # if pdb convert to mmcif calling maxit twice if originStructPath.endswith(".pdb"): # convert pdb to cif using maxit program log = self._log fromPDBToCIF(originStructPath, destinyStructPath, log) try: # convert cif to mmCIF by using maxit program fromCIFTommCIF(destinyStructPath, destinyStructPath, log) except Exception as e: pass # if cif convert to mmcif using maxit elif originStructPath.endswith(".cif"): # convert cif to mmCIF by using maxit program log = self._log try: fromCIFTommCIF(originStructPath, destinyStructPath, log) except Exception as e: pass
def _insertAllSteps(self): # compute alanine atom struct len inputPdb = self.inputStructure.get().getFileName() atomStruct = AtomicStructHandler(inputPdb) # we assume that there is a single model and a single chain atomStructSize = sum(1 for _ in atomStruct.getStructure().get_residues()) chainName = next(atomStruct.getStructure().get_chains()).get_id() firstAAinChain = next(atomStruct.getStructure().get_residues()).id[1] # starting and ending residue firstaa = int(self.firstaa.get().split(":")[1].split(",")[0].strip()) lastaa = int(self.lastaa.get().split(":")[1].split(",")[0].strip()) # compute number of steps according to the sequence size numberOfSteps = lastaa - firstaa + 1 # steps prepareId = self._insertFunctionStep('convertInputStep', self.FITTEDFILE) # mutateChain mutateId = self._insertFunctionStep( 'mutateStep', firstaa, # in seq firstAAinChain, # in struct atomStructSize, chainName, numberOfSteps, prerequisites=[prepareId]) refineIdList = [] numberOfThreads = self.numberOfMpi.get() for start in range(numberOfThreads): refineId = self._insertFunctionStep('refineStep2', prerequisites=[mutateId]) refineIdList.append(refineId) self._insertFunctionStep('createOutputStep', prerequisites=refineIdList)
def compare(self, pdb, sym): args = { 'pdbFileToBeRefined': pdb, 'originSymmetryGroup': SYM_I222r - SYM_I222, 'targetSymmetryGroup': sym - SYM_I222 } protAtomStrucOperate = self.newProtocol(ProtAtomStrucConvertSymmetry, **args) protAtomStrucOperate.setObjLabel('rotate atom structs, to %s' % SCIPION_SYM_NAME[sym]) self.launchProtocol(protAtomStrucOperate) aSH = AtomicStructHandler( protAtomStrucOperate.rotatedAtomStruct.getFileName()) atoms_coord = [atom.coord for atom in aSH.getStructure().get_atoms()] icosahedron = Icosahedron(circumscribed_radius=100, orientation=SCIPION_SYM_NAME[sym][1:]) pentomVector = icosahedron.getVertices() for atom, vertex in zip(atoms_coord, pentomVector): for a, v in zip(atom, vertex): self.assertAlmostEqual(a, v, places=2)
def rotateAtomStruct(self, inAtomStructFn, outAtomStructFn, matrix): "apply rotation matrix to input atomic structure" atSH = AtomicStructHandler(inAtomStructFn) atSH.transform(matrix) atSH.write(outAtomStructFn)
def preparationStep(self): prog = Plugin.getHome('utilities/prepwizard') if isinstance(self.inputStructure.get(), AtomStruct): fnIn = self._getExtraPath("atomStructIn.pdb") aStruct1 = AtomicStructHandler( self.inputStructure.get().getFileName()) aStruct1.write(fnIn) fnIn = 'extra/atomStructIn.pdb' else: fnIn = self._getExtraPath("atomStructIn.mae") createLink(self.inputStructure.get().getFileName(), fnIn) fnIn = 'extra/atomStructIn.mae' args = '-WAIT' if self.stage1.get(): if self.fillSideChains.get(): args += ' -fillsidechains' if self.fillLoops.get(): args += ' -fillloops' if self.disulfides.get(): args += ' -disulfides' if self.mse.get(): args += ' -mse' if self.hydrogens.get() == 0: args += " -nohtreat" elif self.hydrogens.get() == 1: args += " -rehtreat" if self.glycosylation.get(): args += " -glycosylation" if self.palmitoylation.get(): args += " -palmitoylation" if self.captermini.get(): args += " -captermini" if self.keepFarWat.get(): args += " -keepfarwat -watdist %f" % self.watdist.get() if not self.treatMetals.get(): args += " -nometaltreat" if self.stage2.get(): if self.sampleWaters.get(): args += " -samplewater" if self.xtal.get(): args += " -xtal" if self.propKa.get(): args += " -propka_pH %f" % self.propKapH.get() else: args += " -nopropka" if self.minadjh.get(): args += " -minimize_adj_h" else: args += " -noprotassign" if self.stage3.get(): args += " -rmsd %f" % self.rmsdD.get() if self.fix.get(): args += " -fix" if self.force.get() == 0: args += " -f 2005" else: args += " -f 3" else: args += " -noimpref" if self.stage4.get(): if self.ms.get(): args += " -ms %d" % self.msN.get() args += " -epik_pH %f" % self.epikPh.get() args += " -epik_pHt %f" % self.epikPht.get() else: args += " -noepik" args += ' %s atomStructOut.maegz' % fnIn self.runJob(prog, args, cwd=self._getPath())
def testAddChain(self): pdb1 = self._importStructurePDB('1P30') # A pdb2 = self._importStructurePDB('5NI1') # A, B, C, D pdb3 = self._importStructurePDB('1J77') # A _dictOperations = ProtAtomStrucOperate.operationsDictInv args = { 'pdbFileToBeRefined': pdb1, 'InputAtomStruct2': [pdb2, pdb3], 'Operation': _dictOperations['addChain'] } protAtomStrucOperate = self.newProtocol(ProtAtomStrucOperate, **args) protAtomStrucOperate.setObjLabel('add atom structs') self.launchProtocol(protAtomStrucOperate) outPutPDB = protAtomStrucOperate.outputPdb.getFileName() # check file exists self.assertTrue(exists(protAtomStrucOperate.outputPdb.getFileName()), "Filename {} does not exists".format(outPutPDB)) aSH = AtomicStructHandler(outPutPDB) chains = [chain.id for chain in aSH.getStructure().get_chains()] goal = ['A', 'A002', 'B', 'C', 'D', 'A003'] self.assertTrue( Counter(chains) == Counter(goal), "{} != {}".format(chains, goal)) # atoms are OK aSH1 = AtomicStructHandler(pdb1.getFileName()) aSH2 = AtomicStructHandler(pdb2.getFileName()) aSH3 = AtomicStructHandler(pdb3.getFileName()) # atomsNum1 = len([atom.id for atom in aSH1.getStructure().get_atoms()]) atomsNum2 = len([atom.id for atom in aSH2.getStructure().get_atoms()]) atomsNum3 = len([atom.id for atom in aSH3.getStructure().get_atoms()]) atomsNumT = len([atom.id for atom in aSH.getStructure().get_atoms()]) self.assertEqual(atomsNum1 + atomsNum2 + atomsNum3, atomsNumT)
class ChimeraModelFromTemplate(ChimeraProtBase): """Protocol to model three-dimensional structures of proteins using Modeller. Execute command *scipionwrite [model #n]* from command line in order to transfer the selected pdb to scipion. Default value is model=#0, model refers to the pdb file.""" _label = 'model from template' _program = "" _version = VERSION_1_2 INFILE1 = "unaligned_1.fasta" OUTFILE1 = "aligned_1.fasta" INFILE2 = "unaligned_2.fasta" OUTFILE2 = "aligned_2.fasta" TWOSEQUENCES = 0 MULTIPLESEQUENCES = 1 ProgramToAlign1 = ['Bio.pairwise2', 'Clustal Omega', 'MUSCLE'] ProgramToAlign2 = ['Clustal Omega', 'MUSCLE'] OptionForAligning = [ 'None', 'Additional sequences to align', 'Provide your own sequence alignment' ] # --------------------------- DEFINE param functions -------------------- def _defineParams(self, form, doHelp=False): formBase = super(ChimeraModelFromTemplate, self)._defineParams(form, doHelp=True) param = form.getParam('pdbFileToBeRefined') param.label.set('Atomic structure used as template') param.help.set("PDBx/mmCIF file template used as basic atomic " "structure to model your specific sequence.") param = form.getParam('inputVolume') param.condition.set('False') param = form.getParam('inputVolumes') param.condition.set('False') # hide inputPdbFiles param = form.getParam('inputPdbFiles') param.condition.set('False') param.allowsNull.set('True') section = formBase.getSection('Input') section.addParam('inputStructureChain', StringParam, label="Chain ", allowsNull=True, important=True, help="Select a particular chain of the atomic " "structure.") section.addParam('inputSequence1', PointerParam, pointerClass="Sequence", label='Target sequence', allowsNull=True, important=True, help="Input the aminoacid sequence to align with the " "structure template sequence.") section.addParam( 'optionForAligning1', EnumParam, choices=self.OptionForAligning, label="Options to improve the alignment:", default=0, help="None: Option by default. Only the template and the " "target sequences will be included in the alignment. " "This option is recommendable when these two sequences " "are very similar. Otherwise, select any of the two " "additional options:\n" "Additional sequences to align: Select this option " "if you want to add some more sequences to accomplish " "the alignment.\n" "Provide your own sequence alignment: Your alignment" "should include both the target and the template " "sequences.\n") section.addParam( 'inputYourOwnSequenceAlignment1', PathParam, pointerClass="File", allowsNull=False, condition='optionForAligning1 == 2', label='Sequence alignment input', help="Input your own sequence alignment.\n" "ChimeraX allowed formats accessible here: " "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence " ) section.addParam('inputSequencesToAlign1', MultiPointerParam, pointerClass="Sequence", allowsNull=True, condition='optionForAligning1 == 1', label='Other sequences to align', help="In case you need to load more sequences to " "align, you can load them here.") section.addParam('inputProgramToAlign1_1', EnumParam, choices=self.ProgramToAlign1, label="Alignment tool for two sequences:", default=0, condition='optionForAligning1 == 0', help="Select a program to accomplish the sequence" "alignment:\n\nBiophyton module " "Bio.pairwise2 (" "http://biopython.org/DIST/docs/api/" "Bio.pairwise2-module.html). Built-in " "program to align two " "sequences. The global " "alignment algorithm from the EMBOSS suite " "has been implemented with match/mismatch " "scores of 3/-1 and gap penalties " "(open/extend) of " "3/2.\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam('inputProgramToAlign2_1', EnumParam, choices=self.ProgramToAlign2, label="Multiple alignment tool:", default=0, condition='optionForAligning1 == 1', help="Select a program to accomplish the sequence" "alignment:\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam('additionalTargetSequence', BooleanParam, default=False, label='Additional target sequence to include?', help='Select YES if you want to add an additional ' 'target sequence to model according a different ' 'chain of the structure template. This ' 'option is recommendable when you want to model ' 'the two interacting elements of a particular complex' ' at the same time.') section.addParam('selectStructureChain', StringParam, condition='additionalTargetSequence == True', label="Chain ", allowsNull=True, important=True, help="Select a particular chain of the atomic " "structure.") section.addParam('inputSequence2', PointerParam, pointerClass="Sequence", condition='additionalTargetSequence == True', label='Target sequence', allowsNull=True, important=True, help="Input the aminoacid sequence to align with the " "structure template sequence.") section.addParam( 'optionForAligning2', EnumParam, choices=self.OptionForAligning, condition='additionalTargetSequence == True', label="Options to improve the alignment:", default=0, help="None: Option by default. Only the template and the " "target sequences will be included in the alignment. " "This option is recommendable when these two sequences " "are very similar. Otherwise, select any of the two " "additional options:\n" "Additional sequences to align: Select this option " "if you want to add some more sequences to accomplish " "the alignment.\n" "Provide your own sequence alignment: Your alignment" "should include both the target and the template " "sequences.\n") section.addParam( 'inputYourOwnSequenceAlignment2', PathParam, pointerClass="File", allowsNull=False, condition='optionForAligning2 == 2 and ' 'additionalTargetSequence == True', label='Sequence alignment input', help="Input your own sequence alignment.\n" "ChimeraX allowed formats accessible here: " "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence " ) section.addParam('inputSequencesToAlign2', MultiPointerParam, pointerClass="Sequence", allowsNull=True, condition='optionForAligning2 == 1 and ' 'additionalTargetSequence == True', label='Other sequences to align', help="In case you need to load more sequences to " "align, you can load them here.") section.addParam('inputProgramToAlign1_2', EnumParam, choices=self.ProgramToAlign1, label="Alignment tool for two sequences:", default=0, condition='optionForAligning2 == 0 and ' 'additionalTargetSequence == True', help="Select a program to accomplish the sequence" "alignment:\n\nBiophyton module " "Bio.pairwise2 (" "http://biopython.org/DIST/docs/api/" "Bio.pairwise2-module.html). Built-in " "program to align two " "sequences. The global " "alignment algorithm from the EMBOSS suite " "has been implemented with match/mismatch " "scores of 3/-1 and gap penalties " "(open/extend) of " "3/2.\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam('inputProgramToAlign2_2', EnumParam, choices=self.ProgramToAlign2, label="Multiple alignment tool:", default=0, condition='optionForAligning2 == 1 and ' 'additionalTargetSequence == True', help="Select a program to accomplish the sequence" "alignment:\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") formBase.addLine( "Step 1:\nIn the sequence window your target " "sequence (and other additional sequences that you " "want to use in the alignment) will appear aligned to " "the template's sequence. Select in the sequence window " "menu:\nStructure -> Modeller (homology)...;\nA new " "window for Comparative Modeling with Modeller will " "appear. Select your specific sequence as the sequence " "to be modeled (target), and the input atomic structure" + ''' used as template for modeling. Select Run Modeller via web service and write the Modeller license key supplied (Academic user can register free of charge to receive a license key). Finally, press OK. \nWAITING TIME: (you may see the status of your job in chimera main window, lower left corner.)\n\nStep 2:\nWhen the process finished, 5 models will be automatically superimposed onto the template and model scores will appear in Modeller Results window. In Chimera main menu -> Favorites -> Model panel will show you: #0 (coordinate axes); #1 ( template); #2.1 to 2.5 (models).Choose the one you like the best, for example model #2.1. To save it in Scipion, we need to change the model ID. In Chimera main menu: Favorites -> Command Line, write *combine #2.1 model #3 close t*. Then, you will see in Model panel that selected model #2.1 renamed to combination with ID #3. Save it as first guess in Scipion by executing the Chimera command *scipionwrite [model #n]*. In our example *scipionwrite model #3*.\n When you use the command line scipionwrite, the Chimera session will be saved by default. Additionally, you can save the Chimera session whenever you want by executing the command *scipionss*. You will be able to restore the saved session by using the protocol chimera restore session (SCIPION menu: Tools/Calculators/chimera restore session). Once you have save your favorite model you can press Quit in the Modeller Results window.''') # --------------------------- INSERT steps functions -------------------- def prerequisitesStep(self): # read PDB fileName = self._readPDB() # get pdb sequence import json chainIdDict = json.loads(self.inputStructureChain.get()) userSeq = self.inputSequence1.get() # SEQ object from Scipion inFile = self.INFILE1 outFile = self.OUTFILE1 addSeq = self.optionForAligning1.get() yourAlignment = self.inputYourOwnSequenceAlignment1.get() inputSeqAlign = self.inputSequencesToAlign1 programToAlign1 = self.inputProgramToAlign1_1 programToAlign2 = self.inputProgramToAlign2_1 self.prePreRequisites(fileName, chainIdDict, userSeq, inFile, outFile, addSeq, yourAlignment, inputSeqAlign, programToAlign1, programToAlign2) self.selectedChain1 = self.selectedChain if self.additionalTargetSequence.get() is True: chainIdDict = json.loads(self.selectStructureChain.get()) userSeq = self.inputSequence2.get() # SEQ object from Scipion inFile = self.INFILE2 outFile = self.OUTFILE2 addSeq = self.optionForAligning2.get() yourAlignment = self.inputYourOwnSequenceAlignment2.get() inputSeqAlign = self.inputSequencesToAlign2 programToAlign1 = self.inputProgramToAlign1_2 programToAlign2 = self.inputProgramToAlign2_2 self.prePreRequisites(fileName, chainIdDict, userSeq, inFile, outFile, addSeq, yourAlignment, inputSeqAlign, programToAlign1, programToAlign2) self.selectedChain2 = self.selectedChain def prePreRequisites(self, fileName, chainIdDict, userSeq, inFile, \ outFile, addSeq, yourAlignment, inputSeqAlign, \ programToAlign1, programToAlign2): # get sequence of structure chain with id chainId (selected by the user) self.selectedModel = chainIdDict['model'] self.selectedChain = chainIdDict['chain'] # self.selectedModel = chainId.split(',')[0].split(':')[1].strip() # self.selectedChain = chainId.split(',')[1].split(':')[1].strip() print("Selected chain: %s from model: %s from structure: %s" \ % (self.selectedChain, self.selectedModel, os.path.basename(fileName))) # Bio.Seq.Seq object structureSeq = self.structureHandler.getSequenceFromChain( self.selectedModel, self.selectedChain) # obtain a seqID for our PDB sequence structSeqID = self.structureHandler.getFullID(self.selectedModel, self.selectedChain) # END PDB sequence # start user imported target sequence # get target sequence imported by the user targetSeqID = userSeq.getId() # ID associated to SEQ object (str) userSequence = userSeq.getSequence() # sequence associated to # that SEQ object (str) # transformation of this sequence (str) in a Bio.Seq.Seq object: seqHandler = SequenceHandler(userSequence, isAminoacid=userSeq.getIsAminoacids()) targetSeq = seqHandler._sequence # Bio.Seq.Seq object # creation of Dic of IDs and sequences SeqDic = OrderedDict() SeqDic[structSeqID] = structureSeq SeqDic[targetSeqID] = targetSeq # align sequences and save them to disk, -this will be chimera input- # get all sequences in a fasta file inFile = self._getInFastaSequencesFile(inFile) outFile = self._getOutFastaSequencesFile(outFile) # get the alignment of sequences if addSeq == 0: saveFileSequencesToAlign(SeqDic, inFile) inputSeqAlign = None if programToAlign1.get() == \ self.ProgramToAlign1.index('Bio.pairwise2'): # Only the two first sequences will be included in the alignment self.alignment = alignBioPairwise2Sequences( structSeqID, structureSeq, targetSeqID, targetSeq, outFile) else: # All the sequences will be included in the alignment if programToAlign1.get() == \ self.ProgramToAlign1.index('Clustal Omega'): cline = alignClustalSequences(inFile, outFile) else: cline = alignMuscleSequences(inFile, outFile) args = '' self.runJob(cline, args) elif addSeq == 1: # if there are additional sequences imported by the user if inputSeqAlign is not None: for seq in inputSeqAlign: seq = seq.get() ID = seq.getId() sequence = seq.getSequence() seqHandler = SequenceHandler( sequence, isAminoacid=seq.getIsAminoacids()) otherSeq = seqHandler._sequence # Bio.Seq.Seq object SeqDic[ID] = otherSeq # align sequences and save them to disk, -this will be chimera input- # get all sequences in a fasta file # inFile = self._getInFastaSequencesFile() saveFileSequencesToAlign(SeqDic, inFile) # outFile = self._getOutFastaSequencesFile() # All the sequences will be included in the alignment if programToAlign2 == self.ProgramToAlign2.index('Clustal Omega'): cline = alignClustalSequences(inFile, outFile) else: cline = alignMuscleSequences(inFile, outFile) args = '' self.runJob(cline, args) else: aligmentFile = os.path.basename(yourAlignment) outFile = os.path.join(self._getExtraPath(), aligmentFile) copyFile(yourAlignment, outFile) def _readPDB(self): self.structureHandler = AtomicStructHandler() fileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName()) self.structureHandler.read(fileName) return fileName def _getInFastaSequencesFile(self, inFile): INFILENAME = self._getTmpPath(inFile) return os.path.abspath(INFILENAME) def _getOutFastaSequencesFile(self, outFile): OUTFILENAME = self._getExtraPath(outFile) return os.path.abspath(OUTFILENAME) # def validate(self): # super(ChimeraModelFromTemplate, self).validate() # # TODO check if clustal/muscle exists # #TODO; change help ro installation pages instead of apt-get def _validate(self): # Check that CLUSTALO or MUSCLE program exists errors = super(ChimeraModelFromTemplate, self)._validate() if not (self.is_tool(CLUSTALO) or self.is_tool(MUSCLE)): errors.append( "Clustal-omega and MUSCLE programs missing.\n " "You need at least one of them to run this program.\n" "Please install Clustal-omega and/or MUSCLE:\n" " sudo apt-get install clustalo\n" " sudo apt-get install muscle") return errors
def _runChangingCifFormatSuperpose(self, list_args): cwd = os.getcwd() + "/" + self._getExtraPath() try: if list_args[0].endswith(".cif") and list_args[1].endswith(".cif"): try: # upgrade cifs list_args1 = [] for i in range(0, 2): list_args1.append(fromCIFTommCIF(list_args[i], list_args[i])) args1 = list_args1[0] + " " + list_args1[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args1, extraEnvDict=None, cwd=cwd) except: # convert cifs to pdbs list_args2 = [] for i in range(0, 2): list_args2.append(fromCIFToPDB( list_args[i], list_args[i].replace('.cif', '.pdb'))) args2 = list_args2[0] + " " + list_args2[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args2, extraEnvDict=None, cwd=cwd) elif list_args[0].endswith(".cif") and list_args[1].endswith(".pdb"): try: # pdbs: convert cif to pdb list_args1 = [] list_args1.append(fromCIFToPDB( list_args[0], list_args[0].replace('.cif', '.pdb'))) args1 = list_args1[0] + " " + list_args[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args1, extraEnvDict=None, cwd=cwd) except: try: # cifs: convert pdb to cif list_args2 = [] list_args2.append(fromPDBToCIF( list_args[1], list_args[1].replace('.pdb', '.cif'))) args2 = list_args[0] + " " + list_args2[0] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args2, extraEnvDict=None, cwd=cwd) except: # upgrade cif list_args3 = [] list_args0 = args2.split() for i in range(0, 2): list_args3[i].append(fromCIFTommCIF( list_args0[i], list_args0[i])) args3 = list_args3[0] + " " + list_args3[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args3, extraEnvDict=None, cwd=cwd) elif list_args[0].endswith(".pdb") and list_args[1].endswith(".cif"): try: # pdbs: convert cif to pdb list_args1 = [] list_args1.append(fromCIFToPDB( list_args[1], list_args[1].replace('.cif', '.pdb'))) args1 = list_args[0] + " " + list_args1[0] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args1, extraEnvDict=None, cwd=cwd) except: try: # cifs: convert pdb to cif list_args2 = [] list_args2.append(fromPDBToCIF( list_args[0], list_args[0].replace('.pdb', '.cif'))) args2 = list_args2[0] + " " + list_args[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args2, extraEnvDict=None, cwd=cwd) except: # upgrade cifs list_args3 = [] list_args0 = args2.split() for i in range(0, 2): list_args3.append(fromCIFTommCIF( list_args0[i], list_args0[i])) args3 = list_args3[0] + " " + list_args3[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args3, extraEnvDict=None, cwd=cwd) except: # biopython conversion aSH = AtomicStructHandler() try: for i in range(0, 2): aSH.read(list_args[i]) aSH.write(list_args[i]) args = list_args[0] + " " + list_args[1] Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args, extraEnvDict=None, cwd=cwd) except: print("CIF file standarization failed.")
def _readPDB(self): self.structureHandler = AtomicStructHandler() fileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName()) self.structureHandler.read(fileName) return fileName
class ChimeraModelFromTemplate(ChimeraProtBase): """Protocol to model three-dimensional structures of proteins using Modeller. Execute command *scipionwrite #n [prefix stringAddedToFilename] from command line in order to transfer the selected pdb to scipion. Default value is model=#0, model refers to the pdb file.""" _label = 'model from template' _program = "" _version = VERSION_1_2 INFILE1 = "unaligned_1.fasta" OUTFILE1 = "aligned_1.fasta" INFILE2 = "unaligned_2.fasta" OUTFILE2 = "aligned_2.fasta" TWOSEQUENCES = 0 MULTIPLESEQUENCES = 1 ProgramToAlign1 = ['Bio.pairwise2', 'Clustal Omega', 'MUSCLE'] ProgramToAlign2 = ['Clustal Omega', 'MUSCLE'] OptionForAligning = [ 'None', 'Additional sequences to align', 'Provide your own sequence alignment' ] OptionForDataBase = ['PDB', 'NR'] OptionForMatrix = [ 'BLOSUM45', 'BLOSUM50', 'BLOSUM62', 'BLOSUM80', 'BLOSUM90', 'PAM30', 'PAM70', 'PAM250', 'IDENTITY' ] # --------------------------- DEFINE param functions -------------------- def _defineParams(self, form, doHelp=False): form.addSection(label='Input') section = form.getSection('Input') section.addParam( 'addTemplate', BooleanParam, default=True, label='Do you already have a template?', help='"Yes": Option by default. Select this option in case ' 'you already have a template to model your target ' 'sequence.\n"No": Select this option if you want to ' 'search for a template with which model your target ' 'sequence. Generation of multimeric models is not ' 'allowed selecting this option.\n') section.addParam('pdbFileToBeRefined', PointerParam, pointerClass="AtomStruct", allowsNull=True, important=True, condition='addTemplate == True', label='Atomic structure used as template', help="PDBx/mmCIF file template used as basic atomic " "structure to model your specific sequence.") section.addParam('inputStructureChain', StringParam, label="Chain ", allowsNull=True, important=True, condition='addTemplate == True', help="Select a particular chain of the atomic " "structure.") section.addParam('inputSequence1', PointerParam, pointerClass="Sequence", label='Target sequence', allowsNull=True, important=True, help="Input the aminoacid sequence to align with the " "structure template sequence.") section.addParam( 'dataBase', EnumParam, choices=self.OptionForDataBase, condition='addTemplate == False', label="Protein sequence database:", default=0, help="Select a protein sequence database to search " "for templates:\nPDB: Experimentally determined structures " "in the " "Protein Data Bank.\nNR: NCBI 'non-redundant'database. " "It contains GenBank translation proteins, PDB sequences, " "SwissProt proteins + PIR + PRF. Since NR is much larger " "than PDB, it takes longer to search.\n") section.addParam( 'similarityMatrix', EnumParam, choices=self.OptionForMatrix, condition='addTemplate == False', label="Similarity matrix:", default=2, help="Select a similarity matrix to use for alignment " "scoring.\n") section.addParam('cutoffValue', FloatParam, condition='addTemplate == False', label="cutoff evalue:", default=1e-3, help="Least significant expectation value needed to " "qualify the retrieved element as a hit.\n") section.addParam('maxSeqs', IntParam, condition='addTemplate == False', label="Maximum number of sequences:", default=100, help="Maximum number of sequences to retrieve " "from the database.\n") section.addParam( 'optionForAligning1', EnumParam, choices=self.OptionForAligning, condition='addTemplate == True', label="Options to improve the alignment:", default=0, help="None: Option by default. Only the template and the " "target sequences will be included in the alignment. " "This option is recommendable when these two sequences " "are very similar. Otherwise, select any of the two " "additional options:\n" "Additional sequences to align: Select this option " "if you want to add some more sequences to accomplish " "the alignment.\n" "Provide your own sequence alignment: Your alignment" "should include both the target and the template " "sequences.\n") section.addParam( 'inputYourOwnSequenceAlignment1', PathParam, pointerClass="File", allowsNull=False, condition='addTemplate == True and optionForAligning1 == 2', label='Sequence alignment input', help="Input your own sequence alignment.\n" "ChimeraX allowed formats accessible here: " "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence " ) section.addParam( 'inputSequencesToAlign1', MultiPointerParam, pointerClass="Sequence", allowsNull=True, condition='addTemplate == True and optionForAligning1 == 1', label='Other sequences to align', help="In case you need to load more sequences to " "align, you can load them here.") section.addParam( 'inputProgramToAlign1_1', EnumParam, choices=self.ProgramToAlign1, label="Alignment tool for two sequences:", default=0, condition='addTemplate == True and optionForAligning1 == 0', help="Select a program to accomplish the sequence" "alignment:\n\nBiophyton module " "Bio.pairwise2 (" "http://biopython.org/DIST/docs/api/" "Bio.pairwise2-module.html). Built-in " "program to align two " "sequences. The global " "alignment algorithm from the EMBOSS suite " "has been implemented with match/mismatch " "scores of 3/-1 and gap penalties " "(open/extend) of " "3/2.\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam( 'inputProgramToAlign2_1', EnumParam, choices=self.ProgramToAlign2, label="Multiple alignment tool:", default=0, condition='addTemplate == True and optionForAligning1 == 1', help="Select a program to accomplish the sequence" "alignment:\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam('additionalTargetSequence', BooleanParam, default=False, condition='addTemplate == True', label='Additional target sequence to include?', help='Select YES if you want to add an additional ' 'target sequence to model according a different ' 'chain of the structure template. This ' 'option is recommendable when you want to model ' 'the two interacting elements of a particular complex' ' at the same time.') section.addParam('selectStructureChain', StringParam, condition='addTemplate == True and ' 'additionalTargetSequence == True', label="Chain ", allowsNull=True, important=True, help="Select a particular chain of the atomic " "structure.") section.addParam('inputSequence2', PointerParam, pointerClass="Sequence", condition='addTemplate == True and ' 'additionalTargetSequence == True', label='Target sequence', allowsNull=True, important=True, help="Input the aminoacid sequence to align with the " "structure template sequence.") section.addParam( 'optionForAligning2', EnumParam, choices=self.OptionForAligning, condition='addTemplate == True and ' 'additionalTargetSequence == True', label="Options to improve the alignment:", default=0, help="None: Option by default. Only the template and the " "target sequences will be included in the alignment. " "This option is recommendable when these two sequences " "are very similar. Otherwise, select any of the two " "additional options:\n" "Additional sequences to align: Select this option " "if you want to add some more sequences to accomplish " "the alignment.\n" "Provide your own sequence alignment: Your alignment" "should include both the target and the template " "sequences.\n") section.addParam( 'inputYourOwnSequenceAlignment2', PathParam, pointerClass="File", allowsNull=False, condition='addTemplate == True and ' 'optionForAligning2 == 2 and ' 'additionalTargetSequence == True', label='Sequence alignment input', help="Input your own sequence alignment.\n" "ChimeraX allowed formats accessible here: " "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence " ) section.addParam('inputSequencesToAlign2', MultiPointerParam, pointerClass="Sequence", allowsNull=True, condition='addTemplate == True and ' 'optionForAligning2 == 1 and ' 'additionalTargetSequence == True', label='Other sequences to align', help="In case you need to load more sequences to " "align, you can load them here.") section.addParam('inputProgramToAlign1_2', EnumParam, choices=self.ProgramToAlign1, label="Alignment tool for two sequences:", default=0, condition='addTemplate == True and ' 'optionForAligning2 == 0 and ' 'additionalTargetSequence == True', help="Select a program to accomplish the sequence" "alignment:\n\nBiophyton module " "Bio.pairwise2 (" "http://biopython.org/DIST/docs/api/" "Bio.pairwise2-module.html). Built-in " "program to align two " "sequences. The global " "alignment algorithm from the EMBOSS suite " "has been implemented with match/mismatch " "scores of 3/-1 and gap penalties " "(open/extend) of " "3/2.\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam('inputProgramToAlign2_2', EnumParam, choices=self.ProgramToAlign2, label="Multiple alignment tool:", default=0, condition='addTemplate == True and ' 'optionForAligning2 == 1 and ' 'additionalTargetSequence == True', help="Select a program to accomplish the sequence" "alignment:\n\nClustal Omega " "program (http://www.clustal.org/omega/, " "https://doi.org/10.1038/msb.2011.75): " "Multiple sequence alignment tool. Install " "clustalo if you choose this option for " "the first time by 'sudo apt-get install " "clustalo'.\n\nMUSCLE program stands for " "MUltiple Sequence Comparison by " "Log- Expectation(" "http://www.drive5.com/muscle/muscle.html, " "https://doi.org/10.1093/nar/gkh340). " "Install muscle if you choose this option " "for the first time by 'sudo apt install " "muscle'.") section.addParam( 'extraCommands', StringParam, default='', condition='False', label='Extra commands for chimera viewer', help="Add extra commands in cmd file. Use for testing") form.addSection(label='Help') form.addLine( "Step 1:\nIn the sequence window your target " "sequence (and other additional sequences that you " "want to use in the alignment) will appear aligned to " "the template's sequence. Select in the sequence window " "menu:\nTools -> Sequence -> Modeller Comparative;\nA new " "window for Comparative Modeling with Modeller will " "appear. Select your specific template(s) as the Sequence " "alignments and the target(s)sequence as the sequence " "to be modeled" + ''' . To run Modeller via web service write the Modeller license key supplied (Academic user can register free of charge to receive a license key). Finally, press OK. \nWAITING TIME: (you may see the status of your job in chimera main window, lower left corner.)\n\nStep 2:\nWhen the process finished, 5 models will be automatically superimposed onto the template and model scores will appear in Modeller Results window. In Chimera Model panel you will have: #1 (coordinate axes); #2 ( template); #3.1 to 3.5 (models).Choose the one you like the best, for example model #3.1. To save it in Scipion, we need to change the model ID. In Chimera main menu: Favorites -> Command Line, write *rename #3.1 id #4*. Then, you will see in Model panel that selected model #3.1 renamed to #3. Save it as first guess in Scipion by executing the Chimera command *scipionwrite [model] #n [prefix XX]*. In our example *scipionwrite #4 pefix model_3_1_*.\n When you use the command line scipionwrite, the Chimera session will be saved by default. Additionally, you can save the Chimera session whenever you want by executing the command *scipionss*. You will be able to restore the saved session by using the protocol chimera restore session (SCIPION menu: Tools/Calculators/chimera restore session). Once you have save your favorite model you can press Quit in the Modeller Results window.''') # --------------------------- INSERT steps functions -------------------- def prerequisitesStep(self): if self.addTemplate: # read PDB fileName = self._readPDB() # get pdb sequence import json chainIdDict = json.loads(self.inputStructureChain.get()) userSeq = self.inputSequence1.get() # SEQ object from Scipion inFile = self.INFILE1 outFile = self.OUTFILE1 addSeq = self.optionForAligning1.get() yourAlignment = self.inputYourOwnSequenceAlignment1.get() inputSeqAlign = self.inputSequencesToAlign1 programToAlign1 = self.inputProgramToAlign1_1 programToAlign2 = self.inputProgramToAlign2_1 self.prePreRequisites(fileName, chainIdDict, userSeq, inFile, outFile, addSeq, yourAlignment, inputSeqAlign, programToAlign1, programToAlign2) self.selectedChain1 = self.selectedChain if self.additionalTargetSequence.get() is True: chainIdDict = json.loads(self.selectStructureChain.get()) userSeq = self.inputSequence2.get() # SEQ object from Scipion inFile = self.INFILE2 outFile = self.OUTFILE2 addSeq = self.optionForAligning2.get() yourAlignment = self.inputYourOwnSequenceAlignment2.get() inputSeqAlign = self.inputSequencesToAlign2 programToAlign1 = self.inputProgramToAlign1_2 programToAlign2 = self.inputProgramToAlign2_2 self.prePreRequisites(fileName, chainIdDict, userSeq, inFile, outFile, addSeq, yourAlignment, inputSeqAlign, programToAlign1, programToAlign2) self.selectedChain2 = self.selectedChain else: userSeq = self.inputSequence1.get() # SEQ object from Scipion # get target sequence imported by the user outFile = self.OUTFILE1 self.targetSeqID1 = self.preTemplate(userSeq, outFile) def prePreRequisites(self, fileName, chainIdDict, userSeq, inFile, outFile, addSeq, yourAlignment, inputSeqAlign, programToAlign1, programToAlign2): # get sequence of structure chain with id chainId (selected by the user) self.selectedModel = chainIdDict['model'] self.selectedChain = chainIdDict['chain'] # self.selectedModel = chainId.split(',')[0].split(':')[1].strip() # self.selectedChain = chainId.split(',')[1].split(':')[1].strip() print("Selected chain: %s from model: %s from structure: %s" \ % (self.selectedChain, self.selectedModel, os.path.basename(fileName))) # Bio.Seq.Seq object structureSeq = self.structureHandler.getSequenceFromChain( self.selectedModel, self.selectedChain) # obtain a seqID for our PDB sequence structSeqID = self.structureHandler.getFullID(self.selectedModel, self.selectedChain) # END PDB sequence # start user imported target sequence # get target sequence imported by the user targetSeqID = userSeq.getId() # ID associated to SEQ object (str) userSequence = userSeq.getSequence() # sequence associated to # that SEQ object (str) # transformation of this sequence (str) in a Bio.Seq.Seq object: seqHandler = SequenceHandler(userSequence, isAminoacid=userSeq.getIsAminoacids()) targetSeq = seqHandler._sequence # Bio.Seq.Seq object # creation of Dic of IDs and sequences SeqDic = OrderedDict() SeqDic[structSeqID] = structureSeq SeqDic[targetSeqID] = targetSeq # align sequences and save them to disk, -this will be chimera input- # get all sequences in a fasta file inFile = self._getInFastaSequencesFile(inFile) outFile = self._getOutFastaSequencesFile(outFile) # get the alignment of sequences if addSeq == 0: saveFileSequencesToAlign(SeqDic, inFile) inputSeqAlign = None if programToAlign1.get() == \ self.ProgramToAlign1.index('Bio.pairwise2'): # Only the two first sequences will be included in the alignment self.alignment = alignBioPairwise2Sequences( structSeqID, structureSeq, targetSeqID, targetSeq, outFile) else: # All the sequences will be included in the alignment if programToAlign1.get() == \ self.ProgramToAlign1.index('Clustal Omega'): cline = alignClustalSequences(inFile, outFile) else: cline = alignMuscleSequences(inFile, outFile) args = '' self.runJob(cline, args) elif addSeq == 1: # if there are additional sequences imported by the user if inputSeqAlign is not None: for seq in inputSeqAlign: seq = seq.get() ID = seq.getId() sequence = seq.getSequence() seqHandler = SequenceHandler( sequence, isAminoacid=seq.getIsAminoacids()) otherSeq = seqHandler._sequence # Bio.Seq.Seq object SeqDic[ID] = otherSeq # align sequences and save them to disk, -this will be chimera input- # get all sequences in a fasta file # inFile = self._getInFastaSequencesFile() saveFileSequencesToAlign(SeqDic, inFile) # outFile = self._getOutFastaSequencesFile() # All the sequences will be included in the alignment if programToAlign2 == self.ProgramToAlign2.index('Clustal Omega'): cline = alignClustalSequences(inFile, outFile) else: cline = alignMuscleSequences(inFile, outFile) args = '' self.runJob(cline, args) else: aligmentFile = os.path.basename(yourAlignment) outFile = os.path.join(self._getExtraPath(), aligmentFile) copyFile(yourAlignment, outFile) def preTemplate(self, userSeq, outFile): userSequence = userSeq.getSequence() # sequence associated to # that SEQ object (str) targetSeqID = userSeq.getId() # ID associated to SEQ object (str) # transformation of this sequence (str) in a Bio.Seq.Seq object: seqHandler = SequenceHandler(userSequence, isAminoacid=userSeq.getIsAminoacids()) targetSeq = seqHandler._sequence # Bio.Seq.Seq object # creation of Dic of IDs and sequences SeqDic = OrderedDict() SeqDic[targetSeqID] = targetSeq outFile = self._getOutFastaSequencesFile(outFile) saveFileSequencesToAlign(SeqDic, outFile) return targetSeqID def _readPDB(self): self.structureHandler = AtomicStructHandler() fileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName()) self.structureHandler.read(fileName) return fileName def _getInFastaSequencesFile(self, inFile): INFILENAME = self._getTmpPath(inFile) return os.path.abspath(INFILENAME) def _getOutFastaSequencesFile(self, outFile): OUTFILENAME = self._getExtraPath(outFile) return os.path.abspath(OUTFILENAME) def runChimeraStep(self): # building script file including the coordinate axes and the input # volume with samplingRate and Origin information f = open(self._getTmpPath(chimeraScriptFileName), "w") # building coordinate axes dim = 150 # eventually we will create a PDB library that # computes PDB dim sampling = 1. tmpFileName = os.path.abspath(self._getTmpPath("axis_input.bild")) Chimera.createCoordinateAxisFile(dim, bildFileName=tmpFileName, sampling=sampling) f.write("open %s\n" % tmpFileName) f.write("cofr 0,0,0\n") # set center of coordinates # input vol with its origin coordinates pdbModelCounter = 1 if (not self.addTemplate and self.inputSequence1.get() is not None and self._getOutFastaSequencesFile is not None): alignmentFile1 = self._getOutFastaSequencesFile(self.OUTFILE1) f.write("open %s\n" % alignmentFile1) f.write("blastprotein %s:%s database %s matrix %s " "cutoff %.3f maxSeqs %d log true\n" % (alignmentFile1.split("/")[-1], self.targetSeqID1, self.OptionForDataBase[int(self.dataBase)], self.OptionForMatrix[int(self.similarityMatrix)], self.cutoffValue, self.maxSeqs)) if (hasattr(self, 'pdbFileToBeRefined') and self.pdbFileToBeRefined.get() is not None): pdbModelCounter += 1 pdbFileToBeRefined = self.pdbFileToBeRefined.get() f.write("open %s\n" % os.path.abspath(pdbFileToBeRefined.getFileName())) if pdbFileToBeRefined.hasOrigin(): x, y, z = (pdbFileToBeRefined.getOrigin().getShifts()) f.write("move %0.2f,%0.2f,%0.2f model #%d " "coord #0\n" % (x, y, z, pdbModelCounter)) # Alignment of sequence and structure if (hasattr(self, 'inputSequence1') and hasattr(self, 'inputStructureChain')): if (self.inputSequence1.get() is not None and self.inputStructureChain.get() is not None): pdbModelCounter = 2 if str(self.selectedModel) != '0': f.write("select #%s.%s/%s\n" % (pdbModelCounter, str(self.selectedModel + 1), str(self.selectedChain1))) else: f.write("select #%s/%s\n" % (pdbModelCounter, str(self.selectedChain1))) if self._getOutFastaSequencesFile is not None: alignmentFile1 = self._getOutFastaSequencesFile( self.OUTFILE1) f.write("open %s\n" % alignmentFile1) f.write("sequence disassociate #%s %s\n" % (pdbModelCounter, alignmentFile1.split("/")[-1])) if str(self.selectedModel) != '0': f.write("sequence associate #%s.%s/%s %s:1\n" % (pdbModelCounter, str(self.selectedModel + 1), str(self.selectedChain1), alignmentFile1.split("/")[-1])) else: f.write("sequence associate #%s/%s %s:1\n" % (pdbModelCounter, str(self.selectedChain1), alignmentFile1.split("/")[-1])) if (self.additionalTargetSequence.get() is True and self.inputSequence2.get() is not None and self.inputStructureChain.get() is not None): f.write("select clear\n") f.write("select #%s/%s,%s\n" % (pdbModelCounter, str( self.selectedChain1), str(self.selectedChain2))) if self._getOutFastaSequencesFile is not None: alignmentFile2 = self._getOutFastaSequencesFile( self.OUTFILE2) f.write("open %s\n" % alignmentFile2) f.write("sequence disassociate #%s %s\n" % (pdbModelCounter, alignmentFile2.split("/")[-1])) if str(self.selectedModel) != '0': f.write("sequence associate #%s.%s/%s %s:1\n" % (pdbModelCounter, str(self.selectedModel + 1), str(self.selectedChain2), alignmentFile2.split("/")[-1])) else: f.write("sequence associate #%s/%s %s:1\n" % (pdbModelCounter, str(self.selectedChain2), alignmentFile2.split("/")[-1])) # run the text: _chimeraScriptFileName = os.path.abspath( self._getTmpPath(chimeraScriptFileName)) if len(self.extraCommands.get()) > 2: f.write(self.extraCommands.get()) args = " --nogui " + _chimeraScriptFileName else: args = " " + _chimeraScriptFileName f.close() self._log.info('Launching: ' + Plugin.getProgram() + ' ' + args) # run in the background cwd = os.path.abspath(self._getExtraPath()) Plugin.runChimeraProgram(Plugin.getProgram(), args, cwd=cwd, extraEnv=getEnvDictionary(self)) def _validate(self): # Check that CLUSTALO or MUSCLE program exists errors = super(ChimeraModelFromTemplate, self)._validate() if not (self.is_tool(CLUSTALO) or self.is_tool(MUSCLE)): errors.append( "Clustal-omega and MUSCLE programs missing.\n " "You need at least one of them to run this program.\n" "Please install Clustal-omega and/or MUSCLE:\n" " sudo apt-get install clustalo\n" " sudo apt-get install muscle") return errors