Beispiel #1
0
 def searchStep(self, structFileName):
     outFileName = self._getExtraPath("atomStruct.pdb")
     aStruct1 = AtomicStructHandler(structFileName)
     aStruct1.write(outFileName)
     args='-F "file1=@%s" -F "method=%s" -F "title=%s"  -F "address=%s" http://ekhidna.biocenter.helsinki.fi/cgi-bin/dali/dump.cgi' %\
          (outFileName,self.methodsDict[self.method.get()],self.title.get(),self.email.get())
     self.runJob("curl", args)
 def reNameChainStep(self, structFileName):
     import json
     outFileName = self._getExtraPath("atomStruct_reNamedChain.cif")
     aStruct1 = AtomicStructHandler(structFileName)
     chainIdDict = json.loads(self.inputStructureChain.get())
     aStruct1.renameChain(chainID=chainIdDict['chain'],
                          newChainName=self.chainName.get(),
                          modelID=chainIdDict['model'],
                          filename=outFileName)
     #aStruct1.write(outFileName)
     self.createOutputStep(outFileName)
    def addChainStep(self, structFileName, listStructFileName):

        outFileName = self._getExtraPath("atomStruct_addChain.cif")
        aStruct1 = AtomicStructHandler(structFileName)
        print("Adding to Atomic Struct {}".format(structFileName))
        for fileName in listStructFileName:
            print("AddingStruct {}".format(fileName))
            sys.stdout.flush()
            aStruct1.addStruct(fileName, outFileName)
        #aStruct1.write(outFileName)
        self.createOutputStep(outFileName, twoRelations=True)
Beispiel #4
0
    def preparationStep(self):
        if isinstance(self.inputStructure.get(),AtomStruct):
            fnIn = self._getExtraPath("atomStructIn.pdb")
            aStruct1 = AtomicStructHandler(self.inputStructure.get().getFileName())
            aStruct1.write(fnIn)
            fnIn='extra/atomStructIn.pdb'
        else:
            fnIn = self._getExtraPath("atomStructIn.mae")
            createLink(self.inputStructure.get().getFileName(),fnIn)
            fnIn='extra/atomStructIn.mae'

        self.runJob(schrodinger_plugin.getHome('maestro'), "-b %s"%fnIn, cwd=self._getPath())
    def preparationStep(self):
        if self.inputStructure.get().getFileName().endswith('.cif'):
            fnIn = self._getTmpPath("atomStructIn.pdb")
            aStruct1 = AtomicStructHandler(
                self.inputStructure.get().getFileName())
            aStruct1.write(fnIn)
        else:
            fnIn = self.inputStructure.get().getFileName()
        fnOut = self._getExtraPath('atomStruct.pdbqt')

        args = ' -v -r %s -o %s' % (fnIn, fnOut)
        ProtBioinformaticsADTPrepare.callPrepare(self, "prepare_receptor4",
                                                 args)
    def extractChainStep(self, structFileName):

        import json
        outFileName = self._getExtraPath("atomStruct_extractChain.cif")
        aStruct1 = AtomicStructHandler(structFileName)
        chainIdDict = json.loads(self.inputStructureChain.get())
        end = self.end.get()
        if end == -1:
            end = sys.maxsize

        aStruct1.extractChain(chainID=chainIdDict['chain'],
                              start=self.start.get(),
                              end=end,
                              modelID=chainIdDict['model'],
                              filename=outFileName)
        self.createOutputStep(outFileName)
    def testExtractAllChains(self):
        pdb1 = self._importStructurePDB('6vsb')  # A, B, C
        _dictOperations = ProtAtomStrucOperate.operationsDictInv
        args = {
            'pdbFileToBeRefined': pdb1,
            'Operation': _dictOperations['extractAllChains']
        }

        protAtomStrucExtractChain = self.newProtocol(ProtAtomStrucOperate,
                                                     **args)
        self.launchProtocol(protAtomStrucExtractChain)
        aSH2 = AtomicStructHandler()
        outFileName = protAtomStrucExtractChain.outputPdb_chainC.getFileName()
        aSH2.read(outFileName)
        atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()])
        self.assertEqual(7327, atomsNumT)
Beispiel #8
0
 def preparationStep(self):
     if isinstance(self.inputStructure.get(), AtomStruct):
         fnIn = self._getExtraPath("atomStructIn.pdb")
         aStruct1 = AtomicStructHandler(
             self.inputStructure.get().getFileName())
         aStruct1.write(fnIn)
         fnIn = 'extra/atomStructIn.pdb'
         self.runJob(schrodinger_plugin.getHome('maestro'),
                     "-b %s" % fnIn,
                     cwd=self._getPath())
     else:
         fnIn = self._getExtraPath(
             "atomStructIn") + self.inputStructure.get().getExtension()
         createLink(self.inputStructure.get().getFileName(), fnIn)
         fnIn = os.path.join('extra', os.path.split(fnIn)[1])
         self.runJob(schrodinger_plugin.getHome('maestro'),
                     "-m %s" % fnIn,
                     cwd=self._getPath())
 def extractAllChainsStep(self, structFileName):
     import json
     outFileName = self._getExtraPath("atomStruct_extractChain_%s.cif")
     aStruct1 = AtomicStructHandler(structFileName)
     listOfChains, _ = aStruct1.getModelsChains()
     for model, chainDic in listOfChains.items():
         for chainID, lenResidues in chainDic.items():
             chainIdDict = json.loads(
                 '{"model": %d, "chain": "%s", "residues": %d}' %
                 (model, str(chainID), lenResidues))
             chainIDStr = chainIdDict['chain']
             aStruct1.extractChain(modelID=chainIdDict['model'],
                                   chainID=chainIDStr,
                                   start=-1,
                                   end=sys.maxsize,
                                   filename=outFileName % chainIDStr)
             self.createOutputStep(outFileName % chainIDStr,
                                   suffix=chainIDStr)
    def testRenumberChain(self):
        pdb1 = self._importStructurePDB('1P30')  # A
        _dictOperations = ProtAtomStrucOperate.operationsDictInv
        args = {
            'pdbFileToBeRefined': pdb1,
            'Operation': _dictOperations['reNumberChain'],
            'inputStructureChain':
            '{"model": 0, "chain": "A", "residues": 891}',
            'offset': 5
        }

        protAtomStrucExtractChain = self.newProtocol(ProtAtomStrucOperate,
                                                     **args)
        self.launchProtocol(protAtomStrucExtractChain)
        aSH2 = AtomicStructHandler()
        outFileName = protAtomStrucExtractChain.outputPdb.getFileName()
        aSH2.read(outFileName)
        atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()])
        self.assertEqual(7304, atomsNumT)
    def testRenameChain(self):
        pdb1 = self._importStructurePDB('5NI1')  # A, B, C
        _dictOperations = ProtAtomStrucOperate.operationsDictInv
        args = {
            'pdbFileToBeRefined': pdb1,
            'Operation': _dictOperations['reNameChain'],
            'inputStructureChain':
            '{"model": 0, "chain": "B", "residues": 146}',
            'chainName': 'AA'
        }

        protAtomStrucExtractChain = self.newProtocol(ProtAtomStrucOperate,
                                                     **args)
        self.launchProtocol(protAtomStrucExtractChain)
        aSH2 = AtomicStructHandler()
        outFileName = protAtomStrucExtractChain.outputPdb.getFileName()
        aSH2.read(outFileName)
        atomsNumT = len([atom.id for atom in aSH2.getStructure().get_atoms()])
        # check size
        self.assertEqual(4579, atomsNumT)
        # check chain names
        d1, d2 = aSH2.getModelsChains()
        chains = d1[0].keys()
        self.assertTrue('A' in chains)
        self.assertTrue('AA' in chains)
        self.assertTrue('C' in chains)
        self.assertTrue('D' in chains)
        self.assertFalse('B' in chains)
Beispiel #12
0
    def exportAtomStructStep(self):
        exportAtomStruct = self.exportAtomStruct.get()
        originStructPath = exportAtomStruct.getFileName()
        dirName = self.filesPath.get()
        destinyStructPath = os.path.join(dirName, self.COORDINATEFILENAME)
        destinySympleStructPath = os.path.join(dirName, self.SYMPLIFIED_STRUCT)

        # save input atom struct with no change
        baseName = os.path.basename(originStructPath)
        localPath = os.path.abspath(os.path.join(dirName, baseName))
        copyFile(originStructPath, localPath)

        # call biopython to simplify atom struct and save it
        aSH = AtomicStructHandler()
        aSH.read(originStructPath)
        aSH.write(destinySympleStructPath)

        # if pdb convert to mmcif calling maxit twice
        if originStructPath.endswith(".pdb"):
            # convert pdb to cif using maxit program
            log = self._log
            fromPDBToCIF(originStructPath, destinyStructPath, log)
            try:
                # convert cif to mmCIF by using maxit program
                fromCIFTommCIF(destinyStructPath, destinyStructPath, log)
            except Exception as e:
                pass
        # if cif convert to mmcif using maxit
        elif originStructPath.endswith(".cif"):
            # convert cif to mmCIF by using maxit program
            log = self._log
            try:
                fromCIFTommCIF(originStructPath, destinyStructPath, log)
            except Exception as e:
                pass
Beispiel #13
0
    def _insertAllSteps(self):
        # compute alanine atom struct len
        inputPdb = self.inputStructure.get().getFileName()
        atomStruct = AtomicStructHandler(inputPdb)
        # we assume that there is a single model and a single chain
        atomStructSize = sum(1
                             for _ in atomStruct.getStructure().get_residues())
        chainName = next(atomStruct.getStructure().get_chains()).get_id()
        firstAAinChain = next(atomStruct.getStructure().get_residues()).id[1]

        # starting and ending residue
        firstaa = int(self.firstaa.get().split(":")[1].split(",")[0].strip())
        lastaa = int(self.lastaa.get().split(":")[1].split(",")[0].strip())

        # compute number of steps according to the sequence size
        numberOfSteps = lastaa - firstaa + 1

        # steps
        prepareId = self._insertFunctionStep('convertInputStep',
                                             self.FITTEDFILE)
        # mutateChain
        mutateId = self._insertFunctionStep(
            'mutateStep',
            firstaa,  # in seq
            firstAAinChain,  # in struct
            atomStructSize,
            chainName,
            numberOfSteps,
            prerequisites=[prepareId])
        refineIdList = []
        numberOfThreads = self.numberOfMpi.get()
        for start in range(numberOfThreads):
            refineId = self._insertFunctionStep('refineStep2',
                                                prerequisites=[mutateId])
            refineIdList.append(refineId)

        self._insertFunctionStep('createOutputStep',
                                 prerequisites=refineIdList)
    def compare(self, pdb, sym):

        args = {
            'pdbFileToBeRefined': pdb,
            'originSymmetryGroup': SYM_I222r - SYM_I222,
            'targetSymmetryGroup': sym - SYM_I222
        }

        protAtomStrucOperate = self.newProtocol(ProtAtomStrucConvertSymmetry,
                                                **args)
        protAtomStrucOperate.setObjLabel('rotate atom structs, to %s' %
                                         SCIPION_SYM_NAME[sym])
        self.launchProtocol(protAtomStrucOperate)

        aSH = AtomicStructHandler(
            protAtomStrucOperate.rotatedAtomStruct.getFileName())
        atoms_coord = [atom.coord for atom in aSH.getStructure().get_atoms()]
        icosahedron = Icosahedron(circumscribed_radius=100,
                                  orientation=SCIPION_SYM_NAME[sym][1:])
        pentomVector = icosahedron.getVertices()
        for atom, vertex in zip(atoms_coord, pentomVector):
            for a, v in zip(atom, vertex):
                self.assertAlmostEqual(a, v, places=2)
    def rotateAtomStruct(self, inAtomStructFn, outAtomStructFn, matrix):
        "apply rotation matrix to input atomic structure"

        atSH = AtomicStructHandler(inAtomStructFn)
        atSH.transform(matrix)
        atSH.write(outAtomStructFn)
Beispiel #16
0
    def preparationStep(self):
        prog = Plugin.getHome('utilities/prepwizard')

        if isinstance(self.inputStructure.get(), AtomStruct):
            fnIn = self._getExtraPath("atomStructIn.pdb")
            aStruct1 = AtomicStructHandler(
                self.inputStructure.get().getFileName())
            aStruct1.write(fnIn)
            fnIn = 'extra/atomStructIn.pdb'
        else:
            fnIn = self._getExtraPath("atomStructIn.mae")
            createLink(self.inputStructure.get().getFileName(), fnIn)
            fnIn = 'extra/atomStructIn.mae'

        args = '-WAIT'
        if self.stage1.get():
            if self.fillSideChains.get():
                args += ' -fillsidechains'
            if self.fillLoops.get():
                args += ' -fillloops'
            if self.disulfides.get():
                args += ' -disulfides'
            if self.mse.get():
                args += ' -mse'
            if self.hydrogens.get() == 0:
                args += " -nohtreat"
            elif self.hydrogens.get() == 1:
                args += " -rehtreat"
            if self.glycosylation.get():
                args += " -glycosylation"
            if self.palmitoylation.get():
                args += " -palmitoylation"
            if self.captermini.get():
                args += " -captermini"
            if self.keepFarWat.get():
                args += " -keepfarwat -watdist %f" % self.watdist.get()
            if not self.treatMetals.get():
                args += " -nometaltreat"

        if self.stage2.get():
            if self.sampleWaters.get():
                args += " -samplewater"
            if self.xtal.get():
                args += " -xtal"
            if self.propKa.get():
                args += " -propka_pH %f" % self.propKapH.get()
            else:
                args += " -nopropka"
            if self.minadjh.get():
                args += " -minimize_adj_h"
        else:
            args += " -noprotassign"

        if self.stage3.get():
            args += " -rmsd %f" % self.rmsdD.get()
            if self.fix.get():
                args += " -fix"
            if self.force.get() == 0:
                args += " -f 2005"
            else:
                args += " -f 3"
        else:
            args += " -noimpref"

        if self.stage4.get():
            if self.ms.get():
                args += " -ms %d" % self.msN.get()
            args += " -epik_pH %f" % self.epikPh.get()
            args += " -epik_pHt %f" % self.epikPht.get()
        else:
            args += " -noepik"

        args += ' %s atomStructOut.maegz' % fnIn
        self.runJob(prog, args, cwd=self._getPath())
    def testAddChain(self):
        pdb1 = self._importStructurePDB('1P30')  # A
        pdb2 = self._importStructurePDB('5NI1')  # A, B, C, D
        pdb3 = self._importStructurePDB('1J77')  # A
        _dictOperations = ProtAtomStrucOperate.operationsDictInv
        args = {
            'pdbFileToBeRefined': pdb1,
            'InputAtomStruct2': [pdb2, pdb3],
            'Operation': _dictOperations['addChain']
        }

        protAtomStrucOperate = self.newProtocol(ProtAtomStrucOperate, **args)
        protAtomStrucOperate.setObjLabel('add atom structs')
        self.launchProtocol(protAtomStrucOperate)
        outPutPDB = protAtomStrucOperate.outputPdb.getFileName()

        # check file exists
        self.assertTrue(exists(protAtomStrucOperate.outputPdb.getFileName()),
                        "Filename {} does not exists".format(outPutPDB))

        aSH = AtomicStructHandler(outPutPDB)
        chains = [chain.id for chain in aSH.getStructure().get_chains()]
        goal = ['A', 'A002', 'B', 'C', 'D', 'A003']
        self.assertTrue(
            Counter(chains) == Counter(goal), "{} != {}".format(chains, goal))

        # atoms are OK
        aSH1 = AtomicStructHandler(pdb1.getFileName())
        aSH2 = AtomicStructHandler(pdb2.getFileName())
        aSH3 = AtomicStructHandler(pdb3.getFileName())
        #
        atomsNum1 = len([atom.id for atom in aSH1.getStructure().get_atoms()])
        atomsNum2 = len([atom.id for atom in aSH2.getStructure().get_atoms()])
        atomsNum3 = len([atom.id for atom in aSH3.getStructure().get_atoms()])
        atomsNumT = len([atom.id for atom in aSH.getStructure().get_atoms()])
        self.assertEqual(atomsNum1 + atomsNum2 + atomsNum3, atomsNumT)
Beispiel #18
0
class ChimeraModelFromTemplate(ChimeraProtBase):
    """Protocol to model three-dimensional structures of proteins using Modeller.
        Execute command *scipionwrite [model #n]* from command line in order
        to transfer the selected
        pdb to scipion. Default value is model=#0,
        model refers to the pdb file."""
    _label = 'model from template'
    _program = ""
    _version = VERSION_1_2

    INFILE1 = "unaligned_1.fasta"
    OUTFILE1 = "aligned_1.fasta"
    INFILE2 = "unaligned_2.fasta"
    OUTFILE2 = "aligned_2.fasta"
    TWOSEQUENCES = 0
    MULTIPLESEQUENCES = 1
    ProgramToAlign1 = ['Bio.pairwise2', 'Clustal Omega', 'MUSCLE']
    ProgramToAlign2 = ['Clustal Omega', 'MUSCLE']
    OptionForAligning = [
        'None', 'Additional sequences to align',
        'Provide your own sequence alignment'
    ]

    # --------------------------- DEFINE param functions --------------------
    def _defineParams(self, form, doHelp=False):
        formBase = super(ChimeraModelFromTemplate,
                         self)._defineParams(form, doHelp=True)
        param = form.getParam('pdbFileToBeRefined')
        param.label.set('Atomic structure used as template')
        param.help.set("PDBx/mmCIF file template used as basic atomic "
                       "structure to model your specific sequence.")
        param = form.getParam('inputVolume')
        param.condition.set('False')
        param = form.getParam('inputVolumes')
        param.condition.set('False')
        # hide inputPdbFiles
        param = form.getParam('inputPdbFiles')
        param.condition.set('False')
        param.allowsNull.set('True')
        section = formBase.getSection('Input')
        section.addParam('inputStructureChain',
                         StringParam,
                         label="Chain ",
                         allowsNull=True,
                         important=True,
                         help="Select a particular chain of the atomic "
                         "structure.")
        section.addParam('inputSequence1',
                         PointerParam,
                         pointerClass="Sequence",
                         label='Target sequence',
                         allowsNull=True,
                         important=True,
                         help="Input the aminoacid sequence to align with the "
                         "structure template sequence.")
        section.addParam(
            'optionForAligning1',
            EnumParam,
            choices=self.OptionForAligning,
            label="Options to improve the alignment:",
            default=0,
            help="None: Option by default. Only the template and the "
            "target sequences will be included in the alignment. "
            "This option is recommendable when these two sequences "
            "are very similar. Otherwise, select any of the two "
            "additional options:\n"
            "Additional sequences to align: Select this option "
            "if you want to add some more sequences to accomplish "
            "the alignment.\n"
            "Provide your own sequence alignment: Your alignment"
            "should include both the target and the template "
            "sequences.\n")
        section.addParam(
            'inputYourOwnSequenceAlignment1',
            PathParam,
            pointerClass="File",
            allowsNull=False,
            condition='optionForAligning1 == 2',
            label='Sequence alignment input',
            help="Input your own sequence alignment.\n"
            "ChimeraX allowed formats accessible here: "
            "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence "
        )
        section.addParam('inputSequencesToAlign1',
                         MultiPointerParam,
                         pointerClass="Sequence",
                         allowsNull=True,
                         condition='optionForAligning1 == 1',
                         label='Other sequences to align',
                         help="In case you need to load more sequences to "
                         "align, you can load them here.")
        section.addParam('inputProgramToAlign1_1',
                         EnumParam,
                         choices=self.ProgramToAlign1,
                         label="Alignment tool for two sequences:",
                         default=0,
                         condition='optionForAligning1 == 0',
                         help="Select a program to accomplish the sequence"
                         "alignment:\n\nBiophyton module "
                         "Bio.pairwise2 ("
                         "http://biopython.org/DIST/docs/api/"
                         "Bio.pairwise2-module.html). Built-in "
                         "program to align two "
                         "sequences. The global "
                         "alignment algorithm from the EMBOSS suite "
                         "has been implemented with match/mismatch "
                         "scores of 3/-1 and gap penalties "
                         "(open/extend) of "
                         "3/2.\n\nClustal Omega "
                         "program (http://www.clustal.org/omega/, "
                         "https://doi.org/10.1038/msb.2011.75): "
                         "Multiple sequence alignment tool. Install "
                         "clustalo if you choose this option for "
                         "the first time by 'sudo apt-get install "
                         "clustalo'.\n\nMUSCLE program stands for "
                         "MUltiple Sequence Comparison by "
                         "Log- Expectation("
                         "http://www.drive5.com/muscle/muscle.html, "
                         "https://doi.org/10.1093/nar/gkh340). "
                         "Install muscle if you choose this option "
                         "for the first time by 'sudo apt install "
                         "muscle'.")
        section.addParam('inputProgramToAlign2_1',
                         EnumParam,
                         choices=self.ProgramToAlign2,
                         label="Multiple alignment tool:",
                         default=0,
                         condition='optionForAligning1 == 1',
                         help="Select a program to accomplish the sequence"
                         "alignment:\n\nClustal Omega "
                         "program (http://www.clustal.org/omega/, "
                         "https://doi.org/10.1038/msb.2011.75): "
                         "Multiple sequence alignment tool. Install "
                         "clustalo if you choose this option for "
                         "the first time by 'sudo apt-get install "
                         "clustalo'.\n\nMUSCLE program stands for "
                         "MUltiple Sequence Comparison by "
                         "Log- Expectation("
                         "http://www.drive5.com/muscle/muscle.html, "
                         "https://doi.org/10.1093/nar/gkh340). "
                         "Install muscle if you choose this option "
                         "for the first time by 'sudo apt install "
                         "muscle'.")
        section.addParam('additionalTargetSequence',
                         BooleanParam,
                         default=False,
                         label='Additional target sequence to include?',
                         help='Select YES if you want to add an additional '
                         'target sequence to model according a different '
                         'chain of the structure template. This '
                         'option is recommendable when you want to model '
                         'the two interacting elements of a particular complex'
                         ' at the same time.')
        section.addParam('selectStructureChain',
                         StringParam,
                         condition='additionalTargetSequence == True',
                         label="Chain ",
                         allowsNull=True,
                         important=True,
                         help="Select a particular chain of the atomic "
                         "structure.")
        section.addParam('inputSequence2',
                         PointerParam,
                         pointerClass="Sequence",
                         condition='additionalTargetSequence == True',
                         label='Target sequence',
                         allowsNull=True,
                         important=True,
                         help="Input the aminoacid sequence to align with the "
                         "structure template sequence.")
        section.addParam(
            'optionForAligning2',
            EnumParam,
            choices=self.OptionForAligning,
            condition='additionalTargetSequence == True',
            label="Options to improve the alignment:",
            default=0,
            help="None: Option by default. Only the template and the "
            "target sequences will be included in the alignment. "
            "This option is recommendable when these two sequences "
            "are very similar. Otherwise, select any of the two "
            "additional options:\n"
            "Additional sequences to align: Select this option "
            "if you want to add some more sequences to accomplish "
            "the alignment.\n"
            "Provide your own sequence alignment: Your alignment"
            "should include both the target and the template "
            "sequences.\n")
        section.addParam(
            'inputYourOwnSequenceAlignment2',
            PathParam,
            pointerClass="File",
            allowsNull=False,
            condition='optionForAligning2 == 2 and '
            'additionalTargetSequence == True',
            label='Sequence alignment input',
            help="Input your own sequence alignment.\n"
            "ChimeraX allowed formats accessible here: "
            "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence "
        )
        section.addParam('inputSequencesToAlign2',
                         MultiPointerParam,
                         pointerClass="Sequence",
                         allowsNull=True,
                         condition='optionForAligning2 == 1 and '
                         'additionalTargetSequence == True',
                         label='Other sequences to align',
                         help="In case you need to load more sequences to "
                         "align, you can load them here.")
        section.addParam('inputProgramToAlign1_2',
                         EnumParam,
                         choices=self.ProgramToAlign1,
                         label="Alignment tool for two sequences:",
                         default=0,
                         condition='optionForAligning2 == 0 and '
                         'additionalTargetSequence == True',
                         help="Select a program to accomplish the sequence"
                         "alignment:\n\nBiophyton module "
                         "Bio.pairwise2 ("
                         "http://biopython.org/DIST/docs/api/"
                         "Bio.pairwise2-module.html). Built-in "
                         "program to align two "
                         "sequences. The global "
                         "alignment algorithm from the EMBOSS suite "
                         "has been implemented with match/mismatch "
                         "scores of 3/-1 and gap penalties "
                         "(open/extend) of "
                         "3/2.\n\nClustal Omega "
                         "program (http://www.clustal.org/omega/, "
                         "https://doi.org/10.1038/msb.2011.75): "
                         "Multiple sequence alignment tool. Install "
                         "clustalo if you choose this option for "
                         "the first time by 'sudo apt-get install "
                         "clustalo'.\n\nMUSCLE program stands for "
                         "MUltiple Sequence Comparison by "
                         "Log- Expectation("
                         "http://www.drive5.com/muscle/muscle.html, "
                         "https://doi.org/10.1093/nar/gkh340). "
                         "Install muscle if you choose this option "
                         "for the first time by 'sudo apt install "
                         "muscle'.")
        section.addParam('inputProgramToAlign2_2',
                         EnumParam,
                         choices=self.ProgramToAlign2,
                         label="Multiple alignment tool:",
                         default=0,
                         condition='optionForAligning2 == 1 and '
                         'additionalTargetSequence == True',
                         help="Select a program to accomplish the sequence"
                         "alignment:\n\nClustal Omega "
                         "program (http://www.clustal.org/omega/, "
                         "https://doi.org/10.1038/msb.2011.75): "
                         "Multiple sequence alignment tool. Install "
                         "clustalo if you choose this option for "
                         "the first time by 'sudo apt-get install "
                         "clustalo'.\n\nMUSCLE program stands for "
                         "MUltiple Sequence Comparison by "
                         "Log- Expectation("
                         "http://www.drive5.com/muscle/muscle.html, "
                         "https://doi.org/10.1093/nar/gkh340). "
                         "Install muscle if you choose this option "
                         "for the first time by 'sudo apt install "
                         "muscle'.")
        formBase.addLine(
            "Step 1:\nIn the sequence window your target "
            "sequence (and other additional sequences that you "
            "want to use in  the alignment) will appear aligned to "
            "the template's sequence. Select in the sequence window "
            "menu:\nStructure -> Modeller (homology)...;\nA new  "
            "window for Comparative Modeling with Modeller will "
            "appear. Select your specific sequence as the sequence "
            "to be modeled (target), and the input atomic structure" + '''
        used as template for modeling. Select Run Modeller via web service 
        and write the Modeller license key supplied (Academic user can 
        register free of charge to receive a license key). Finally, press OK.
        \nWAITING TIME: (you may see the status of your job in chimera main 
        window, lower left corner.)\n\nStep 2:\nWhen the process finished, 
        5 models will 
        be automatically superimposed onto the template and model scores
        will appear in Modeller Results window. In Chimera main menu -> 
        Favorites -> Model panel will show you: #0 (coordinate axes); #1 (
        template); #2.1 to 2.5 (models).Choose the one you like the best, 
        for example model #2.1. To save it in Scipion, we need to change the 
        model ID. In Chimera main menu: Favorites -> Command Line, write 
        *combine #2.1 model #3 close t*. Then, you will see in Model panel 
        that selected model #2.1 renamed to combination with ID #3. Save it 
        as first guess in Scipion by executing the Chimera command 
        *scipionwrite [model #n]*. In our example *scipionwrite model #3*.\n 
        When you use the command line scipionwrite, the Chimera session will 
        be saved by default. Additionally, you can save the Chimera session 
        whenever you want by executing the command *scipionss*. You will be 
        able to restore the saved session by using the protocol chimera 
        restore session (SCIPION menu: Tools/Calculators/chimera restore 
        session). Once you have save your favorite model you can press 
        Quit in the Modeller Results window.''')

    # --------------------------- INSERT steps functions --------------------

    def prerequisitesStep(self):

        # read PDB
        fileName = self._readPDB()

        # get pdb sequence
        import json
        chainIdDict = json.loads(self.inputStructureChain.get())

        userSeq = self.inputSequence1.get()  # SEQ object from Scipion

        inFile = self.INFILE1
        outFile = self.OUTFILE1

        addSeq = self.optionForAligning1.get()

        yourAlignment = self.inputYourOwnSequenceAlignment1.get()

        inputSeqAlign = self.inputSequencesToAlign1

        programToAlign1 = self.inputProgramToAlign1_1

        programToAlign2 = self.inputProgramToAlign2_1

        self.prePreRequisites(fileName, chainIdDict, userSeq, inFile, outFile,
                              addSeq, yourAlignment, inputSeqAlign,
                              programToAlign1, programToAlign2)

        self.selectedChain1 = self.selectedChain

        if self.additionalTargetSequence.get() is True:
            chainIdDict = json.loads(self.selectStructureChain.get())

            userSeq = self.inputSequence2.get()  # SEQ object from Scipion

            inFile = self.INFILE2
            outFile = self.OUTFILE2

            addSeq = self.optionForAligning2.get()

            yourAlignment = self.inputYourOwnSequenceAlignment2.get()

            inputSeqAlign = self.inputSequencesToAlign2

            programToAlign1 = self.inputProgramToAlign1_2

            programToAlign2 = self.inputProgramToAlign2_2

            self.prePreRequisites(fileName, chainIdDict, userSeq, inFile,
                                  outFile, addSeq, yourAlignment,
                                  inputSeqAlign, programToAlign1,
                                  programToAlign2)

        self.selectedChain2 = self.selectedChain

    def prePreRequisites(self, fileName, chainIdDict, userSeq, inFile, \
                         outFile, addSeq, yourAlignment, inputSeqAlign, \
                         programToAlign1, programToAlign2):
        # get sequence of structure chain with id chainId (selected by the user)
        self.selectedModel = chainIdDict['model']
        self.selectedChain = chainIdDict['chain']
        # self.selectedModel = chainId.split(',')[0].split(':')[1].strip()
        # self.selectedChain = chainId.split(',')[1].split(':')[1].strip()
        print("Selected chain: %s from model: %s from structure: %s" \
              % (self.selectedChain, self.selectedModel,
                 os.path.basename(fileName)))

        # Bio.Seq.Seq object
        structureSeq = self.structureHandler.getSequenceFromChain(
            self.selectedModel, self.selectedChain)

        # obtain a seqID for our PDB sequence
        structSeqID = self.structureHandler.getFullID(self.selectedModel,
                                                      self.selectedChain)
        # END PDB sequence

        # start user imported target sequence
        # get target sequence imported by the user

        targetSeqID = userSeq.getId()  # ID associated to SEQ object (str)
        userSequence = userSeq.getSequence()  # sequence associated to
        # that SEQ object (str)
        # transformation of this sequence (str) in a Bio.Seq.Seq object:
        seqHandler = SequenceHandler(userSequence,
                                     isAminoacid=userSeq.getIsAminoacids())
        targetSeq = seqHandler._sequence  # Bio.Seq.Seq object

        # creation of Dic of IDs and sequences
        SeqDic = OrderedDict()
        SeqDic[structSeqID] = structureSeq
        SeqDic[targetSeqID] = targetSeq

        # align sequences and save them to disk, -this will be chimera input-
        # get all sequences in a fasta file
        inFile = self._getInFastaSequencesFile(inFile)
        outFile = self._getOutFastaSequencesFile(outFile)

        # get the alignment of sequences
        if addSeq == 0:
            saveFileSequencesToAlign(SeqDic, inFile)
            inputSeqAlign = None
            if programToAlign1.get() == \
                    self.ProgramToAlign1.index('Bio.pairwise2'):
                # Only the two first sequences will be included in the alignment
                self.alignment = alignBioPairwise2Sequences(
                    structSeqID, structureSeq, targetSeqID, targetSeq, outFile)
            else:
                # All the sequences will be included in the alignment
                if programToAlign1.get() == \
                        self.ProgramToAlign1.index('Clustal Omega'):
                    cline = alignClustalSequences(inFile, outFile)
                else:
                    cline = alignMuscleSequences(inFile, outFile)
                args = ''
                self.runJob(cline, args)
        elif addSeq == 1:
            # if there are additional sequences imported by the user
            if inputSeqAlign is not None:
                for seq in inputSeqAlign:
                    seq = seq.get()
                    ID = seq.getId()
                    sequence = seq.getSequence()
                    seqHandler = SequenceHandler(
                        sequence, isAminoacid=seq.getIsAminoacids())
                    otherSeq = seqHandler._sequence  # Bio.Seq.Seq object
                    SeqDic[ID] = otherSeq

            # align sequences and save them to disk, -this will be chimera input-
            # get all sequences in a fasta file
            # inFile = self._getInFastaSequencesFile()
            saveFileSequencesToAlign(SeqDic, inFile)
            # outFile = self._getOutFastaSequencesFile()

            # All the sequences will be included in the alignment
            if programToAlign2 == self.ProgramToAlign2.index('Clustal Omega'):
                cline = alignClustalSequences(inFile, outFile)
            else:
                cline = alignMuscleSequences(inFile, outFile)
            args = ''
            self.runJob(cline, args)

        else:
            aligmentFile = os.path.basename(yourAlignment)
            outFile = os.path.join(self._getExtraPath(), aligmentFile)
            copyFile(yourAlignment, outFile)

    def _readPDB(self):
        self.structureHandler = AtomicStructHandler()
        fileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName())
        self.structureHandler.read(fileName)
        return fileName

    def _getInFastaSequencesFile(self, inFile):
        INFILENAME = self._getTmpPath(inFile)
        return os.path.abspath(INFILENAME)

    def _getOutFastaSequencesFile(self, outFile):
        OUTFILENAME = self._getExtraPath(outFile)
        return os.path.abspath(OUTFILENAME)

    # def validate(self):
    #    super(ChimeraModelFromTemplate, self).validate()
    #    # TODO check if clustal/muscle exists
    #    #TODO; change help ro installation pages instead of apt-get

    def _validate(self):
        # Check that CLUSTALO or MUSCLE program exists
        errors = super(ChimeraModelFromTemplate, self)._validate()
        if not (self.is_tool(CLUSTALO) or self.is_tool(MUSCLE)):
            errors.append(
                "Clustal-omega and MUSCLE programs missing.\n "
                "You need at least one of them to run this program.\n"
                "Please install Clustal-omega and/or MUSCLE:\n"
                "     sudo apt-get install clustalo\n"
                "     sudo apt-get install muscle")
        return errors
Beispiel #19
0
 def _runChangingCifFormatSuperpose(self, list_args):
     cwd = os.getcwd() + "/" + self._getExtraPath()
     try:
         if list_args[0].endswith(".cif") and list_args[1].endswith(".cif"):
             try:
                 # upgrade cifs
                 list_args1 = []
                 for i in range(0, 2):
                     list_args1.append(fromCIFTommCIF(list_args[i], list_args[i]))
                 args1 = list_args1[0] + " " + list_args1[1]
                 Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args1,
                                         extraEnvDict=None, cwd=cwd)
             except:
                 # convert cifs to pdbs
                 list_args2 = []
                 for i in range(0, 2):
                     list_args2.append(fromCIFToPDB(
                         list_args[i], list_args[i].replace('.cif', '.pdb')))
                 args2 = list_args2[0] + " " + list_args2[1]
                 Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args2,
                                         extraEnvDict=None, cwd=cwd)
         elif list_args[0].endswith(".cif") and list_args[1].endswith(".pdb"):
             try:
                 # pdbs: convert cif to pdb
                 list_args1 = []
                 list_args1.append(fromCIFToPDB(
                     list_args[0], list_args[0].replace('.cif', '.pdb')))
                 args1 = list_args1[0] + " " + list_args[1]
                 Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args1,
                                         extraEnvDict=None, cwd=cwd)
             except:
                 try:
                     # cifs: convert pdb to cif
                     list_args2 = []
                     list_args2.append(fromPDBToCIF(
                         list_args[1], list_args[1].replace('.pdb', '.cif')))
                     args2 = list_args[0] + " " + list_args2[0]
                     Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args2,
                                             extraEnvDict=None, cwd=cwd)
                 except:
                     # upgrade cif
                     list_args3 = []
                     list_args0 = args2.split()
                     for i in range(0, 2):
                         list_args3[i].append(fromCIFTommCIF(
                             list_args0[i], list_args0[i]))
                     args3 = list_args3[0] + " " + list_args3[1]
                     Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE),
                                             args3, extraEnvDict=None, cwd=cwd)
         elif list_args[0].endswith(".pdb") and list_args[1].endswith(".cif"):
             try:
                 # pdbs: convert cif to pdb
                 list_args1 = []
                 list_args1.append(fromCIFToPDB(
                     list_args[1], list_args[1].replace('.cif', '.pdb')))
                 args1 = list_args[0] + " " + list_args1[0]
                 Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args1,
                                         extraEnvDict=None, cwd=cwd)
             except:
                 try:
                     # cifs: convert pdb to cif
                     list_args2 = []
                     list_args2.append(fromPDBToCIF(
                         list_args[0], list_args[0].replace('.pdb', '.cif')))
                     args2 = list_args2[0] + " " + list_args[1]
                     Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE), args2,
                                             extraEnvDict=None, cwd=cwd)
                 except:
                     # upgrade cifs
                     list_args3 = []
                     list_args0 = args2.split()
                     for i in range(0, 2):
                         list_args3.append(fromCIFTommCIF(
                             list_args0[i], list_args0[i]))
                     args3 = list_args3[0] + " " + list_args3[1]
                     Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE),
                                             args3, extraEnvDict=None, cwd=cwd)
     except:
         # biopython conversion
         aSH = AtomicStructHandler()
         try:
             for i in range(0, 2):
                 aSH.read(list_args[i])
                 aSH.write(list_args[i])
                 args = list_args[0] + " " + list_args[1]
                 Plugin.runPhenixProgram(Plugin.getProgram(SUPERPOSE),
                                         args, extraEnvDict=None, cwd=cwd)
         except:
             print("CIF file standarization failed.")
Beispiel #20
0
 def _readPDB(self):
     self.structureHandler = AtomicStructHandler()
     fileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName())
     self.structureHandler.read(fileName)
     return fileName
Beispiel #21
0
class ChimeraModelFromTemplate(ChimeraProtBase):
    """Protocol to model three-dimensional structures of proteins using Modeller.
        Execute command *scipionwrite #n [prefix stringAddedToFilename] from command line in order
        to transfer the selected
        pdb to scipion. Default value is model=#0,
        model refers to the pdb file."""
    _label = 'model from template'
    _program = ""
    _version = VERSION_1_2

    INFILE1 = "unaligned_1.fasta"
    OUTFILE1 = "aligned_1.fasta"
    INFILE2 = "unaligned_2.fasta"
    OUTFILE2 = "aligned_2.fasta"
    TWOSEQUENCES = 0
    MULTIPLESEQUENCES = 1
    ProgramToAlign1 = ['Bio.pairwise2', 'Clustal Omega', 'MUSCLE']
    ProgramToAlign2 = ['Clustal Omega', 'MUSCLE']
    OptionForAligning = [
        'None', 'Additional sequences to align',
        'Provide your own sequence alignment'
    ]
    OptionForDataBase = ['PDB', 'NR']
    OptionForMatrix = [
        'BLOSUM45', 'BLOSUM50', 'BLOSUM62', 'BLOSUM80', 'BLOSUM90', 'PAM30',
        'PAM70', 'PAM250', 'IDENTITY'
    ]

    # --------------------------- DEFINE param functions --------------------
    def _defineParams(self, form, doHelp=False):
        form.addSection(label='Input')
        section = form.getSection('Input')

        section.addParam(
            'addTemplate',
            BooleanParam,
            default=True,
            label='Do you already have a template?',
            help='"Yes": Option by default. Select this option in case '
            'you already have a template to model your target '
            'sequence.\n"No": Select this option if you want to '
            'search for a template with which model your target '
            'sequence. Generation of multimeric models is not '
            'allowed selecting this option.\n')
        section.addParam('pdbFileToBeRefined',
                         PointerParam,
                         pointerClass="AtomStruct",
                         allowsNull=True,
                         important=True,
                         condition='addTemplate == True',
                         label='Atomic structure used as template',
                         help="PDBx/mmCIF file template used as basic atomic "
                         "structure to model your specific sequence.")
        section.addParam('inputStructureChain',
                         StringParam,
                         label="Chain ",
                         allowsNull=True,
                         important=True,
                         condition='addTemplate == True',
                         help="Select a particular chain of the atomic "
                         "structure.")
        section.addParam('inputSequence1',
                         PointerParam,
                         pointerClass="Sequence",
                         label='Target sequence',
                         allowsNull=True,
                         important=True,
                         help="Input the aminoacid sequence to align with the "
                         "structure template sequence.")
        section.addParam(
            'dataBase',
            EnumParam,
            choices=self.OptionForDataBase,
            condition='addTemplate == False',
            label="Protein sequence database:",
            default=0,
            help="Select a protein sequence database to search "
            "for templates:\nPDB: Experimentally determined structures "
            "in the "
            "Protein Data Bank.\nNR: NCBI 'non-redundant'database. "
            "It contains GenBank translation proteins, PDB sequences, "
            "SwissProt proteins + PIR + PRF. Since NR is much larger "
            "than PDB, it takes longer to search.\n")
        section.addParam(
            'similarityMatrix',
            EnumParam,
            choices=self.OptionForMatrix,
            condition='addTemplate == False',
            label="Similarity matrix:",
            default=2,
            help="Select a similarity matrix to use for alignment "
            "scoring.\n")
        section.addParam('cutoffValue',
                         FloatParam,
                         condition='addTemplate == False',
                         label="cutoff evalue:",
                         default=1e-3,
                         help="Least significant expectation value needed to "
                         "qualify the retrieved element as a hit.\n")
        section.addParam('maxSeqs',
                         IntParam,
                         condition='addTemplate == False',
                         label="Maximum number of sequences:",
                         default=100,
                         help="Maximum number of sequences to retrieve "
                         "from the database.\n")
        section.addParam(
            'optionForAligning1',
            EnumParam,
            choices=self.OptionForAligning,
            condition='addTemplate == True',
            label="Options to improve the alignment:",
            default=0,
            help="None: Option by default. Only the template and the "
            "target sequences will be included in the alignment. "
            "This option is recommendable when these two sequences "
            "are very similar. Otherwise, select any of the two "
            "additional options:\n"
            "Additional sequences to align: Select this option "
            "if you want to add some more sequences to accomplish "
            "the alignment.\n"
            "Provide your own sequence alignment: Your alignment"
            "should include both the target and the template "
            "sequences.\n")
        section.addParam(
            'inputYourOwnSequenceAlignment1',
            PathParam,
            pointerClass="File",
            allowsNull=False,
            condition='addTemplate == True and optionForAligning1 == 2',
            label='Sequence alignment input',
            help="Input your own sequence alignment.\n"
            "ChimeraX allowed formats accessible here: "
            "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence "
        )
        section.addParam(
            'inputSequencesToAlign1',
            MultiPointerParam,
            pointerClass="Sequence",
            allowsNull=True,
            condition='addTemplate == True and optionForAligning1 == 1',
            label='Other sequences to align',
            help="In case you need to load more sequences to "
            "align, you can load them here.")
        section.addParam(
            'inputProgramToAlign1_1',
            EnumParam,
            choices=self.ProgramToAlign1,
            label="Alignment tool for two sequences:",
            default=0,
            condition='addTemplate == True and optionForAligning1 == 0',
            help="Select a program to accomplish the sequence"
            "alignment:\n\nBiophyton module "
            "Bio.pairwise2 ("
            "http://biopython.org/DIST/docs/api/"
            "Bio.pairwise2-module.html). Built-in "
            "program to align two "
            "sequences. The global "
            "alignment algorithm from the EMBOSS suite "
            "has been implemented with match/mismatch "
            "scores of 3/-1 and gap penalties "
            "(open/extend) of "
            "3/2.\n\nClustal Omega "
            "program (http://www.clustal.org/omega/, "
            "https://doi.org/10.1038/msb.2011.75): "
            "Multiple sequence alignment tool. Install "
            "clustalo if you choose this option for "
            "the first time by 'sudo apt-get install "
            "clustalo'.\n\nMUSCLE program stands for "
            "MUltiple Sequence Comparison by "
            "Log- Expectation("
            "http://www.drive5.com/muscle/muscle.html, "
            "https://doi.org/10.1093/nar/gkh340). "
            "Install muscle if you choose this option "
            "for the first time by 'sudo apt install "
            "muscle'.")
        section.addParam(
            'inputProgramToAlign2_1',
            EnumParam,
            choices=self.ProgramToAlign2,
            label="Multiple alignment tool:",
            default=0,
            condition='addTemplate == True and optionForAligning1 == 1',
            help="Select a program to accomplish the sequence"
            "alignment:\n\nClustal Omega "
            "program (http://www.clustal.org/omega/, "
            "https://doi.org/10.1038/msb.2011.75): "
            "Multiple sequence alignment tool. Install "
            "clustalo if you choose this option for "
            "the first time by 'sudo apt-get install "
            "clustalo'.\n\nMUSCLE program stands for "
            "MUltiple Sequence Comparison by "
            "Log- Expectation("
            "http://www.drive5.com/muscle/muscle.html, "
            "https://doi.org/10.1093/nar/gkh340). "
            "Install muscle if you choose this option "
            "for the first time by 'sudo apt install "
            "muscle'.")
        section.addParam('additionalTargetSequence',
                         BooleanParam,
                         default=False,
                         condition='addTemplate == True',
                         label='Additional target sequence to include?',
                         help='Select YES if you want to add an additional '
                         'target sequence to model according a different '
                         'chain of the structure template. This '
                         'option is recommendable when you want to model '
                         'the two interacting elements of a particular complex'
                         ' at the same time.')
        section.addParam('selectStructureChain',
                         StringParam,
                         condition='addTemplate == True and '
                         'additionalTargetSequence == True',
                         label="Chain ",
                         allowsNull=True,
                         important=True,
                         help="Select a particular chain of the atomic "
                         "structure.")
        section.addParam('inputSequence2',
                         PointerParam,
                         pointerClass="Sequence",
                         condition='addTemplate == True and '
                         'additionalTargetSequence == True',
                         label='Target sequence',
                         allowsNull=True,
                         important=True,
                         help="Input the aminoacid sequence to align with the "
                         "structure template sequence.")
        section.addParam(
            'optionForAligning2',
            EnumParam,
            choices=self.OptionForAligning,
            condition='addTemplate == True and '
            'additionalTargetSequence == True',
            label="Options to improve the alignment:",
            default=0,
            help="None: Option by default. Only the template and the "
            "target sequences will be included in the alignment. "
            "This option is recommendable when these two sequences "
            "are very similar. Otherwise, select any of the two "
            "additional options:\n"
            "Additional sequences to align: Select this option "
            "if you want to add some more sequences to accomplish "
            "the alignment.\n"
            "Provide your own sequence alignment: Your alignment"
            "should include both the target and the template "
            "sequences.\n")
        section.addParam(
            'inputYourOwnSequenceAlignment2',
            PathParam,
            pointerClass="File",
            allowsNull=False,
            condition='addTemplate == True and '
            'optionForAligning2 == 2 and '
            'additionalTargetSequence == True',
            label='Sequence alignment input',
            help="Input your own sequence alignment.\n"
            "ChimeraX allowed formats accessible here: "
            "https://www.cgl.ucsf.edu/chimerax/docs/user/commands/open.html#sequence "
        )
        section.addParam('inputSequencesToAlign2',
                         MultiPointerParam,
                         pointerClass="Sequence",
                         allowsNull=True,
                         condition='addTemplate == True and '
                         'optionForAligning2 == 1 and '
                         'additionalTargetSequence == True',
                         label='Other sequences to align',
                         help="In case you need to load more sequences to "
                         "align, you can load them here.")
        section.addParam('inputProgramToAlign1_2',
                         EnumParam,
                         choices=self.ProgramToAlign1,
                         label="Alignment tool for two sequences:",
                         default=0,
                         condition='addTemplate == True and '
                         'optionForAligning2 == 0 and '
                         'additionalTargetSequence == True',
                         help="Select a program to accomplish the sequence"
                         "alignment:\n\nBiophyton module "
                         "Bio.pairwise2 ("
                         "http://biopython.org/DIST/docs/api/"
                         "Bio.pairwise2-module.html). Built-in "
                         "program to align two "
                         "sequences. The global "
                         "alignment algorithm from the EMBOSS suite "
                         "has been implemented with match/mismatch "
                         "scores of 3/-1 and gap penalties "
                         "(open/extend) of "
                         "3/2.\n\nClustal Omega "
                         "program (http://www.clustal.org/omega/, "
                         "https://doi.org/10.1038/msb.2011.75): "
                         "Multiple sequence alignment tool. Install "
                         "clustalo if you choose this option for "
                         "the first time by 'sudo apt-get install "
                         "clustalo'.\n\nMUSCLE program stands for "
                         "MUltiple Sequence Comparison by "
                         "Log- Expectation("
                         "http://www.drive5.com/muscle/muscle.html, "
                         "https://doi.org/10.1093/nar/gkh340). "
                         "Install muscle if you choose this option "
                         "for the first time by 'sudo apt install "
                         "muscle'.")
        section.addParam('inputProgramToAlign2_2',
                         EnumParam,
                         choices=self.ProgramToAlign2,
                         label="Multiple alignment tool:",
                         default=0,
                         condition='addTemplate == True and '
                         'optionForAligning2 == 1 and '
                         'additionalTargetSequence == True',
                         help="Select a program to accomplish the sequence"
                         "alignment:\n\nClustal Omega "
                         "program (http://www.clustal.org/omega/, "
                         "https://doi.org/10.1038/msb.2011.75): "
                         "Multiple sequence alignment tool. Install "
                         "clustalo if you choose this option for "
                         "the first time by 'sudo apt-get install "
                         "clustalo'.\n\nMUSCLE program stands for "
                         "MUltiple Sequence Comparison by "
                         "Log- Expectation("
                         "http://www.drive5.com/muscle/muscle.html, "
                         "https://doi.org/10.1093/nar/gkh340). "
                         "Install muscle if you choose this option "
                         "for the first time by 'sudo apt install "
                         "muscle'.")
        section.addParam(
            'extraCommands',
            StringParam,
            default='',
            condition='False',
            label='Extra commands for chimera viewer',
            help="Add extra commands in cmd file. Use for testing")
        form.addSection(label='Help')
        form.addLine(
            "Step 1:\nIn the sequence window your target "
            "sequence (and other additional sequences that you "
            "want to use in  the alignment) will appear aligned to "
            "the template's sequence. Select in the sequence window "
            "menu:\nTools -> Sequence -> Modeller Comparative;\nA new  "
            "window for Comparative Modeling with Modeller will "
            "appear. Select your specific template(s) as the Sequence "
            "alignments and the target(s)sequence as the sequence "
            "to be modeled" + '''
        . To run Modeller via web service 
        write the Modeller license key supplied (Academic user can 
        register free of charge to receive a license key). Finally, press OK.
        \nWAITING TIME: (you may see the status of your job in chimera main 
        window, lower left corner.)\n\nStep 2:\nWhen the process finished, 
        5 models will 
        be automatically superimposed onto the template and model scores
        will appear in Modeller Results window. In Chimera Model panel 
        you will have: #1 (coordinate axes); #2 (
        template); #3.1 to 3.5 (models).Choose the one you like the best, 
        for example model #3.1. To save it in Scipion, we need to change the 
        model ID. In Chimera main menu: Favorites -> Command Line, write 
        *rename #3.1 id #4*. Then, you will see in Model panel 
        that selected model #3.1 renamed to #3. Save it 
        as first guess in Scipion by executing the Chimera command 
        *scipionwrite [model] #n [prefix XX]*. In our example 
        *scipionwrite #4 pefix model_3_1_*.\n 
        When you use the command line scipionwrite, the Chimera session will 
        be saved by default. Additionally, you can save the Chimera session 
        whenever you want by executing the command *scipionss*. You will be 
        able to restore the saved session by using the protocol chimera 
        restore session (SCIPION menu: Tools/Calculators/chimera restore 
        session). Once you have save your favorite model you can press 
        Quit in the Modeller Results window.''')

    # --------------------------- INSERT steps functions --------------------

    def prerequisitesStep(self):
        if self.addTemplate:

            # read PDB
            fileName = self._readPDB()

            # get pdb sequence
            import json
            chainIdDict = json.loads(self.inputStructureChain.get())

            userSeq = self.inputSequence1.get()  # SEQ object from Scipion

            inFile = self.INFILE1
            outFile = self.OUTFILE1

            addSeq = self.optionForAligning1.get()

            yourAlignment = self.inputYourOwnSequenceAlignment1.get()

            inputSeqAlign = self.inputSequencesToAlign1

            programToAlign1 = self.inputProgramToAlign1_1

            programToAlign2 = self.inputProgramToAlign2_1

            self.prePreRequisites(fileName, chainIdDict, userSeq, inFile,
                                  outFile, addSeq, yourAlignment,
                                  inputSeqAlign, programToAlign1,
                                  programToAlign2)

            self.selectedChain1 = self.selectedChain

            if self.additionalTargetSequence.get() is True:
                chainIdDict = json.loads(self.selectStructureChain.get())

                userSeq = self.inputSequence2.get()  # SEQ object from Scipion

                inFile = self.INFILE2
                outFile = self.OUTFILE2

                addSeq = self.optionForAligning2.get()

                yourAlignment = self.inputYourOwnSequenceAlignment2.get()

                inputSeqAlign = self.inputSequencesToAlign2

                programToAlign1 = self.inputProgramToAlign1_2

                programToAlign2 = self.inputProgramToAlign2_2

                self.prePreRequisites(fileName, chainIdDict, userSeq, inFile,
                                      outFile, addSeq, yourAlignment,
                                      inputSeqAlign, programToAlign1,
                                      programToAlign2)

            self.selectedChain2 = self.selectedChain

        else:
            userSeq = self.inputSequence1.get()  # SEQ object from Scipion
            # get target sequence imported by the user
            outFile = self.OUTFILE1
            self.targetSeqID1 = self.preTemplate(userSeq, outFile)

    def prePreRequisites(self, fileName, chainIdDict, userSeq, inFile, outFile,
                         addSeq, yourAlignment, inputSeqAlign, programToAlign1,
                         programToAlign2):
        # get sequence of structure chain with id chainId (selected by the user)
        self.selectedModel = chainIdDict['model']
        self.selectedChain = chainIdDict['chain']
        # self.selectedModel = chainId.split(',')[0].split(':')[1].strip()
        # self.selectedChain = chainId.split(',')[1].split(':')[1].strip()
        print("Selected chain: %s from model: %s from structure: %s" \
              % (self.selectedChain, self.selectedModel,
                 os.path.basename(fileName)))

        # Bio.Seq.Seq object
        structureSeq = self.structureHandler.getSequenceFromChain(
            self.selectedModel, self.selectedChain)

        # obtain a seqID for our PDB sequence
        structSeqID = self.structureHandler.getFullID(self.selectedModel,
                                                      self.selectedChain)
        # END PDB sequence

        # start user imported target sequence
        # get target sequence imported by the user

        targetSeqID = userSeq.getId()  # ID associated to SEQ object (str)
        userSequence = userSeq.getSequence()  # sequence associated to
        # that SEQ object (str)
        # transformation of this sequence (str) in a Bio.Seq.Seq object:
        seqHandler = SequenceHandler(userSequence,
                                     isAminoacid=userSeq.getIsAminoacids())
        targetSeq = seqHandler._sequence  # Bio.Seq.Seq object

        # creation of Dic of IDs and sequences
        SeqDic = OrderedDict()
        SeqDic[structSeqID] = structureSeq
        SeqDic[targetSeqID] = targetSeq

        # align sequences and save them to disk, -this will be chimera input-
        # get all sequences in a fasta file
        inFile = self._getInFastaSequencesFile(inFile)
        outFile = self._getOutFastaSequencesFile(outFile)

        # get the alignment of sequences
        if addSeq == 0:
            saveFileSequencesToAlign(SeqDic, inFile)
            inputSeqAlign = None
            if programToAlign1.get() == \
                    self.ProgramToAlign1.index('Bio.pairwise2'):
                # Only the two first sequences will be included in the alignment
                self.alignment = alignBioPairwise2Sequences(
                    structSeqID, structureSeq, targetSeqID, targetSeq, outFile)
            else:
                # All the sequences will be included in the alignment
                if programToAlign1.get() == \
                        self.ProgramToAlign1.index('Clustal Omega'):
                    cline = alignClustalSequences(inFile, outFile)
                else:
                    cline = alignMuscleSequences(inFile, outFile)
                args = ''
                self.runJob(cline, args)
        elif addSeq == 1:
            # if there are additional sequences imported by the user
            if inputSeqAlign is not None:
                for seq in inputSeqAlign:
                    seq = seq.get()
                    ID = seq.getId()
                    sequence = seq.getSequence()
                    seqHandler = SequenceHandler(
                        sequence, isAminoacid=seq.getIsAminoacids())
                    otherSeq = seqHandler._sequence  # Bio.Seq.Seq object
                    SeqDic[ID] = otherSeq

            # align sequences and save them to disk, -this will be chimera input-
            # get all sequences in a fasta file
            # inFile = self._getInFastaSequencesFile()
            saveFileSequencesToAlign(SeqDic, inFile)
            # outFile = self._getOutFastaSequencesFile()

            # All the sequences will be included in the alignment
            if programToAlign2 == self.ProgramToAlign2.index('Clustal Omega'):
                cline = alignClustalSequences(inFile, outFile)
            else:
                cline = alignMuscleSequences(inFile, outFile)
            args = ''
            self.runJob(cline, args)

        else:
            aligmentFile = os.path.basename(yourAlignment)
            outFile = os.path.join(self._getExtraPath(), aligmentFile)
            copyFile(yourAlignment, outFile)

    def preTemplate(self, userSeq, outFile):
        userSequence = userSeq.getSequence()  # sequence associated to
        # that SEQ object (str)
        targetSeqID = userSeq.getId()  # ID associated to SEQ object (str)
        # transformation of this sequence (str) in a Bio.Seq.Seq object:
        seqHandler = SequenceHandler(userSequence,
                                     isAminoacid=userSeq.getIsAminoacids())
        targetSeq = seqHandler._sequence  # Bio.Seq.Seq object
        # creation of Dic of IDs and sequences
        SeqDic = OrderedDict()
        SeqDic[targetSeqID] = targetSeq
        outFile = self._getOutFastaSequencesFile(outFile)
        saveFileSequencesToAlign(SeqDic, outFile)
        return targetSeqID

    def _readPDB(self):
        self.structureHandler = AtomicStructHandler()
        fileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName())
        self.structureHandler.read(fileName)
        return fileName

    def _getInFastaSequencesFile(self, inFile):
        INFILENAME = self._getTmpPath(inFile)
        return os.path.abspath(INFILENAME)

    def _getOutFastaSequencesFile(self, outFile):
        OUTFILENAME = self._getExtraPath(outFile)
        return os.path.abspath(OUTFILENAME)

    def runChimeraStep(self):
        # building script file including the coordinate axes and the input
        # volume with samplingRate and Origin information
        f = open(self._getTmpPath(chimeraScriptFileName), "w")
        # building coordinate axes

        dim = 150  # eventually we will create a PDB library that
        # computes PDB dim
        sampling = 1.

        tmpFileName = os.path.abspath(self._getTmpPath("axis_input.bild"))
        Chimera.createCoordinateAxisFile(dim,
                                         bildFileName=tmpFileName,
                                         sampling=sampling)
        f.write("open %s\n" % tmpFileName)
        f.write("cofr 0,0,0\n")  # set center of coordinates

        # input vol with its origin coordinates
        pdbModelCounter = 1
        if (not self.addTemplate and self.inputSequence1.get() is not None
                and self._getOutFastaSequencesFile is not None):
            alignmentFile1 = self._getOutFastaSequencesFile(self.OUTFILE1)
            f.write("open %s\n" % alignmentFile1)
            f.write("blastprotein %s:%s database %s matrix %s "
                    "cutoff %.3f maxSeqs %d log true\n" %
                    (alignmentFile1.split("/")[-1], self.targetSeqID1,
                     self.OptionForDataBase[int(self.dataBase)],
                     self.OptionForMatrix[int(self.similarityMatrix)],
                     self.cutoffValue, self.maxSeqs))

        if (hasattr(self, 'pdbFileToBeRefined')
                and self.pdbFileToBeRefined.get() is not None):
            pdbModelCounter += 1
            pdbFileToBeRefined = self.pdbFileToBeRefined.get()
            f.write("open %s\n" %
                    os.path.abspath(pdbFileToBeRefined.getFileName()))
            if pdbFileToBeRefined.hasOrigin():
                x, y, z = (pdbFileToBeRefined.getOrigin().getShifts())
                f.write("move %0.2f,%0.2f,%0.2f model #%d "
                        "coord #0\n" % (x, y, z, pdbModelCounter))

        # Alignment of sequence and structure
        if (hasattr(self, 'inputSequence1')
                and hasattr(self, 'inputStructureChain')):
            if (self.inputSequence1.get() is not None
                    and self.inputStructureChain.get() is not None):
                pdbModelCounter = 2
                if str(self.selectedModel) != '0':
                    f.write("select #%s.%s/%s\n" %
                            (pdbModelCounter, str(self.selectedModel + 1),
                             str(self.selectedChain1)))
                else:
                    f.write("select #%s/%s\n" %
                            (pdbModelCounter, str(self.selectedChain1)))

                if self._getOutFastaSequencesFile is not None:
                    alignmentFile1 = self._getOutFastaSequencesFile(
                        self.OUTFILE1)
                    f.write("open %s\n" % alignmentFile1)
                    f.write("sequence disassociate #%s %s\n" %
                            (pdbModelCounter, alignmentFile1.split("/")[-1]))
                    if str(self.selectedModel) != '0':
                        f.write("sequence associate #%s.%s/%s %s:1\n" %
                                (pdbModelCounter, str(self.selectedModel + 1),
                                 str(self.selectedChain1),
                                 alignmentFile1.split("/")[-1]))
                    else:
                        f.write("sequence associate #%s/%s %s:1\n" %
                                (pdbModelCounter, str(self.selectedChain1),
                                 alignmentFile1.split("/")[-1]))

            if (self.additionalTargetSequence.get() is True
                    and self.inputSequence2.get() is not None
                    and self.inputStructureChain.get() is not None):
                f.write("select clear\n")
                f.write("select #%s/%s,%s\n" %
                        (pdbModelCounter, str(
                            self.selectedChain1), str(self.selectedChain2)))

                if self._getOutFastaSequencesFile is not None:
                    alignmentFile2 = self._getOutFastaSequencesFile(
                        self.OUTFILE2)
                    f.write("open %s\n" % alignmentFile2)
                    f.write("sequence disassociate #%s %s\n" %
                            (pdbModelCounter, alignmentFile2.split("/")[-1]))
                    if str(self.selectedModel) != '0':
                        f.write("sequence associate #%s.%s/%s %s:1\n" %
                                (pdbModelCounter, str(self.selectedModel + 1),
                                 str(self.selectedChain2),
                                 alignmentFile2.split("/")[-1]))
                    else:
                        f.write("sequence associate #%s/%s %s:1\n" %
                                (pdbModelCounter, str(self.selectedChain2),
                                 alignmentFile2.split("/")[-1]))

        # run the text:
        _chimeraScriptFileName = os.path.abspath(
            self._getTmpPath(chimeraScriptFileName))
        if len(self.extraCommands.get()) > 2:
            f.write(self.extraCommands.get())
            args = " --nogui " + _chimeraScriptFileName
        else:
            args = " " + _chimeraScriptFileName

        f.close()

        self._log.info('Launching: ' + Plugin.getProgram() + ' ' + args)

        # run in the background
        cwd = os.path.abspath(self._getExtraPath())
        Plugin.runChimeraProgram(Plugin.getProgram(),
                                 args,
                                 cwd=cwd,
                                 extraEnv=getEnvDictionary(self))

    def _validate(self):
        # Check that CLUSTALO or MUSCLE program exists
        errors = super(ChimeraModelFromTemplate, self)._validate()
        if not (self.is_tool(CLUSTALO) or self.is_tool(MUSCLE)):
            errors.append(
                "Clustal-omega and MUSCLE programs missing.\n "
                "You need at least one of them to run this program.\n"
                "Please install Clustal-omega and/or MUSCLE:\n"
                "     sudo apt-get install clustalo\n"
                "     sudo apt-get install muscle")
        return errors