Example #1
0
 def pose(self):
   """ Loads the PDBMapStructure as a Rosetta::Pose object """
   import_rosetta()
   io = PDBIO()
   io.set_structure(self.structure)
   with tempfile.NamedTemporaryFile('wrb',suffix='.pdb',delete=False) as tf:
     io.save(tf.name)
   pose = rosetta.Pose()
   rosetta.pose_from_pdb(pose,tf.name)
   os.remove(tf.name)
   return pose
def generate_resfile_from_pdb( pdbfilename , resfilename ,
        pack = True , design = False , input_sc = True ,
        freeze = [] , specific = {} ):
	"""
    Writes a resfile for the PDB file <pdbfilename>  named  <resfilename>
       <pack> = True allows packing by default
       <design> = True allows design using all amino acids by default
       <input_sc> = True allows usage of the original side chain conformation
       <freeze> is an optional list of (pose) residue numbers to exclude
            (preserve the side chain conformations of these residues)
       <specific> is an optional dictionary with (pose) residue numbers as keys
            and resfile keywords as corresponding values
            (for setting individual residue options, it may be easier to add
            these numbers to freeze and edit the resfile manually)

	example:
	    generate_resfile_from_pdb('1YY8.pdb','1YY8.resfile')
	See also:
	    generate_resfile_from_pose
	    Pose
	    PackRotamersMover
	    TaskFactory
	"""
	p = pose_from_pdb(pdbfilename)
	generate_resfile_from_pose(p,resfilename,pack,design,input_sc,freeze,specific)
    def __init__(self, scaffold_pdb, gap_descriptions, chain, protein_only = True):

        self.logger = logging.getLogger(__name__)

        # Scoring function to determine probability of moves
        self.scorefxn = rosetta.core.scoring.get_score_function()

        self.res_type = 'fa_standard'

        # Load pose from input PDB file
        self.scaffold_pose =  rosetta.pose_from_pdb(scaffold_pdb)

        # Gap descriptions as list of lists:
        # [[pre_anchor, post_anchor, pre_flank, gap, post_flank], ...]
        self.gap_descriptions = gap_descriptions
        self.gap_descriptions.sort(key=lambda x:x[0],reverse=True)

        self.Loops = rosetta.Loops()

        self.loop_list = []

        self.chain = chain

        self.protein_only = protein_only

        return
    def setUp(self):
        import rosetta
        fragment_db = FragmentDatabase("/work/fordas/workspace/fragment_fitting/threshold_test_fragments/test_sets.h5")
        self.test_fragments = fragment_db.fragments["source_fragments_4_mer"].read()

        test_fragment_length = fragment_db.fragments["source_fragments_4_mer"].attrs.fragment_length
        test_fragment_atoms = fragment_db.fragments["source_fragments_4_mer"].attrs.fragment_atoms.split(",")
        self.test_fragment_spec = FragmentSpecification(test_fragment_length, tuple(test_fragment_atoms))

        pass_test_structure = rosetta.pose_from_pdb(path.join(path.dirname(__file__), "foldit_17_0001.pdb" ))
        self.pass_test_residues = StructureDatabase.extract_residue_entries_from_pose(pass_test_structure)
        _, self.pass_test_fragments = self.test_fragment_spec.fragments_from_source_residues(self.pass_test_residues)

        fail_test_structure = rosetta.pose_from_pdb(path.join(path.dirname(__file__), "foldit_18_0001.pdb" ))
        self.fail_test_residues = StructureDatabase.extract_residue_entries_from_pose(fail_test_structure)
        _, self.fail_test_fragments = self.test_fragment_spec.fragments_from_source_residues(self.fail_test_residues)
Example #5
0
def pose_from_pubchem(cid, name, temporary=True):
    pose = Pose()
    if temporary:
        # the temporary solution, create an ephemeral ResidueSet
        params_from_pubchem(cid, name)

        # generate ResidueSet
        res_set = generate_nonstandard_residue_set([name])

        # fill the pose
        pose_from_pdb(pose, res_set, name + '_0001.pdb')
    else:
        # permanent solution, add to .params list
        add_cid_to_database(cid, name)

        # fill the pose
        pose_from_pdb(pose, name + '_0001.pdb')
    return pose
Example #6
0
    def getRMSD(self, pdbName1, pdbName2, pathfile1="", pathfile2=""):

        if (pathfile1 == ""):
            fileDir1 = os.path.join(ROOTPATH, 'results')
            fileDir1 = os.path.join(fileDir1, pdbName1)
        else:
            fileDir1 = os.path.join(pathfile1, pdbName1)

        if (pathfile2 == ""):
            fileDir2 = os.path.join(ROOTPATH, 'results')
            fileDir2 = os.path.join(fileDir2, pdbName2)
        else:
            fileDir2 = os.path.join(pathfile2, pdbName2)

        pose1 = rosetta.pose_from_pdb(fileDir1)
        pose2 = rosetta.pose_from_pdb(fileDir2)
        scoreRMSD = rosetta.all_atom_rmsd(pose1, pose2)
        return scoreRMSD
Example #7
0
def main():
    opts, args = getopt.getopt(sys.argv[3:], 'i')
    show_index = 0

    for o in opts:
        if '-i' in o:
            show_index = 1

    rosetta.init()
    wtName = sys.argv[1]
    compareName = sys.argv[2]

    outputName = wtName.split('.')[0] + '_vs_' + compareName.split('.')[0] + ".txt"

    pose1 = rosetta.pose_from_pdb(wtName)
    pose2 = rosetta.pose_from_pdb(compareName)

    use_me = True
    if pose1.total_residue() != pose2.total_residue():
        print "Residue number not equal", pose1.total_residue(), \
                                        pose2.total_residue()
        use_me = False
    else:
        output = open(outputName, 'w')
        total_residue = pose1.total_residue()

        kabsch_alignment(pose1, pose2, range(1, total_residue + 1), range(1, total_residue + 1))
        # RMSD calculated by my own function
        for i in range(1, total_residue + 1):
            calculateRMS(pose1, pose2, i, output, show_index)

    # RMSD calculated by PyRosetta
    ro_rmsd = rosetta.CA_rmsd(pose1, pose2)
    print "rosetta generated rmsd: " + str(ro_rmsd)

    if use_me:
        global total_square
        me_rmsd = math.sqrt(total_square / total_residue)
        print "me generated rmsd: " + str(me_rmsd)
        output.write(outputName.split('.')[0] + ":\t" + str(ro_rmsd))
        output.close()

    print "Done"
def main():
  args  =  sys.argv
  in_file = args[1]
  out_file = args[2]
  distance_cutoff = float(sys.argv[3])
  init(extra_options='-mute basic -mute core -mute protocols -mute Warning')

  all_lines = (open(in_file, 'r')).readlines()
  print(len(all_lines))
  
  #get the protein used to initalize the forward simulation
  initial_pose = pose_from_pdb(str('burn1ABC_renumb.pdb'))
  
  #save each one of its changes 
  chains=initial_pose.split_by_chain()

  ancestral1 = chains[1]
  ancestral2 = chains[2]
  ancestral3 = chains[3]

  ancestral1.dump_pdb("Ans_A.pdb")
  ancestral2.dump_pdb("Ans_B.pdb")
  ancestral3.dump_pdb("Ans_C.pdb")  


  ancestral_structure1=capture_pdb_one("Ans_A_cap.pdb","Ans_A.pdb")
  ancestral_structure2=capture_pdb_one("Ans_B_cap.pdb","Ans_B.pdb")
  ancestral_structure3=capture_pdb_one("Ans_C_cap.pdb","Ans_C.pdb")

  all_data = []
  i=0
  for a_line in all_lines:
    split = a_line.split(',')
    if split[0] == 'Variant':
      continue
    if split[0] == 'WT':
      continue
    else:
      print(split[0])
      pos=re.sub("[^0-9^.]", "", split[0])
	  #figure out if a position is in A B or C
      print(pos)
      if int(pos) <= ancestral1.total_residue():
        all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'A'])
		i=i+1
      if int(pos) > ancestral1.total_residue() and int(pos) <= ancestral1.total_residue()+ancestral2.total_residue():
		all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'B'])
		i=i+1
      if int(pos) > ancestral1.total_residue()+ancestral2.total_residue():
		all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'C'])
		i=i+1
Example #9
0
def generate_resfile_from_pdb(pdbfilename, resfilename, input_sc = True ):
	"""
	Writes a resfile for PDB file <pdbfilename> named <resfilename>,
	optionally allowing input side chains to be used in packing.

	Example:
	    generate_resfile_from_pdb("1YY8.pdb", "1YY8.resfile")
	See also:
	    Pose
	    PackRotamersMover
	    TaskFactory
	"""
	p = rosetta.pose_from_pdb(pdbfilename)
	generate_resfile_from_pose(p, resfilename, input_sc)
Example #10
0
def cleanCRYS(pdb_file, olig=2, out_file=''):
    """
    Writes a PDB file for a monomer of  <pdb_file>  if it is a  <olig>-mer
    to  <out_file>  (defaults to  <pdb_file>.mono.pdb)
    note: this is a simple sequence comparison

    example:
        cleanCRYS('1YY8.pdb',2)
    See also:
        Pose
        Pose.dump_pdb
        pose_from_pdb
        pose_from_rcsb
        cleanATOM
    """
    # if the file exists
    if os.path.exists(os.getcwd() + '/' + pdb_file):
        # load in the PDB...this is really just to get the sequence
        pose = pose_from_pdb(pdb_file)
        tot = pose.total_residue()
        seq = pose.sequence()
        # generate sequence fragments until
        frags = [''] * olig
        match = [False] * (olig - 1)
        olig = float(olig)
        frac = int(round(tot / olig))
        for f in range(int(olig)):
            frags[f] = seq[:frac]
            seq = seq[frac:]
        # determine if sequence fragments are identical
        for f in range(int(olig - 1)):
            match[f] = (frags[0] == frags[f + 1])
        # if the protein has repeats, delete all other residues
        if sum(match) == (olig - 1):
            for i in range(frac * int(olig - 1)):
                pose.delete_polymer_residue(frac + 1)  # I hope this works!
            # write the new pdb file
            if not out_file:
                out_file = pdb_file[:-4] + '.mono.pdb'
            print 'if the file', out_file, ' already exists, it will be overwritten'
            pose.dump_pdb(out_file)
            print 'PDB', pdb_file, 'successfully cleaned, redundant monomers removed\nmonomer data written to', out_file
            return True
        else:
            print pdb_file, 'is not a ' + str(int(olig)) + '-mer'
            return False
    else:
        print 'No such file or directory named ' + pdb_file
        return False
Example #11
0
def check_sasa(Pdb, ResidueSubsets, StartingResidue, LastResidue,
               SasaProbeRadius):
    ''' Uses Alex's AtomicSasaCalculator to calculate average SASA (surface area solvent accessibility) for residue sets input '''
    PdbfullPath = ''.join(
        ['/lab/databases/pdb_clean/', Pdb[1:3].lower(), '/', Pdb[0:4], '.pdb'])
    # Load pdb into a rosetta pose object
    PdbPose = rosetta.pose_from_pdb(PdbfullPath)

    if len(Pdb) > 4:
        TargetChainIndex = ChainAlphabetIndices[Pdb[4]]
        PdbChains = PdbPose.split_by_chain()
        # Silly loop to get pose with only the desired chain, there probably is a better way to do this
        for i, Chain in enumerate(PdbChains):
            if i == TargetChainIndex:
                PdbPose = Chain
                break

    # print LastResidue - StartingResidue, PdbPose.n_residue()
    if (LastResidue - StartingResidue + 1) != PdbPose.n_residue():
        print 'Pose werid, returning bogus SASA values'
        return [999.999 for Set in ResidueSubsets]

    # initalize Alex's AtomicSasaCalculator
    SasaCalculator = AtomicSasaCalculator(probe_radius=SasaProbeRadius)
    # get array of residue sasa's
    ResidueSasa = SasaCalculator.calculate_per_residue_sasa(PdbPose)

    SubsetAverageSasas = []
    # RepeatMinimumSasas = []
    # RepeatMaximumSasas = []

    # print PdbPose.n_residue()
    # print len(ResidueSasa)

    count = 0
    for Residues in ResidueSubsets:
        # Converts residue number from pdb to appropriate index for sasa array
        ResidueIndices = [ResNum - StartingResidue for ResNum in Residues]
        # print ResidueIndices
        # print Residues

        SubsetAverageSasas.append(
            np.mean([ResidueSasa[ResIndex] for ResIndex in ResidueIndices]))
        # RepeatMinimumSasas.append( min( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) )
        # RepeatMaximumSasas.append( max( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) )

        count += 1

    return SubsetAverageSasas
 def load_pdb(self, pdb_name):
     # store list of ligand params files
     lig_params = []
     for f in os.listdir( self.working_dir ):
         if f.endswith( ".params" ):
             lig_params.append( f )
     
     # extra options string creation
     ext_opts = "-mute basic -mute core -ignore_waters True"
     for param in lig_params:
         ext_opts = ext_opts + " -in:file:extra_res_fa %s" %param
     
     print "Initializing Rosetta with the following options:", ext_opts    
     from rosetta import init
     init(extra_options=ext_opts)
     
     self.pose = pose_from_pdb( pdb_name )
Example #13
0
def check_sasa(Pdb, ResidueSubsets, StartingResidue, LastResidue, SasaProbeRadius):
	''' Uses Alex's AtomicSasaCalculator to calculate average SASA (surface area solvent accessibility) for residue sets input '''
	PdbfullPath = ''.join( ['/lab/databases/pdb_clean/', Pdb[1:3].lower(), '/', Pdb[0:4], '.pdb'] )
	# Load pdb into a rosetta pose object 
	PdbPose = rosetta.pose_from_pdb(PdbfullPath)

	if len(Pdb) > 4:
		TargetChainIndex = ChainAlphabetIndices[Pdb[4]]
		PdbChains = PdbPose.split_by_chain()
		# Silly loop to get pose with only the desired chain, there probably is a better way to do this
		for i, Chain in enumerate(PdbChains):
			if i == TargetChainIndex:
				PdbPose = Chain
				break

	# print LastResidue - StartingResidue, PdbPose.n_residue()
	if (LastResidue - StartingResidue + 1) != PdbPose.n_residue():
		print 'Pose werid, returning bogus SASA values'
		return [999.999 for Set in ResidueSubsets]

	# initalize Alex's AtomicSasaCalculator
	SasaCalculator = AtomicSasaCalculator(probe_radius=SasaProbeRadius)
	# get array of residue sasa's
	ResidueSasa = SasaCalculator.calculate_per_residue_sasa(PdbPose)

	SubsetAverageSasas = []
	# RepeatMinimumSasas = []
	# RepeatMaximumSasas = []

	# print PdbPose.n_residue()
	# print len(ResidueSasa)

	count = 0
	for Residues in ResidueSubsets:
		# Converts residue number from pdb to appropriate index for sasa array
		ResidueIndices = [ResNum - StartingResidue for ResNum in Residues]
		# print ResidueIndices
		# print Residues

		SubsetAverageSasas.append( np.mean( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) )
		# RepeatMinimumSasas.append( min( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) )
		# RepeatMaximumSasas.append( max( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) )

		count += 1
	
	return SubsetAverageSasas
Example #14
0
    def load_pdb(self, pdb_name):
        # store list of ligand params files
        lig_params = []
        for f in os.listdir(self.working_dir):
            if f.endswith(".params"):
                lig_params.append(f)

        # extra options string creation
        ext_opts = "-mute basic -mute core -ignore_waters True"
        for param in lig_params:
            ext_opts = ext_opts + " -in:file:extra_res_fa %s" % param

        print "Initializing Rosetta with the following options:", ext_opts
        from rosetta import init
        init(extra_options=ext_opts)

        self.pose = pose_from_pdb(pdb_name)
Example #15
0
def cleanCRYS(pdb_file, olig = 2):
    """
    Removes redundant crystal contacts and isolates a monomer by writing a PDB
    file for a monomer of <pdb_file>, if it is an <olig>-mer, to
    <pdb_file>.mono.

    Note: This is by simple sequence comparison.

    Example:
        cleanCRYS("1YY8.pdb", 2)
    See also:
        Pose
        Pose.dump_pdb
        pose_from_pdb
        pose_from_rcsb
        cleanATOM
    """
    if os.path.exists(os.getcwd() + '/' + pdb_file):
        print "If the file", pdb_file[:-4] + ".mono.pdb already exists, " + \
              "it will be overwritten."
        pose = rosetta.pose_from_pdb(pdb_file)
        tot = pose.total_residue()
        seq = pose.sequence()
        frags = [''] * olig
        match = [False] * (olig - 1)
        olig = float(olig)
        frac = int(round(tot / olig))
        for f in range(int(olig)):
            frags[f] = seq[:frac]
            seq = seq[frac:]
        for f in range(int(olig-1)):
            match[f] = (frags[0] == frags[f + 1])
        if sum(match) == (olig - 1):
           for i in range(frac * int(olig - 1)):
               pose.delete_polymer_residue(frac + 1)
           pose.dump_pdb(pdb_file[:-4] + ".mono.pdb")
           print "PDB", pdb_file, "successfully cleaned, redundant " + \
                 "monomers removed."
           print "Monomer data written to", pdb_file[:-4] + ".mono.pdb."
        else:
            print pdb_file, "is not a " + str(int(olig)) + "-mer."
    else:
        raise IOError("No such file or directory named " + pdb_file)
Example #16
0
    def __init__(self,
                 User,
                 Key,
                 RefPdb,
                 ScoreFxns=[],
                 FxnNames=[],
                 PerResidue=True):
        ''' Track scores of design trajectories for plotly plots '''

        self.User = User
        self.ApiKey = Key

        import plotly.graph_objs as Graph
        import plotly.plotly as py

        self.Graph = Graph
        self.py = py

        self.ScoreFxns = ScoreFxns
        self.FxnNames = FxnNames
        assert len(self.ScoreFxns) == len(self.FxnNames)

        self.PerRes = PerResidue

        self.RefPdb = RefPdb

        self.RefPose = rosetta.pose_from_pdb(RefPdb)

        self.Score2dComboTraces = {}
        # self.ColorIterator = 0
        # self.Colors = []

        self.MaxScores = [0 for Fxn in self.ScoreFxns]
        self.MinScores = [999 for Fxn in self.ScoreFxns]

        # Scores ordered in all score lists in same order for ploting
        self.TaggedPoseScores = {}
        self.PoseTags = []
        # Later keyed with index of self.ScoreFxns
        self.ScoreFunctionScoredPdbs = {}

        self.CstDict = {}
Example #17
0
def idealize_and_relax_pdb_set( PdbCstPairs ):
  
  for PdbName, CstName in PdbCstPairs:
    print '(PdbName, CstName) ', (PdbName, CstName) 
    '''  idealize peptide bonds with command line subprocess  '''
    subprocess.check_output([ 'idealize_jd2.default.linuxgccrelease', '-s', PdbName ])
    IdealizedPdbOldName = re.sub( r'(.*).pdb$', r'\1_0001.pdb', PdbName )
    IdealizedPdbNewName = re.sub( r'(.*).pdb$', r'\1_Ideal.pdb', PdbName )
    
    subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
    time.sleep(0.2)

    IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName )

    # make constraint mover
    Constrainer = rosetta.ConstraintSetMover()
    # get constraints from file
    Constrainer.constraint_file(CstName)
    Constrainer.apply(IdealizedCappedPose)


    ''' SET UP WEIGHTS '''
    Talaris = rosetta.getScoreFunction()
    TalarisPlusCst = rosetta.getScoreFunction()

    TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0)
    TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0)
    TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 2.5)
    print 'relaxing %s with %s'%(IdealizedPdbNewName, CstName) 

    # relax w/ cst
    rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag')
    # relax w/o cst
    rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag')

    RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1_Relax.pdb', IdealizedPdbNewName)
    rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
  def __init__(self, User, Key, RefPdb, ScoreFxns=[], FxnNames=[], PerResidue=True):
    ''' Track scores of design trajectories for plotly plots '''

    self.User = User
    self.ApiKey = Key

    import plotly.graph_objs as Graph
    import plotly.plotly as py

    self.Graph = Graph
    self.py = py 

    self.ScoreFxns = ScoreFxns
    self.FxnNames = FxnNames
    assert len(self.ScoreFxns) == len(self.FxnNames)

    self.PerRes = PerResidue

    self.RefPdb = RefPdb
    
    self.RefPose = rosetta.pose_from_pdb( RefPdb )

    self.Score2dComboTraces = {}
    # self.ColorIterator = 0
    # self.Colors = [] 

    self.MaxScores = [ 0 for Fxn in self.ScoreFxns ]
    self.MinScores = [ 999 for Fxn in self.ScoreFxns ]

    # Scores ordered in all score lists in same order for ploting
    self.TaggedPoseScores = {}
    self.PoseTags = []
    # Later keyed with index of self.ScoreFxns
    self.ScoreFunctionScoredPdbs = {}

    self.CstDict = {}
Example #19
0
def pose_from_rcsb(pdb_code, ATOM = True, CRYS = False):
    """
    Returns a pose for RCSB PDB <pdb_code>, also writes this data to
    <pdb_code>.pdb, and optionally calls cleanATOM and/or cleanCRYS

    example:
        pose = pose_from_rcsb("1YY8")
    See also:
        Pose
        pose_from_pdb
        pose_from_sequence
        load_from_rcsb
        cleanATOM
        cleanCRYS
    """
    load_from_rcsb(pdb_code)
    if ATOM:
        cleanATOM(pdb_code + ".pdb")
        pdb_code = pdb_code + ".clean"
    if CRYS:
        cleanCRYS(pdb_code + ".pdb")
        pdb_code = pdb_code + ".mono"
    pose = rosetta.pose_from_pdb(pdb_code + ".pdb")
    return pose
def main(ExtraResidues=0, ipython=0):
  ### Required args
  ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ")
  ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True )    
  ArgParser.add_argument('-native', type=str, help=" pdb to compare designs against ", required=True )    
  ### Default args
  ArgParser.add_argument('-cst', type=str, help=" to provide cst manually, will apply to all globed pdbs!!! ", default=False )
  ArgParser.add_argument('-param', type=str, nargs='+', help=" params ", default=[] )
  ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 )

  ### following args are for plotly:
  ### change if you use this script!!!
  ArgParser.add_argument('-plotly_id', type=str, help=" ", default="pylesharley") # required=True )    
  ArgParser.add_argument('-plotly_key', type=str, help="  ", default="cc5z4a8kst") # required=True )    
  ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 )
  ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' )
  ArgParser.add_argument('-and_or', type=str, help=" And/Or logic for score cutoffs. Default = 'and'  ", default='and' )
  ArgParser.add_argument('-multi', type=int, help=" 0|(1) plot different methods together on same plot ", default=1 )
  
  Args = ArgParser.parse_args()
  Pdbs = glob.glob( Args.pdb_glob )
  print 'globed %d pdbs'%len(Pdbs)

  if ExtraResidues == 0 and len(Args.param) > 0:
    try: 
      ExtraParams = rosetta.Vector1( Args.param )
      ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams )
    except:
      ExtraParams = rosetta.Vector1( Args.param )
      ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams )
    ### for ipython mode
    if ipython: 
      return ExtraResidues

  Args.and_or = Args.and_or.lower()
  assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' "

  RepeatLengths = []
  ProcessTags = {}
  TagList = []
  TagByPdbName = {}

  # better to find out of native pdb is wrong before waiting for pdb scoring
  Check = open(Args.native, 'r')

  # print ' first loop '
  OverlapStarts = []
  for Pdb in Pdbs:
    Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
    for OtherPdb in Pdbs:
      OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
      i = 0
      if Pdb != OtherPdb:
        while Pdb[:i] == OtherPdb[:i]:
          i+=1
        Overlap = OtherPdb[:i-1]
        OverlapStarts.append( ( len(Overlap), Overlap ) )

  OverlapStarts.sort()
  ShortestOverlap = OverlapStarts[0][1]

  # print 'OverlapStarts', OverlapStarts
  # print 'ShortestOverlap', ShortestOverlap
  
  for Pdb in Pdbs:
    try:
      RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb))
    except ValueError:
      RepeatLength = 0
    # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb))
    assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb 
    # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb 
    RepeatLengths.append(RepeatLength)    

    #### re.sub out tag from design process
    Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
    Tag = re.sub(r'^%s(.*)\.pdb$'%(ShortestOverlap), r'\1', Tag)
    
    TagByPdbName[Pdb] = Tag
    try:
      TagNumber = ProcessTags[Tag] 
    except:
      TagNumber = len(ProcessTags) + 1
      ProcessTags[Tag] = TagNumber
    TagList.append(TagNumber)

  # Scoring is redundant, once for sorting outside plotter, then again in plotter
  # making not redundant not a priority. 
  # Scoring in the plotter object is so multiple score functions can be plotted easily

  # Sort by repeat length, then score
  if Args.multi:
    # Sort by repeat length, then method tag, then score
    MultiPoseSortingTuples = []
  else:
    PoseSortingTuples = []

  Talaris = rosetta.getScoreFunction()
  for i, Pdb in enumerate(Pdbs):
    RepeatLength = RepeatLengths[i]
    ProcessNumber = TagList[i]
    Pose = rosetta.pose_from_pdb(Pdb)
    if Args.norm:
      Score = Talaris(Pose) / Pose.n_residue()
    else:
      Score = Talaris(Pose) 
    
    # print 'Pdb', Pdb
    if Args.multi:
      MultiPoseSortingTuples.append( (RepeatLength, ProcessNumber, Score, Pose) )
    else:
      PoseSortingTuples.append( (RepeatLength, Score, Pose) )


  if Args.multi:
    # Sort by repeat length, then method tag, then score
    MultiPoseSortingTuples.sort()
  else:
    # sorts by repeat length (shortest to longest) then score (best to worst)
    PoseSortingTuples.sort()

  if Args.multi:
    # print 'MultiPoseSortingTuples', MultiPoseSortingTuples
    SortedTuples = MultiPoseSortingTuples
  else:
    # print 'PoseSortingTuples', PoseSortingTuples
    SortedTuples = PoseSortingTuples

  LastLength = 0
  LastTag = 0
  AllGroups = []
  CurrentGroup = []

  for PoseTuple in SortedTuples:
    Length = PoseTuple[0]
    if Args.multi:
      Tag = PoseTuple[1]
    
    if LastLength and Length != LastLength:
      AllGroups.append(CurrentGroup)
      CurrentGroup = []
    
    if Args.multi:
      if LastTag and Tag != LastTag:
        AllGroups.append(CurrentGroup)
        CurrentGroup = [] 
    
    CurrentGroup.append(PoseTuple)
    LastLength = Length
    if Args.multi: 
      LastTag = Tag

  # for last repeat length
  AllGroups.append(CurrentGroup)

  ''' Build score functions here: '''

  Talaris = rosetta.getScoreFunction()

  # This line returns a talaris function with all default weights set to 0
  CstScore = set_all_weights_zero( rosetta.getScoreFunction() )
  CstScore.set_weight(rosetta.atom_pair_constraint, 10.0)
  CstScore.set_weight(rosetta.angle_constraint, 5.0)
  CstScore.set_weight(rosetta.dihedral_constraint, 3.0)

  HbondScore = set_all_weights_zero( rosetta.getScoreFunction() )
  HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170)
  HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170)
  HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170)
  HbondScore.set_weight(rosetta.hbond_sc, 1.100)

  Disulfide = set_all_weights_zero( rosetta.getScoreFunction() )
  Disulfide.set_weight(rosetta.dslf_fa13, 1.0)

  if Args.plot:
    if Args.norm:
      PerRes = True
    else:
      PerRes = False
    ''' Add and remove score functions here '''
    Plotter = plotly_plotter( Args.plotly_id, Args.plotly_key, Args.native,
                              ScoreFxns=[ CstScore, Talaris, HbondScore, Disulfide ],
                              FxnNames=[ 'ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide' ],
                              PerResidue=PerRes )

  XaxisSortingTuples = []

  for PoseGroup in AllGroups:
  # for PoseGroup in [SortedTuples]:
    if len(PoseGroup):
      # print 
      # print 'Group:', PoseGroup
      Poses = [ PoseTuple[-1] for PoseTuple in PoseGroup ]
      # print PoseGroup
      RepeatLength = PoseGroup[0][0]
      # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] ) 
      # print 'Zero index pose tuple:'
      # print PoseGroup[0]
     
      if Args.plot:
        GroupPdbName = PoseGroup[0][-1].pdb_info().name()
        if Args.multi:
          Tag = TagByPdbName[GroupPdbName] 
          
          if Args.cst:
            Plotter.score_poses( Poses, Args.cst, Tag )
          else:
            Plotter.score_poses( Poses, 1, Tag )
  
  # return Plotter
  Plotter.plot_2d_score_combinations()
  print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces

  Plotter.draw_comparisons()

  print 'plotting...'
  if len(Args.name):
    Name = Args.name
  else:
    Name = '%s based %d res '%( Args.native, RepeatLength )
  Plotter.render_scatter_plot( PlotName=Name )
  
  while 1:

    ScoreFunctionScoreCutoffs = []
    for i, Name in enumerate( Plotter.FxnNames ):
      while 1:
        try:
          Cutoff = float( raw_input('\tEnter cutoff value (maximum) for %s function: '%Name) ) 
          break
        except ValueError:
          pass  
      ScoreFunctionScoreCutoffs.append(Cutoff)

    print 'Cutoff values set at:'
    for i, Name in enumerate( Plotter.FxnNames ):
      # print Name, ScoreFunctionScoreCutoffs[i]
      Plotter.ScoreFunctionScoredPdbs[i].sort()

    PassingPdbs = []
    for i, Name in enumerate( Plotter.FxnNames ):
      PassThisFxn = []
      Cutoff = ScoreFunctionScoreCutoffs[i]
      # print Plotter.ScoreFunctionScoredPdbs[i]
      for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]:
        if Score <= Cutoff:
          PassThisFxn.append(Pdb)
        else:
          break
      PassingPdbs.append( PassThisFxn )

    PdbsPassingAll = PassingPdbs[0]
    if Args.and_or == 'and':
      for OtherSet in PassingPdbs[1:]:
        PdbsPassingAll = list( set(PdbsPassingAll) & set(OtherSet) )
    else:
      for OtherSet in PassingPdbs[1:]:
        PdbsPassingAll = list( set(PdbsPassingAll + OtherSet) )
    
    Outdir = raw_input( '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: '%Args.and_or ) 

    if not os.path.isdir(Outdir):
      subprocess.check_output(['mkdir', Outdir])
    if Outdir [-1] != '/':
      Outdir = Outdir + '/'

    for Pdb in PdbsPassingAll:
      subprocess.check_output([ 'cp', Pdb, Outdir ])
      if Plotter.CstDict[Pdb] != None:
        subprocess.check_output([ 'cp', Plotter.CstDict[Pdb], Outdir ])

    Continue = str( raw_input( '\tEnter Y to add another set of selection threshold, or anything else to quit: ') ).upper()
    if Continue == 'Y':
      pass
    else:
      break
Example #21
0
def main(argv=None):
  # if argv is None:
  #   argv = sys.argv
  if argv != None:                                                             
    sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ]                          
  # print 'sys.argv', sys.argv
  
  # Arg block
  ArgParser = argparse.ArgumentParser(description=' expand_cst.py ( -help ) %s'%InfoString)
  # Required args
  ArgParser.add_argument('-ref_pdb', type=str, help=' reference pdb ', required=True)
  ArgParser.add_argument('-ref_cst', type=str, help=' corresponding to reference pdb ', required=True)
  ArgParser.add_argument('-repeat_pdb_tag', type=str, help=' input pdb tag ', required=True)
  # Optional args
  ArgParser.add_argument('-out', type=str, help=' Output directory ', default='./')
  Args = ArgParser.parse_args()
  if Args.out [-1] != '/':
    Args.out = Args.out + '/'


  # default talaris 2013 score function
  ScoreFunction = rosetta.getScoreFunction()
  # turning on constraint weights
  ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0)
  ScoreFunction.set_weight(rosetta.angle_constraint, 1.0)
  ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0)

  RefPdb = Args.ref_pdb
  # print RefPdb
  ReferencePose = rosetta.pose_from_pdb( RefPdb )
  print 'ReferencePose', ReferencePose

  # modify rosetta cst w/o rosetta
  Constrainer = constraint_extrapolator(Args.ref_cst)

  # RefCst = Args.ref_cst
  # # make constraint mover
  # Constrainer = rosetta.ConstraintSetMover()
  # # get constraints from file
  # Constrainer.constraint_file(RefCst)  
  # # Apply constraints to pose
  # Constrainer.apply(ReferencePose)

  # return Constrainer

  Pdbs = glob.glob( '*%s*.pdb'%Args.repeat_pdb_tag ) 
  assert len(Pdbs), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag "%Args.repeat_pdb_tag
  
  for Pdb in Pdbs:
    ## For debug put pdb of interest here:
    # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb':

    print 'Pdb:', Pdb 
    Pose = rosetta.pose_from_pdb(Pdb)

    try: 
      SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', Pdb)
      SourceRanges = [ [ int(Number) for Number in Range.split('_') ]   for Range in SourceRangeString.split('__') ]
    except ValueError:
      print 'No src range tag, skipping: %s '%Pdb
      continue

    print 'SourceRanges:', SourceRanges
    RepeatLength = int( re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb) )
    print 'RepeatLength', RepeatLength
    print
    
    # print [Pdb]
    PdbTag = (Pdb+'!').replace('.pdb!', '').replace('!', '')
    CstName = PdbTag+'.cst'
    ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit(SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose, CstName, PdbTag)
        '-mute core -mute protocols '
        #'-run:constant_seed '
        #'-run:jran 618450550 '
        #'-out:levels protocols.simple_moves.MinMover:500 '
        #'-out:levels core.optimization.AtomTreeMinimizer:500 '
        #'-out:levels core.optimization.Minimizer:500 '
        #'-out:levels protocols.moves.RigidBodyMover:200 '
        #'-out:levels core.optimize:500 '
        #'-out:levels core.optimization.LineMinimizer:500 '
        #'-out:levels protocols.simple_moves.PackRotamersMover:500 '
        #'-out:levels core.pose:500 -out:levels core.io.pdb.file_data:500 -out:levels core.import_pose.import_pose:500'
    )

    # Create pose.
    print '\nGenerating starting pose...'
    starting_pose = pose_from_pdb(new_filename)

    # Display stating pose.
    if args.pm:
        starting_pose.pdb_info().name(args.pdb_filename1[:-4] + "-" + \
                                                            args.pdb_filename2)
        pm = PyMOL_Mover()
    visualize(starting_pose)

    # Prepare the foldtree.
    upstream_chains, downstream_chains = \
                                      determine_docking_partners(starting_pose)
    partners = upstream_chains + "_" + downstream_chains
    # TODO: Modify C++ so that chemical edges are not removed.
    setup_foldtree(starting_pose, partners, Vector1([JUMP_NUM]))
Example #23
0
def main(argv=None):
  if argv != None:                                                             
    sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ]                          
  print 'sys.argv', sys.argv

  ArgParser = argparse.ArgumentParser(description=' nc_cst_gen.py arguments ( -help ) %s'%InfoString)
  # Required arguments:
  ArgParser.add_argument('-pdbs', type=str, nargs='+', help=' input pdbs ', required=True)
  # Optional arguments:
  ArgParser.add_argument('-out', type=str, help=' output directory ', default='./')
  ArgParser.add_argument('-max_dist', type=float, default=3.4, help=' distance between the oxygens and downstreams ')
  ArgParser.add_argument('-min_seq_sep', type=int, default=3, help=' minimum seperation in primary sequece ')
  ArgParser.add_argument('-upstream_atom', type=str, default='[ON]\w?\d?', help=' grep for upstream atoms ')
  ArgParser.add_argument('-downstream_atom', type=str, default='[ON]\w?\d?', help=' grep for downstream atoms ')
  ArgParser.add_argument('-num_repeats', type=int, default=5, help=' number of repeats to extrapolate contacts for ')
  ArgParser.add_argument('-min_sasa',  type=float, default=0.0,  help=' floor for weighting downstream oxygen contacts ')
  ArgParser.add_argument('-min_sasa_weight',  type=float, default=1.0,  help=' weight of floor for downstream oxygen contacts ')
  ArgParser.add_argument('-max_sasa',  type=float, default=5.0,  help=' ceiling for cst weighting downstream oxygen contacts ')
  ArgParser.add_argument('-max_sasa_weight',  type=float, default=0.1,  help=' weight of ceiling for downstream oxygen contacts ')
  ArgParser.add_argument('-sasa_probe_radius', type=float, default=0.8,  help=' probe radius for sasa calculations ')
  ArgParser.add_argument('-renumber_pose', type=bool, default=True, help='True|False renumber pdb residues ' )
  
  ArgParser.add_argument('-disulfide', type=bool, default=True, help='True|False include disulfide constraints ' )  

  Args = ArgParser.parse_args()
  
  # if len(Args.pdbs[0]) == 1:
  #   Args.pdbs = [''.join(Args.pdbs)]

  if Args.out [-1] != '/':
    Args.out = Args.out + '/'

  import rosetta
  rosetta.init(extra_options = "-mute basic -mute core -mute protocols")

  ReportedRepeatCount = 0
  TotalPdbs = len(Args.pdbs)

  # Instance of class to convert sasas to cst weight
  SasaScale = sasa_scale( Args.min_sasa, Args.min_sasa_weight, Args.max_sasa, Args.max_sasa_weight )
  
  for iPdb, Pdb in enumerate(Args.pdbs):
    print ' Working with %s; %d of %d total pdbs '%(Pdb, iPdb+1, TotalPdbs)
    # Starting rosetta  
    Pose = rosetta.pose_from_pdb(Pdb)
    OutputPdb = Args.out+Pdb

    # Sets pdb info so residues in dumped pdbs are same as index 
    Pose.pdb_info(rosetta.core.pose.PDBInfo( Pose ))
    if Args.renumber_pose:
      rosetta.dump_pdb(Pose, OutputPdb)
    else:
      rosetta.dump_pdb(Pose, OutputPdb.replace('.pdb', '_renumbered.pdb'))

    AllConstraints, SortedConstraints = get_pose_constraints(Pose, Args.max_dist, Args.min_seq_sep, Args.sasa_probe_radius, SasaScale, Args.upstream_atom, Args.downstream_atom, True)
    
    if Args.disulfide:
      DisulfAllConstraints, DisulfSortedConstraints = get_pose_constraints(Pose, 3.5, 2, Args.sasa_probe_radius, SasaScale, 'SG', 'SG', False)
      AllConstraints.extend(DisulfAllConstraints)

    # print AllConstraints
    # print SortedConstraints
    # print 
    # print
    # print DisulfAllConstraints
    # print DisulfSortedConstraints
    # sys.exit()

    CstName = OutputPdb.replace('.pdb', '_All.cst')
    with open(CstName, 'w') as CstFile:
      print>>CstFile, '\n'.join(AllConstraints) 

    BackboneBackboneCst, BackboneSidechainCst, SidechainSidechainCst = SortedConstraints

    CstName = OutputPdb.replace('.pdb', '_BBBB.cst')
    with open(CstName, 'w') as CstFile:
      print>>CstFile, '\n'.join(BackboneBackboneCst) 
    CstName = OutputPdb.replace('.pdb', '_BBSC.cst')
    with open(CstName, 'w') as CstFile:
      print>>CstFile, '\n'.join(BackboneSidechainCst) 
    CstName = OutputPdb.replace('.pdb', '_SCSC.cst')
    with open(CstName, 'w') as CstFile:
      print>>CstFile, '\n'.join(SidechainSidechainCst) 
    CstName = OutputPdb.replace('.pdb', '_Disulf.cst')
    with open(CstName, 'w') as CstFile:
      print>>CstFile, '\n'.join(DisulfAllConstraints)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('pdb_filename', action="store", type=str)
    parser.add_argument('replicate_number', action="store", type=int)

    inputs = parser.parse_args()
    #takes name of pdb file without the extention
    pdb_file = inputs.pdb_filename
    prot_name = pdb_file.split('/')[-1].split('.')[0]
    #set up timer to figure out how long the code took to run
    t0 = time()
    fasta_file = pdb_file.replace('/structures/',
                                  '/fastas/').replace('.pdb', '.fasta')
    records = list(SeqIO.parse(fasta_file, 'fasta'))
    assert len(records) == 1
    wt_seq = str(records[0].seq)

    # Initialize Rosetta.
    #init(extra_options='-mute basic -mute core')
    init(extra_options=
         '-mute basic -mute core -rebuild_disulf false -detect_disulf false')

    ########################
    # Constants
    ########################
    PACK_RADIUS = 12.0
    #Amino acids
    AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P",
           "Q", "R", "S", "T", "V", "W", "Y")
    AAs_choice_dict = {}
    for aa in AAs:
        AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa]
    #Number of mutations to accept
    max_accept_mut = 10 * len(wt_seq)
    #max_accept_mut = 2048

    #Population size
    N = 1000
    #Beta (temp term)
    beta = 1
    #Fraction of the WT stability value to shoot for
    threshold_fraction = 0.5
    ########################
    ########################

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load a clean pdb file
    initial_pose = pose_from_pdb(pdb_file)
    if '.clean' in pdb_file:
        pdb_file = ''.join(pdb_file.split('.clean'))

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Threshold for selection
    threshold = post_pre_packing_score * threshold_fraction
    print 'threshold:', threshold

    data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start evolution
    i = 0
    gen = 0
    while i < max_accept_mut:

        #update the number of generations that have pased
        gen += 1

        #print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #choose the amino acid to mutate to
        #new_mut_key = random.randint(0,len(AAs)-1)
        #proposed_res = AAs[new_mut_key]
        proposed_res = random.choice(AAs_choice_dict[res.name1()])

        #make the mutation
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            #save name and energy change
            data.append(variant_name + "," + str(variant_score) + "," +
                        str(variant_score - post_pre_packing_score) + "," +
                        str(probability) + "," + str(gen) + "\n")

            #            if i == (max_accept_mut - 1):
            #                final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i))
            #                mutant_pose.dump_pdb(final_pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format(
        prot_name, prot_name, threshold_fraction, N, beta,
        inputs.replicate_number)
    with open(output_filename, "w") as outfile:
        outfile.writelines(data)

    print 'Data written to:', output_filename
    print 'program takes %f' % (t1 - t0)
def main(argv=None):
  # if argv is None:
  #   argv = sys.argv
  if argv != None:                                                             
    sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ]                          
  # print 'sys.argv', sys.argv
  
  ArgParser = argparse.ArgumentParser(description=" args for optimize_repeat_structures ")
  ArgParser.add_argument('-pdb_stem', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True )    
  Args = ArgParser.parse_args()
  
  Pdbs = glob.glob('*%s.pdb'%Args.pdb_stem)
  
  PdbSortTuples = []
  Skipped = []

  for Pdb in Pdbs:
    RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb))
    SourceStart = int(re.sub(r'.*src(\d+).*pdb', r'\1', Pdb))
    
    try:
      assert SourceStart != Pdb and RepeatLength != Pdb, 'regular expression substitution failed' 
    except AssertionError:
      Skipped.append(Pdb)
      continue

    PdbSortTuples.append( (RepeatLength, SourceStart, Pdb) )
  
  print 'Skipped:'
  print Skipped
  print
  
  PdbSortTuples.sort()

  LastPdb = PdbSortTuples[0][2]
  Pose = rosetta.pose_from_pdb(LastPdb)
  LastArray = np.array([ list( Pose.residue(P).xyz('CA') ) for P in range(1, Pose.n_residue()+1) ])

  subprocess.check_output(['mkdir', 'Redundant'])

  for PdbTup in PdbSortTuples[1:]:
    Pdb = PdbTup[2]
    Pose = rosetta.pose_from_pdb(Pdb)

    CA_Array = np.array([ list( Pose.residue(P).xyz('CA') ) for P in range(1, Pose.n_residue()+1) ])
  
    if len(CA_Array) == len(LastArray):

      RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays(CA_Array, LastArray)
      print
      print 'LastPdb, Pdb'
      print LastPdb
      print Pdb
      print 'RMSD:', RMSD

      if RMSD < 0.001:
        PdbStem = re.sub(r'(.*).pdb$', r'\1', Pdb)
        GlobString = '%s*'%PdbStem

        PdbAssociatedFiles = glob.glob(GlobString)
        # print PdbAssociatedFiles

        for File in PdbAssociatedFiles:
          subprocess.check_output(['mv', File, 'Redundant/'])


    LastArray = copy.deepcopy(CA_Array)
    LastPdb = copy.deepcopy(Pdb)
def detect_and_expand_repeats(InputTuple):
  Args, Pdb = InputTuple
  print 'Pdb:', Pdb
  # get name base for output pdbs 
  InputPdbStem = Pdb.split('/')[-1].replace('.pdb', '')
  print 'StemName:', InputPdbStem

  # load Pdb into rosetta pose
  Pose = rosetta.pose_from_pdb(Pdb)
  Pose.pdb_info(rosetta.core.pose.PDBInfo( Pose ))
  
  # Get repeat unit poses from function above
  if Args.repeat_residues == False:
    TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(Pose)
  
  else:
    RepeatChains = Args.repeat_residues.split('__')
    RepeatChains = [ [ int(Number) for Number in Chain.split('_') ] for Chain in RepeatChains]
    # print 'RepeatChains', RepeatChains
    # sys.exit()
    TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(Pose, RepeatChains)
    
  # RepeatStretchesByLengthHash[12] = [[14, 15, 16, 17, 18, 21]]
  # ConsolidatedRepeatStarts.extend([45,46,47])    
  # print 'ConsolidatedRepeatStarts', ConsolidatedRepeatStarts
  # print 'RepeatStretchesByLengthHash', RepeatStretchesByLengthHash
  # print 'TandemRepeats', TandemRepeats
  # InputPoseRepeatNumber = len(TandemRepeats[ConsolidatedRepeatStarts[0]])

  AllExtrapolationsByRepeatLength = {}
  print 'TandemRepeats:', TandemRepeats
  print 'RepeatStretchesByLengthHash:', RepeatStretchesByLengthHash
  # print 
  # MaxTurns = Args.max_turns_per_repeat   
  count = 1
  for RepeatUnitLength in RepeatStretchesByLengthHash:
    # UniformLength = Args.repeat * RepeatUnitLength
    
    ExtrapolationList = []
    MinLength = 9000000000 # will break if pose has more than 9 billion residues
    
    print 'RepeatUnitLength', RepeatUnitLength
    for RepeatStretch in RepeatStretchesByLengthHash[RepeatUnitLength]:
      print 'RepeatStretch', RepeatStretch

      # gets all pairwise combinations of repeat combinations, second arg should ALWAYS be 2, unless manger overhaul is performed
      for RepeatUnitCombo in itertools.combinations(RepeatStretch, 2):
        # print 'RepeatUnitCombo', RepeatUnitCombo
        RepeatUnit1Start, RepeatUnit2Start = RepeatUnitCombo
        assert RepeatUnit1Start <= RepeatUnit2Start, ' RepeatUnit1 must begin before RepeatUnit2 '

        TandemRepeats1 = TandemRepeats[RepeatUnit1Start]
        TandemRepeats2 = TandemRepeats[RepeatUnit2Start]

        # Whichever position starts the fewest tandem repeats dicates how far to shift
        Shifts = min(len(TandemRepeats1), len(TandemRepeats2))
        # How max number of turns to include per repeat depends on available repeats, and uner input max 
        MaxTurns = min( Args.max_turns_per_repeat, Shifts)

        if (RepeatUnit1Start + Args.min_overlap) <= RepeatUnit2Start <= (RepeatUnit1Start + RepeatUnitLength - Args.min_overlap):  
          # print 
          # print 'Selected RepeatUnitCombo:', RepeatUnitCombo
          # print 'RepeatUnit1Start, repeats ', RepeatUnit1Start, TandemRepeats[RepeatUnit1Start]
          # print 'RepeatUnit2Start, repeats ', RepeatUnit2Start, TandemRepeats[RepeatUnit2Start]
                        
          for NumTurns in range(1, MaxTurns+1):
            
            # print '\n'*5
            # print 'NumTurns', NumTurns
            ModLength = NumTurns * RepeatUnitLength
            
            # print 'ModLength', ModLength
            ModUniformLength = Args.repeat * ModLength
            # print 'ModUniformLength1', ModUniformLength
          
            for Shift in range((Shifts/NumTurns)):
              # print 'Shift', Shift
              ModRep1Start = RepeatUnit1Start + (Shift*ModLength)
              ModRep2Start = RepeatUnit2Start + (Shift*ModLength)
              Overlap = ModRep2Start - ModRep1Start
              ModRep1End = ModRep1Start + ModLength - 1 
              ModRep2End = ModRep2Start + ModLength - 1 

              # print 'ModRep1Start, ModRep1End', ModRep1Start, ModRep1End
              # print 'ModRep2Start, ModRep2End', ModRep2Start, ModRep2End

              Repeat1Unit = grafting.return_region(Pose, ModRep1Start, ModRep1End)
              Repeat2Unit = grafting.return_region(Pose, ModRep2Start, ModRep2End)
              # print 'Repeat1Unit', Repeat1Unit
              # print 'Repeat2Unit', Repeat2Unit
              # use function to extrapolate from a partial repeat 

              try:
                Extrapolation = extrapolate_repeat_pose(Repeat1Unit, Repeat2Unit, Args.repeat - 1)
              except AssertionError:
                'Extrapolation failed'
                continue

              # hacky check finds things that went wrong in extrapolation, sometimes
              if Extrapolation.n_residue() == ModUniformLength + Overlap:

                # trim down to uniform length 
                Extrapolation = grafting.return_region(Extrapolation, 1, ModUniformLength)

                # add extrapolated pose to list
                Repeat1Range = (ModRep1Start, ModRep1End)
                Repeat2Range = (ModRep2Start, ModRep2End)
                ExtrapolationList.append(( Extrapolation, Repeat1Range, Repeat2Range, NumTurns ))

              else:
                print 'fail'

    AllExtrapolationsByRepeatLength[RepeatUnitLength] = ExtrapolationList


  with open('%s_RepExtra.log'%InputPdbStem, 'w') as LogFile:

    for BaseUnitLength in AllExtrapolationsByRepeatLength:
      print 'Extrapolated %d poses with base unit length %d'%(len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength)
      print>>LogFile, 'Extrapolated %d poses with base unit length %d'%(len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength)
      print>>LogFile, 'Number\tUnit1 range\tUnit2 range'

      for i, ExtrapolationTuple in enumerate( AllExtrapolationsByRepeatLength[BaseUnitLength] ):
        # print 'Extrapolation',Extrapolation
        ### print>>LogFile, '\t\t'.join([ str(i+1), ','.join([str(Number) for Number in ExtrapolationTuple[1]]), ','.join([str(Number) for Number in ExtrapolationTuple[2]]) ])
        Repeat1Range = ExtrapolationTuple[1]
        Repeat2Range = ExtrapolationTuple[2]
        
        RepeatUnitLength = BaseUnitLength * ExtrapolationTuple[3]
        rosetta.dump_pdb( ExtrapolationTuple[0], '%ssrc%d_%d__%d_%d_rep%d_%s.pdb'%(Args.out, Repeat1Range[0], Repeat1Range[1], Repeat2Range[0], Repeat2Range[1], RepeatUnitLength, InputPdbStem) )
Example #27
0
def main():
    #read in the file made by the forward sim
    args = sys.argv
    inputfile = args[1]
    data = open(inputfile)
    first_line = data.readlines()[1]
    var_line=first_line.split(',')
    start_stab=var_line[1]

    #the first entry in the file is the wild type structure, calc the threshold using this
    threshold=float(start_stab)+10
    print(threshold)
    
    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 0
    #Population size
    N = 100
    #Beta (temp term)
    beta = .6
  
    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')

    #Prepare data headers
    data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n']

    # Get the reversions file, the output file the score_mutant_pdb has made
    variant_scores=open(inputfile)

    #get just the mutation we want to revert to
    lines= variant_scores.readlines()
    var_line=lines[500] #gets the Nth line how ever long you want the burn to be
    print "staring here", var_line  
    var_line=var_line.split(',')[0]
  
    var_loc=int(filter(str.isdigit, var_line))
    var_rev=var_line[:1]

    gen=1
    #get all the pdb files
    sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort)
    sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. 

  
    for i in range(1,len(sort_list)-30):
      step=-15
      #calc reversion for next 15 moves
      for infile in sort_list[i:i+31]:

	#for each mutation	
        var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be
        var_line=var_line.split(',')[0]
	print(var_line)
        var_loc=int(filter(str.isdigit, var_line))
	var_rev=""
	old=""
	if(step<0):
        	var_rev=var_line[len(var_line)-1:len(var_line)]
		old=var_line[:1]
	
	else:
		var_rev=var_line[:1]
		old=var_line[len(var_line)-1:len(var_line)]

      	print "Current File Being Processed is: " + infile
        print "revering to:", var_rev
        print "at:", var_loc

	#get the pdb you want to revert and make the reversion
        initial_pose = pose_from_pdb(infile)
        mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf)

	#repack mut
        task1 = standard_packer_task(mutant_pose)
	task1.restrict_to_repacking()
        task1.or_include_current(True)
        packer_rotamers_mover1 = RotamerTrialsMover(sf,task1)
	packer_rotamers_mover1.apply(mutant_pose)

	#repack init
        task2 = standard_packer_task(initial_pose)
	task2.restrict_to_repacking()
	task2.or_include_current(True)
	pack_rotamers_mover2 = RotamerTrialsMover(sf, task2)
	pack_rotamers_mover2.apply(initial_pose)

	#apply min mover
	min_mover.apply(mutant_pose)
	min_mover.apply(initial_pose)
	
	#get scores    
	variant_score = sf(mutant_pose)
        initial_score = sf(initial_pose)

	#get prob
        probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold)

	print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant
_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n")
      	data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v
ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n")
	step=step+1
      gen+=1

    print '\nDONE'

    data_filename = 'premutate_rep1_bb_T_ch_T.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)
Example #28
0
def detect_and_expand_repeats(InputTuple):
    Args, Pdb = InputTuple
    print 'Pdb:', Pdb
    # get name base for output pdbs
    InputPdbStem = Pdb.split('/')[-1].replace('.pdb', '')
    print 'StemName:', InputPdbStem

    # load Pdb into rosetta pose
    Pose = rosetta.pose_from_pdb(Pdb)
    Pose.pdb_info(rosetta.core.pose.PDBInfo(Pose))

    # Get repeat unit poses from function above
    if Args.repeat_residues == False:
        TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(
            Pose)

    else:
        RepeatChains = Args.repeat_residues.split('__')
        RepeatChains = [[int(Number) for Number in Chain.split('_')]
                        for Chain in RepeatChains]
        # print 'RepeatChains', RepeatChains
        # sys.exit()
        TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(
            Pose, RepeatChains)

    # RepeatStretchesByLengthHash[12] = [[14, 15, 16, 17, 18, 21]]
    # ConsolidatedRepeatStarts.extend([45,46,47])
    # print 'ConsolidatedRepeatStarts', ConsolidatedRepeatStarts
    # print 'RepeatStretchesByLengthHash', RepeatStretchesByLengthHash
    # print 'TandemRepeats', TandemRepeats
    # InputPoseRepeatNumber = len(TandemRepeats[ConsolidatedRepeatStarts[0]])

    AllExtrapolationsByRepeatLength = {}
    print 'TandemRepeats:', TandemRepeats
    print 'RepeatStretchesByLengthHash:', RepeatStretchesByLengthHash
    # print
    # MaxTurns = Args.max_turns_per_repeat
    count = 1
    for RepeatUnitLength in RepeatStretchesByLengthHash:
        # UniformLength = Args.repeat * RepeatUnitLength

        ExtrapolationList = []
        MinLength = 9000000000  # will break if pose has more than 9 billion residues

        print 'RepeatUnitLength', RepeatUnitLength
        for RepeatStretch in RepeatStretchesByLengthHash[RepeatUnitLength]:
            print 'RepeatStretch', RepeatStretch

            # gets all pairwise combinations of repeat combinations, second arg should ALWAYS be 2, unless manger overhaul is performed
            for RepeatUnitCombo in itertools.combinations(RepeatStretch, 2):
                # print 'RepeatUnitCombo', RepeatUnitCombo
                RepeatUnit1Start, RepeatUnit2Start = RepeatUnitCombo
                assert RepeatUnit1Start <= RepeatUnit2Start, ' RepeatUnit1 must begin before RepeatUnit2 '

                TandemRepeats1 = TandemRepeats[RepeatUnit1Start]
                TandemRepeats2 = TandemRepeats[RepeatUnit2Start]

                # Whichever position starts the fewest tandem repeats dicates how far to shift
                Shifts = min(len(TandemRepeats1), len(TandemRepeats2))
                # How max number of turns to include per repeat depends on available repeats, and uner input max
                MaxTurns = min(Args.max_turns_per_repeat, Shifts)

                if (RepeatUnit1Start +
                        Args.min_overlap) <= RepeatUnit2Start <= (
                            RepeatUnit1Start + RepeatUnitLength -
                            Args.min_overlap):
                    # print
                    # print 'Selected RepeatUnitCombo:', RepeatUnitCombo
                    # print 'RepeatUnit1Start, repeats ', RepeatUnit1Start, TandemRepeats[RepeatUnit1Start]
                    # print 'RepeatUnit2Start, repeats ', RepeatUnit2Start, TandemRepeats[RepeatUnit2Start]

                    for NumTurns in range(1, MaxTurns + 1):

                        # print '\n'*5
                        # print 'NumTurns', NumTurns
                        ModLength = NumTurns * RepeatUnitLength

                        # print 'ModLength', ModLength
                        ModUniformLength = Args.repeat * ModLength
                        # print 'ModUniformLength1', ModUniformLength

                        for Shift in range((Shifts / NumTurns)):
                            # print 'Shift', Shift
                            ModRep1Start = RepeatUnit1Start + (Shift *
                                                               ModLength)
                            ModRep2Start = RepeatUnit2Start + (Shift *
                                                               ModLength)
                            Overlap = ModRep2Start - ModRep1Start
                            ModRep1End = ModRep1Start + ModLength - 1
                            ModRep2End = ModRep2Start + ModLength - 1

                            # print 'ModRep1Start, ModRep1End', ModRep1Start, ModRep1End
                            # print 'ModRep2Start, ModRep2End', ModRep2Start, ModRep2End

                            Repeat1Unit = grafting.return_region(
                                Pose, ModRep1Start, ModRep1End)
                            Repeat2Unit = grafting.return_region(
                                Pose, ModRep2Start, ModRep2End)
                            # print 'Repeat1Unit', Repeat1Unit
                            # print 'Repeat2Unit', Repeat2Unit
                            # use function to extrapolate from a partial repeat

                            try:
                                Extrapolation = extrapolate_repeat_pose(
                                    Repeat1Unit, Repeat2Unit, Args.repeat - 1)
                            except AssertionError:
                                'Extrapolation failed'
                                continue

                            # hacky check finds things that went wrong in extrapolation, sometimes
                            if Extrapolation.n_residue(
                            ) == ModUniformLength + Overlap:

                                # trim down to uniform length
                                Extrapolation = grafting.return_region(
                                    Extrapolation, 1, ModUniformLength)

                                # add extrapolated pose to list
                                Repeat1Range = (ModRep1Start, ModRep1End)
                                Repeat2Range = (ModRep2Start, ModRep2End)
                                ExtrapolationList.append(
                                    (Extrapolation, Repeat1Range, Repeat2Range,
                                     NumTurns))

                            else:
                                print 'fail'

        AllExtrapolationsByRepeatLength[RepeatUnitLength] = ExtrapolationList

    with open('%s_RepExtra.log' % InputPdbStem, 'w') as LogFile:

        for BaseUnitLength in AllExtrapolationsByRepeatLength:
            print 'Extrapolated %d poses with base unit length %d' % (
                len(AllExtrapolationsByRepeatLength[BaseUnitLength]),
                BaseUnitLength)
            print >> LogFile, 'Extrapolated %d poses with base unit length %d' % (
                len(AllExtrapolationsByRepeatLength[BaseUnitLength]),
                BaseUnitLength)
            print >> LogFile, 'Number\tUnit1 range\tUnit2 range'

            for i, ExtrapolationTuple in enumerate(
                    AllExtrapolationsByRepeatLength[BaseUnitLength]):
                # print 'Extrapolation',Extrapolation
                ### print>>LogFile, '\t\t'.join([ str(i+1), ','.join([str(Number) for Number in ExtrapolationTuple[1]]), ','.join([str(Number) for Number in ExtrapolationTuple[2]]) ])
                Repeat1Range = ExtrapolationTuple[1]
                Repeat2Range = ExtrapolationTuple[2]

                RepeatUnitLength = BaseUnitLength * ExtrapolationTuple[3]
                rosetta.dump_pdb(
                    ExtrapolationTuple[0], '%ssrc%d_%d__%d_%d_rep%d_%s.pdb' %
                    (Args.out, Repeat1Range[0], Repeat1Range[1],
                     Repeat2Range[0], Repeat2Range[1], RepeatUnitLength,
                     InputPdbStem))
Example #29
0
            #print r, N, C
            pymol.send_colors(pose,
                              C,
                              default_color=rosetta.protocols.moves.XC_blue)

            #pymol.send_energy( pose_s )

            time.sleep(.1)


rosetta.init()

pose = rosetta.Pose()
pose.name = 'CustomNamedPose'
pose_s = rosetta.Pose()
rosetta.pose_from_pdb(pose, "test/data/test_in.pdb")
rosetta.pose_from_pdb(pose_s, "test/data/test_in_short.pdb")

scorefxn = rosetta.create_score_function('standard')
scorefxn(pose)

pymol = rosetta.PyMOL_Mover()

pymol.apply(pose_s)
coloring_demo(pose_s)

seq = rosetta.protocols.moves.SequenceMover()
seq.add_mover(pymol)

seq.apply(pose)
seq.apply(pose_s)
Example #30
0
def cap_pdb_make_cst( RepeatPdbFileName, RepeatCstFileName, ReferencePdb, ReferenceCst, Ntrim=0, Ctrim=0, Step=0 ):
  if Step:
    if Ntrim:
      assert Ntrim % Step == 0
    if Ctrim:
      assert Ctrim % Step == 0 

  # Grep out repeat length and src ranges 
  RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdbFileName))
  SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdbFileName)
  assert SourceRanges != RepeatPdbFileName, 'src string not found in pdb name '
  SourceRanges = SourceRanges.split('__')
  SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ]
  SourceStart = SourceRanges[0][0]
  SourceEnd = SourceRanges[0][1]

  # Load repeat pose
  RepeatPose = rosetta.pose_from_pdb( RepeatPdbFileName )
  # Trim off floppy end residues
  TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 )
  TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) )
  # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb')
  # Load reference (native) pose
  ReferencePose = rosetta.pose_from_pdb( ReferencePdb )
  ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) )

  PdbCstPairs = []

  ''' Loop through N terminal caps '''
  # print '(SourceStart-Ntrim, SourceStart, -1*Step)', (SourceStart-Ntrim, SourceStart, -1*Step)

  for NcapTrimBackSteps in range(0, (Ntrim/Step) + 1 ):
    # print 'Ntrils -ltrhm:', NcapTrimBackSteps * Step
    NcapLastRes = SourceStart - (NcapTrimBackSteps * Step)
    # print 'NcapLastRes:', NcapLastRes

    ### Get pose for n-terminal cap with overhang for superimpositions
    try:
      NcapPose = grafting.return_region( ReferencePose, 1, NcapLastRes+5 )
    except RuntimeError:
      print 'Requested end of n-terminal cap, %d, beyond range of reference protein. '%NcapLastRes
      continue    
    except OverflowError:
      print 'Requested end of n-terminal cap, %d, beyond range of reference protein. '%NcapLastRes
      continue    

    try:
      assert NcapPose.n_residue() > 4
    except AssertionError:
      print 'Too few residues to attach n-terminal cap ending at %d; skipping '%NcapLastRes
      continue

    # rosetta.dump_pdb(NcapPose, 'Ncap.pdb')
    NcapLength = NcapPose.n_residue()
    
    NcapOverhangPositions = [ Position for Position in range( NcapLength-3, NcapLength+1 ) ]
    # print NcapOverhangPositions
    NcapOverhangArray = generate_backbones.get_residue_array( NcapPose, NcapOverhangPositions )
    

    RepStartOverhangPositions = [1, 2, 3, 4]
    RepStartOverhangArray = generate_backbones.get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions )
    # print RepStartOverhangArray

    RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray )
    rosetta.Pose.apply_transform_Rx_plus_v( TrimmedRepeatPose, rMtx, tVec )
    # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' )
    
    try:
      NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = generate_backbones.fuse( NcapPose, TrimmedRepeatPose )
    except AssertionError:
      print ' Not enough structural similarity to attach n-terminal cap ending at %d; skipping '%NcapLastRes
      continue

    # print 'Ncap attachment RMSD %f'%RMSD
    rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' )
    NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) )    

    RepeatCstExtrapolator = expand_cst.constraint_extrapolator(RepeatCstFileName)
    # print 'NcapLastRes', NcapLastRes
    # print NcapPlusRepeatPose

    ''' Shift repeat unit constraints to accomadiate numbering with n-cap length'''
    Redundict = {}
    RepeatCsts = []
    
    for RepeatPosition in range(1, RepeatPose.n_residue()+1 ):
      # print 'RepeatPosition', RepeatPosition
      try:
        RepeatPositionCstDict = RepeatCstExtrapolator.Cst[RepeatPosition]
      except KeyError:
        continue
      for AtomName in RepeatPositionCstDict:
        for Cst in RepeatPositionCstDict[AtomName]:
          ### unpack tuple values 
          AtomResidueCoords, CstParameters, CstLineNumber, CstType = Cst
          ### Redundancy check with redundict 
          try:
            Check = Redundict[CstLineNumber]
            ### if cst considered already, skip it! 
            continue
          except KeyError:
            Redundict[CstLineNumber] = 1

          ShiftedPoseAtomResidueCoords = []
          ### iterate through atom residue pairs
          for AtomResiduePair in AtomResidueCoords:
            # print 'AtomResiduePair', AtomResiduePair
            RepeatPosePosition = (AtomResiduePair[1]) + NcapLastRes - 1
            # print 'RepeatPosePosition', RepeatPosePosition
            ShiftedPoseAtomResidueCoords.append( ( AtomResiduePair[0], RepeatPosePosition ) )

          ShiftedCst = ShiftedPoseAtomResidueCoords, CstParameters, CstLineNumber, CstType       

          
          if expand_cst.pose_has(NcapPlusRepeatPose, ShiftedPoseAtomResidueCoords):
            RepeatCsts.append(ShiftedCst)
          try:
            assert expand_cst.pose_has(NcapPlusRepeatPose, ShiftedPoseAtomResidueCoords), ' Cst shifted from repeat pose not found in capped pose'
          except AssertionError:
            pass
            # print 'AtomResidueCoords', AtomResidueCoords
            # print 'ShiftedPoseAtomResidueCoords', ShiftedPoseAtomResidueCoords

  
    ''' Loop through C terminal caps '''
    for CcapTrimForwardSteps in range(0, (Ctrim/Step) + 1 ):
      # print 'CcapTrimForwardSteps', CcapTrimForwardSteps
      CcapFirstRes = SourceEnd + ( CcapTrimForwardSteps * Step )
      # print 'CcapFirstRes:', CcapFirstRes
      Cshift = CcapFirstRes-6
      print 'Cshift', Cshift
      print 'ReferencePose.n_residue()', ReferencePose.n_residue()
      
      try:
        CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() )
      except RuntimeError:
        print 'Requested start of c-terminal, %d, beyond range of reference protein. '%CcapFirstRes
        continue        
      except OverflowError:
        print 'Requested start of c-terminal, %d, beyond range of reference protein. '%CcapFirstRes
        continue   

      # rosetta.dump_pdb(CcapPose, 'Ccap.pdb')

      try:
        assert CcapPose.n_residue() > 4
      except AssertionError:
        print 'Too few residues to attach c-terminal cap starting at %d; skipping '%CcapFirstRes
        continue

      CcapOverhangPositions = [1, 2, 3, 4]
      CcapOverhangArray = generate_backbones.get_residue_array( CcapPose, CcapOverhangPositions )

      RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ]
      # print 'RepEndOverhangPositions', RepEndOverhangPositions
      RepEndOverhangArray = generate_backbones.get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions )
      
      RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray )
      rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec)
      rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' )

      try:
        CappedRepeatPose, RMSD, CcapCorrespondingResidues = generate_backbones.fuse(NcapPlusRepeatPose, CcapPose)
      except AssertionError:
        print 'Not enough structural similarity to attach c-terminal cap starting at %d; skipping '%CcapFirstRes
        continue

      CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_%dCap%d.pdb'%(NcapLastRes, CcapFirstRes), RepeatPdbFileName)
      assert CappedNamePdb != RepeatPdbFileName, 'regular expression substitution failed!'
      
      rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb )

      ''' Generate csts for cap/repeat edges '''
      CapCstExtrapolator = expand_cst.constraint_extrapolator(ReferenceCst)
      CapCsts = []
      
      ' N cap constraints are easy; no shifts are needed '

      # For catching when individual constraints have been considered already  
      Redundict = {} 
      for Position in range(1, NcapLastRes):
        # print 'Position', Position
        # Skip positions w/out constraints
        try:
          PositionCstDict = CapCstExtrapolator.Cst[Position]
        except KeyError:
          continue

        for AtomName in PositionCstDict:
          for Constraint in PositionCstDict[AtomName]:
            # unpack tuple values 
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            
            # Redundancy check with redundict 
            try:
              Check = Redundict[CstLineNumber]
              # if cst considered already, skip it! 
              continue
            except KeyError:
              Redundict[CstLineNumber] = 1
            
            CapCsts.append(Constraint)

      ' C cap constraints are harder; need to shift due to pose expansion '
      CcapCstShift = CappedRepeatPose.n_residue() - ReferencePose.n_residue()

      # CapCstExtrapolator.output_cst(CapCsts, 'NcapConstraints.cst')\
      Redundict = {} 

      # print 'CcapCorrespondingResidues', CcapCorrespondingResidues
      RepeatCcapPositionStart = CcapCorrespondingResidues[0][0]
      # print 'RepeatCcapPositionStart', RepeatCcapPositionStart

      ShiftToRepeatPose = RepeatCcapPositionStart - Cshift
      # print 'ShiftToRepeatPose', ShiftToRepeatPose

      for Position in range( CcapFirstRes, ReferencePose.n_residue()+1 ):
        # Skip positions w/out constraints
        try:
          PositionCstDict = CapCstExtrapolator.Cst[Position]
        except KeyError:
          continue

        for AtomName in PositionCstDict:
          for Constraint in PositionCstDict[AtomName]:
            # unpack tuple values 
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            
            # Redundancy check with redundict 
            try:
              Check = Redundict[CstLineNumber]
              # if cst considered already, skip it! 
              continue
            except KeyError:
              Redundict[CstLineNumber] = 1

            ExpandedPoseAtomResidueCoords = []
            # iterate through atom residue pairs
            for AtomResiduePair in AtomResidueCoords:
              # print 'AtomResiduePair', AtomResiduePair
              ExpandedPosePosition = (AtomResiduePair[1]) + CcapCstShift  
              # print 'ExpandedPosePosition', ExpandedPosePosition
              ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) )

            ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType       

            CapCsts.append(ShiftedConstraint)  


      CappedCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb)
      
      with open(CappedCstName, 'w') as OverwriteExistingFile:
        pass
      
      FinalCstSet = []
      
      for Cst in CapCsts:
        if expand_cst.pose_has(CappedRepeatPose, Cst[0]):
          FinalCstSet.append(Cst)
      for Cst in RepeatCsts:
        if expand_cst.pose_has(CappedRepeatPose, Cst[0]):
          FinalCstSet.append(Cst)

      CapCstExtrapolator.output_cst(FinalCstSet, CappedCstName)        
      PdbCstPairs.append((CappedNamePdb, CappedCstName))
  
  return PdbCstPairs
Example #31
0
def pose_from_params(filename, params_list):
    res_set = generate_nonstandard_residue_set(params_list)
    pose = Pose()
    pose_from_pdb(pose, res_set, filename)
    return pose
def optimize_repeat_pdb( (Pdb, CstSets, RepeatLength) ):
  ''' parallelizable '''

  # idealize peptide bonds with command line subprocess
  subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', Pdb])
  IdealizedPdbOldName = Pdb.replace('.pdb', '_0001.pdb') 
  IdealizedPdbNewName = Pdb.replace('.pdb', '_ideal.pdb')
  subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
  time.sleep(0.5)

  Pose = rosetta.pose_from_pdb(IdealizedPdbNewName)
  PoseLength = Pose.n_residue()

  assert PoseLength % RepeatLength == 0, 'pdb input into optimize_repeat_pdb must have integer multiple of repeat_length number of residues'
  NumberRepeats = PoseLength / RepeatLength

  # print 'NumberRepeats', NumberRepeats
  # print 'RepeatLength', RepeatLength
  Sequence = Pose.sequence()
  # print Sequence
  
  RepeatRanges = []
  Start = 1
  for Repeat in range(NumberRepeats):
    End = Start + RepeatLength - 1
    RepeatRanges.append((Start, End))
    Start += RepeatLength

  assert len(RepeatRanges) == NumberRepeats
  # print 'RepeatRanges', RepeatRanges

  MidRepeat = ( NumberRepeats / 2 ) - 1  
  ReferenceRange = RepeatRanges[MidRepeat]
  # print 'MidRepeat', MidRepeat
  # print 'ReferenceRange', ReferenceRange

  SetupNCS = symmetry.SetupNCSMover()

  for TargetRange in RepeatRanges:
    if TargetRange != ReferenceRange:
      # print 'OtherRange', TargetRange
      # skip first three residue (not enougth atoms for torsion), and amino acid types allowed to vary
      if TargetRange[0] == 1:
        SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0]+3, ReferenceRange[1]), "%dA-%dA"%(TargetRange[0]+3, TargetRange[1]) )        
      # skip last residue (not enougth atoms for torsion)
      elif TargetRange[1] == PoseLength:
        SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]-3), "%dA-%dA"%(TargetRange[0], TargetRange[1]-3) )
      else:
        SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]), "%dA-%dA"%(TargetRange[0], TargetRange[1]) )

  SetupNCS.apply(Pose)

  # default talaris 2013 score function plus dihedral wieght for symmetry ncs mimization
  SymmTalaris = rosetta.getScoreFunction()
  SymmTalaris.set_weight(rosetta.dihedral_constraint, 1.0)

  TalarisPlusCst = rosetta.getScoreFunction()
  TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0)
  TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0)
  TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 3.0)

  TalarisPlusCstLowerFaRep = rosetta.getScoreFunction()
  TalarisPlusCstLowerFaRep.set_weight(rosetta.atom_pair_constraint, 10.0)
  TalarisPlusCstLowerFaRep.set_weight(rosetta.angle_constraint, 5.0)
  TalarisPlusCstLowerFaRep.set_weight(rosetta.dihedral_constraint, 3.0)
  TalarisPlusCstLowerFaRep.set_weight(rosetta.fa_rep, 0.25)
  print 'Pdb:', Pdb

  OptimizedPoses = []
  PoseIDs = []

  for Cst in CstSets:
    print 'Cst:', Cst
    CstPose = Pose.clone()
    CstStemName = re.sub(r'^(.*)\.cst$', r'\1', Cst)

    # make constraint mover
    Constrainer = rosetta.ConstraintSetMover()
    # get constraints from file
    Constrainer.constraint_file(Cst)
    Constrainer.apply(CstPose)

    FxnTags = [ 'TalCst', 'LowFaRep'  ]

    for i, ScoreFunction in enumerate( [ TalarisPlusCst, TalarisPlusCstLowerFaRep ] ):
      # for AbsoluteWeight in [1, 5, 10, 100]:

      RelaxPose = CstPose.clone()
      rosetta.relax_pose(RelaxPose, ScoreFunction, 'tag')
      rosetta.dump_pdb( RelaxPose, CstStemName+'_%s.pdb'%FxnTags[i] )
      # remove all constraints
      RelaxPose.remove_constraints()
      # reapply ncs constraints
      SetupNCS.apply(RelaxPose)

      rosetta.relax_pose(RelaxPose, SymmTalaris, 'tag')
      # Trekker.score(RelaxPose)
      rosetta.dump_pdb( RelaxPose, CstStemName+'_%s_Relax.pdb'%FxnTags[i] )

  JustRelaxPose = Pose.clone()
  SetupNCS.apply( JustRelaxPose )

  rosetta.relax_pose( JustRelaxPose, SymmTalaris, 'tag' )
  rosetta.dump_pdb( JustRelaxPose, CstStemName+'_JustRelax.pdb' )
Example #33
0
    def __init__(self, pdb, centroid=False, pdb_file='', frag=False, nine_mer=False, local=False, local_size=3,
                 full=False, rosetta_refinement=False):
        """ :param pdb: :type string: pdb ID of the protein to be folded
            :param centroid: :type boolean: Option for use of centroid model
        """
        self.loops = 0                                    # Stores generation for which energy score was last calculated
        self.scores = {}                                  # Dictionary container for current gen genomes/scores
        self.scores_list = []                             # List container of current gen scores for search
        self.gen_added = 0                                # Last gen in which a point was added to novelty archive
        self.threshold = 10                               # Novelty threshold for which point is added to archive
        self.acceptance_threshold = 100                   # Novelty threshold for which move is accepted automatically
        self.num_added = 0                                # Number of points added to novelty archive
        self.switch = False                               # All atom switch
        self.temperature = 5                              # Monte Carlo temperature
        self.mover_range = 10                             # +-range of the angle in degrees in which mover moves residue
        self.local_size = local_size                      # For local mover, size of fragment to move
        self.local = local                                # Whether to use local mover
        self.novelty_archive = deque()                    # Initialize novelty archive
        self.centroid = centroid                          # If true use centroid scoring
        self.last_lowest = 0                              # For use in novelty loop
        self.last_lowest_10 = 0                           # For use in clear main loop
        self.frag = frag                                  # If true use frag mover
        self.rosetta_refinement = rosetta_refinement      # If true refine rosetta fold

        # Rosetta inits
        rosetta.init()                                    # Initialize rosetta libraries
        pose_native = pose_from_rcsb(pdb)                 # Create rosetta pose of natively folded protein from pdb file
        sequence = pose_native.sequence()                 # Get sequence of protein
        self.scorefxn = rosetta.get_fa_scorefxn()         # Create the rosetta energy score function for all atom
        
        if pdb_file != '':
            self.pose = rosetta.pose_from_pdb(pdb_file)   # If a starting pdb is given search from this pose
        elif rosetta_refinement:                          # If rosetta refinement, start from fastrelax structure
            self.pose = rosetta.pose_from_sequence(sequence)
            relax = rosetta.FastRelax()
            relax.set_scorefxn(self.scorefxn)
            relax.apply(self.pose)
        else:
            self.pose = rosetta.pose_from_sequence(sequence)  # Create the rosetta pose that will be manipulated
            
        if centroid:                                      # Switch pose to centroid if centroid option is true
            switch = rosetta.SwitchResidueTypeSetMover("centroid")
            switch.apply(self.pose)
        self.c_size = len(sequence)*2                     # Number of residues * 2 (phi and psi for each residue)
        self.native_energy = self.scorefxn(pose_native)   # Energy of the natively folded protein
        
        if centroid:                                      # Switch rosetta score function if centroid
            self.scorefxn = rosetta.create_score_function('score3')
        self.conformation = []
        
        i = 1
        while i <= len(sequence):
            self.conformation.append(self.pose.phi(i))
            self.conformation.append(self.pose.psi(i))
            i += 1

        self.mc_energy = self.scorefxn(self.pose) + 500   # Energy to be used as minimal criteria
        self.lowest = self.scorefxn(self.pose)            # Lowest energy in archive
        
        if frag:
            if nine_mer:
                fragset = rosetta.ConstantLengthFragSet(9)
                fragset.read_fragment_file("aat000_09_05-1.200_v1_3")
            else:
                fragset = rosetta.ConstantLengthFragSet(3)
                fragset.read_fragment_file("aat000_03_05-1.200_v1_3")
            movemap = rosetta.MoveMap()
            movemap.set_bb(True)
            self.mover_3mer = rosetta.ClassicFragmentMover(fragset, movemap)

        if local:                                         # For local, initialize na with appropriate number of deques
            self.novelty_archive = [deque() for i in range(self.c_size/2/self.local_size)]

        self.full = full                                  # If true use full mover
Example #34
0
def main():
    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Population size
    N = 37
    #Beta (temp term)
    beta = 1
    #look up what the first stored value was in the files to get the threshold
    threshold = float(-534.687360627 / 2)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Prepare data headers
    data = ['Generation,RevertTo,OrgScore,RevScore,Change,Prob\n']

    # Get the reversions file, the output file the score_mutant_pdb has made
    variant_scores = open('mh_rep_3_37.csv')

    #get just the mutation we want to revert to
    lines = variant_scores.readlines()
    var_line = lines[
        2]  #gets the Nth line how ever long you want the burn to be
    var_line = var_line.split(',')[0]

    var_loc = int(filter(str.isdigit, var_line))
    var_rev = var_line[:1]

    gen = 1
    #get all the pdb files
    sort_list = sorted(glob.glob('*.pdb'), key=numericalSort)

    for i in range(1, len(sort_list) - 15):

        #calc reversion for next 15 moves
        for infile in sorted(glob.glob('*.pdb'), key=numericalSort)[i:i + 15]:

            #for each mutation
            var_line = lines[
                gen +
                1]  #gets the Nth line how ever long you want the burn to be
            var_line = var_line.split(',')[0]
            var_loc = int(filter(str.isdigit, var_line))
            var_rev = var_line[:1]

            print "Current File Being Processed is: " + infile
            initial_pose = pose_from_pdb(infile)
            initial_score = sf(initial_pose)
            print("init scored")
            mutant_pose = mutate_residue(initial_pose, var_loc, var_rev,
                                         PACK_RADIUS, sf)
            variant_score = sf(mutant_pose)
            probability = calc_prob_mh(variant_score, initial_score, N, beta,
                                       threshold)
            print(
                str(gen) + "," + var_line + "," + str(initial_score) + "," +
                str(variant_score) + "," + str(variant_score - initial_score) +
                "," + str(probability) + "\n")
            data.append(
                str(gen) + "," + var_line + "," + str(initial_score) + "," +
                str(variant_score) + "," + str(variant_score - initial_score) +
                "," + str(probability) + "\n")
        gen += 1

    print '\nDONE'

    data_filename = 'rep_3_mh_37_rev_15_score.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)
Example #35
0
# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
# (c) For more information, see http://www.rosettacommons.org. Questions about this can be
# (c) addressed to University of Washington UW TechTransfer, email: [email protected].

## @author Sergey Lyskov

print '-------- Test/Demo for capturing Tracers output in PyRosetta --------'

import rosetta

T = rosetta.basic.PyTracer()
rosetta.basic.Tracer.set_ios_hook(
    T, rosetta.basic.Tracer.get_all_channels_string(), False)

rosetta.init()
pose = rosetta.pose_from_pdb("test/data/test_in.pdb")

print '\nCaptured IO:'
print T.buf()

# More fancy example, using a output callback:


class MyPyTracer(rosetta.basic.PyTracer):
    def __init__(self):
        rosetta.basic.PyTracer.__init__(self)

    def output_callback(self, s):
        print 'MyPyTracer.output_callback with argument:'
        print s
Example #36
0
def main(argv=None):
  if argv is None:
    argv = sys.argv
  ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ")
  ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True )    
  ArgParser.add_argument('-native_pdb', type=str, help=" pdb to compare designs against ", required=True )    
  ArgParser.add_argument('-out', type=str, help=" folder to move files to ", required=True )    
  ArgParser.add_argument('-score', type=float, help=" select all structures with less than this REU / residue ", default=None )
  ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 )
  ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 )
  ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' )

  Args = ArgParser.parse_args()
  print Args
  Pdbs = glob.glob( Args.pdb_glob )

  print 'globed %d pdbs'%len(Pdbs)

  if not os.path.isdir(Args.out):
    subprocess.check_output(['mkdir', Args.out])
  if Args.out [-1] != '/':
    Args.out = Args.out + '/'

  if Args.name != '':
    Args.out = Args.out + ' '

  NativePose = rosetta.pose_from_pdb( Args.native_pdb )

  RepeatLengths = []
  for Pdb in Pdbs:
    RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb))
    # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb))
    assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb 
    # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb 
    RepeatLengths.append(RepeatLength)
    # RepeatLengths.append(SourceStart)


  PoseSortingTuples = []
  # Scoring is redundant, once for sorting outside plotter, then again in plotter
  # making not redundant not a priority. 
  # Scoring in the plotter object is so multiple score functions can be plotted easily
  Talaris = rosetta.getScoreFunction()
  for i, Pdb in enumerate(Pdbs):
    RepeatLength = RepeatLengths[i]
    Pose = rosetta.pose_from_pdb(Pdb)
    if Args.norm:
      Score = Talaris(Pose) / Pose.n_residue()
    else:
      Score = Talaris(Pose) 
    PoseSortingTuples.append( (RepeatLength, Score, Pose) )
  # sorts by repeat length (shortest to longest) then score (best to worst)
  PoseSortingTuples.sort()

  # print 'PoseSortingTuples', PoseSortingTuples

  AllRepeatLengthGroups = []
  RepeatRepeatLengthGroup = []
  LastLength = 0
  for PoseTuple in PoseSortingTuples:
    Length = PoseTuple[0]
    if LastLength and Length != LastLength:
      AllRepeatLengthGroups.append(RepeatRepeatLengthGroup)
      RepeatRepeatLengthGroup = []
    RepeatRepeatLengthGroup.append(PoseTuple)
    LastLength = Length
  # for last repeat length
  AllRepeatLengthGroups.append(RepeatRepeatLengthGroup)

  # print 'AllRepeatLengthGroups', AllRepeatLengthGroups

  # Add more score functions as wanted
  if Args.plot:
    Plotter = plotly_plotter(ScoreFxns=[ Talaris ], FxnNames=[ 'Talaris' ], EnergyPerResidue=True )

  for RepeatLengthGroup in AllRepeatLengthGroups:
    print 'RepeatLengthGroup', RepeatLengthGroup
    Poses = [ PoseTuple[2] for PoseTuple in RepeatLengthGroup ]
    RepeatLength = RepeatLengthGroup[0][0]
    if Args.plot:
      Plotter.clear_traces()
      Xaxis = Plotter.score_poses( Poses )
      Plotter.add_comparsion_threshold( NativePose, Xaxis )
      Plotter.plot_traces( PlotName='%s%s based %d res repeats globed with %s'%(Args.name, Args.native_pdb, RepeatLength, Args.pdb_glob) )

    if Args.score != None:
      with open('%sScores.log'%Args.out, 'a') as Log:  
        for RepLen, Score, Pose in RepeatLengthGroup:
          if Score > Args.score:
            break
          PdbName = Pose.pdb_info().name()
          subprocess.check_output([ 'cp', PdbName, Args.out ])
          print>>Log, '%s\t%.3f'%(PdbName, Score)
Example #37
0
def main(ExtraResidues=0, ipython=0):
    ### Required args
    ArgParser = argparse.ArgumentParser(
        description=
        " for plotting pdb scores and selecting subsets based on absolute or per residue scores "
    )
    ArgParser.add_argument('-pdb_glob',
                           type=str,
                           help=" pdb stem, start of globs for pdbs and csts ",
                           required=True)
    ArgParser.add_argument('-native',
                           type=str,
                           help=" pdb to compare designs against ",
                           required=True)
    ### Default args
    ArgParser.add_argument(
        '-cst',
        type=str,
        help=" to provide cst manually, will apply to all globed pdbs!!! ",
        default=False)
    ArgParser.add_argument('-param',
                           type=str,
                           nargs='+',
                           help=" params ",
                           default=[])
    ArgParser.add_argument('-norm',
                           type=int,
                           help=" 0|(1) normalize scores by residue ",
                           default=1)

    ### following args are for plotly:
    ### change if you use this script!!!
    ArgParser.add_argument('-plotly_id',
                           type=str,
                           help=" ",
                           default="pylesharley")  # required=True )
    ArgParser.add_argument('-plotly_key',
                           type=str,
                           help="  ",
                           default="cc5z4a8kst")  # required=True )
    ArgParser.add_argument('-plot',
                           type=int,
                           help=" 0|(1) plot scores with plotly ",
                           default=1)
    ArgParser.add_argument('-name', type=str, help=" plot tag ", default='')
    ArgParser.add_argument(
        '-and_or',
        type=str,
        help=" And/Or logic for score cutoffs. Default = 'and'  ",
        default='and')
    ArgParser.add_argument(
        '-multi',
        type=int,
        help=" 0|(1) plot different methods together on same plot ",
        default=1)

    Args = ArgParser.parse_args()
    Pdbs = glob.glob(Args.pdb_glob)
    print 'globed %d pdbs' % len(Pdbs)

    if ExtraResidues == 0 and len(Args.param) > 0:
        try:
            ExtraParams = rosetta.Vector1(Args.param)
            ExtraResidues = rosetta.generate_nonstandard_residue_set(
                ExtraParams)
        except:
            ExtraParams = rosetta.Vector1(Args.param)
            ExtraResidues = rosetta.generate_nonstandard_residue_set(
                ExtraParams)
        ### for ipython mode
        if ipython:
            return ExtraResidues

    Args.and_or = Args.and_or.lower()
    assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' "

    RepeatLengths = []
    ProcessTags = {}
    TagList = []
    TagByPdbName = {}

    # better to find out of native pdb is wrong before waiting for pdb scoring
    Check = open(Args.native, 'r')

    # print ' first loop '
    OverlapStarts = []
    for Pdb in Pdbs:
        Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
        for OtherPdb in Pdbs:
            OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
            i = 0
            if Pdb != OtherPdb:
                while Pdb[:i] == OtherPdb[:i]:
                    i += 1
                Overlap = OtherPdb[:i - 1]
                OverlapStarts.append((len(Overlap), Overlap))

    OverlapStarts.sort()
    ShortestOverlap = OverlapStarts[0][1]

    # print 'OverlapStarts', OverlapStarts
    # print 'ShortestOverlap', ShortestOverlap

    for Pdb in Pdbs:
        try:
            RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb))
        except ValueError:
            RepeatLength = 0
        # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb))
        assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s " % Pdb
        # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb
        RepeatLengths.append(RepeatLength)

        #### re.sub out tag from design process
        Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
        Tag = re.sub(r'^%s(.*)\.pdb$' % (ShortestOverlap), r'\1', Tag)

        TagByPdbName[Pdb] = Tag
        try:
            TagNumber = ProcessTags[Tag]
        except:
            TagNumber = len(ProcessTags) + 1
            ProcessTags[Tag] = TagNumber
        TagList.append(TagNumber)

    # Scoring is redundant, once for sorting outside plotter, then again in plotter
    # making not redundant not a priority.
    # Scoring in the plotter object is so multiple score functions can be plotted easily

    # Sort by repeat length, then score
    if Args.multi:
        # Sort by repeat length, then method tag, then score
        MultiPoseSortingTuples = []
    else:
        PoseSortingTuples = []

    Talaris = rosetta.getScoreFunction()
    for i, Pdb in enumerate(Pdbs):
        RepeatLength = RepeatLengths[i]
        ProcessNumber = TagList[i]
        Pose = rosetta.pose_from_pdb(Pdb)
        if Args.norm:
            Score = Talaris(Pose) / Pose.n_residue()
        else:
            Score = Talaris(Pose)

        # print 'Pdb', Pdb
        if Args.multi:
            MultiPoseSortingTuples.append(
                (RepeatLength, ProcessNumber, Score, Pose))
        else:
            PoseSortingTuples.append((RepeatLength, Score, Pose))

    if Args.multi:
        # Sort by repeat length, then method tag, then score
        MultiPoseSortingTuples.sort()
    else:
        # sorts by repeat length (shortest to longest) then score (best to worst)
        PoseSortingTuples.sort()

    if Args.multi:
        # print 'MultiPoseSortingTuples', MultiPoseSortingTuples
        SortedTuples = MultiPoseSortingTuples
    else:
        # print 'PoseSortingTuples', PoseSortingTuples
        SortedTuples = PoseSortingTuples

    LastLength = 0
    LastTag = 0
    AllGroups = []
    CurrentGroup = []

    for PoseTuple in SortedTuples:
        Length = PoseTuple[0]
        if Args.multi:
            Tag = PoseTuple[1]

        if LastLength and Length != LastLength:
            AllGroups.append(CurrentGroup)
            CurrentGroup = []

        if Args.multi:
            if LastTag and Tag != LastTag:
                AllGroups.append(CurrentGroup)
                CurrentGroup = []

        CurrentGroup.append(PoseTuple)
        LastLength = Length
        if Args.multi:
            LastTag = Tag

    # for last repeat length
    AllGroups.append(CurrentGroup)
    ''' Build score functions here: '''

    Talaris = rosetta.getScoreFunction()

    # This line returns a talaris function with all default weights set to 0
    CstScore = set_all_weights_zero(rosetta.getScoreFunction())
    CstScore.set_weight(rosetta.atom_pair_constraint, 10.0)
    CstScore.set_weight(rosetta.angle_constraint, 5.0)
    CstScore.set_weight(rosetta.dihedral_constraint, 3.0)

    HbondScore = set_all_weights_zero(rosetta.getScoreFunction())
    HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170)
    HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170)
    HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170)
    HbondScore.set_weight(rosetta.hbond_sc, 1.100)

    Disulfide = set_all_weights_zero(rosetta.getScoreFunction())
    Disulfide.set_weight(rosetta.dslf_fa13, 1.0)

    if Args.plot:
        if Args.norm:
            PerRes = True
        else:
            PerRes = False
        ''' Add and remove score functions here '''
        Plotter = plotly_plotter(
            Args.plotly_id,
            Args.plotly_key,
            Args.native,
            ScoreFxns=[CstScore, Talaris, HbondScore, Disulfide],
            FxnNames=['ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide'],
            PerResidue=PerRes)

    XaxisSortingTuples = []

    for PoseGroup in AllGroups:
        # for PoseGroup in [SortedTuples]:
        if len(PoseGroup):
            # print
            # print 'Group:', PoseGroup
            Poses = [PoseTuple[-1] for PoseTuple in PoseGroup]
            # print PoseGroup
            RepeatLength = PoseGroup[0][0]
            # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] )
            # print 'Zero index pose tuple:'
            # print PoseGroup[0]

            if Args.plot:
                GroupPdbName = PoseGroup[0][-1].pdb_info().name()
                if Args.multi:
                    Tag = TagByPdbName[GroupPdbName]

                    if Args.cst:
                        Plotter.score_poses(Poses, Args.cst, Tag)
                    else:
                        Plotter.score_poses(Poses, 1, Tag)

    # return Plotter
    Plotter.plot_2d_score_combinations()
    print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces

    Plotter.draw_comparisons()

    print 'plotting...'
    if len(Args.name):
        Name = Args.name
    else:
        Name = '%s based %d res ' % (Args.native, RepeatLength)
    Plotter.render_scatter_plot(PlotName=Name)

    while 1:

        ScoreFunctionScoreCutoffs = []
        for i, Name in enumerate(Plotter.FxnNames):
            while 1:
                try:
                    Cutoff = float(
                        raw_input(
                            '\tEnter cutoff value (maximum) for %s function: '
                            % Name))
                    break
                except ValueError:
                    pass
            ScoreFunctionScoreCutoffs.append(Cutoff)

        print 'Cutoff values set at:'
        for i, Name in enumerate(Plotter.FxnNames):
            # print Name, ScoreFunctionScoreCutoffs[i]
            Plotter.ScoreFunctionScoredPdbs[i].sort()

        PassingPdbs = []
        for i, Name in enumerate(Plotter.FxnNames):
            PassThisFxn = []
            Cutoff = ScoreFunctionScoreCutoffs[i]
            # print Plotter.ScoreFunctionScoredPdbs[i]
            for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]:
                if Score <= Cutoff:
                    PassThisFxn.append(Pdb)
                else:
                    break
            PassingPdbs.append(PassThisFxn)

        PdbsPassingAll = PassingPdbs[0]
        if Args.and_or == 'and':
            for OtherSet in PassingPdbs[1:]:
                PdbsPassingAll = list(set(PdbsPassingAll) & set(OtherSet))
        else:
            for OtherSet in PassingPdbs[1:]:
                PdbsPassingAll = list(set(PdbsPassingAll + OtherSet))

        Outdir = raw_input(
            '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: '
            % Args.and_or)

        if not os.path.isdir(Outdir):
            subprocess.check_output(['mkdir', Outdir])
        if Outdir[-1] != '/':
            Outdir = Outdir + '/'

        for Pdb in PdbsPassingAll:
            subprocess.check_output(['cp', Pdb, Outdir])
            if Plotter.CstDict[Pdb] != None:
                subprocess.check_output(['cp', Plotter.CstDict[Pdb], Outdir])

        Continue = str(
            raw_input(
                '\tEnter Y to add another set of selection threshold, or anything else to quit: '
            )).upper()
        if Continue == 'Y':
            pass
        else:
            break
Example #38
0
def sequence_mapping(pdb_file, sequence_file, score_file, relax, jobs):
    if os.path.exists( os.getcwd() + '/' + pdb_file ) and pdb_file:
        init()
        pose = Pose()
        score_fxn = create_score_function('talaris2014')
        if (relax):
            refinement = FastRelax(score_fxn)
        pose_from_pdb(pose, pdb_file)
        if os.path.exists( os.getcwd() + '/' + sequence_file ) and sequence_file:
            fid = open(sequence_file,'r')
            fod = open(score_file,'w')
            data = fid.readlines()
            fid.close()
            sequences = []
            read_seq = False
            for i in data:
                if not len(i):
                    continue
                elif i[0] == '>':
                    read_seq = True
                    fasta_line = re.split(':|\s+|\||\\n',i[1:])
                    name_cpt=0
                    while (name_cpt<len(fasta_line) and not fasta_line[name_cpt]):
                        name_cpt+=1
                    if name_cpt<len(fasta_line):
                        job_output = fasta_line[name_cpt]
                    else:
                        print 'Error: Please enter an identifier for sequences in your fasta file'
                        exit(1)
                elif read_seq:
                    seq=list(i)
                    resn=1
                    for j in i:
                        if j!='\n' and resn<=pose.total_residue():
                            mutator = MutateResidue( resn , one_to_three[j] )
                            mutator.apply( pose )
                            resn+=1
                        elif resn>pose.total_residue():
                            print 'WARNING: couldn\'t mutate residue number '+str(resn)+', sequence too long for backbone...'
                            resn+=1
                    if (relax):
                        jd = PyJobDistributor(job_output, jobs, score_fxn)
                        jd.native_pose = pose
                        scores = [0]*(jobs)
                        counter = 0
                        decoy=Pose()
                        while not jd.job_complete:
                            decoy.assign(pose)
                            resn=1
                            refinement.apply(decoy)
                            jd.output_decoy(decoy)
                            scores[counter]=score_fxn(decoy)
                            counter+=1
                        for i in range(0, len(scores)):
                            fod.writelines(job_output + '_' + str(i+1) + ' : '+str(scores[i])+'\n')
                    else:
                        pose_packer = standard_packer_task(pose)
                        pose_packer.restrict_to_repacking()
                        packmover = PackRotamersMover(score_fxn, pose_packer)
                        packmover.apply(pose)
                        fod.writelines(job_output+' : '+str(score_fxn(pose))+'\n')
                        pose.dump_pdb(job_output+'_1.pdb')
                else:
                    print 'Bad fasta format'
                    exit(1)
            fod.close()
        else:
            print 'Please provide a valid sequence file, '+sequence_file+' doesn\'t exist'
    else:
       print 'Please provide a valid backbone file, '+pdb_file+' doesn\'t exist' 
Example #39
0
def main():
    #takes name of pdb file without the extention
    args =  sys.argv	
    pdb_file = args[1]
    out_file = args[2]
    score_type = int(args[3])
    #set up timer to figure out how long the code took to run
    t0=time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core -mute protocol -mute warn')

    # Constants
    PACK_RADIUS = 5
    #Amino acids, notice there is no C
    AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
    #Number of mutations to accept
    max_accept_mut = 2000
    #Population size
    N = 1
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n']

    initial_pose = pose_from_pdb(pdb_file)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()
       
    #Set up MoveMap This is where you turn the bb and side chain flexibility on and off
    mm = MoveMap()
    mm.set_bb(False)

    #Get the init score of the struct to calc the threshold
    pre_pre_packing_score = sf(initial_pose)
    print(pre_pre_packing_score)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')

    cp_init_pdb = Pose()
    cp_init_pdb.assign(initial_pose)
    chains=cp_init_pdb.split_by_chain()

    #split up AB inter and AC inter 
    initial_poseAB = Pose()
    initial_poseAB.assign(initial_pose)
    initial_poseAC = Pose()
    initial_poseAC.assign(initial_pose)

    init_chain_moverAB = SwitchChainOrderMover()
    init_chain_moverAB.chain_order("12")
    init_chain_moverAB.apply(initial_poseAB)

    init_chain_moverAC = SwitchChainOrderMover()
    init_chain_moverAC.chain_order("13")
    init_chain_moverAC.apply(initial_poseAC)

    #score the inital stabs of each chain
    wt_a=sf(chains[1])

    wt_b=sf(chains[2])

    wt_c=sf(chains[3])

    #score the intial interfaces 
    inter_AB=InterfaceEnergy_split(initial_poseAB)

    inter_AC=InterfaceEnergy_split(initial_poseAC)

    #init thresholds set to half of the init stabilities, if you want to do a different protein change these
    threshold_a=-138.41754752
    threshold_b=-61.378619136
    threshold_c=-61.378619136
    threshold_inter_ab=-10.3726691079
    threshold_inter_ac=-10.3726691079

    data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n')

	#check the inital starting score
    init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type)
    print(init_score)

    #number of residues to select from
    n_res = initial_pose.total_residue()
    print(n_res)
  
    #start sim
    i=0
    gen=0
    while i < max_accept_mut:
            #update the number of generations that have pased
            gen+=1

	    print 'accepts:', i 

	    #pick a place to mutate
	    mut_location = random.randint(1, n_res)
	    #mut_location = random.randint(1, 10)

	    #get the amino acid at that position
	    res = initial_pose.residue(mut_location)

	    #don't mess with C, just choose again
	    while(res.name1() == 'C'):
			mut_location = random.randint(1, n_res)
	    	#get the amino acid at that position
	    	res = initial_pose.residue(mut_location)


	    #choose the amino acid to mutate to
	    toname = res.name1()
	    new_mut_key = random.randint(0,len(AAs)-1)
	    proposed_res = AAs[new_mut_key]
	  
	    #don't bother mutating to the same amino acid it just takes more time
	    while(proposed_res == res.name1()):
			new_mut_key = random.randint(0,len(AAs)-1)
	        proposed_res = AAs[new_mut_key]

	    #init mutant with current 
	    mutant_pose = Pose()
	    mutant_pose.assign(initial_pose)
		
		#mutate 
	    mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf)
		
	    #score mutant
	     mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type)

	    #get the probability that the mutation will be accepted
	    probability = calc_prob_scores(mut_score['score'], init_score['score'], N)
		
	    rand = random.random()

	    #test to see if mutation is accepted
	    if float(rand) < float(probability):
			print "accepted" 	
		
			#make a name for the new mutant
			variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res)


			# Assuming some burn in phase, make this zero if you want to store everything
			if i>=0:
				#save name and energy change
				data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n")

				#save the new accepted mutation	
				pdb_name=str(i)+".pdb"	
				mutant_pose.dump_pdb(pdb_name)

			#update the wildtype 
			initial_pose = mutant_pose
			init_score = mut_score

			#update number of accepts
	    	i+=1
Example #40
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 1500
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    #change these for more or less flexability
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Set threshold for selection
    threshold = pre_pre_packing_score / 2

    data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start sim
    i = 0
    gen = 0
    while i < max_accept_mut:
        #update the number of generations that have pased
        gen += 1

        print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #don't mess with C, just choose again
        while (res.name1() == 'C'):
            mut_location = random.randint(1, n_res)
            #get the amino acid at that position
            res = initial_pose.residue(mut_location)

#choose the amino acid to mutate to
        new_mut_key = random.randint(0, len(AAs) - 1)

        proposed_res = AAs[new_mut_key]

        #don't bother mutating to the same amino acid it just takes more time
        while (proposed_res == res.name1()):
            new_mut_key = random.randint(0, len(AAs) - 1)
            proposed_res = AAs[new_mut_key]

#make the mutation
#this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it.
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            # Assuming 1000 burn in phase, take this if out if you want to store everything
            if i > 1000:
                #save name and energy change
                data.append(variant_name + "," + str(variant_score) + "," +
                            str(variant_score - post_pre_packing_score) + "," +
                            str(probability) + "," + str(gen) + "\n")

                pdb_name = str(i) + ".pdb"
                mutant_pose.dump_pdb(pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    print 'program takes %f' % (t1 - t0)
Example #41
0
def main(argv=None):
    if argv != None:
        sys.argv = [sys.argv[0]] + [arg for arg in argv]
    print 'sys.argv', sys.argv

    ArgParser = argparse.ArgumentParser(
        description=' nc_cst_gen.py arguments ( -help ) %s' % InfoString)
    # Required arguments:
    ArgParser.add_argument('-pdbs',
                           type=str,
                           nargs='+',
                           help=' input pdbs ',
                           required=True)
    # Optional arguments:
    ArgParser.add_argument('-out',
                           type=str,
                           help=' output directory ',
                           default='./')
    ArgParser.add_argument(
        '-max_dist',
        type=float,
        default=3.4,
        help=' distance between the oxygens and downstreams ')
    ArgParser.add_argument('-min_seq_sep',
                           type=int,
                           default=3,
                           help=' minimum seperation in primary sequece ')
    ArgParser.add_argument('-upstream_atom',
                           type=str,
                           default='[ON]\w?\d?',
                           help=' grep for upstream atoms ')
    ArgParser.add_argument('-downstream_atom',
                           type=str,
                           default='[ON]\w?\d?',
                           help=' grep for downstream atoms ')
    ArgParser.add_argument(
        '-num_repeats',
        type=int,
        default=5,
        help=' number of repeats to extrapolate contacts for ')
    ArgParser.add_argument(
        '-min_sasa',
        type=float,
        default=0.0,
        help=' floor for weighting downstream oxygen contacts ')
    ArgParser.add_argument(
        '-min_sasa_weight',
        type=float,
        default=1.0,
        help=' weight of floor for downstream oxygen contacts ')
    ArgParser.add_argument(
        '-max_sasa',
        type=float,
        default=5.0,
        help=' ceiling for cst weighting downstream oxygen contacts ')
    ArgParser.add_argument(
        '-max_sasa_weight',
        type=float,
        default=0.1,
        help=' weight of ceiling for downstream oxygen contacts ')
    ArgParser.add_argument('-sasa_probe_radius',
                           type=float,
                           default=0.8,
                           help=' probe radius for sasa calculations ')
    ArgParser.add_argument('-renumber_pose',
                           type=bool,
                           default=True,
                           help='True|False renumber pdb residues ')

    ArgParser.add_argument('-disulfide',
                           type=bool,
                           default=True,
                           help='True|False include disulfide constraints ')

    Args = ArgParser.parse_args()

    # if len(Args.pdbs[0]) == 1:
    #   Args.pdbs = [''.join(Args.pdbs)]

    if Args.out[-1] != '/':
        Args.out = Args.out + '/'

    import rosetta
    rosetta.init(extra_options="-mute basic -mute core -mute protocols")

    ReportedRepeatCount = 0
    TotalPdbs = len(Args.pdbs)

    # Instance of class to convert sasas to cst weight
    SasaScale = sasa_scale(Args.min_sasa, Args.min_sasa_weight, Args.max_sasa,
                           Args.max_sasa_weight)

    for iPdb, Pdb in enumerate(Args.pdbs):
        print ' Working with %s; %d of %d total pdbs ' % (Pdb, iPdb + 1,
                                                          TotalPdbs)
        # Starting rosetta
        Pose = rosetta.pose_from_pdb(Pdb)
        OutputPdb = Args.out + Pdb

        # Sets pdb info so residues in dumped pdbs are same as index
        Pose.pdb_info(rosetta.core.pose.PDBInfo(Pose))
        if Args.renumber_pose:
            rosetta.dump_pdb(Pose, OutputPdb)
        else:
            rosetta.dump_pdb(Pose, OutputPdb.replace('.pdb',
                                                     '_renumbered.pdb'))

        AllConstraints, SortedConstraints = get_pose_constraints(
            Pose, Args.max_dist, Args.min_seq_sep, Args.sasa_probe_radius,
            SasaScale, Args.upstream_atom, Args.downstream_atom, True)

        if Args.disulfide:
            DisulfAllConstraints, DisulfSortedConstraints = get_pose_constraints(
                Pose, 3.5, 2, Args.sasa_probe_radius, SasaScale, 'SG', 'SG',
                False)
            AllConstraints.extend(DisulfAllConstraints)

        # print AllConstraints
        # print SortedConstraints
        # print
        # print
        # print DisulfAllConstraints
        # print DisulfSortedConstraints
        # sys.exit()

        CstName = OutputPdb.replace('.pdb', '_All.cst')
        with open(CstName, 'w') as CstFile:
            print >> CstFile, '\n'.join(AllConstraints)

        BackboneBackboneCst, BackboneSidechainCst, SidechainSidechainCst = SortedConstraints

        CstName = OutputPdb.replace('.pdb', '_BBBB.cst')
        with open(CstName, 'w') as CstFile:
            print >> CstFile, '\n'.join(BackboneBackboneCst)
        CstName = OutputPdb.replace('.pdb', '_BBSC.cst')
        with open(CstName, 'w') as CstFile:
            print >> CstFile, '\n'.join(BackboneSidechainCst)
        CstName = OutputPdb.replace('.pdb', '_SCSC.cst')
        with open(CstName, 'w') as CstFile:
            print >> CstFile, '\n'.join(SidechainSidechainCst)
        CstName = OutputPdb.replace('.pdb', '_Disulf.cst')
        with open(CstName, 'w') as CstFile:
            print >> CstFile, '\n'.join(DisulfAllConstraints)
Example #42
0
def cap_and_relax_pdb( (RepeatPdb, ReferencePdb, ReferenceCst) ):

  RepeatPose = rosetta.pose_from_pdb(RepeatPdb)
  TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 )
  TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) )

  ReferencePose = rosetta.pose_from_pdb( ReferencePdb )
  ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) )

  # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb')

  RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdb))
  SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdb)
  SourceRanges = SourceRanges.split('__')
  SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ]
  SourceStart = SourceRanges[0][0]
  SourceEnd = SourceRanges[0][1]


  '''
   Add N terminal cap 
   '''
  NcapPose = grafting.return_region( ReferencePose, 1, SourceStart+5 )
  # rosetta.dump_pdb(NcapPose, 'Ncap.pdb')
  NcapLength = NcapPose.n_residue()
  
  NcapOverhangPositions = [ Position for Position in range(NcapLength-3, NcapLength+1) ]
  # print NcapOverhangPositions
  NcapOverhangArray = get_residue_array( NcapPose, NcapOverhangPositions )
  
  RepStartOverhangPositions = [1,2,3,4]
  RepStartOverhangArray = get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions )
  # print RepStartOverhangArray

  RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray )
  rosetta.Pose.apply_transform_Rx_plus_v(TrimmedRepeatPose, rMtx, tVec)
  # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' )
  NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = fuse(NcapPose, TrimmedRepeatPose)
  print 'Ncap attachment RMSD %f'%RMSD
  # rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' )
  NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) )    
  

  '''
   Add C terminal cap 
  '''
  Cshift = SourceEnd-6
  CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() )
  # rosetta.dump_pdb(CcapPose, 'Ccap.pdb')
  CcapOverhangPositions = [1,2,3,4]
  CcapOverhangArray = get_residue_array( CcapPose, CcapOverhangPositions )

  RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ]
  # print 'RepEndOverhangPositions', RepEndOverhangPositions
  RepEndOverhangArray = get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions )
  
  RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray )
  rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec)
  # rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' )
  CappedRepeatPose, RMSD, CcapCorrespondingResidues = fuse(NcapPlusRepeatPose, CcapPose)
  print 'Ccap attachment RMSD %f'%RMSD

  CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_Cap.pdb', RepeatPdb)
  assert CappedNamePdb != RepeatPdb, 'regular expression substitution failed!'
  rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb )



  '''
   Generate csts for cap/repeat edges 
  '''
  CstExtrapolator = constraint_extrapolator(ReferenceCst)
  ConstraintSet = []
  
  ' N cap constraints are easy; no shifts are needed '

  # For catching when individual constraints have been considered already  
  Redundict = {} 
  for Position in range(1, SourceStart+6):
    # print 'Position', Position
    # Skip positions w/out constraints
    try:
      PositionCstDict = CstExtrapolator.Cst[Position]
    except KeyError:
      continue

    for AtomName in PositionCstDict:
      for Constraint in PositionCstDict[AtomName]:
        # unpack tuple values 
        AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
        
        # Redundancy check with redundict 
        try:
          Check = Redundict[CstLineNumber]
          # if cst considered already, skip it! 
          continue
        except KeyError:
          Redundict[CstLineNumber] = 1
        
        if pose_has(CappedRepeatPose, AtomResidueCoords):
          ConstraintSet.append(Constraint)

  ' C cap constraints are harder; need to shift due to pose expansion '

  # CstExtrapolator.output_cst(ConstraintSet, 'NcapConstraints.cst')\
  Redundict = {} 

  # print 'CcapCorrespondingResidues', CcapCorrespondingResidues
  RepeatCcapPositionStart = CcapCorrespondingResidues[0][0]
  # print 'RepeatCcapPositionStart', RepeatCcapPositionStart

  ShiftToRepeatPose = RepeatCcapPositionStart - Cshift
  # print 'ShiftToRepeatPose', ShiftToRepeatPose

  for Position in range( Cshift, ReferencePose.n_residue()+1 ):
    # Skip positions w/out constraints
    try:
      PositionCstDict = CstExtrapolator.Cst[Position]
    except KeyError:
      continue

    for AtomName in PositionCstDict:
      for Constraint in PositionCstDict[AtomName]:
        # unpack tuple values 
        AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
        
        # Redundancy check with redundict 
        try:
          Check = Redundict[CstLineNumber]
          # if cst considered already, skip it! 
          continue
        except KeyError:
          Redundict[CstLineNumber] = 1

        ExpandedPoseAtomResidueCoords = []
        # iterate through atom residue pairs
        for AtomResiduePair in AtomResidueCoords:
          # print 'AtomResiduePair', AtomResiduePair
          ExpandedPosePosition = (AtomResiduePair[1]) + ShiftToRepeatPose
          # print 'ExpandedPosePosition', ExpandedPosePosition
          ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) )

        ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType       

        if pose_has(CappedRepeatPose, ExpandedPoseAtomResidueCoords):
          ConstraintSet.append(ShiftedConstraint)  

  CapCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb)
  CstExtrapolator.output_cst(ConstraintSet, CapCstName)

  '''
  idealize peptide bonds with command line subprocess
  '''
  subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', CappedNamePdb])
  IdealizedPdbOldName = re.sub(r'(.*).pdb$', r'\1_0001.pdb', CappedNamePdb)
  IdealizedPdbNewName = re.sub(r'(.*).pdb$', r'\1_Ideal.pdb', CappedNamePdb)
  
  subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
  time.sleep(0.2)

  IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName )

  # make constraint mover
  Constrainer = rosetta.ConstraintSetMover()
  # get constraints from file
  Constrainer.constraint_file(CapCstName)
  Constrainer.apply(IdealizedCappedPose)

  ''' SET UP WEIGHTS AS decided '''

  # RelativeWeight = 0.1

  Talaris = rosetta.getScoreFunction()
  TalarisPlusCst = rosetta.getScoreFunction()
  AtomPairCst = set_all_weights_zero( rosetta.getScoreFunction() )
  AtomPairCst.set_weight(rosetta.atom_pair_constraint, 1.0)

  # RosettaScore = Talaris(IdealizedCappedPose) 
  # AtomPairCstScore = AtomPairCst(IdealizedCappedPose)
  
  # Weight = ( RosettaScore * RelativeWeight ) / AtomPairCstScore  
  Weight = 1.0
  TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, Weight)
  TalarisPlusCst.set_weight(rosetta.angle_constraint, Weight)
  TalarisPlusCst.set_weight(rosetta.dihedral_constraint, Weight)

  print 'relaxing %s with %s'%(IdealizedPdbNewName, CapCstName) 
  print ' Weight %d '%Weight
  rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag')
  RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax.pdb', IdealizedPdbNewName)
  rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)

  rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag')

  RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax2.pdb', IdealizedPdbNewName)
  rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
Example #43
0
def main(argv=None):
    # if argv is None:
    #   argv = sys.argv
    if argv != None:
        sys.argv = [sys.argv[0]] + [arg for arg in argv]
    # print 'sys.argv', sys.argv

    ArgParser = argparse.ArgumentParser(
        description=" args for optimize_repeat_structures ")
    ArgParser.add_argument('-pdb_stem',
                           type=str,
                           help=" pdb stem, start of globs for pdbs and csts ",
                           required=True)
    Args = ArgParser.parse_args()

    Pdbs = glob.glob('*%s.pdb' % Args.pdb_stem)

    PdbSortTuples = []
    Skipped = []

    for Pdb in Pdbs:
        RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb))
        SourceStart = int(re.sub(r'.*src(\d+).*pdb', r'\1', Pdb))

        try:
            assert SourceStart != Pdb and RepeatLength != Pdb, 'regular expression substitution failed'
        except AssertionError:
            Skipped.append(Pdb)
            continue

        PdbSortTuples.append((RepeatLength, SourceStart, Pdb))

    print 'Skipped:'
    print Skipped
    print

    PdbSortTuples.sort()

    LastPdb = PdbSortTuples[0][2]
    Pose = rosetta.pose_from_pdb(LastPdb)
    LastArray = np.array([
        list(Pose.residue(P).xyz('CA')) for P in range(1,
                                                       Pose.n_residue() + 1)
    ])

    subprocess.check_output(['mkdir', 'Redundant'])

    for PdbTup in PdbSortTuples[1:]:
        Pdb = PdbTup[2]
        Pose = rosetta.pose_from_pdb(Pdb)

        CA_Array = np.array([
            list(Pose.residue(P).xyz('CA'))
            for P in range(1,
                           Pose.n_residue() + 1)
        ])

        if len(CA_Array) == len(LastArray):

            RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays(
                CA_Array, LastArray)
            print
            print 'LastPdb, Pdb'
            print LastPdb
            print Pdb
            print 'RMSD:', RMSD

            if RMSD < 0.001:
                PdbStem = re.sub(r'(.*).pdb$', r'\1', Pdb)
                GlobString = '%s*' % PdbStem

                PdbAssociatedFiles = glob.glob(GlobString)
                # print PdbAssociatedFiles

                for File in PdbAssociatedFiles:
                    subprocess.check_output(['mv', File, 'Redundant/'])

        LastArray = copy.deepcopy(CA_Array)
        LastPdb = copy.deepcopy(Pdb)
Example #44
0
def main(argv=None):
    # if argv is None:
    #   argv = sys.argv
    if argv != None:
        sys.argv = [sys.argv[0]] + [arg for arg in argv]
    # print 'sys.argv', sys.argv

    # Arg block
    ArgParser = argparse.ArgumentParser(
        description=' expand_cst.py ( -help ) %s' % InfoString)
    # Required args
    ArgParser.add_argument('-ref_pdb',
                           type=str,
                           help=' reference pdb ',
                           required=True)
    ArgParser.add_argument('-ref_cst',
                           type=str,
                           help=' corresponding to reference pdb ',
                           required=True)
    ArgParser.add_argument('-repeat_pdb_tag',
                           type=str,
                           help=' input pdb tag ',
                           required=True)
    # Optional args
    ArgParser.add_argument('-out',
                           type=str,
                           help=' Output directory ',
                           default='./')
    Args = ArgParser.parse_args()
    if Args.out[-1] != '/':
        Args.out = Args.out + '/'

    # default talaris 2013 score function
    ScoreFunction = rosetta.getScoreFunction()
    # turning on constraint weights
    ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0)
    ScoreFunction.set_weight(rosetta.angle_constraint, 1.0)
    ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0)

    RefPdb = Args.ref_pdb
    # print RefPdb
    ReferencePose = rosetta.pose_from_pdb(RefPdb)
    print 'ReferencePose', ReferencePose

    # modify rosetta cst w/o rosetta
    Constrainer = constraint_extrapolator(Args.ref_cst)

    # RefCst = Args.ref_cst
    # # make constraint mover
    # Constrainer = rosetta.ConstraintSetMover()
    # # get constraints from file
    # Constrainer.constraint_file(RefCst)
    # # Apply constraints to pose
    # Constrainer.apply(ReferencePose)

    # return Constrainer

    Pdbs = glob.glob('*%s*.pdb' % Args.repeat_pdb_tag)
    assert len(
        Pdbs
    ), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag " % Args.repeat_pdb_tag

    for Pdb in Pdbs:
        ## For debug put pdb of interest here:
        # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb':

        print 'Pdb:', Pdb
        Pose = rosetta.pose_from_pdb(Pdb)

        try:
            SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1',
                                       Pdb)
            SourceRanges = [[int(Number) for Number in Range.split('_')]
                            for Range in SourceRangeString.split('__')]
        except ValueError:
            print 'No src range tag, skipping: %s ' % Pdb
            continue

        print 'SourceRanges:', SourceRanges
        RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb))
        print 'RepeatLength', RepeatLength
        print

        # print [Pdb]
        PdbTag = (Pdb + '!').replace('.pdb!', '').replace('!', '')
        CstName = PdbTag + '.cst'
        ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit(
            SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose,
            CstName, PdbTag)