예제 #1
0
def get_E_per_residue( pose ):
    from rosetta import get_fa_scorefxn
    
    sf = get_fa_scorefxn()
    sf( pose )
    
    for residue in pose:
        energy = pose.energies().residue_total_energy( residue.seqpos() )
        if energy > 4:
            print residue.name(), '\t', pose.pdb_info().pose2pdb( residue.seqpos() ), '\t', energy
예제 #2
0
def get_E_per_residue_from_file( pdb_filename ):
    from rosetta import get_fa_scorefxn
    from antibody_functions import load_pose
    
    pose = load_pose( pdb_filename )
    sf = get_fa_scorefxn()
    sf( pose )
    
    for residue in pose:
        energy = pose.energies().residue_total_energy( residue.seqpos() )
        if energy > 1.5:
            print residue.name(), '\t', pose.pdb_info().pose2pdb( residue.seqpos() ), '\t', energy
def get_fa_scorefxn_with_given_weights( weights_dict, verbose = False ):
    '''
    Return an sf from get_fa_scoretype but with adjusted weights <scoretypes> with given <weights>
    If <input_scoretype> is not already part of the <sf>, this function will add it to <sf> with a weight of <weight>, and then get the score
    Will exit if the string( <input_scoretype> ) is not a valid ScoreType
    :param weights_dict: dict( ScoreType or str of ScoreType name : int( or float( weight ) ) )
    :param verbose: bool( print the final weights of the returned ScoreFunction? ) Default = False
    "return: ScoreFunction( fa_scorefxn with adjusted weights of given scoretypes )
    '''
    # imports
    import sys
    from rosetta import get_fa_scorefxn, score_type_from_name
    from rosetta.core.scoring import fa_atr


    # argument check - check the passed argument is a dict
    if not isinstance( weights_dict, dict ):
        print "You didn't give me a dictionary for your input. I need a dict of ScoreType (or name) : weight. Exiting."
        sys.exit()
        
    # get a standard fa_scorefxn to start with
    sf = get_fa_scorefxn()
    
    # for each entry of the dictionary, change the weight
    for scoretype_name, scoretype_weight in weights_dict.items():
        # if the key is a string
        if isinstance( scoretype_name, str ):
            try:
                scoretype = score_type_from_name( scoretype_name )
            except:
                print "\nThe string name: '%s' does not appear to be a valid ScoreType. Exiting" %scoretype_name
                sys.exit()
            
            # set the weight
            sf.set_weight( scoretype, scoretype_weight )

        # if the argument is a ScoreType object
        elif isinstance( scoretype_name, type( fa_atr ) ):
            # adjust the weight in the scorefxn using the corresponding weight given
            sf.set_weight( scoretype_name, scoretype_weight )

        # else, I don't know what they gave me as a scoretype
        else: 
            print "I'm not sure what '%s' is from your ScoreType key in your <weights_dict> argument. Exiting" %scoretype_name
            sys.exit()

    # if verbose, print out the weights of the new ScoreFunction
    if verbose:
        print "\nNew score weights sf:\n%s\n" %( "\n".join( [ "%s: %s" %( str( name ), sf.get_weight( name ) ) for name in sf.get_nonzero_weighted_scoretypes() ] ) )

    # return the newly weighted fa_scorefxn
    return sf
예제 #4
0
def main():
    #takes name of pdb file without the extention
    args =  sys.argv	
    pdb_file = args[1]
    out_file = args[2]
    score_type = int(args[3])
    #set up timer to figure out how long the code took to run
    t0=time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core -mute protocol -mute warn')

    # Constants
    PACK_RADIUS = 5
    #Amino acids, notice there is no C
    AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
    #Number of mutations to accept
    max_accept_mut = 2000
    #Population size
    N = 1
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n']

    initial_pose = pose_from_pdb(pdb_file)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()
       
    #Set up MoveMap This is where you turn the bb and side chain flexibility on and off
    mm = MoveMap()
    mm.set_bb(False)

    #Get the init score of the struct to calc the threshold
    pre_pre_packing_score = sf(initial_pose)
    print(pre_pre_packing_score)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')

    cp_init_pdb = Pose()
    cp_init_pdb.assign(initial_pose)
    chains=cp_init_pdb.split_by_chain()

    #split up AB inter and AC inter 
    initial_poseAB = Pose()
    initial_poseAB.assign(initial_pose)
    initial_poseAC = Pose()
    initial_poseAC.assign(initial_pose)

    init_chain_moverAB = SwitchChainOrderMover()
    init_chain_moverAB.chain_order("12")
    init_chain_moverAB.apply(initial_poseAB)

    init_chain_moverAC = SwitchChainOrderMover()
    init_chain_moverAC.chain_order("13")
    init_chain_moverAC.apply(initial_poseAC)

    #score the inital stabs of each chain
    wt_a=sf(chains[1])

    wt_b=sf(chains[2])

    wt_c=sf(chains[3])

    #score the intial interfaces 
    inter_AB=InterfaceEnergy_split(initial_poseAB)

    inter_AC=InterfaceEnergy_split(initial_poseAC)

    #init thresholds set to half of the init stabilities, if you want to do a different protein change these
    threshold_a=-138.41754752
    threshold_b=-61.378619136
    threshold_c=-61.378619136
    threshold_inter_ab=-10.3726691079
    threshold_inter_ac=-10.3726691079

    data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n')

	#check the inital starting score
    init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type)
    print(init_score)

    #number of residues to select from
    n_res = initial_pose.total_residue()
    print(n_res)
  
    #start sim
    i=0
    gen=0
    while i < max_accept_mut:
            #update the number of generations that have pased
            gen+=1

	    print 'accepts:', i 

	    #pick a place to mutate
	    mut_location = random.randint(1, n_res)
	    #mut_location = random.randint(1, 10)

	    #get the amino acid at that position
	    res = initial_pose.residue(mut_location)

	    #don't mess with C, just choose again
	    while(res.name1() == 'C'):
			mut_location = random.randint(1, n_res)
	    	#get the amino acid at that position
	    	res = initial_pose.residue(mut_location)


	    #choose the amino acid to mutate to
	    toname = res.name1()
	    new_mut_key = random.randint(0,len(AAs)-1)
	    proposed_res = AAs[new_mut_key]
	  
	    #don't bother mutating to the same amino acid it just takes more time
	    while(proposed_res == res.name1()):
			new_mut_key = random.randint(0,len(AAs)-1)
	        proposed_res = AAs[new_mut_key]

	    #init mutant with current 
	    mutant_pose = Pose()
	    mutant_pose.assign(initial_pose)
		
		#mutate 
	    mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf)
		
	    #score mutant
	     mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type)

	    #get the probability that the mutation will be accepted
	    probability = calc_prob_scores(mut_score['score'], init_score['score'], N)
		
	    rand = random.random()

	    #test to see if mutation is accepted
	    if float(rand) < float(probability):
			print "accepted" 	
		
			#make a name for the new mutant
			variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res)


			# Assuming some burn in phase, make this zero if you want to store everything
			if i>=0:
				#save name and energy change
				data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n")

				#save the new accepted mutation	
				pdb_name=str(i)+".pdb"	
				mutant_pose.dump_pdb(pdb_name)

			#update the wildtype 
			initial_pose = mutant_pose
			init_score = mut_score

			#update number of accepts
	    	i+=1
# make the directory for the working PDBs in the base_structs_dir
structure_dir = base_structs_dir + native_pdb_name
if not os.path.isdir( structure_dir ):
    try:
        os.mkdir( structure_dir )
    except:
        pass
working_pose_decoy_name = structure_dir + '/' + native_pdb_name + "_just_%s_sugar_small_moves" %input_args.num_sugar_small_move_trials


# use the scorefxn_file to set up additional weights
if input_args.scorefxn_file is not None:
    main_sf = make_fa_scorefxn_from_file( input_args.scorefxn_file )
# else create a fa_scorefxn
else:
    main_sf = get_fa_scorefxn()

# fa_intra_rep should always be 0.440 since that's what I've been using
main_sf.set_weight( score_type_from_name( "fa_intra_rep" ), 0.440 )

# set up constraints from the passed constraint file
if input_args.native_constraint_file is not None:
    # add an appropriate weight to the main_sf if atom_pair_constraint and dihedral_constraint are 0
    if main_sf.get_weight( score_type_from_name( "atom_pair_constraint" ) ) == 0:
        main_sf.set_weight( score_type_from_name( "atom_pair_constraint" ), 1.0 )
    if main_sf.get_weight( score_type_from_name( "dihedral_constraint" ) ) == 0:
        main_sf.set_weight( score_type_from_name( "dihedral_constraint" ), 1.0 )

    if input_args.verbose:
        print "Setting up a ConstraintSetMover"
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('pdb_filename', action="store", type=str)
    parser.add_argument('replicate_number', action="store", type=int)

    inputs = parser.parse_args()
    #takes name of pdb file without the extention
    pdb_file = inputs.pdb_filename
    prot_name = pdb_file.split('/')[-1].split('.')[0]
    #set up timer to figure out how long the code took to run
    t0 = time()
    fasta_file = pdb_file.replace('/structures/',
                                  '/fastas/').replace('.pdb', '.fasta')
    records = list(SeqIO.parse(fasta_file, 'fasta'))
    assert len(records) == 1
    wt_seq = str(records[0].seq)

    # Initialize Rosetta.
    #init(extra_options='-mute basic -mute core')
    init(extra_options=
         '-mute basic -mute core -rebuild_disulf false -detect_disulf false')

    ########################
    # Constants
    ########################
    PACK_RADIUS = 12.0
    #Amino acids
    AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P",
           "Q", "R", "S", "T", "V", "W", "Y")
    AAs_choice_dict = {}
    for aa in AAs:
        AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa]
    #Number of mutations to accept
    max_accept_mut = 10 * len(wt_seq)
    #max_accept_mut = 2048

    #Population size
    N = 1000
    #Beta (temp term)
    beta = 1
    #Fraction of the WT stability value to shoot for
    threshold_fraction = 0.5
    ########################
    ########################

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load a clean pdb file
    initial_pose = pose_from_pdb(pdb_file)
    if '.clean' in pdb_file:
        pdb_file = ''.join(pdb_file.split('.clean'))

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Threshold for selection
    threshold = post_pre_packing_score * threshold_fraction
    print 'threshold:', threshold

    data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start evolution
    i = 0
    gen = 0
    while i < max_accept_mut:

        #update the number of generations that have pased
        gen += 1

        #print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #choose the amino acid to mutate to
        #new_mut_key = random.randint(0,len(AAs)-1)
        #proposed_res = AAs[new_mut_key]
        proposed_res = random.choice(AAs_choice_dict[res.name1()])

        #make the mutation
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            #save name and energy change
            data.append(variant_name + "," + str(variant_score) + "," +
                        str(variant_score - post_pre_packing_score) + "," +
                        str(probability) + "," + str(gen) + "\n")

            #            if i == (max_accept_mut - 1):
            #                final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i))
            #                mutant_pose.dump_pdb(final_pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format(
        prot_name, prot_name, threshold_fraction, N, beta,
        inputs.replicate_number)
    with open(output_filename, "w") as outfile:
        outfile.writelines(data)

    print 'Data written to:', output_filename
    print 'program takes %f' % (t1 - t0)
예제 #7
0
    def go(self):
        """ :param candidates: Current population being evaluated

            Scores the conformation by novelty.
        """

        time.clock()
        while (self.loops < 1000000) or (self.lowest > self.native_energy):

            # Do MCNS on each residue in protein
            if self.frag:
                n = 1
                while n <= self.c_size/2:
                    self.frag_mover()
                    n += 1
            elif self.local:
                n = 1
                while n < self.c_size/2/self.local_size:
                    self.local_mover(n, self.local_size)
                    n += 5
                r = self.c_size/2 % self.local_size
                self.local_mover(n, r)
            elif self.full:
                self.full_mover()
            else:
                n = 1
                while n <= self.c_size/2:
                    self.mover(n)
                    n += 1

            # Options for dynamic parameter adjustment

            # If no new points have been added in 4 loops decrease novelty threshold by 5%
            # if self.loops % 4 == 0:
            #     if self.num_added == 0:
            #         self.acceptance_threshold *= .95

            # If more than 20 points has been added in 4 loops increase novelty threshold by 20%
            # if self.loops % 4 == 0:
            #     if self.num_added > 20:
            #         self.acceptance_threshold *= 1.2

            # If no new points have been added in 1 loop increase mover range by 10
            # if self.num_added == 0:
            #     if self.mover_range < 350:
            #         self.mover_range += 10

            # If more than 1 point has been added in 1 loop decrease mover range by 5
            # if self.num_added > 1:
            #     if self.mover_range > 5:
            #         self.mover_range -= 5

            # Do a individual residue mover round if no improvement in 20 moves
            # if (self.loops % 20 == 0) and (self.loops != 0):
            #     if self.lowest == self.last_lowest_10:
            #         n = 1
            #         while n <= self.c_size/2:
            #             self.mover(n)
            #             n += 1
            #     self.last_lowest_10 = self.lowest

            # Print lowest e_score in archive compared to native
            if len(self.novelty_archive) > 0:
                print "Lowest energy: " + str(self.lowest) + "\nNative energy: " + str(self.native_energy) + \
                    "\nNovelty points added: " + str(self.num_added) + "\n"

                # If lowest e_score is lower then 10000 convert to all-atom, switch energy function, and change minimal
                # criteria to new all-atom energy (for centroid)
                if (self.lowest < 10000) and not self.switch and self.centroid:
                    # self.threshold = 10
                    switch_type = rosetta.SwitchResidueTypeSetMover("fa_standard")
                    switch_type.apply(self.pose)
                    self.scorefxn = rosetta.get_fa_scorefxn()
                    self.mc_energy = self.scorefxn(self.pose)
                    self.switch = True

            # Clear novelty archive after 10 loops and set mc energy to lowest if no improvement
            # if (self.loops % 10 == 0) and (self.loops != 0):
            #     if self.lowest == self.last_lowest_10:
            #         print("Reloading...")
            #         self.pose = rosetta.pose_from_pdb('lowest.pdb')
            #         self.novelty_archive = deque()
            #         self.mc_energy = self.scorefxn(self.pose) + 500
            #         self.acceptance_threshold = 100
            #     self.last_lowest_10 = self.lowest

            # Decrease temp by 5 if no progress after 20 loops
            # if self.loops % 10 == 0:
            #     if self.lowest == self.last_lowest:
            #         self.temperature += 5
            #     self.last_lowest = self.lowest

            self.loops += 1
            self.num_added = 0
            print str(self.loops) + " iterations."
            # print "Threshold: " + str(self.threshold)
        print ("Time elapsed: " + str(time.clock()))
예제 #8
0
    def __init__(self, pdb, centroid=False, pdb_file='', frag=False, nine_mer=False, local=False, local_size=3,
                 full=False, rosetta_refinement=False):
        """ :param pdb: :type string: pdb ID of the protein to be folded
            :param centroid: :type boolean: Option for use of centroid model
        """
        self.loops = 0                                    # Stores generation for which energy score was last calculated
        self.scores = {}                                  # Dictionary container for current gen genomes/scores
        self.scores_list = []                             # List container of current gen scores for search
        self.gen_added = 0                                # Last gen in which a point was added to novelty archive
        self.threshold = 10                               # Novelty threshold for which point is added to archive
        self.acceptance_threshold = 100                   # Novelty threshold for which move is accepted automatically
        self.num_added = 0                                # Number of points added to novelty archive
        self.switch = False                               # All atom switch
        self.temperature = 5                              # Monte Carlo temperature
        self.mover_range = 10                             # +-range of the angle in degrees in which mover moves residue
        self.local_size = local_size                      # For local mover, size of fragment to move
        self.local = local                                # Whether to use local mover
        self.novelty_archive = deque()                    # Initialize novelty archive
        self.centroid = centroid                          # If true use centroid scoring
        self.last_lowest = 0                              # For use in novelty loop
        self.last_lowest_10 = 0                           # For use in clear main loop
        self.frag = frag                                  # If true use frag mover
        self.rosetta_refinement = rosetta_refinement      # If true refine rosetta fold

        # Rosetta inits
        rosetta.init()                                    # Initialize rosetta libraries
        pose_native = pose_from_rcsb(pdb)                 # Create rosetta pose of natively folded protein from pdb file
        sequence = pose_native.sequence()                 # Get sequence of protein
        self.scorefxn = rosetta.get_fa_scorefxn()         # Create the rosetta energy score function for all atom
        
        if pdb_file != '':
            self.pose = rosetta.pose_from_pdb(pdb_file)   # If a starting pdb is given search from this pose
        elif rosetta_refinement:                          # If rosetta refinement, start from fastrelax structure
            self.pose = rosetta.pose_from_sequence(sequence)
            relax = rosetta.FastRelax()
            relax.set_scorefxn(self.scorefxn)
            relax.apply(self.pose)
        else:
            self.pose = rosetta.pose_from_sequence(sequence)  # Create the rosetta pose that will be manipulated
            
        if centroid:                                      # Switch pose to centroid if centroid option is true
            switch = rosetta.SwitchResidueTypeSetMover("centroid")
            switch.apply(self.pose)
        self.c_size = len(sequence)*2                     # Number of residues * 2 (phi and psi for each residue)
        self.native_energy = self.scorefxn(pose_native)   # Energy of the natively folded protein
        
        if centroid:                                      # Switch rosetta score function if centroid
            self.scorefxn = rosetta.create_score_function('score3')
        self.conformation = []
        
        i = 1
        while i <= len(sequence):
            self.conformation.append(self.pose.phi(i))
            self.conformation.append(self.pose.psi(i))
            i += 1

        self.mc_energy = self.scorefxn(self.pose) + 500   # Energy to be used as minimal criteria
        self.lowest = self.scorefxn(self.pose)            # Lowest energy in archive
        
        if frag:
            if nine_mer:
                fragset = rosetta.ConstantLengthFragSet(9)
                fragset.read_fragment_file("aat000_09_05-1.200_v1_3")
            else:
                fragset = rosetta.ConstantLengthFragSet(3)
                fragset.read_fragment_file("aat000_03_05-1.200_v1_3")
            movemap = rosetta.MoveMap()
            movemap.set_bb(True)
            self.mover_3mer = rosetta.ClassicFragmentMover(fragset, movemap)

        if local:                                         # For local, initialize na with appropriate number of deques
            self.novelty_archive = [deque() for i in range(self.c_size/2/self.local_size)]

        self.full = full                                  # If true use full mover
예제 #9
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 5000
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    pdb_name = str(pdb_file) + "_min.pdb"
    initial_pose.dump_pdb(pdb_name)

    #Set threshold for selection
    #threshold = post_pre_packing_score/2
    #threshold = post_pre_packing_score

    data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n')

    data_filename = pdb_file + '.score'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    '''
예제 #10
0
def main():
    #read in the file made by the forward sim
    args = sys.argv
    inputfile = args[1]
    data = open(inputfile)
    first_line = data.readlines()[1]
    var_line=first_line.split(',')
    start_stab=var_line[1]

    #the first entry in the file is the wild type structure, calc the threshold using this
    threshold=float(start_stab)+10
    print(threshold)
    
    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 0
    #Population size
    N = 100
    #Beta (temp term)
    beta = .6
  
    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')

    #Prepare data headers
    data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n']

    # Get the reversions file, the output file the score_mutant_pdb has made
    variant_scores=open(inputfile)

    #get just the mutation we want to revert to
    lines= variant_scores.readlines()
    var_line=lines[500] #gets the Nth line how ever long you want the burn to be
    print "staring here", var_line  
    var_line=var_line.split(',')[0]
  
    var_loc=int(filter(str.isdigit, var_line))
    var_rev=var_line[:1]

    gen=1
    #get all the pdb files
    sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort)
    sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. 

  
    for i in range(1,len(sort_list)-30):
      step=-15
      #calc reversion for next 15 moves
      for infile in sort_list[i:i+31]:

	#for each mutation	
        var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be
        var_line=var_line.split(',')[0]
	print(var_line)
        var_loc=int(filter(str.isdigit, var_line))
	var_rev=""
	old=""
	if(step<0):
        	var_rev=var_line[len(var_line)-1:len(var_line)]
		old=var_line[:1]
	
	else:
		var_rev=var_line[:1]
		old=var_line[len(var_line)-1:len(var_line)]

      	print "Current File Being Processed is: " + infile
        print "revering to:", var_rev
        print "at:", var_loc

	#get the pdb you want to revert and make the reversion
        initial_pose = pose_from_pdb(infile)
        mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf)

	#repack mut
        task1 = standard_packer_task(mutant_pose)
	task1.restrict_to_repacking()
        task1.or_include_current(True)
        packer_rotamers_mover1 = RotamerTrialsMover(sf,task1)
	packer_rotamers_mover1.apply(mutant_pose)

	#repack init
        task2 = standard_packer_task(initial_pose)
	task2.restrict_to_repacking()
	task2.or_include_current(True)
	pack_rotamers_mover2 = RotamerTrialsMover(sf, task2)
	pack_rotamers_mover2.apply(initial_pose)

	#apply min mover
	min_mover.apply(mutant_pose)
	min_mover.apply(initial_pose)
	
	#get scores    
	variant_score = sf(mutant_pose)
        initial_score = sf(initial_pose)

	#get prob
        probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold)

	print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant
_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n")
      	data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v
ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n")
	step=step+1
      gen+=1

    print '\nDONE'

    data_filename = 'premutate_rep1_bb_T_ch_T.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)
예제 #11
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 1500
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    #change these for more or less flexability
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Set threshold for selection
    threshold = pre_pre_packing_score / 2

    data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start sim
    i = 0
    gen = 0
    while i < max_accept_mut:
        #update the number of generations that have pased
        gen += 1

        print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #don't mess with C, just choose again
        while (res.name1() == 'C'):
            mut_location = random.randint(1, n_res)
            #get the amino acid at that position
            res = initial_pose.residue(mut_location)

#choose the amino acid to mutate to
        new_mut_key = random.randint(0, len(AAs) - 1)

        proposed_res = AAs[new_mut_key]

        #don't bother mutating to the same amino acid it just takes more time
        while (proposed_res == res.name1()):
            new_mut_key = random.randint(0, len(AAs) - 1)
            proposed_res = AAs[new_mut_key]

#make the mutation
#this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it.
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            # Assuming 1000 burn in phase, take this if out if you want to store everything
            if i > 1000:
                #save name and energy change
                data.append(variant_name + "," + str(variant_score) + "," +
                            str(variant_score - post_pre_packing_score) + "," +
                            str(probability) + "," + str(gen) + "\n")

                pdb_name = str(i) + ".pdb"
                mutant_pose.dump_pdb(pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    print 'program takes %f' % (t1 - t0)
#!/usr/bin/env python
# :noTabs=true:

import rosetta

rosetta.init()

pose = rosetta.pose_from_sequence('EVAAAVAT')

pymol = rosetta.PyMOL_Mover()

pymol.apply(pose)

scorefxn = rosetta.get_fa_scorefxn(
)  #  rosetta.create_score_function('standard')
scorefxn(pose)

pymol.send_energy(pose)
pymol.send_energy(pose, label=True)

pymol.send_colors(pose, {}, default_color="orange")
colors = {2: "red", 5: "white"}
pymol.send_colors(pose, colors, default_color="blue")

pymol.label_energy(pose, "fa_atr")

pymol.send_hbonds(pose)
pymol.send_ss(pose)
pymol.send_polars(pose)

mm = rosetta.MoveMap()
예제 #13
0
parser.add_argument('-m', '--minimize', action='store_true',
                    help='flag to perform minimization after each mutation')
args = parser.parse_args()


# Initialize Rosetta.
init(extra_options='-mute basic -mute core')

# Prepare data headers.
data = ['Variant,Rosetta Score,"delta-delta-G"\n']

# Load pdb file.
initial_pose = pose_from_pdb(args.pdb_filename)

# Set up ScoreFunction.
sf = get_fa_scorefxn()

# Set up MoveMap.
mm = MoveMap()
mm.set_bb(True)
mm.set_chi(True)

# Pack and minimize initial pose to remove clashes.
pre_pre_packing_score = sf(initial_pose)

task = standard_packer_task(initial_pose)
task.restrict_to_repacking()
task.or_include_current(True)
pack_rotamers_mover = RotamerTrialsMover(sf, task)
pack_rotamers_mover.apply(initial_pose)
예제 #14
0
def InterfaceEnergy_split(pdb):

  sf = get_fa_scorefxn()
  interface_mover = InterfaceAnalyzerMover(1, False, sf, False, True, True, False )
  interface_mover.apply(pdb)
  return(interface_mover.get_interface_dG())
예제 #15
0
def main():
    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Population size
    N = 37
    #Beta (temp term)
    beta = 1
    #look up what the first stored value was in the files to get the threshold
    threshold = float(-534.687360627 / 2)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Prepare data headers
    data = ['Generation,RevertTo,OrgScore,RevScore,Change,Prob\n']

    # Get the reversions file, the output file the score_mutant_pdb has made
    variant_scores = open('mh_rep_3_37.csv')

    #get just the mutation we want to revert to
    lines = variant_scores.readlines()
    var_line = lines[
        2]  #gets the Nth line how ever long you want the burn to be
    var_line = var_line.split(',')[0]

    var_loc = int(filter(str.isdigit, var_line))
    var_rev = var_line[:1]

    gen = 1
    #get all the pdb files
    sort_list = sorted(glob.glob('*.pdb'), key=numericalSort)

    for i in range(1, len(sort_list) - 15):

        #calc reversion for next 15 moves
        for infile in sorted(glob.glob('*.pdb'), key=numericalSort)[i:i + 15]:

            #for each mutation
            var_line = lines[
                gen +
                1]  #gets the Nth line how ever long you want the burn to be
            var_line = var_line.split(',')[0]
            var_loc = int(filter(str.isdigit, var_line))
            var_rev = var_line[:1]

            print "Current File Being Processed is: " + infile
            initial_pose = pose_from_pdb(infile)
            initial_score = sf(initial_pose)
            print("init scored")
            mutant_pose = mutate_residue(initial_pose, var_loc, var_rev,
                                         PACK_RADIUS, sf)
            variant_score = sf(mutant_pose)
            probability = calc_prob_mh(variant_score, initial_score, N, beta,
                                       threshold)
            print(
                str(gen) + "," + var_line + "," + str(initial_score) + "," +
                str(variant_score) + "," + str(variant_score - initial_score) +
                "," + str(probability) + "\n")
            data.append(
                str(gen) + "," + var_line + "," + str(initial_score) + "," +
                str(variant_score) + "," + str(variant_score - initial_score) +
                "," + str(probability) + "\n")
        gen += 1

    print '\nDONE'

    data_filename = 'rep_3_mh_37_rev_15_score.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)
예제 #16
0
def mutate_residue_chain(pose, mutant_position, mutant_aa,
        pack_radius = 0.0, pack_scorefxn = '' ):
    """
    Replaces the residue at  <mutant_position>  in  <pose>  with  <mutant_aa>
        and repack any residues within  <pack_radius>  Angstroms of the mutating
        residue's center (nbr_atom) using  <pack_scorefxn>
    note: <mutant_aa>  is the single letter name for the desired ResidueType

    example:
        mutate_residue(pose, 30, A)
    See also:
        Pose
        PackRotamersMover
        MutateResidue
        pose_from_sequence
    """
    #### a MutateResidue Mover exists similar to this except it does not pack
    ####    the area around the mutant residue (no pack_radius feature)
    #mutator = MutateResidue(mutant_position, mutant_aa)
    #mutator.apply(test_pose)
    #test_pose = Pose()
    #test_pose.assign( pose )

    if pose.is_fullatom() == False:
        IOError( 'mutate_residue only works with fullatom poses' )


    # create a standard scorefxn by default
    if not pack_scorefxn:
        pack_scorefxn = get_fa_scorefxn() #  create_score_function('standard')

    task = standard_packer_task(pose)

    # the Vector1 of booleans (a specific object) is needed for specifying the
    #    mutation, this demonstrates another more direct method of setting
    #    PackerTask options for design
    aa_bool = rosetta.utility.vector1_bool()
    # PyRosetta uses several ways of tracking amino acids (ResidueTypes)
    # the numbers 1-20 correspond individually to the 20 proteogenic amino acids
    # aa_from_oneletter returns the integer representation of an amino acid
    #    from its one letter code
    # convert mutant_aa to its integer representation
    mutant_aa = aa_from_oneletter_code(mutant_aa)

    # mutation is performed by using a PackerTask with only the mutant
    #    amino acid available during design
    # to do this, construct a Vector1 of booleans indicating which amino acid
    #    (by its numerical designation, see above) to allow
    for i in range(1, 21):
        # in Python, logical expression are evaluated with priority, thus the
        #    line below appends to aa_bool the truth (True or False) of the
        #    statement i == mutant_aa
        aa_bool.append( i == mutant_aa )

    # modify the mutating residue's assignment in the PackerTask using the
    #    Vector1 of booleans across the proteogenic amino acids
   
    # prevent residues from packing if they are in a different chain then the mutant
    task2=restrict_non_nbrs_from_repacking_chain(pose, mutant_position, task, pack_radius)
    task2.nonconst_residue_task(mutant_position
        ).restrict_absent_canonical_aas(aa_bool)

    # apply the mutation and pack nearby residues
    packer = PackRotamersMover(pack_scorefxn, task2)
    packer.apply(pose)
   
    return(pose)
예제 #17
0
def mutate_residue(pose, mutant_position, mutant_aa, pack_radius=0.0,
                                                             pack_scorefxn=''):
    """Replace the residue at <mutant_position> in <pose> with <mutant_aa> and
    repack any residues within <pack_radius> angstroms of the mutating
    residue's center (nbr_atom) using <pack_scorefxn>

    Note: <mutant_aa> is the single letter name for the desired ResidueType

    Example:
        mutate_residue(pose, 30, "A")
    See also:
        Pose
        PackRotamersMover
    """
    if not pose.is_fullatom():
        IOError('mutate_residue() only works with full-atom poses.')

    test_pose = rosetta.Pose()
    test_pose.assign(pose)

    # create a standard scorefxn by default
    if not pack_scorefxn:
        pack_scorefxn = rosetta.get_fa_scorefxn()

    task = rosetta.standard_packer_task(test_pose)
    task.or_include_current(True)

    # A vector1 of booleans (a specific object) is needed for specifying the
    # mutation.  This demonstrates another more direct method of setting
    # PackerTask options for design.
    aa_bool = rosetta.utility.vector1_bool()

    # PyRosetta uses several ways of tracking amino acids (ResidueTypes).
    # The numbers 1-20 correspond individually to the 20 proteogenic amino
    # acids.  aa_from_oneletter_code() returns the integer representation of an
    # amino acid from its one letter code

    # Convert mutant_aa to its integer representation.
    mutant_aa = rosetta.aa_from_oneletter_code(mutant_aa)

    # The mutation is performed by using a PackerTask with only the mutant
    # amino acid available during design.  To do this, we construct a vector1
    # of booleans indicating which amino acid (by its numerical designation;
    # see above) to allow.
    for i in range(1, 20 + 1):
        # In Python, logical expression are evaluated with priority, thus the
        # line below appends to aa_bool the truth (True or False) of the
        # statement i == mutant_aa.
        aa_bool.append(i == mutant_aa)

    # Modify the mutating residue's assignment in the PackerTask using the
    # vector1 of booleans across the proteogenic amino acids.
    task.nonconst_residue_task(mutant_position).restrict_absent_canonical_aas(
                                                                       aa_bool)

    # Prevent residues from packing by setting the per-residue "options" of
    # the PackerTask.
    center = pose.residue(mutant_position).nbr_atom_xyz()
    for i in range(1, pose.total_residue() + 1):
        # Only pack the mutating residue and any within the pack_radius
        if not i == mutant_position or center.distance_squared(
                         test_pose.residue(i).nbr_atom_xyz()) > pack_radius**2:
            task.nonconst_residue_task(i).prevent_repacking()

    # Apply the mutation and pack nearby residues.
    packer = rosetta.PackRotamersMover(pack_scorefxn, task)
    packer.apply(test_pose)

    return test_pose
# sets up the input native PDB as being the base pose
native_pose = Pose()
native_pose.assign( load_pose( orig_pdb_filename_full_path ) )
native_pose.pdb_info().name( "native" )

# automatically populates chain and residue information into holder for native 3ay4
native_pose_info = hold_chain_and_res_designations_3ay4()
native_pose_info.native()


# use the scorefxn_file to set up additional weights
if input_args.scorefxn_file is not None:
    sf = make_fa_scorefxn_from_file( input_args.scorefxn_file )
# else create a fa_scorefxn
else:
    sf = get_fa_scorefxn()

# fa_intra_rep should always 0.440 since that's what I've been using
if input_args.fa_intra_rep:
    sf.set_weight( score_type_from_name( "fa_intra_rep" ), 0.440 )


# pymol stuff
pmm = PyMOL_Mover()
pmm.keep_history( True )
pmm.apply( native_pose )


# relay information to user
info_file_details = []
info_file_details.append( "Native PDB filename:\t\t\t%s\n" %input_args.native_pdb_file.split( '/' )[-1] )
예제 #19
0
def rescore_sol(folder, args):
	"""
	This function takes an input folder and scores all the solution state
	decoys in the folder. It returns a sorted list of names and scores.
	This requires the importation of PyRosetta. There is a limitation that
	this function will only use the default Rosetta score function. In this
	case, it is using talaris2014.
	"""
	import rosetta 
	import rosetta.core.scoring.solid_surface
	opts = '-include_surfaces -mute basic -mute core -mute protocols'
	rosetta.init(extra_options = opts)
	if not args.silence:
		from time import time

	# Getting list of all files in the folder
	f_name = os.path.basename(folder).replace('_output', '')
	if not args.silence:
		print '\n\nFolder:\t{}'.format(f_name)

	folder_list = os.listdir(folder) 	# Full folder

	# Narrowing list to only solution models
	sol_pdbs = []
	for i in folder_list:
		if 'Sol' in i:
			sol_pdbs.append(i)
	sol_pdbs.sort()
	count = len(sol_pdbs)

	if not args.silence:
		print "Scoring {} PDBs".format(count)

	# Writing unsorted scores file
	scoresc = os.path.join(folder, 'sol_score.sc')
	header = ('\t' * 6).join(['Description', 'Score'])
	with open(scoresc, 'w') as s:
		s.write(header)

	# Scoring solution PDBs and listing scores
	score_erors = {}
	sf = rosetta.get_fa_scorefxn()
	sol_scores = []
	start = time()
	for i in range(len(sol_pdbs)):
		try:
			pdb = sol_pdbs[i]
			p = rosetta.pose_from_pdb(os.path.join(folder, pdb))
			score = sf(p)
			sol_scores.append(score)

			# Adding score to unsorted list
			with open(scoresc, 'a') as s:
				s.write('\n{}\t{}'.format(pdb, score))

			if not args.silence:
				elapsed = time() - start
				display_time(start, elapsed, i, count)

		except RuntimeError:
			print "Unable to read PDB: {}".format(pdb)
			if score_erors.has_key(f_name):
				score_erors[f_name].append(pdb)
			else:
				score_erors.update({f_name: [pdb]})

	# Combining files names and scores, sorting by scores
	s_name_scores = sorted(zip(sol_pdbs, sol_scores), key=lambda x:x[1])

	return s_name_scores, header, score_erors