Ejemplo n.º 1
0
def idealize_and_relax_pdb_set( PdbCstPairs ):
  
  for PdbName, CstName in PdbCstPairs:
    print '(PdbName, CstName) ', (PdbName, CstName) 
    '''  idealize peptide bonds with command line subprocess  '''
    subprocess.check_output([ 'idealize_jd2.default.linuxgccrelease', '-s', PdbName ])
    IdealizedPdbOldName = re.sub( r'(.*).pdb$', r'\1_0001.pdb', PdbName )
    IdealizedPdbNewName = re.sub( r'(.*).pdb$', r'\1_Ideal.pdb', PdbName )
    
    subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
    time.sleep(0.2)

    IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName )

    # make constraint mover
    Constrainer = rosetta.ConstraintSetMover()
    # get constraints from file
    Constrainer.constraint_file(CstName)
    Constrainer.apply(IdealizedCappedPose)


    ''' SET UP WEIGHTS '''
    Talaris = rosetta.getScoreFunction()
    TalarisPlusCst = rosetta.getScoreFunction()

    TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0)
    TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0)
    TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 2.5)
    print 'relaxing %s with %s'%(IdealizedPdbNewName, CstName) 

    # relax w/ cst
    rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag')
    # relax w/o cst
    rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag')

    RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1_Relax.pdb', IdealizedPdbNewName)
    rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
def optimize_repeat_pdb( (Pdb, CstSets, RepeatLength) ):
  ''' parallelizable '''

  # idealize peptide bonds with command line subprocess
  subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', Pdb])
  IdealizedPdbOldName = Pdb.replace('.pdb', '_0001.pdb') 
  IdealizedPdbNewName = Pdb.replace('.pdb', '_ideal.pdb')
  subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
  time.sleep(0.5)

  Pose = rosetta.pose_from_pdb(IdealizedPdbNewName)
  PoseLength = Pose.n_residue()

  assert PoseLength % RepeatLength == 0, 'pdb input into optimize_repeat_pdb must have integer multiple of repeat_length number of residues'
  NumberRepeats = PoseLength / RepeatLength

  # print 'NumberRepeats', NumberRepeats
  # print 'RepeatLength', RepeatLength
  Sequence = Pose.sequence()
  # print Sequence
  
  RepeatRanges = []
  Start = 1
  for Repeat in range(NumberRepeats):
    End = Start + RepeatLength - 1
    RepeatRanges.append((Start, End))
    Start += RepeatLength

  assert len(RepeatRanges) == NumberRepeats
  # print 'RepeatRanges', RepeatRanges

  MidRepeat = ( NumberRepeats / 2 ) - 1  
  ReferenceRange = RepeatRanges[MidRepeat]
  # print 'MidRepeat', MidRepeat
  # print 'ReferenceRange', ReferenceRange

  SetupNCS = symmetry.SetupNCSMover()

  for TargetRange in RepeatRanges:
    if TargetRange != ReferenceRange:
      # print 'OtherRange', TargetRange
      # skip first three residue (not enougth atoms for torsion), and amino acid types allowed to vary
      if TargetRange[0] == 1:
        SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0]+3, ReferenceRange[1]), "%dA-%dA"%(TargetRange[0]+3, TargetRange[1]) )        
      # skip last residue (not enougth atoms for torsion)
      elif TargetRange[1] == PoseLength:
        SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]-3), "%dA-%dA"%(TargetRange[0], TargetRange[1]-3) )
      else:
        SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]), "%dA-%dA"%(TargetRange[0], TargetRange[1]) )

  SetupNCS.apply(Pose)

  # default talaris 2013 score function plus dihedral wieght for symmetry ncs mimization
  SymmTalaris = rosetta.getScoreFunction()
  SymmTalaris.set_weight(rosetta.dihedral_constraint, 1.0)

  TalarisPlusCst = rosetta.getScoreFunction()
  TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0)
  TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0)
  TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 3.0)

  TalarisPlusCstLowerFaRep = rosetta.getScoreFunction()
  TalarisPlusCstLowerFaRep.set_weight(rosetta.atom_pair_constraint, 10.0)
  TalarisPlusCstLowerFaRep.set_weight(rosetta.angle_constraint, 5.0)
  TalarisPlusCstLowerFaRep.set_weight(rosetta.dihedral_constraint, 3.0)
  TalarisPlusCstLowerFaRep.set_weight(rosetta.fa_rep, 0.25)
  print 'Pdb:', Pdb

  OptimizedPoses = []
  PoseIDs = []

  for Cst in CstSets:
    print 'Cst:', Cst
    CstPose = Pose.clone()
    CstStemName = re.sub(r'^(.*)\.cst$', r'\1', Cst)

    # make constraint mover
    Constrainer = rosetta.ConstraintSetMover()
    # get constraints from file
    Constrainer.constraint_file(Cst)
    Constrainer.apply(CstPose)

    FxnTags = [ 'TalCst', 'LowFaRep'  ]

    for i, ScoreFunction in enumerate( [ TalarisPlusCst, TalarisPlusCstLowerFaRep ] ):
      # for AbsoluteWeight in [1, 5, 10, 100]:

      RelaxPose = CstPose.clone()
      rosetta.relax_pose(RelaxPose, ScoreFunction, 'tag')
      rosetta.dump_pdb( RelaxPose, CstStemName+'_%s.pdb'%FxnTags[i] )
      # remove all constraints
      RelaxPose.remove_constraints()
      # reapply ncs constraints
      SetupNCS.apply(RelaxPose)

      rosetta.relax_pose(RelaxPose, SymmTalaris, 'tag')
      # Trekker.score(RelaxPose)
      rosetta.dump_pdb( RelaxPose, CstStemName+'_%s_Relax.pdb'%FxnTags[i] )

  JustRelaxPose = Pose.clone()
  SetupNCS.apply( JustRelaxPose )

  rosetta.relax_pose( JustRelaxPose, SymmTalaris, 'tag' )
  rosetta.dump_pdb( JustRelaxPose, CstStemName+'_JustRelax.pdb' )
Ejemplo n.º 3
0
def main(argv=None):
  # if argv is None:
  #   argv = sys.argv
  if argv != None:                                                             
    sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ]                          
  # print 'sys.argv', sys.argv
  
  # Arg block
  ArgParser = argparse.ArgumentParser(description=' expand_cst.py ( -help ) %s'%InfoString)
  # Required args
  ArgParser.add_argument('-ref_pdb', type=str, help=' reference pdb ', required=True)
  ArgParser.add_argument('-ref_cst', type=str, help=' corresponding to reference pdb ', required=True)
  ArgParser.add_argument('-repeat_pdb_tag', type=str, help=' input pdb tag ', required=True)
  # Optional args
  ArgParser.add_argument('-out', type=str, help=' Output directory ', default='./')
  Args = ArgParser.parse_args()
  if Args.out [-1] != '/':
    Args.out = Args.out + '/'


  # default talaris 2013 score function
  ScoreFunction = rosetta.getScoreFunction()
  # turning on constraint weights
  ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0)
  ScoreFunction.set_weight(rosetta.angle_constraint, 1.0)
  ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0)

  RefPdb = Args.ref_pdb
  # print RefPdb
  ReferencePose = rosetta.pose_from_pdb( RefPdb )
  print 'ReferencePose', ReferencePose

  # modify rosetta cst w/o rosetta
  Constrainer = constraint_extrapolator(Args.ref_cst)

  # RefCst = Args.ref_cst
  # # make constraint mover
  # Constrainer = rosetta.ConstraintSetMover()
  # # get constraints from file
  # Constrainer.constraint_file(RefCst)  
  # # Apply constraints to pose
  # Constrainer.apply(ReferencePose)

  # return Constrainer

  Pdbs = glob.glob( '*%s*.pdb'%Args.repeat_pdb_tag ) 
  assert len(Pdbs), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag "%Args.repeat_pdb_tag
  
  for Pdb in Pdbs:
    ## For debug put pdb of interest here:
    # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb':

    print 'Pdb:', Pdb 
    Pose = rosetta.pose_from_pdb(Pdb)

    try: 
      SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', Pdb)
      SourceRanges = [ [ int(Number) for Number in Range.split('_') ]   for Range in SourceRangeString.split('__') ]
    except ValueError:
      print 'No src range tag, skipping: %s '%Pdb
      continue

    print 'SourceRanges:', SourceRanges
    RepeatLength = int( re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb) )
    print 'RepeatLength', RepeatLength
    print
    
    # print [Pdb]
    PdbTag = (Pdb+'!').replace('.pdb!', '').replace('!', '')
    CstName = PdbTag+'.cst'
    ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit(SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose, CstName, PdbTag)
Ejemplo n.º 4
0
  def extrapolate_from_repeat_unit(self, ReferenceStart, ReferenceEnd, RepeatUnitLength, NewPose, FinalCstName, PdbTag):
    ''' renumbers based on repeat unit pose '''

    # Loop through positions in range of archetype
    # To avoid double counting first only add constraints from archetype residues to 
    # more C-terminal residues 
    NewLength = NewPose.n_residue()
    self.Range = (1, NewLength)
    self.NewPoseStartShift = ReferenceStart - 1 # for 1 indexing

    UnitShiftMultiples = (NewLength / RepeatUnitLength)
    UnitShiftList = [ RepeatUnitLength * Multiple for Multiple in range( UnitShiftMultiples ) ] 
    
    Edge1Cst, Edge2Cst, BothEdgeCst, MiddleCst = self.shift_and_sort_constraints(ReferenceStart, ReferenceEnd, RepeatUnitLength)
    
    # self.output_cst(Edge1Cst, 'Edge1.cst')
    # self.output_cst(Edge2Cst, 'Edge2.cst')
    # self.output_cst(BothEdgeCst, 'BothEdgeCst.cst')
    # self.output_cst(MiddleCst, 'Middle.cst')
    # print 'Edge1Cst:', Edge1Cst, '\n'
    # print 'Edge2Cst:', Edge2Cst, '\n'
    # print 'BothEdgeCst:', BothEdgeCst, '\n'
    # print 'MiddleCst:', MiddleCst, '\n'
    print 'UnitShiftList:', UnitShiftList
    print 'RepeatUnitLength:', RepeatUnitLength
    
    MiddleRepeatCstList = []
    MiddleSkippedCst = 0
    for Constraint in MiddleCst:
      AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
      # Loops through all repeat positions corresponding to reference position
      for Shift in UnitShiftList:
        # print 'Shift:', Shift
        # print 'AtomResidueCoords:', AtomResidueCoords
        ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ]
        if pose_has(NewPose, ShiftedAtomResidueCoords):
          MiddleRepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) )
        else:
          MiddleSkippedCst += 1
          # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

    Edge1RepeatCstList = []
    Edge1SkippedCst = 0
    for Constraint in Edge1Cst:
      AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
      for Shift in UnitShiftList[1:]:
        ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ]
        if pose_has(NewPose, ShiftedAtomResidueCoords):        
          Edge1RepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) )
        else:
          Edge1SkippedCst += 1
          # print 'Skipping constraint involving:', ShiftedAtomResidueCoords
    
    Edge2RepeatCstList = []     
    Edge2SkippedCst = 0
    for Constraint in Edge2Cst:
      AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
      for Shift in UnitShiftList[:-1]:
        ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ]
        if pose_has(NewPose, ShiftedAtomResidueCoords):       
          Edge2RepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) )
        else:
          Edge2SkippedCst += 1
          # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

    BothEdgeRepeatCstList = []
    BothEdgeSkippedCst = 0
    for Constraint in BothEdgeCst:
      AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
      
      for Shift in UnitShiftList:
        ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ]
        if pose_has(NewPose, ShiftedAtomResidueCoords):
          BothEdgeRepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) )
        else:
          BothEdgeSkippedCst += 1
          # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

    # RepPose.constraint_set().show_definition(ostream(sys.stdout), RepPose )

    self.output_cst(MiddleRepeatCstList, '%s_MidRepTemp.cst'%PdbTag)
    self.output_cst(Edge1RepeatCstList, '%s_Edge1RepTemp.cst'%PdbTag)
    self.output_cst(Edge2RepeatCstList, '%s_Edge2RepTemp.cst'%PdbTag)
    self.output_cst(BothEdgeRepeatCstList, '%s_BothEdgeRepTemp.cst'%PdbTag)
    
    AllRepeatCst = Edge1RepeatCstList[:]
    AllRepeatCst.extend(Edge1RepeatCstList)
    AllRepeatCst.extend(Edge2RepeatCstList)
    AllRepeatCst.extend(BothEdgeRepeatCstList)
    self.output_cst(AllRepeatCst, 'AllRepeatCst.cst')

    ''' trying out constraints to pick between edge 1 and edge 2 (and filter?) '''
    # print
    # print 'MiddleSkippedCst', MiddleSkippedCst
    # print 'Edge1SkippedCst', Edge1SkippedCst
    # print 'Edge2SkippedCst', Edge2SkippedCst
    # print 'BothEdgeSkippedCst', BothEdgeSkippedCst
    # print
    # print 'MiddleRepeatCst' 
    NumberMiddleRepeatCst = len(MiddleRepeatCstList)
    # print 'Edge1RepeatCst'
    NumberEdge1RepeatCst = len(Edge1RepeatCstList)
    # print 'Edge2RepeatCst'
    NumberEdge2RepeatCst = len(Edge2RepeatCstList)
    # print 'BothEdgeRepeatCst'
    NumberBothEdgeRepeatCst = len(BothEdgeRepeatCstList)

    NumberAllRepeatCst = len(AllRepeatCst)
    
    # # All default talaris 2013 non zero weights set to zero
    CstScoreFunction = set_all_weights_zero( rosetta.getScoreFunction() )
    # # turning on constraint weights
    CstScoreFunction.set_weight( rosetta.atom_pair_constraint, 1.0 )
    CstScoreFunction.set_weight( rosetta.angle_constraint, 1.0 )
    CstScoreFunction.set_weight( rosetta.dihedral_constraint, 1.0 )

    print 'MiddlePose should have %d constraints !!! '%NumberMiddleRepeatCst 
    MiddlePose = NewPose.clone()
    if NumberEdge1RepeatCst:
      ConstraintSetter = rosetta.ConstraintSetMover()
      ConstraintSetter.constraint_file('%s_MidRepTemp.cst'%PdbTag) 
      ConstraintSetter.apply(MiddlePose) 
      # return ConstraintSetter
      # return MiddlePose
      CstScoreFunction.show(MiddlePose)
      # MiddlePose.constraint_set().show_definition(ostream(sys.stdout), MiddlePose )
      print

    print 'Edge1Pose should have %d constraints !!! '%NumberEdge1RepeatCst   
    Edge1Pose = NewPose.clone()
    if NumberEdge1RepeatCst:
      ConstraintSetter = rosetta.ConstraintSetMover()
      ConstraintSetter.constraint_file('%s_Edge1RepTemp.cst'%PdbTag) 
      ConstraintSetter.apply(Edge1Pose) 
      CstScoreFunction.show(Edge1Pose)
      Edge1Score = CstScoreFunction(Edge1Pose)
      Edge1ScoreNorm = Edge1Score / NumberEdge1RepeatCst
      # Edge1Pose.constraint_set().show_definition(ostream(sys.stdout), Edge1Pose )
      print

    print 'Edge2Pose should have %d constraints !!! '%NumberEdge2RepeatCst   
    Edge2Pose = NewPose.clone()
    if NumberEdge2RepeatCst:
      ConstraintSetter = rosetta.ConstraintSetMover()
      ConstraintSetter.constraint_file('%s_Edge2RepTemp.cst'%PdbTag) 
      ConstraintSetter.apply(Edge2Pose) 
      CstScoreFunction.show(Edge2Pose)
      Edge2Score = CstScoreFunction(Edge2Pose)
      Edge2ScoreNorm = Edge2Score / NumberEdge2RepeatCst
      # Edge2Pose.constraint_set().show_definition(ostream(sys.stdout), Edge2Pose )
      print 
    
    # print 'BothEdgePose should have %d constraints !!! '%NumberBothEdgeRepeatCst   
    # BothEdgePose = NewPose.clone()
    # if NumberBothEdgeRepeatCst:
    #   ConstraintSetter = rosetta.ConstraintSetMover()
    #   ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag) 
    #   ConstraintSetter.apply(BothEdgePose) 
    #   CstScoreFunction.show(BothEdgePose)
    #   BothEdgeScore = CstScoreFunction(BothEdgePose)
    #   BothEdgeScoreNorm = BothEdgeScore / NumberBothEdgeRepeatCst
    #   # BothEdgePose.constraint_set().show_definition(ostream(sys.stdout), BothEdgePose )
    #   print 

    # print 'AllCstPose should have %d constraints !!! '%NumberAllRepeatCst   
    # AllCstPose = NewPose.clone()
    # ConstraintSetter = rosetta.ConstraintSetMover()
    # ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag) 
    # ConstraintSetter.apply(AllCstPose) 
    # CstScoreFunction.show(AllCstPose)
    # # AllCstPose.constraint_set().show_definition(ostream(sys.stdout), AllCstPose )
    # print 

    CuratedRepeatCst = MiddleRepeatCstList[:]
    ## whether these should be included or not is up in the air!!
    CuratedRepeatCst.extend(BothEdgeRepeatCstList)

    if NumberEdge1RepeatCst and NumberEdge2RepeatCst:
      if Edge1ScoreNorm <= Edge2ScoreNorm:
        CuratedRepeatCst.extend(Edge1RepeatCstList)
      else:
        CuratedRepeatCst.extend(Edge2RepeatCstList)

    elif NumberEdge1RepeatCst:
      CuratedRepeatCst.extend(Edge1RepeatCstList)
    elif NumberEdge2RepeatCst:
      CuratedRepeatCst.extend(Edge2RepeatCstList)

    # CuratedRepeatCst
    # print 'Edge1ScoreNorm, Edge2ScoreNorm', Edge1ScoreNorm, Edge2ScoreNorm
    # self.output_cst(CuratedRepeatCst, FinalCstName)

    AllWithEdge1RepeatCst = MiddleRepeatCstList[:]
    ## whether these should be included or not is up in the air!!
    # AllWithEdge1RepeatCst.extend(BothEdgeRepeatCstList)
    AllWithEdge1RepeatCst.extend(Edge1RepeatCstList)

    AllWithEdge2RepeatCst = MiddleRepeatCstList[:]
    ## whether these should be included or not is up in the air!!
    # AllWithEdge2RepeatCst.extend(BothEdgeRepeatCstList)
    AllWithEdge2RepeatCst.extend(Edge2RepeatCstList)

    ModFinalCstName = (FinalCstName+'!').replace('.cst!', '')
    self.output_cst(AllWithEdge1RepeatCst, ModFinalCstName+'_e1.cst')
    self.output_cst(AllWithEdge2RepeatCst, ModFinalCstName+'_e2.cst')

    RemainingTempFiles = glob.glob( '%s_*Temp.cst'%PdbTag )
    for File in RemainingTempFiles:
      subprocess.check_output(['rm', File])
Ejemplo n.º 5
0
def main(ExtraResidues=0, ipython=0):
    ### Required args
    ArgParser = argparse.ArgumentParser(
        description=
        " for plotting pdb scores and selecting subsets based on absolute or per residue scores "
    )
    ArgParser.add_argument('-pdb_glob',
                           type=str,
                           help=" pdb stem, start of globs for pdbs and csts ",
                           required=True)
    ArgParser.add_argument('-native',
                           type=str,
                           help=" pdb to compare designs against ",
                           required=True)
    ### Default args
    ArgParser.add_argument(
        '-cst',
        type=str,
        help=" to provide cst manually, will apply to all globed pdbs!!! ",
        default=False)
    ArgParser.add_argument('-param',
                           type=str,
                           nargs='+',
                           help=" params ",
                           default=[])
    ArgParser.add_argument('-norm',
                           type=int,
                           help=" 0|(1) normalize scores by residue ",
                           default=1)

    ### following args are for plotly:
    ### change if you use this script!!!
    ArgParser.add_argument('-plotly_id',
                           type=str,
                           help=" ",
                           default="pylesharley")  # required=True )
    ArgParser.add_argument('-plotly_key',
                           type=str,
                           help="  ",
                           default="cc5z4a8kst")  # required=True )
    ArgParser.add_argument('-plot',
                           type=int,
                           help=" 0|(1) plot scores with plotly ",
                           default=1)
    ArgParser.add_argument('-name', type=str, help=" plot tag ", default='')
    ArgParser.add_argument(
        '-and_or',
        type=str,
        help=" And/Or logic for score cutoffs. Default = 'and'  ",
        default='and')
    ArgParser.add_argument(
        '-multi',
        type=int,
        help=" 0|(1) plot different methods together on same plot ",
        default=1)

    Args = ArgParser.parse_args()
    Pdbs = glob.glob(Args.pdb_glob)
    print 'globed %d pdbs' % len(Pdbs)

    if ExtraResidues == 0 and len(Args.param) > 0:
        try:
            ExtraParams = rosetta.Vector1(Args.param)
            ExtraResidues = rosetta.generate_nonstandard_residue_set(
                ExtraParams)
        except:
            ExtraParams = rosetta.Vector1(Args.param)
            ExtraResidues = rosetta.generate_nonstandard_residue_set(
                ExtraParams)
        ### for ipython mode
        if ipython:
            return ExtraResidues

    Args.and_or = Args.and_or.lower()
    assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' "

    RepeatLengths = []
    ProcessTags = {}
    TagList = []
    TagByPdbName = {}

    # better to find out of native pdb is wrong before waiting for pdb scoring
    Check = open(Args.native, 'r')

    # print ' first loop '
    OverlapStarts = []
    for Pdb in Pdbs:
        Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
        for OtherPdb in Pdbs:
            OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
            i = 0
            if Pdb != OtherPdb:
                while Pdb[:i] == OtherPdb[:i]:
                    i += 1
                Overlap = OtherPdb[:i - 1]
                OverlapStarts.append((len(Overlap), Overlap))

    OverlapStarts.sort()
    ShortestOverlap = OverlapStarts[0][1]

    # print 'OverlapStarts', OverlapStarts
    # print 'ShortestOverlap', ShortestOverlap

    for Pdb in Pdbs:
        try:
            RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb))
        except ValueError:
            RepeatLength = 0
        # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb))
        assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s " % Pdb
        # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb
        RepeatLengths.append(RepeatLength)

        #### re.sub out tag from design process
        Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
        Tag = re.sub(r'^%s(.*)\.pdb$' % (ShortestOverlap), r'\1', Tag)

        TagByPdbName[Pdb] = Tag
        try:
            TagNumber = ProcessTags[Tag]
        except:
            TagNumber = len(ProcessTags) + 1
            ProcessTags[Tag] = TagNumber
        TagList.append(TagNumber)

    # Scoring is redundant, once for sorting outside plotter, then again in plotter
    # making not redundant not a priority.
    # Scoring in the plotter object is so multiple score functions can be plotted easily

    # Sort by repeat length, then score
    if Args.multi:
        # Sort by repeat length, then method tag, then score
        MultiPoseSortingTuples = []
    else:
        PoseSortingTuples = []

    Talaris = rosetta.getScoreFunction()
    for i, Pdb in enumerate(Pdbs):
        RepeatLength = RepeatLengths[i]
        ProcessNumber = TagList[i]
        Pose = rosetta.pose_from_pdb(Pdb)
        if Args.norm:
            Score = Talaris(Pose) / Pose.n_residue()
        else:
            Score = Talaris(Pose)

        # print 'Pdb', Pdb
        if Args.multi:
            MultiPoseSortingTuples.append(
                (RepeatLength, ProcessNumber, Score, Pose))
        else:
            PoseSortingTuples.append((RepeatLength, Score, Pose))

    if Args.multi:
        # Sort by repeat length, then method tag, then score
        MultiPoseSortingTuples.sort()
    else:
        # sorts by repeat length (shortest to longest) then score (best to worst)
        PoseSortingTuples.sort()

    if Args.multi:
        # print 'MultiPoseSortingTuples', MultiPoseSortingTuples
        SortedTuples = MultiPoseSortingTuples
    else:
        # print 'PoseSortingTuples', PoseSortingTuples
        SortedTuples = PoseSortingTuples

    LastLength = 0
    LastTag = 0
    AllGroups = []
    CurrentGroup = []

    for PoseTuple in SortedTuples:
        Length = PoseTuple[0]
        if Args.multi:
            Tag = PoseTuple[1]

        if LastLength and Length != LastLength:
            AllGroups.append(CurrentGroup)
            CurrentGroup = []

        if Args.multi:
            if LastTag and Tag != LastTag:
                AllGroups.append(CurrentGroup)
                CurrentGroup = []

        CurrentGroup.append(PoseTuple)
        LastLength = Length
        if Args.multi:
            LastTag = Tag

    # for last repeat length
    AllGroups.append(CurrentGroup)
    ''' Build score functions here: '''

    Talaris = rosetta.getScoreFunction()

    # This line returns a talaris function with all default weights set to 0
    CstScore = set_all_weights_zero(rosetta.getScoreFunction())
    CstScore.set_weight(rosetta.atom_pair_constraint, 10.0)
    CstScore.set_weight(rosetta.angle_constraint, 5.0)
    CstScore.set_weight(rosetta.dihedral_constraint, 3.0)

    HbondScore = set_all_weights_zero(rosetta.getScoreFunction())
    HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170)
    HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170)
    HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170)
    HbondScore.set_weight(rosetta.hbond_sc, 1.100)

    Disulfide = set_all_weights_zero(rosetta.getScoreFunction())
    Disulfide.set_weight(rosetta.dslf_fa13, 1.0)

    if Args.plot:
        if Args.norm:
            PerRes = True
        else:
            PerRes = False
        ''' Add and remove score functions here '''
        Plotter = plotly_plotter(
            Args.plotly_id,
            Args.plotly_key,
            Args.native,
            ScoreFxns=[CstScore, Talaris, HbondScore, Disulfide],
            FxnNames=['ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide'],
            PerResidue=PerRes)

    XaxisSortingTuples = []

    for PoseGroup in AllGroups:
        # for PoseGroup in [SortedTuples]:
        if len(PoseGroup):
            # print
            # print 'Group:', PoseGroup
            Poses = [PoseTuple[-1] for PoseTuple in PoseGroup]
            # print PoseGroup
            RepeatLength = PoseGroup[0][0]
            # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] )
            # print 'Zero index pose tuple:'
            # print PoseGroup[0]

            if Args.plot:
                GroupPdbName = PoseGroup[0][-1].pdb_info().name()
                if Args.multi:
                    Tag = TagByPdbName[GroupPdbName]

                    if Args.cst:
                        Plotter.score_poses(Poses, Args.cst, Tag)
                    else:
                        Plotter.score_poses(Poses, 1, Tag)

    # return Plotter
    Plotter.plot_2d_score_combinations()
    print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces

    Plotter.draw_comparisons()

    print 'plotting...'
    if len(Args.name):
        Name = Args.name
    else:
        Name = '%s based %d res ' % (Args.native, RepeatLength)
    Plotter.render_scatter_plot(PlotName=Name)

    while 1:

        ScoreFunctionScoreCutoffs = []
        for i, Name in enumerate(Plotter.FxnNames):
            while 1:
                try:
                    Cutoff = float(
                        raw_input(
                            '\tEnter cutoff value (maximum) for %s function: '
                            % Name))
                    break
                except ValueError:
                    pass
            ScoreFunctionScoreCutoffs.append(Cutoff)

        print 'Cutoff values set at:'
        for i, Name in enumerate(Plotter.FxnNames):
            # print Name, ScoreFunctionScoreCutoffs[i]
            Plotter.ScoreFunctionScoredPdbs[i].sort()

        PassingPdbs = []
        for i, Name in enumerate(Plotter.FxnNames):
            PassThisFxn = []
            Cutoff = ScoreFunctionScoreCutoffs[i]
            # print Plotter.ScoreFunctionScoredPdbs[i]
            for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]:
                if Score <= Cutoff:
                    PassThisFxn.append(Pdb)
                else:
                    break
            PassingPdbs.append(PassThisFxn)

        PdbsPassingAll = PassingPdbs[0]
        if Args.and_or == 'and':
            for OtherSet in PassingPdbs[1:]:
                PdbsPassingAll = list(set(PdbsPassingAll) & set(OtherSet))
        else:
            for OtherSet in PassingPdbs[1:]:
                PdbsPassingAll = list(set(PdbsPassingAll + OtherSet))

        Outdir = raw_input(
            '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: '
            % Args.and_or)

        if not os.path.isdir(Outdir):
            subprocess.check_output(['mkdir', Outdir])
        if Outdir[-1] != '/':
            Outdir = Outdir + '/'

        for Pdb in PdbsPassingAll:
            subprocess.check_output(['cp', Pdb, Outdir])
            if Plotter.CstDict[Pdb] != None:
                subprocess.check_output(['cp', Plotter.CstDict[Pdb], Outdir])

        Continue = str(
            raw_input(
                '\tEnter Y to add another set of selection threshold, or anything else to quit: '
            )).upper()
        if Continue == 'Y':
            pass
        else:
            break
Ejemplo n.º 6
0
def main(argv=None):
    # if argv is None:
    #   argv = sys.argv
    if argv != None:
        sys.argv = [sys.argv[0]] + [arg for arg in argv]
    # print 'sys.argv', sys.argv

    # Arg block
    ArgParser = argparse.ArgumentParser(
        description=' expand_cst.py ( -help ) %s' % InfoString)
    # Required args
    ArgParser.add_argument('-ref_pdb',
                           type=str,
                           help=' reference pdb ',
                           required=True)
    ArgParser.add_argument('-ref_cst',
                           type=str,
                           help=' corresponding to reference pdb ',
                           required=True)
    ArgParser.add_argument('-repeat_pdb_tag',
                           type=str,
                           help=' input pdb tag ',
                           required=True)
    # Optional args
    ArgParser.add_argument('-out',
                           type=str,
                           help=' Output directory ',
                           default='./')
    Args = ArgParser.parse_args()
    if Args.out[-1] != '/':
        Args.out = Args.out + '/'

    # default talaris 2013 score function
    ScoreFunction = rosetta.getScoreFunction()
    # turning on constraint weights
    ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0)
    ScoreFunction.set_weight(rosetta.angle_constraint, 1.0)
    ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0)

    RefPdb = Args.ref_pdb
    # print RefPdb
    ReferencePose = rosetta.pose_from_pdb(RefPdb)
    print 'ReferencePose', ReferencePose

    # modify rosetta cst w/o rosetta
    Constrainer = constraint_extrapolator(Args.ref_cst)

    # RefCst = Args.ref_cst
    # # make constraint mover
    # Constrainer = rosetta.ConstraintSetMover()
    # # get constraints from file
    # Constrainer.constraint_file(RefCst)
    # # Apply constraints to pose
    # Constrainer.apply(ReferencePose)

    # return Constrainer

    Pdbs = glob.glob('*%s*.pdb' % Args.repeat_pdb_tag)
    assert len(
        Pdbs
    ), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag " % Args.repeat_pdb_tag

    for Pdb in Pdbs:
        ## For debug put pdb of interest here:
        # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb':

        print 'Pdb:', Pdb
        Pose = rosetta.pose_from_pdb(Pdb)

        try:
            SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1',
                                       Pdb)
            SourceRanges = [[int(Number) for Number in Range.split('_')]
                            for Range in SourceRangeString.split('__')]
        except ValueError:
            print 'No src range tag, skipping: %s ' % Pdb
            continue

        print 'SourceRanges:', SourceRanges
        RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb))
        print 'RepeatLength', RepeatLength
        print

        # print [Pdb]
        PdbTag = (Pdb + '!').replace('.pdb!', '').replace('!', '')
        CstName = PdbTag + '.cst'
        ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit(
            SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose,
            CstName, PdbTag)
Ejemplo n.º 7
0
    def extrapolate_from_repeat_unit(self, ReferenceStart, ReferenceEnd,
                                     RepeatUnitLength, NewPose, FinalCstName,
                                     PdbTag):
        ''' renumbers based on repeat unit pose '''

        # Loop through positions in range of archetype
        # To avoid double counting first only add constraints from archetype residues to
        # more C-terminal residues
        NewLength = NewPose.n_residue()
        self.Range = (1, NewLength)
        self.NewPoseStartShift = ReferenceStart - 1  # for 1 indexing

        UnitShiftMultiples = (NewLength / RepeatUnitLength)
        UnitShiftList = [
            RepeatUnitLength * Multiple
            for Multiple in range(UnitShiftMultiples)
        ]

        Edge1Cst, Edge2Cst, BothEdgeCst, MiddleCst = self.shift_and_sort_constraints(
            ReferenceStart, ReferenceEnd, RepeatUnitLength)

        # self.output_cst(Edge1Cst, 'Edge1.cst')
        # self.output_cst(Edge2Cst, 'Edge2.cst')
        # self.output_cst(BothEdgeCst, 'BothEdgeCst.cst')
        # self.output_cst(MiddleCst, 'Middle.cst')
        # print 'Edge1Cst:', Edge1Cst, '\n'
        # print 'Edge2Cst:', Edge2Cst, '\n'
        # print 'BothEdgeCst:', BothEdgeCst, '\n'
        # print 'MiddleCst:', MiddleCst, '\n'
        print 'UnitShiftList:', UnitShiftList
        print 'RepeatUnitLength:', RepeatUnitLength

        MiddleRepeatCstList = []
        MiddleSkippedCst = 0
        for Constraint in MiddleCst:
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            # Loops through all repeat positions corresponding to reference position
            for Shift in UnitShiftList:
                # print 'Shift:', Shift
                # print 'AtomResidueCoords:', AtomResidueCoords
                ShiftedAtomResidueCoords = [
                    (AtomName, ResidueNumber + Shift)
                    for AtomName, ResidueNumber in AtomResidueCoords
                ]
                if pose_has(NewPose, ShiftedAtomResidueCoords):
                    MiddleRepeatCstList.append(
                        (ShiftedAtomResidueCoords, ConstraintParameters,
                         CstLineNumber, CstType))
                else:
                    MiddleSkippedCst += 1
                    # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

        Edge1RepeatCstList = []
        Edge1SkippedCst = 0
        for Constraint in Edge1Cst:
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            for Shift in UnitShiftList[1:]:
                ShiftedAtomResidueCoords = [
                    (AtomName, ResidueNumber + Shift)
                    for AtomName, ResidueNumber in AtomResidueCoords
                ]
                if pose_has(NewPose, ShiftedAtomResidueCoords):
                    Edge1RepeatCstList.append(
                        (ShiftedAtomResidueCoords, ConstraintParameters,
                         CstLineNumber, CstType))
                else:
                    Edge1SkippedCst += 1
                    # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

        Edge2RepeatCstList = []
        Edge2SkippedCst = 0
        for Constraint in Edge2Cst:
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            for Shift in UnitShiftList[:-1]:
                ShiftedAtomResidueCoords = [
                    (AtomName, ResidueNumber + Shift)
                    for AtomName, ResidueNumber in AtomResidueCoords
                ]
                if pose_has(NewPose, ShiftedAtomResidueCoords):
                    Edge2RepeatCstList.append(
                        (ShiftedAtomResidueCoords, ConstraintParameters,
                         CstLineNumber, CstType))
                else:
                    Edge2SkippedCst += 1
                    # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

        BothEdgeRepeatCstList = []
        BothEdgeSkippedCst = 0
        for Constraint in BothEdgeCst:
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint

            for Shift in UnitShiftList:
                ShiftedAtomResidueCoords = [
                    (AtomName, ResidueNumber + Shift)
                    for AtomName, ResidueNumber in AtomResidueCoords
                ]
                if pose_has(NewPose, ShiftedAtomResidueCoords):
                    BothEdgeRepeatCstList.append(
                        (ShiftedAtomResidueCoords, ConstraintParameters,
                         CstLineNumber, CstType))
                else:
                    BothEdgeSkippedCst += 1
                    # print 'Skipping constraint involving:', ShiftedAtomResidueCoords

        # RepPose.constraint_set().show_definition(ostream(sys.stdout), RepPose )

        self.output_cst(MiddleRepeatCstList, '%s_MidRepTemp.cst' % PdbTag)
        self.output_cst(Edge1RepeatCstList, '%s_Edge1RepTemp.cst' % PdbTag)
        self.output_cst(Edge2RepeatCstList, '%s_Edge2RepTemp.cst' % PdbTag)
        self.output_cst(BothEdgeRepeatCstList,
                        '%s_BothEdgeRepTemp.cst' % PdbTag)

        AllRepeatCst = Edge1RepeatCstList[:]
        AllRepeatCst.extend(Edge1RepeatCstList)
        AllRepeatCst.extend(Edge2RepeatCstList)
        AllRepeatCst.extend(BothEdgeRepeatCstList)
        self.output_cst(AllRepeatCst, 'AllRepeatCst.cst')
        ''' trying out constraints to pick between edge 1 and edge 2 (and filter?) '''
        # print
        # print 'MiddleSkippedCst', MiddleSkippedCst
        # print 'Edge1SkippedCst', Edge1SkippedCst
        # print 'Edge2SkippedCst', Edge2SkippedCst
        # print 'BothEdgeSkippedCst', BothEdgeSkippedCst
        # print
        # print 'MiddleRepeatCst'
        NumberMiddleRepeatCst = len(MiddleRepeatCstList)
        # print 'Edge1RepeatCst'
        NumberEdge1RepeatCst = len(Edge1RepeatCstList)
        # print 'Edge2RepeatCst'
        NumberEdge2RepeatCst = len(Edge2RepeatCstList)
        # print 'BothEdgeRepeatCst'
        NumberBothEdgeRepeatCst = len(BothEdgeRepeatCstList)

        NumberAllRepeatCst = len(AllRepeatCst)

        # # All default talaris 2013 non zero weights set to zero
        CstScoreFunction = set_all_weights_zero(rosetta.getScoreFunction())
        # # turning on constraint weights
        CstScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0)
        CstScoreFunction.set_weight(rosetta.angle_constraint, 1.0)
        CstScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0)

        print 'MiddlePose should have %d constraints !!! ' % NumberMiddleRepeatCst
        MiddlePose = NewPose.clone()
        if NumberEdge1RepeatCst:
            ConstraintSetter = rosetta.ConstraintSetMover()
            ConstraintSetter.constraint_file('%s_MidRepTemp.cst' % PdbTag)
            ConstraintSetter.apply(MiddlePose)
            # return ConstraintSetter
            # return MiddlePose
            CstScoreFunction.show(MiddlePose)
            # MiddlePose.constraint_set().show_definition(ostream(sys.stdout), MiddlePose )
            print

        print 'Edge1Pose should have %d constraints !!! ' % NumberEdge1RepeatCst
        Edge1Pose = NewPose.clone()
        if NumberEdge1RepeatCst:
            ConstraintSetter = rosetta.ConstraintSetMover()
            ConstraintSetter.constraint_file('%s_Edge1RepTemp.cst' % PdbTag)
            ConstraintSetter.apply(Edge1Pose)
            CstScoreFunction.show(Edge1Pose)
            Edge1Score = CstScoreFunction(Edge1Pose)
            Edge1ScoreNorm = Edge1Score / NumberEdge1RepeatCst
            # Edge1Pose.constraint_set().show_definition(ostream(sys.stdout), Edge1Pose )
            print

        print 'Edge2Pose should have %d constraints !!! ' % NumberEdge2RepeatCst
        Edge2Pose = NewPose.clone()
        if NumberEdge2RepeatCst:
            ConstraintSetter = rosetta.ConstraintSetMover()
            ConstraintSetter.constraint_file('%s_Edge2RepTemp.cst' % PdbTag)
            ConstraintSetter.apply(Edge2Pose)
            CstScoreFunction.show(Edge2Pose)
            Edge2Score = CstScoreFunction(Edge2Pose)
            Edge2ScoreNorm = Edge2Score / NumberEdge2RepeatCst
            # Edge2Pose.constraint_set().show_definition(ostream(sys.stdout), Edge2Pose )
            print

        # print 'BothEdgePose should have %d constraints !!! '%NumberBothEdgeRepeatCst
        # BothEdgePose = NewPose.clone()
        # if NumberBothEdgeRepeatCst:
        #   ConstraintSetter = rosetta.ConstraintSetMover()
        #   ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag)
        #   ConstraintSetter.apply(BothEdgePose)
        #   CstScoreFunction.show(BothEdgePose)
        #   BothEdgeScore = CstScoreFunction(BothEdgePose)
        #   BothEdgeScoreNorm = BothEdgeScore / NumberBothEdgeRepeatCst
        #   # BothEdgePose.constraint_set().show_definition(ostream(sys.stdout), BothEdgePose )
        #   print

        # print 'AllCstPose should have %d constraints !!! '%NumberAllRepeatCst
        # AllCstPose = NewPose.clone()
        # ConstraintSetter = rosetta.ConstraintSetMover()
        # ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag)
        # ConstraintSetter.apply(AllCstPose)
        # CstScoreFunction.show(AllCstPose)
        # # AllCstPose.constraint_set().show_definition(ostream(sys.stdout), AllCstPose )
        # print

        CuratedRepeatCst = MiddleRepeatCstList[:]
        ## whether these should be included or not is up in the air!!
        CuratedRepeatCst.extend(BothEdgeRepeatCstList)

        if NumberEdge1RepeatCst and NumberEdge2RepeatCst:
            if Edge1ScoreNorm <= Edge2ScoreNorm:
                CuratedRepeatCst.extend(Edge1RepeatCstList)
            else:
                CuratedRepeatCst.extend(Edge2RepeatCstList)

        elif NumberEdge1RepeatCst:
            CuratedRepeatCst.extend(Edge1RepeatCstList)
        elif NumberEdge2RepeatCst:
            CuratedRepeatCst.extend(Edge2RepeatCstList)

        # CuratedRepeatCst
        # print 'Edge1ScoreNorm, Edge2ScoreNorm', Edge1ScoreNorm, Edge2ScoreNorm
        # self.output_cst(CuratedRepeatCst, FinalCstName)

        AllWithEdge1RepeatCst = MiddleRepeatCstList[:]
        ## whether these should be included or not is up in the air!!
        # AllWithEdge1RepeatCst.extend(BothEdgeRepeatCstList)
        AllWithEdge1RepeatCst.extend(Edge1RepeatCstList)

        AllWithEdge2RepeatCst = MiddleRepeatCstList[:]
        ## whether these should be included or not is up in the air!!
        # AllWithEdge2RepeatCst.extend(BothEdgeRepeatCstList)
        AllWithEdge2RepeatCst.extend(Edge2RepeatCstList)

        ModFinalCstName = (FinalCstName + '!').replace('.cst!', '')
        self.output_cst(AllWithEdge1RepeatCst, ModFinalCstName + '_e1.cst')
        self.output_cst(AllWithEdge2RepeatCst, ModFinalCstName + '_e2.cst')

        RemainingTempFiles = glob.glob('%s_*Temp.cst' % PdbTag)
        for File in RemainingTempFiles:
            subprocess.check_output(['rm', File])
Ejemplo n.º 8
0
def cap_and_relax_pdb( (RepeatPdb, ReferencePdb, ReferenceCst) ):

  RepeatPose = rosetta.pose_from_pdb(RepeatPdb)
  TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 )
  TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) )

  ReferencePose = rosetta.pose_from_pdb( ReferencePdb )
  ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) )

  # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb')

  RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdb))
  SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdb)
  SourceRanges = SourceRanges.split('__')
  SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ]
  SourceStart = SourceRanges[0][0]
  SourceEnd = SourceRanges[0][1]


  '''
   Add N terminal cap 
   '''
  NcapPose = grafting.return_region( ReferencePose, 1, SourceStart+5 )
  # rosetta.dump_pdb(NcapPose, 'Ncap.pdb')
  NcapLength = NcapPose.n_residue()
  
  NcapOverhangPositions = [ Position for Position in range(NcapLength-3, NcapLength+1) ]
  # print NcapOverhangPositions
  NcapOverhangArray = get_residue_array( NcapPose, NcapOverhangPositions )
  
  RepStartOverhangPositions = [1,2,3,4]
  RepStartOverhangArray = get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions )
  # print RepStartOverhangArray

  RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray )
  rosetta.Pose.apply_transform_Rx_plus_v(TrimmedRepeatPose, rMtx, tVec)
  # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' )
  NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = fuse(NcapPose, TrimmedRepeatPose)
  print 'Ncap attachment RMSD %f'%RMSD
  # rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' )
  NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) )    
  

  '''
   Add C terminal cap 
  '''
  Cshift = SourceEnd-6
  CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() )
  # rosetta.dump_pdb(CcapPose, 'Ccap.pdb')
  CcapOverhangPositions = [1,2,3,4]
  CcapOverhangArray = get_residue_array( CcapPose, CcapOverhangPositions )

  RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ]
  # print 'RepEndOverhangPositions', RepEndOverhangPositions
  RepEndOverhangArray = get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions )
  
  RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray )
  rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec)
  # rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' )
  CappedRepeatPose, RMSD, CcapCorrespondingResidues = fuse(NcapPlusRepeatPose, CcapPose)
  print 'Ccap attachment RMSD %f'%RMSD

  CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_Cap.pdb', RepeatPdb)
  assert CappedNamePdb != RepeatPdb, 'regular expression substitution failed!'
  rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb )



  '''
   Generate csts for cap/repeat edges 
  '''
  CstExtrapolator = constraint_extrapolator(ReferenceCst)
  ConstraintSet = []
  
  ' N cap constraints are easy; no shifts are needed '

  # For catching when individual constraints have been considered already  
  Redundict = {} 
  for Position in range(1, SourceStart+6):
    # print 'Position', Position
    # Skip positions w/out constraints
    try:
      PositionCstDict = CstExtrapolator.Cst[Position]
    except KeyError:
      continue

    for AtomName in PositionCstDict:
      for Constraint in PositionCstDict[AtomName]:
        # unpack tuple values 
        AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
        
        # Redundancy check with redundict 
        try:
          Check = Redundict[CstLineNumber]
          # if cst considered already, skip it! 
          continue
        except KeyError:
          Redundict[CstLineNumber] = 1
        
        if pose_has(CappedRepeatPose, AtomResidueCoords):
          ConstraintSet.append(Constraint)

  ' C cap constraints are harder; need to shift due to pose expansion '

  # CstExtrapolator.output_cst(ConstraintSet, 'NcapConstraints.cst')\
  Redundict = {} 

  # print 'CcapCorrespondingResidues', CcapCorrespondingResidues
  RepeatCcapPositionStart = CcapCorrespondingResidues[0][0]
  # print 'RepeatCcapPositionStart', RepeatCcapPositionStart

  ShiftToRepeatPose = RepeatCcapPositionStart - Cshift
  # print 'ShiftToRepeatPose', ShiftToRepeatPose

  for Position in range( Cshift, ReferencePose.n_residue()+1 ):
    # Skip positions w/out constraints
    try:
      PositionCstDict = CstExtrapolator.Cst[Position]
    except KeyError:
      continue

    for AtomName in PositionCstDict:
      for Constraint in PositionCstDict[AtomName]:
        # unpack tuple values 
        AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
        
        # Redundancy check with redundict 
        try:
          Check = Redundict[CstLineNumber]
          # if cst considered already, skip it! 
          continue
        except KeyError:
          Redundict[CstLineNumber] = 1

        ExpandedPoseAtomResidueCoords = []
        # iterate through atom residue pairs
        for AtomResiduePair in AtomResidueCoords:
          # print 'AtomResiduePair', AtomResiduePair
          ExpandedPosePosition = (AtomResiduePair[1]) + ShiftToRepeatPose
          # print 'ExpandedPosePosition', ExpandedPosePosition
          ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) )

        ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType       

        if pose_has(CappedRepeatPose, ExpandedPoseAtomResidueCoords):
          ConstraintSet.append(ShiftedConstraint)  

  CapCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb)
  CstExtrapolator.output_cst(ConstraintSet, CapCstName)

  '''
  idealize peptide bonds with command line subprocess
  '''
  subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', CappedNamePdb])
  IdealizedPdbOldName = re.sub(r'(.*).pdb$', r'\1_0001.pdb', CappedNamePdb)
  IdealizedPdbNewName = re.sub(r'(.*).pdb$', r'\1_Ideal.pdb', CappedNamePdb)
  
  subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
  time.sleep(0.2)

  IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName )

  # make constraint mover
  Constrainer = rosetta.ConstraintSetMover()
  # get constraints from file
  Constrainer.constraint_file(CapCstName)
  Constrainer.apply(IdealizedCappedPose)

  ''' SET UP WEIGHTS AS decided '''

  # RelativeWeight = 0.1

  Talaris = rosetta.getScoreFunction()
  TalarisPlusCst = rosetta.getScoreFunction()
  AtomPairCst = set_all_weights_zero( rosetta.getScoreFunction() )
  AtomPairCst.set_weight(rosetta.atom_pair_constraint, 1.0)

  # RosettaScore = Talaris(IdealizedCappedPose) 
  # AtomPairCstScore = AtomPairCst(IdealizedCappedPose)
  
  # Weight = ( RosettaScore * RelativeWeight ) / AtomPairCstScore  
  Weight = 1.0
  TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, Weight)
  TalarisPlusCst.set_weight(rosetta.angle_constraint, Weight)
  TalarisPlusCst.set_weight(rosetta.dihedral_constraint, Weight)

  print 'relaxing %s with %s'%(IdealizedPdbNewName, CapCstName) 
  print ' Weight %d '%Weight
  rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag')
  RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax.pdb', IdealizedPdbNewName)
  rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)

  rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag')

  RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax2.pdb', IdealizedPdbNewName)
  rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
Ejemplo n.º 9
0
def main(argv=None):
  if argv is None:
    argv = sys.argv
  ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ")
  ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True )    
  ArgParser.add_argument('-native_pdb', type=str, help=" pdb to compare designs against ", required=True )    
  ArgParser.add_argument('-out', type=str, help=" folder to move files to ", required=True )    
  ArgParser.add_argument('-score', type=float, help=" select all structures with less than this REU / residue ", default=None )
  ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 )
  ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 )
  ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' )

  Args = ArgParser.parse_args()
  print Args
  Pdbs = glob.glob( Args.pdb_glob )

  print 'globed %d pdbs'%len(Pdbs)

  if not os.path.isdir(Args.out):
    subprocess.check_output(['mkdir', Args.out])
  if Args.out [-1] != '/':
    Args.out = Args.out + '/'

  if Args.name != '':
    Args.out = Args.out + ' '

  NativePose = rosetta.pose_from_pdb( Args.native_pdb )

  RepeatLengths = []
  for Pdb in Pdbs:
    RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb))
    # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb))
    assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb 
    # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb 
    RepeatLengths.append(RepeatLength)
    # RepeatLengths.append(SourceStart)


  PoseSortingTuples = []
  # Scoring is redundant, once for sorting outside plotter, then again in plotter
  # making not redundant not a priority. 
  # Scoring in the plotter object is so multiple score functions can be plotted easily
  Talaris = rosetta.getScoreFunction()
  for i, Pdb in enumerate(Pdbs):
    RepeatLength = RepeatLengths[i]
    Pose = rosetta.pose_from_pdb(Pdb)
    if Args.norm:
      Score = Talaris(Pose) / Pose.n_residue()
    else:
      Score = Talaris(Pose) 
    PoseSortingTuples.append( (RepeatLength, Score, Pose) )
  # sorts by repeat length (shortest to longest) then score (best to worst)
  PoseSortingTuples.sort()

  # print 'PoseSortingTuples', PoseSortingTuples

  AllRepeatLengthGroups = []
  RepeatRepeatLengthGroup = []
  LastLength = 0
  for PoseTuple in PoseSortingTuples:
    Length = PoseTuple[0]
    if LastLength and Length != LastLength:
      AllRepeatLengthGroups.append(RepeatRepeatLengthGroup)
      RepeatRepeatLengthGroup = []
    RepeatRepeatLengthGroup.append(PoseTuple)
    LastLength = Length
  # for last repeat length
  AllRepeatLengthGroups.append(RepeatRepeatLengthGroup)

  # print 'AllRepeatLengthGroups', AllRepeatLengthGroups

  # Add more score functions as wanted
  if Args.plot:
    Plotter = plotly_plotter(ScoreFxns=[ Talaris ], FxnNames=[ 'Talaris' ], EnergyPerResidue=True )

  for RepeatLengthGroup in AllRepeatLengthGroups:
    print 'RepeatLengthGroup', RepeatLengthGroup
    Poses = [ PoseTuple[2] for PoseTuple in RepeatLengthGroup ]
    RepeatLength = RepeatLengthGroup[0][0]
    if Args.plot:
      Plotter.clear_traces()
      Xaxis = Plotter.score_poses( Poses )
      Plotter.add_comparsion_threshold( NativePose, Xaxis )
      Plotter.plot_traces( PlotName='%s%s based %d res repeats globed with %s'%(Args.name, Args.native_pdb, RepeatLength, Args.pdb_glob) )

    if Args.score != None:
      with open('%sScores.log'%Args.out, 'a') as Log:  
        for RepLen, Score, Pose in RepeatLengthGroup:
          if Score > Args.score:
            break
          PdbName = Pose.pdb_info().name()
          subprocess.check_output([ 'cp', PdbName, Args.out ])
          print>>Log, '%s\t%.3f'%(PdbName, Score)
Ejemplo n.º 10
0
def main(ExtraResidues=0, ipython=0):
  ### Required args
  ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ")
  ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True )    
  ArgParser.add_argument('-native', type=str, help=" pdb to compare designs against ", required=True )    
  ### Default args
  ArgParser.add_argument('-cst', type=str, help=" to provide cst manually, will apply to all globed pdbs!!! ", default=False )
  ArgParser.add_argument('-param', type=str, nargs='+', help=" params ", default=[] )
  ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 )

  ### following args are for plotly:
  ### change if you use this script!!!
  ArgParser.add_argument('-plotly_id', type=str, help=" ", default="pylesharley") # required=True )    
  ArgParser.add_argument('-plotly_key', type=str, help="  ", default="cc5z4a8kst") # required=True )    
  ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 )
  ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' )
  ArgParser.add_argument('-and_or', type=str, help=" And/Or logic for score cutoffs. Default = 'and'  ", default='and' )
  ArgParser.add_argument('-multi', type=int, help=" 0|(1) plot different methods together on same plot ", default=1 )
  
  Args = ArgParser.parse_args()
  Pdbs = glob.glob( Args.pdb_glob )
  print 'globed %d pdbs'%len(Pdbs)

  if ExtraResidues == 0 and len(Args.param) > 0:
    try: 
      ExtraParams = rosetta.Vector1( Args.param )
      ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams )
    except:
      ExtraParams = rosetta.Vector1( Args.param )
      ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams )
    ### for ipython mode
    if ipython: 
      return ExtraResidues

  Args.and_or = Args.and_or.lower()
  assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' "

  RepeatLengths = []
  ProcessTags = {}
  TagList = []
  TagByPdbName = {}

  # better to find out of native pdb is wrong before waiting for pdb scoring
  Check = open(Args.native, 'r')

  # print ' first loop '
  OverlapStarts = []
  for Pdb in Pdbs:
    Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
    for OtherPdb in Pdbs:
      OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
      i = 0
      if Pdb != OtherPdb:
        while Pdb[:i] == OtherPdb[:i]:
          i+=1
        Overlap = OtherPdb[:i-1]
        OverlapStarts.append( ( len(Overlap), Overlap ) )

  OverlapStarts.sort()
  ShortestOverlap = OverlapStarts[0][1]

  # print 'OverlapStarts', OverlapStarts
  # print 'ShortestOverlap', ShortestOverlap
  
  for Pdb in Pdbs:
    try:
      RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb))
    except ValueError:
      RepeatLength = 0
    # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb))
    assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb 
    # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb 
    RepeatLengths.append(RepeatLength)    

    #### re.sub out tag from design process
    Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb)
    Tag = re.sub(r'^%s(.*)\.pdb$'%(ShortestOverlap), r'\1', Tag)
    
    TagByPdbName[Pdb] = Tag
    try:
      TagNumber = ProcessTags[Tag] 
    except:
      TagNumber = len(ProcessTags) + 1
      ProcessTags[Tag] = TagNumber
    TagList.append(TagNumber)

  # Scoring is redundant, once for sorting outside plotter, then again in plotter
  # making not redundant not a priority. 
  # Scoring in the plotter object is so multiple score functions can be plotted easily

  # Sort by repeat length, then score
  if Args.multi:
    # Sort by repeat length, then method tag, then score
    MultiPoseSortingTuples = []
  else:
    PoseSortingTuples = []

  Talaris = rosetta.getScoreFunction()
  for i, Pdb in enumerate(Pdbs):
    RepeatLength = RepeatLengths[i]
    ProcessNumber = TagList[i]
    Pose = rosetta.pose_from_pdb(Pdb)
    if Args.norm:
      Score = Talaris(Pose) / Pose.n_residue()
    else:
      Score = Talaris(Pose) 
    
    # print 'Pdb', Pdb
    if Args.multi:
      MultiPoseSortingTuples.append( (RepeatLength, ProcessNumber, Score, Pose) )
    else:
      PoseSortingTuples.append( (RepeatLength, Score, Pose) )


  if Args.multi:
    # Sort by repeat length, then method tag, then score
    MultiPoseSortingTuples.sort()
  else:
    # sorts by repeat length (shortest to longest) then score (best to worst)
    PoseSortingTuples.sort()

  if Args.multi:
    # print 'MultiPoseSortingTuples', MultiPoseSortingTuples
    SortedTuples = MultiPoseSortingTuples
  else:
    # print 'PoseSortingTuples', PoseSortingTuples
    SortedTuples = PoseSortingTuples

  LastLength = 0
  LastTag = 0
  AllGroups = []
  CurrentGroup = []

  for PoseTuple in SortedTuples:
    Length = PoseTuple[0]
    if Args.multi:
      Tag = PoseTuple[1]
    
    if LastLength and Length != LastLength:
      AllGroups.append(CurrentGroup)
      CurrentGroup = []
    
    if Args.multi:
      if LastTag and Tag != LastTag:
        AllGroups.append(CurrentGroup)
        CurrentGroup = [] 
    
    CurrentGroup.append(PoseTuple)
    LastLength = Length
    if Args.multi: 
      LastTag = Tag

  # for last repeat length
  AllGroups.append(CurrentGroup)

  ''' Build score functions here: '''

  Talaris = rosetta.getScoreFunction()

  # This line returns a talaris function with all default weights set to 0
  CstScore = set_all_weights_zero( rosetta.getScoreFunction() )
  CstScore.set_weight(rosetta.atom_pair_constraint, 10.0)
  CstScore.set_weight(rosetta.angle_constraint, 5.0)
  CstScore.set_weight(rosetta.dihedral_constraint, 3.0)

  HbondScore = set_all_weights_zero( rosetta.getScoreFunction() )
  HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170)
  HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170)
  HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170)
  HbondScore.set_weight(rosetta.hbond_sc, 1.100)

  Disulfide = set_all_weights_zero( rosetta.getScoreFunction() )
  Disulfide.set_weight(rosetta.dslf_fa13, 1.0)

  if Args.plot:
    if Args.norm:
      PerRes = True
    else:
      PerRes = False
    ''' Add and remove score functions here '''
    Plotter = plotly_plotter( Args.plotly_id, Args.plotly_key, Args.native,
                              ScoreFxns=[ CstScore, Talaris, HbondScore, Disulfide ],
                              FxnNames=[ 'ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide' ],
                              PerResidue=PerRes )

  XaxisSortingTuples = []

  for PoseGroup in AllGroups:
  # for PoseGroup in [SortedTuples]:
    if len(PoseGroup):
      # print 
      # print 'Group:', PoseGroup
      Poses = [ PoseTuple[-1] for PoseTuple in PoseGroup ]
      # print PoseGroup
      RepeatLength = PoseGroup[0][0]
      # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] ) 
      # print 'Zero index pose tuple:'
      # print PoseGroup[0]
     
      if Args.plot:
        GroupPdbName = PoseGroup[0][-1].pdb_info().name()
        if Args.multi:
          Tag = TagByPdbName[GroupPdbName] 
          
          if Args.cst:
            Plotter.score_poses( Poses, Args.cst, Tag )
          else:
            Plotter.score_poses( Poses, 1, Tag )
  
  # return Plotter
  Plotter.plot_2d_score_combinations()
  print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces

  Plotter.draw_comparisons()

  print 'plotting...'
  if len(Args.name):
    Name = Args.name
  else:
    Name = '%s based %d res '%( Args.native, RepeatLength )
  Plotter.render_scatter_plot( PlotName=Name )
  
  while 1:

    ScoreFunctionScoreCutoffs = []
    for i, Name in enumerate( Plotter.FxnNames ):
      while 1:
        try:
          Cutoff = float( raw_input('\tEnter cutoff value (maximum) for %s function: '%Name) ) 
          break
        except ValueError:
          pass  
      ScoreFunctionScoreCutoffs.append(Cutoff)

    print 'Cutoff values set at:'
    for i, Name in enumerate( Plotter.FxnNames ):
      # print Name, ScoreFunctionScoreCutoffs[i]
      Plotter.ScoreFunctionScoredPdbs[i].sort()

    PassingPdbs = []
    for i, Name in enumerate( Plotter.FxnNames ):
      PassThisFxn = []
      Cutoff = ScoreFunctionScoreCutoffs[i]
      # print Plotter.ScoreFunctionScoredPdbs[i]
      for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]:
        if Score <= Cutoff:
          PassThisFxn.append(Pdb)
        else:
          break
      PassingPdbs.append( PassThisFxn )

    PdbsPassingAll = PassingPdbs[0]
    if Args.and_or == 'and':
      for OtherSet in PassingPdbs[1:]:
        PdbsPassingAll = list( set(PdbsPassingAll) & set(OtherSet) )
    else:
      for OtherSet in PassingPdbs[1:]:
        PdbsPassingAll = list( set(PdbsPassingAll + OtherSet) )
    
    Outdir = raw_input( '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: '%Args.and_or ) 

    if not os.path.isdir(Outdir):
      subprocess.check_output(['mkdir', Outdir])
    if Outdir [-1] != '/':
      Outdir = Outdir + '/'

    for Pdb in PdbsPassingAll:
      subprocess.check_output([ 'cp', Pdb, Outdir ])
      if Plotter.CstDict[Pdb] != None:
        subprocess.check_output([ 'cp', Plotter.CstDict[Pdb], Outdir ])

    Continue = str( raw_input( '\tEnter Y to add another set of selection threshold, or anything else to quit: ') ).upper()
    if Continue == 'Y':
      pass
    else:
      break
def optimize_repeat_pdb((Pdb, CstSets, RepeatLength)):
    ''' parallelizable '''

    # idealize peptide bonds with command line subprocess
    subprocess.check_output(
        ['idealize_jd2.default.linuxgccrelease', '-s', Pdb])
    IdealizedPdbOldName = Pdb.replace('.pdb', '_0001.pdb')
    IdealizedPdbNewName = Pdb.replace('.pdb', '_ideal.pdb')
    subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
    time.sleep(0.5)

    Pose = rosetta.pose_from_pdb(IdealizedPdbNewName)
    PoseLength = Pose.n_residue()

    assert PoseLength % RepeatLength == 0, 'pdb input into optimize_repeat_pdb must have integer multiple of repeat_length number of residues'
    NumberRepeats = PoseLength / RepeatLength

    # print 'NumberRepeats', NumberRepeats
    # print 'RepeatLength', RepeatLength
    Sequence = Pose.sequence()
    # print Sequence

    RepeatRanges = []
    Start = 1
    for Repeat in range(NumberRepeats):
        End = Start + RepeatLength - 1
        RepeatRanges.append((Start, End))
        Start += RepeatLength

    assert len(RepeatRanges) == NumberRepeats
    # print 'RepeatRanges', RepeatRanges

    MidRepeat = (NumberRepeats / 2) - 1
    ReferenceRange = RepeatRanges[MidRepeat]
    # print 'MidRepeat', MidRepeat
    # print 'ReferenceRange', ReferenceRange

    SetupNCS = symmetry.SetupNCSMover()

    for TargetRange in RepeatRanges:
        if TargetRange != ReferenceRange:
            # print 'OtherRange', TargetRange
            # skip first three residue (not enougth atoms for torsion), and amino acid types allowed to vary
            if TargetRange[0] == 1:
                SetupNCS.add_group(
                    "%dA-%dA" % (ReferenceRange[0] + 3, ReferenceRange[1]),
                    "%dA-%dA" % (TargetRange[0] + 3, TargetRange[1]))
            # skip last residue (not enougth atoms for torsion)
            elif TargetRange[1] == PoseLength:
                SetupNCS.add_group(
                    "%dA-%dA" % (ReferenceRange[0], ReferenceRange[1] - 3),
                    "%dA-%dA" % (TargetRange[0], TargetRange[1] - 3))
            else:
                SetupNCS.add_group(
                    "%dA-%dA" % (ReferenceRange[0], ReferenceRange[1]),
                    "%dA-%dA" % (TargetRange[0], TargetRange[1]))

    SetupNCS.apply(Pose)

    # default talaris 2013 score function plus dihedral wieght for symmetry ncs mimization
    SymmTalaris = rosetta.getScoreFunction()
    SymmTalaris.set_weight(rosetta.dihedral_constraint, 1.0)

    TalarisPlusCst = rosetta.getScoreFunction()
    TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0)
    TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0)
    TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 3.0)

    TalarisPlusCstLowerFaRep = rosetta.getScoreFunction()
    TalarisPlusCstLowerFaRep.set_weight(rosetta.atom_pair_constraint, 10.0)
    TalarisPlusCstLowerFaRep.set_weight(rosetta.angle_constraint, 5.0)
    TalarisPlusCstLowerFaRep.set_weight(rosetta.dihedral_constraint, 3.0)
    TalarisPlusCstLowerFaRep.set_weight(rosetta.fa_rep, 0.25)
    print 'Pdb:', Pdb

    OptimizedPoses = []
    PoseIDs = []

    for Cst in CstSets:
        print 'Cst:', Cst
        CstPose = Pose.clone()
        CstStemName = re.sub(r'^(.*)\.cst$', r'\1', Cst)

        # make constraint mover
        Constrainer = rosetta.ConstraintSetMover()
        # get constraints from file
        Constrainer.constraint_file(Cst)
        Constrainer.apply(CstPose)

        FxnTags = ['TalCst', 'LowFaRep']

        for i, ScoreFunction in enumerate(
            [TalarisPlusCst, TalarisPlusCstLowerFaRep]):
            # for AbsoluteWeight in [1, 5, 10, 100]:

            RelaxPose = CstPose.clone()
            rosetta.relax_pose(RelaxPose, ScoreFunction, 'tag')
            rosetta.dump_pdb(RelaxPose, CstStemName + '_%s.pdb' % FxnTags[i])
            # remove all constraints
            RelaxPose.remove_constraints()
            # reapply ncs constraints
            SetupNCS.apply(RelaxPose)

            rosetta.relax_pose(RelaxPose, SymmTalaris, 'tag')
            # Trekker.score(RelaxPose)
            rosetta.dump_pdb(RelaxPose,
                             CstStemName + '_%s_Relax.pdb' % FxnTags[i])

    JustRelaxPose = Pose.clone()
    SetupNCS.apply(JustRelaxPose)

    rosetta.relax_pose(JustRelaxPose, SymmTalaris, 'tag')
    rosetta.dump_pdb(JustRelaxPose, CstStemName + '_JustRelax.pdb')