def run_graft(self):
     anchor_start = self.pose.pdb_info().pdb2pose(self.scaffold_chain.get(), int(self.scaffold_start.get()))
     anchor_end = self.pose.pdb_info().pdb2pose(self.scaffold_chain.get(), int(self.scaffold_end.get()))
     
     graftmover = graft.AnchoredGraftMover(anchor_start, anchor_end)
     
     insert_start = self.from_pose.pdb_info().pdb2pose(self.insert_chain.get(), int(self.insert_start.get()))
     insert_end = self.from_pose.pdb_info().pdb2pose(self.insert_chain.get(), int(self.insert_end.get()))
     
     if re.search("Double Loop", self.graft_type.get()):
         nter_overhang = tkSimpleDialog.askinteger(title='N-terminal Overhang', prompt='Please enter the number of residues N-terminal from insert_start to use for superposition', initialvalue=3)
         if not nter_overhang:return
         cter_overhang = tkSimpleDialog.askinteger(title='C-terminal Overhang', prompt='Please enter the number of residues C-terminal from insert_end to use for superposition', initialvalue=3)
         insert = graft.return_region(self.from_pose, insert_start-nter_overhang, insert_end+cter_overhang)
         if not cter_overhang:return
         print "Insert with overhang:"
         print insert
         graftmover.set_piece(insert, nter_overhang, cter_overhang)
         graftmover.superimpose_overhangs_heavy(self.pose, False, False)
     else:
         insert = graft.return_region(self.from_pose, insert_start, insert_end)
         print "Insert: "
         print insert
         graftmover.set_piece(insert, 0, 0)
     
     if self.graft_type.get()=="Double Arm":
         graftmover.set_use_single_loop_double_CCD_arms(True)
     elif self.graft_type.get()=="Double Loop Double Arm":
         graftmover.set_use_double_loop_double_CCD_arms(True)
     elif self.graft_type.get()=="Double Loop Quad Arm":
         graftmover.set_use_double_loop_quad_CCD_arms(True)
     else:
         print "Using default graft type"
         
     if self.randomize_first.get():
         graftmover.set_test_control_mode(True)
         
     graftmover.set_cycles(self.cycles.get())
     graftmover.set_scaffold_flexibility(self.scaffold_nter_flex.get(), self.scaffold_cter_flex.get())
     graftmover.set_insert_flexibility(self.insert_nter_flex.get(), self.insert_cter_flex.get())
     
     new_graftmover = GraftMover(graftmover)
     new_graftmover.set_repack_connection(self.repack_connection.get(), self.score_class.score)
     new_graftmover.set_repack_connection_and_insert(self.repack_connection_and_piece.get(), self.score_class.score)
     
     self.run_protocol(new_graftmover)
     self.main.destroy()
Beispiel #2
0
def fuse(Pose1, Pose2, SubsetSize=2):
    # Should continue to fiddle with the hardcoded var below,
    # Originally 0.5, only good for indentical copies,
    # then 1.5, works for close copy
    # trying 2.0
    MatchingResidueHash = solenoid_tools.match_superimposed_pose_residues(
        Pose1, Pose2, 1.5)
    # checks there is a one to one correspondance for all residue matches
    for MatchRes in MatchingResidueHash:
        assert len(MatchingResidueHash[MatchRes]) <= 1

    # list comprehension through matches, add one for each position with match
    NofMatchRes = sum([
        1 for Match in MatchingResidueHash if len(MatchingResidueHash[Match])
    ])
    try:
        assert SubsetSize <= NofMatchRes
    except AssertionError:
        dump_many_poses([Pose1, Pose2], 'FusedFusion')
        # print 'MatchingResidueHash:', MatchingResidueHash
        # print ' Designated subset length should not exceed that of the overlap between poses. Poses dumped for inspection '
        assert SubsetSize <= NofMatchRes, ' Designated subset length should not exceed that of the overlap between poses. Poses dumped for inspection '

    # contains positions like [ (MatchRes1InPose1, MatchRes1InPose2), (MatchRes2InPose1, MatchRes2InPose2) .. ]
    CorrespondingResidues = []

    # iterates through positions in pose1
    for P1 in range(1, Pose1.n_residue() + 1):
        if len(MatchingResidueHash[P1]) == 1:
            P2 = MatchingResidueHash[P1][0]
            CorrespondingResidues.append((P1, P2))

    LengthIncenative = 1.2
    BestRMSD = 999
    BestSubset = []
    BestTransformation = ()

    for i in range(len(CorrespondingResidues) - SubsetSize + 1):
        Pose1Coords = []
        Pose2Coords = []
        IterationsSubset = CorrespondingResidues[i:i + SubsetSize]
        for ResidueMatch in IterationsSubset:
            P1 = ResidueMatch[0]
            P2 = ResidueMatch[1]
            for AtomName in ['N', 'C', 'O', 'CA']:
                Pose1Coords.append(list(Pose1.residue(P1).xyz(AtomName)))
                Pose2Coords.append(list(Pose2.residue(P2).xyz(AtomName)))
        # makes (subset length)by3 array out of list of lists

        Pose1Array = np.array(Pose1Coords)
        Pose2Array = np.array(Pose2Coords)

        RMSD, Rotation, Translation = solenoid_tools.rmsd_2_np_arrays_rosetta(
            Pose1Array, Pose2Array)

        if RMSD < BestRMSD:
            BestRMSD = RMSD
            BestSubset = IterationsSubset
            BestTransformation = (Rotation, Translation)

    # Unpack within overlap subset and corresponding transformation vectors
    Rotation, Translation = BestTransformation
    rosetta.Pose.apply_transform_Rx_plus_v(Pose2, Rotation, Translation)

    # print 'BestSubset', BestSubset
    # make a lot of sense for even overlaps, makes less sense for odd overlaps
    Cutpoint = SubsetSize / 2
    EndOfPose1 = BestSubset[Cutpoint - 1][0]
    StartOfPose2 = BestSubset[Cutpoint][1]

    # print 'EndOfPose1', EndOfPose1
    # print 'StartOfPose2', StartOfPose2
    FusionPose = grafting.return_region(Pose1, 1, EndOfPose1)
    # rosetta.dump_pdb(FusionPose, 'FusionPose1.pdb')
    for Pose2Position in range(StartOfPose2, Pose2.n_residue() + 1):
        Pose2Residue = Pose2.residue(Pose2Position)
        FusionPose.append_residue_by_bond(Pose2Residue)

    return FusionPose, BestRMSD, CorrespondingResidues
Beispiel #3
0
def detect_and_expand_repeats(InputTuple):
    Args, Pdb = InputTuple
    print 'Pdb:', Pdb
    # get name base for output pdbs
    InputPdbStem = Pdb.split('/')[-1].replace('.pdb', '')
    print 'StemName:', InputPdbStem

    # load Pdb into rosetta pose
    Pose = rosetta.pose_from_pdb(Pdb)
    Pose.pdb_info(rosetta.core.pose.PDBInfo(Pose))

    # Get repeat unit poses from function above
    if Args.repeat_residues == False:
        TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(
            Pose)

    else:
        RepeatChains = Args.repeat_residues.split('__')
        RepeatChains = [[int(Number) for Number in Chain.split('_')]
                        for Chain in RepeatChains]
        # print 'RepeatChains', RepeatChains
        # sys.exit()
        TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(
            Pose, RepeatChains)

    # RepeatStretchesByLengthHash[12] = [[14, 15, 16, 17, 18, 21]]
    # ConsolidatedRepeatStarts.extend([45,46,47])
    # print 'ConsolidatedRepeatStarts', ConsolidatedRepeatStarts
    # print 'RepeatStretchesByLengthHash', RepeatStretchesByLengthHash
    # print 'TandemRepeats', TandemRepeats
    # InputPoseRepeatNumber = len(TandemRepeats[ConsolidatedRepeatStarts[0]])

    AllExtrapolationsByRepeatLength = {}
    print 'TandemRepeats:', TandemRepeats
    print 'RepeatStretchesByLengthHash:', RepeatStretchesByLengthHash
    # print
    # MaxTurns = Args.max_turns_per_repeat
    count = 1
    for RepeatUnitLength in RepeatStretchesByLengthHash:
        # UniformLength = Args.repeat * RepeatUnitLength

        ExtrapolationList = []
        MinLength = 9000000000  # will break if pose has more than 9 billion residues

        print 'RepeatUnitLength', RepeatUnitLength
        for RepeatStretch in RepeatStretchesByLengthHash[RepeatUnitLength]:
            print 'RepeatStretch', RepeatStretch

            # gets all pairwise combinations of repeat combinations, second arg should ALWAYS be 2, unless manger overhaul is performed
            for RepeatUnitCombo in itertools.combinations(RepeatStretch, 2):
                # print 'RepeatUnitCombo', RepeatUnitCombo
                RepeatUnit1Start, RepeatUnit2Start = RepeatUnitCombo
                assert RepeatUnit1Start <= RepeatUnit2Start, ' RepeatUnit1 must begin before RepeatUnit2 '

                TandemRepeats1 = TandemRepeats[RepeatUnit1Start]
                TandemRepeats2 = TandemRepeats[RepeatUnit2Start]

                # Whichever position starts the fewest tandem repeats dicates how far to shift
                Shifts = min(len(TandemRepeats1), len(TandemRepeats2))
                # How max number of turns to include per repeat depends on available repeats, and uner input max
                MaxTurns = min(Args.max_turns_per_repeat, Shifts)

                if (RepeatUnit1Start +
                        Args.min_overlap) <= RepeatUnit2Start <= (
                            RepeatUnit1Start + RepeatUnitLength -
                            Args.min_overlap):
                    # print
                    # print 'Selected RepeatUnitCombo:', RepeatUnitCombo
                    # print 'RepeatUnit1Start, repeats ', RepeatUnit1Start, TandemRepeats[RepeatUnit1Start]
                    # print 'RepeatUnit2Start, repeats ', RepeatUnit2Start, TandemRepeats[RepeatUnit2Start]

                    for NumTurns in range(1, MaxTurns + 1):

                        # print '\n'*5
                        # print 'NumTurns', NumTurns
                        ModLength = NumTurns * RepeatUnitLength

                        # print 'ModLength', ModLength
                        ModUniformLength = Args.repeat * ModLength
                        # print 'ModUniformLength1', ModUniformLength

                        for Shift in range((Shifts / NumTurns)):
                            # print 'Shift', Shift
                            ModRep1Start = RepeatUnit1Start + (Shift *
                                                               ModLength)
                            ModRep2Start = RepeatUnit2Start + (Shift *
                                                               ModLength)
                            Overlap = ModRep2Start - ModRep1Start
                            ModRep1End = ModRep1Start + ModLength - 1
                            ModRep2End = ModRep2Start + ModLength - 1

                            # print 'ModRep1Start, ModRep1End', ModRep1Start, ModRep1End
                            # print 'ModRep2Start, ModRep2End', ModRep2Start, ModRep2End

                            Repeat1Unit = grafting.return_region(
                                Pose, ModRep1Start, ModRep1End)
                            Repeat2Unit = grafting.return_region(
                                Pose, ModRep2Start, ModRep2End)
                            # print 'Repeat1Unit', Repeat1Unit
                            # print 'Repeat2Unit', Repeat2Unit
                            # use function to extrapolate from a partial repeat

                            try:
                                Extrapolation = extrapolate_repeat_pose(
                                    Repeat1Unit, Repeat2Unit, Args.repeat - 1)
                            except AssertionError:
                                'Extrapolation failed'
                                continue

                            # hacky check finds things that went wrong in extrapolation, sometimes
                            if Extrapolation.n_residue(
                            ) == ModUniformLength + Overlap:

                                # trim down to uniform length
                                Extrapolation = grafting.return_region(
                                    Extrapolation, 1, ModUniformLength)

                                # add extrapolated pose to list
                                Repeat1Range = (ModRep1Start, ModRep1End)
                                Repeat2Range = (ModRep2Start, ModRep2End)
                                ExtrapolationList.append(
                                    (Extrapolation, Repeat1Range, Repeat2Range,
                                     NumTurns))

                            else:
                                print 'fail'

        AllExtrapolationsByRepeatLength[RepeatUnitLength] = ExtrapolationList

    with open('%s_RepExtra.log' % InputPdbStem, 'w') as LogFile:

        for BaseUnitLength in AllExtrapolationsByRepeatLength:
            print 'Extrapolated %d poses with base unit length %d' % (
                len(AllExtrapolationsByRepeatLength[BaseUnitLength]),
                BaseUnitLength)
            print >> LogFile, 'Extrapolated %d poses with base unit length %d' % (
                len(AllExtrapolationsByRepeatLength[BaseUnitLength]),
                BaseUnitLength)
            print >> LogFile, 'Number\tUnit1 range\tUnit2 range'

            for i, ExtrapolationTuple in enumerate(
                    AllExtrapolationsByRepeatLength[BaseUnitLength]):
                # print 'Extrapolation',Extrapolation
                ### print>>LogFile, '\t\t'.join([ str(i+1), ','.join([str(Number) for Number in ExtrapolationTuple[1]]), ','.join([str(Number) for Number in ExtrapolationTuple[2]]) ])
                Repeat1Range = ExtrapolationTuple[1]
                Repeat2Range = ExtrapolationTuple[2]

                RepeatUnitLength = BaseUnitLength * ExtrapolationTuple[3]
                rosetta.dump_pdb(
                    ExtrapolationTuple[0], '%ssrc%d_%d__%d_%d_rep%d_%s.pdb' %
                    (Args.out, Repeat1Range[0], Repeat1Range[1],
                     Repeat2Range[0], Repeat2Range[1], RepeatUnitLength,
                     InputPdbStem))
print "faseqb : ", faseqb 

regexa = "[A-Z]{0,23}C[A-Z]([A-Z]{8,12}W)[YF][A-Z]{13}([A-Z]{6,11})[A-Z]{15,30}[DL][A-Z]{2,3}Y[A-Z][CW][A-Z]([A-Z]{7,16}[FW])G[A-Z]G[A-Z]{0,7}[PA]*"
regexb = "[A-Z]{0,23}C[A-Z]([A-Z]{8,12}W)[Y][A-Z]{13}([A-Z]{6,11})[A-Z]{15,40}[YLF][A-Z][CW][A-Z]([A-Z]{7,17}[F])G[A-Z]G[A-Z]{0,7}[E]*"
    
res = re.search(regexa, str(faseqa))
if res:
    print res.group(), res.start(), res.end()
else:
    print None

protposea = rosetta.Pose()
rosetta.core.import_pose.pose_from_pdb( protposea , 'tmpa.pdb' )
apose = rosetta.Pose()
apose = graft.return_region( protposea, res.start()+1, res.end())
apose.dump_pdb("apose.pdb")    

res = re.search(regexb, str(faseqb))
if res:
    print res.group(), res.start(), res.end()
else:
    print None

protposeb = rosetta.Pose()
rosetta.core.import_pose.pose_from_pdb( protposeb , 'tmpb.pdb' )
apose = rosetta.Pose()
apose = graft.return_region( protposeb, res.start()+1, res.end())
apose.dump_pdb("bpose.pdb")    

outfilename = pdbcode + '.trunc.pdb'
def fuse(Pose1, Pose2, SubsetSize=2):
  # Should continue to fiddle with the hardcoded var below,
  # Originally 0.5, only good for indentical copies,
  # then 1.5, works for close copy
  # trying 2.0
  MatchingResidueHash = solenoid_tools.match_superimposed_pose_residues(Pose1, Pose2, 1.5)
  # checks there is a one to one correspondance for all residue matches
  for MatchRes in MatchingResidueHash:
    assert len(MatchingResidueHash[MatchRes]) <= 1

  # list comprehension through matches, add one for each position with match
  NofMatchRes = sum([ 1 for Match in MatchingResidueHash if len(MatchingResidueHash[Match]) ])
  try:
    assert SubsetSize <= NofMatchRes
  except AssertionError:
    dump_many_poses([Pose1, Pose2], 'FusedFusion')
    # print 'MatchingResidueHash:', MatchingResidueHash
    # print ' Designated subset length should not exceed that of the overlap between poses. Poses dumped for inspection '
    assert SubsetSize <= NofMatchRes, ' Designated subset length should not exceed that of the overlap between poses. Poses dumped for inspection '
  
  # contains positions like [ (MatchRes1InPose1, MatchRes1InPose2), (MatchRes2InPose1, MatchRes2InPose2) .. ]
  CorrespondingResidues = []

  # iterates through positions in pose1
  for P1 in range( 1, Pose1.n_residue()+1 ):
    if len(MatchingResidueHash[P1]) == 1:
      P2 = MatchingResidueHash[P1][0]
      CorrespondingResidues.append((P1, P2))

  LengthIncenative = 1.2
  BestRMSD = 999
  BestSubset = []
  BestTransformation = ()

  for i in range( len(CorrespondingResidues) - SubsetSize + 1):
    Pose1Coords = []
    Pose2Coords = []    
    IterationsSubset = CorrespondingResidues[i:i+SubsetSize]
    for ResidueMatch in IterationsSubset:
      P1 = ResidueMatch[0]
      P2 = ResidueMatch[1]
      for AtomName in ['N','C','O','CA']:
        Pose1Coords.append( list(Pose1.residue(P1).xyz(AtomName)) )
        Pose2Coords.append( list(Pose2.residue(P2).xyz(AtomName)) )      
    # makes (subset length)by3 array out of list of lists
 
    Pose1Array = np.array(Pose1Coords)
    Pose2Array = np.array(Pose2Coords)

    RMSD, Rotation, Translation = solenoid_tools.rmsd_2_np_arrays_rosetta(Pose1Array, Pose2Array)

    if RMSD < BestRMSD:
      BestRMSD = RMSD
      BestSubset = IterationsSubset
      BestTransformation = ( Rotation, Translation )

  # Unpack within overlap subset and corresponding transformation vectors 
  Rotation, Translation = BestTransformation
  rosetta.Pose.apply_transform_Rx_plus_v(Pose2, Rotation, Translation)

  # print 'BestSubset', BestSubset
  # make a lot of sense for even overlaps, makes less sense for odd overlaps
  Cutpoint = SubsetSize / 2
  EndOfPose1 = BestSubset[Cutpoint-1][0]
  StartOfPose2 = BestSubset[Cutpoint][1]

  # print 'EndOfPose1', EndOfPose1
  # print 'StartOfPose2', StartOfPose2
  FusionPose = grafting.return_region(Pose1, 1, EndOfPose1)
  # rosetta.dump_pdb(FusionPose, 'FusionPose1.pdb')
  for Pose2Position in range( StartOfPose2, Pose2.n_residue()+1 ):
    Pose2Residue = Pose2.residue(Pose2Position)
    FusionPose.append_residue_by_bond(Pose2Residue)
  
  return FusionPose, BestRMSD, CorrespondingResidues
def detect_and_expand_repeats(InputTuple):
  Args, Pdb = InputTuple
  print 'Pdb:', Pdb
  # get name base for output pdbs 
  InputPdbStem = Pdb.split('/')[-1].replace('.pdb', '')
  print 'StemName:', InputPdbStem

  # load Pdb into rosetta pose
  Pose = rosetta.pose_from_pdb(Pdb)
  Pose.pdb_info(rosetta.core.pose.PDBInfo( Pose ))
  
  # Get repeat unit poses from function above
  if Args.repeat_residues == False:
    TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(Pose)
  
  else:
    RepeatChains = Args.repeat_residues.split('__')
    RepeatChains = [ [ int(Number) for Number in Chain.split('_') ] for Chain in RepeatChains]
    # print 'RepeatChains', RepeatChains
    # sys.exit()
    TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(Pose, RepeatChains)
    
  # RepeatStretchesByLengthHash[12] = [[14, 15, 16, 17, 18, 21]]
  # ConsolidatedRepeatStarts.extend([45,46,47])    
  # print 'ConsolidatedRepeatStarts', ConsolidatedRepeatStarts
  # print 'RepeatStretchesByLengthHash', RepeatStretchesByLengthHash
  # print 'TandemRepeats', TandemRepeats
  # InputPoseRepeatNumber = len(TandemRepeats[ConsolidatedRepeatStarts[0]])

  AllExtrapolationsByRepeatLength = {}
  print 'TandemRepeats:', TandemRepeats
  print 'RepeatStretchesByLengthHash:', RepeatStretchesByLengthHash
  # print 
  # MaxTurns = Args.max_turns_per_repeat   
  count = 1
  for RepeatUnitLength in RepeatStretchesByLengthHash:
    # UniformLength = Args.repeat * RepeatUnitLength
    
    ExtrapolationList = []
    MinLength = 9000000000 # will break if pose has more than 9 billion residues
    
    print 'RepeatUnitLength', RepeatUnitLength
    for RepeatStretch in RepeatStretchesByLengthHash[RepeatUnitLength]:
      print 'RepeatStretch', RepeatStretch

      # gets all pairwise combinations of repeat combinations, second arg should ALWAYS be 2, unless manger overhaul is performed
      for RepeatUnitCombo in itertools.combinations(RepeatStretch, 2):
        # print 'RepeatUnitCombo', RepeatUnitCombo
        RepeatUnit1Start, RepeatUnit2Start = RepeatUnitCombo
        assert RepeatUnit1Start <= RepeatUnit2Start, ' RepeatUnit1 must begin before RepeatUnit2 '

        TandemRepeats1 = TandemRepeats[RepeatUnit1Start]
        TandemRepeats2 = TandemRepeats[RepeatUnit2Start]

        # Whichever position starts the fewest tandem repeats dicates how far to shift
        Shifts = min(len(TandemRepeats1), len(TandemRepeats2))
        # How max number of turns to include per repeat depends on available repeats, and uner input max 
        MaxTurns = min( Args.max_turns_per_repeat, Shifts)

        if (RepeatUnit1Start + Args.min_overlap) <= RepeatUnit2Start <= (RepeatUnit1Start + RepeatUnitLength - Args.min_overlap):  
          # print 
          # print 'Selected RepeatUnitCombo:', RepeatUnitCombo
          # print 'RepeatUnit1Start, repeats ', RepeatUnit1Start, TandemRepeats[RepeatUnit1Start]
          # print 'RepeatUnit2Start, repeats ', RepeatUnit2Start, TandemRepeats[RepeatUnit2Start]
                        
          for NumTurns in range(1, MaxTurns+1):
            
            # print '\n'*5
            # print 'NumTurns', NumTurns
            ModLength = NumTurns * RepeatUnitLength
            
            # print 'ModLength', ModLength
            ModUniformLength = Args.repeat * ModLength
            # print 'ModUniformLength1', ModUniformLength
          
            for Shift in range((Shifts/NumTurns)):
              # print 'Shift', Shift
              ModRep1Start = RepeatUnit1Start + (Shift*ModLength)
              ModRep2Start = RepeatUnit2Start + (Shift*ModLength)
              Overlap = ModRep2Start - ModRep1Start
              ModRep1End = ModRep1Start + ModLength - 1 
              ModRep2End = ModRep2Start + ModLength - 1 

              # print 'ModRep1Start, ModRep1End', ModRep1Start, ModRep1End
              # print 'ModRep2Start, ModRep2End', ModRep2Start, ModRep2End

              Repeat1Unit = grafting.return_region(Pose, ModRep1Start, ModRep1End)
              Repeat2Unit = grafting.return_region(Pose, ModRep2Start, ModRep2End)
              # print 'Repeat1Unit', Repeat1Unit
              # print 'Repeat2Unit', Repeat2Unit
              # use function to extrapolate from a partial repeat 

              try:
                Extrapolation = extrapolate_repeat_pose(Repeat1Unit, Repeat2Unit, Args.repeat - 1)
              except AssertionError:
                'Extrapolation failed'
                continue

              # hacky check finds things that went wrong in extrapolation, sometimes
              if Extrapolation.n_residue() == ModUniformLength + Overlap:

                # trim down to uniform length 
                Extrapolation = grafting.return_region(Extrapolation, 1, ModUniformLength)

                # add extrapolated pose to list
                Repeat1Range = (ModRep1Start, ModRep1End)
                Repeat2Range = (ModRep2Start, ModRep2End)
                ExtrapolationList.append(( Extrapolation, Repeat1Range, Repeat2Range, NumTurns ))

              else:
                print 'fail'

    AllExtrapolationsByRepeatLength[RepeatUnitLength] = ExtrapolationList


  with open('%s_RepExtra.log'%InputPdbStem, 'w') as LogFile:

    for BaseUnitLength in AllExtrapolationsByRepeatLength:
      print 'Extrapolated %d poses with base unit length %d'%(len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength)
      print>>LogFile, 'Extrapolated %d poses with base unit length %d'%(len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength)
      print>>LogFile, 'Number\tUnit1 range\tUnit2 range'

      for i, ExtrapolationTuple in enumerate( AllExtrapolationsByRepeatLength[BaseUnitLength] ):
        # print 'Extrapolation',Extrapolation
        ### print>>LogFile, '\t\t'.join([ str(i+1), ','.join([str(Number) for Number in ExtrapolationTuple[1]]), ','.join([str(Number) for Number in ExtrapolationTuple[2]]) ])
        Repeat1Range = ExtrapolationTuple[1]
        Repeat2Range = ExtrapolationTuple[2]
        
        RepeatUnitLength = BaseUnitLength * ExtrapolationTuple[3]
        rosetta.dump_pdb( ExtrapolationTuple[0], '%ssrc%d_%d__%d_%d_rep%d_%s.pdb'%(Args.out, Repeat1Range[0], Repeat1Range[1], Repeat2Range[0], Repeat2Range[1], RepeatUnitLength, InputPdbStem) )
def cap_and_relax_pdb( (RepeatPdb, ReferencePdb, ReferenceCst) ):

  RepeatPose = rosetta.pose_from_pdb(RepeatPdb)
  TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 )
  TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) )

  ReferencePose = rosetta.pose_from_pdb( ReferencePdb )
  ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) )

  # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb')

  RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdb))
  SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdb)
  SourceRanges = SourceRanges.split('__')
  SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ]
  SourceStart = SourceRanges[0][0]
  SourceEnd = SourceRanges[0][1]


  '''
   Add N terminal cap 
   '''
  NcapPose = grafting.return_region( ReferencePose, 1, SourceStart+5 )
  # rosetta.dump_pdb(NcapPose, 'Ncap.pdb')
  NcapLength = NcapPose.n_residue()
  
  NcapOverhangPositions = [ Position for Position in range(NcapLength-3, NcapLength+1) ]
  # print NcapOverhangPositions
  NcapOverhangArray = get_residue_array( NcapPose, NcapOverhangPositions )
  
  RepStartOverhangPositions = [1,2,3,4]
  RepStartOverhangArray = get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions )
  # print RepStartOverhangArray

  RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray )
  rosetta.Pose.apply_transform_Rx_plus_v(TrimmedRepeatPose, rMtx, tVec)
  # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' )
  NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = fuse(NcapPose, TrimmedRepeatPose)
  print 'Ncap attachment RMSD %f'%RMSD
  # rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' )
  NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) )    
  

  '''
   Add C terminal cap 
  '''
  Cshift = SourceEnd-6
  CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() )
  # rosetta.dump_pdb(CcapPose, 'Ccap.pdb')
  CcapOverhangPositions = [1,2,3,4]
  CcapOverhangArray = get_residue_array( CcapPose, CcapOverhangPositions )

  RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ]
  # print 'RepEndOverhangPositions', RepEndOverhangPositions
  RepEndOverhangArray = get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions )
  
  RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray )
  rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec)
  # rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' )
  CappedRepeatPose, RMSD, CcapCorrespondingResidues = fuse(NcapPlusRepeatPose, CcapPose)
  print 'Ccap attachment RMSD %f'%RMSD

  CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_Cap.pdb', RepeatPdb)
  assert CappedNamePdb != RepeatPdb, 'regular expression substitution failed!'
  rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb )



  '''
   Generate csts for cap/repeat edges 
  '''
  CstExtrapolator = constraint_extrapolator(ReferenceCst)
  ConstraintSet = []
  
  ' N cap constraints are easy; no shifts are needed '

  # For catching when individual constraints have been considered already  
  Redundict = {} 
  for Position in range(1, SourceStart+6):
    # print 'Position', Position
    # Skip positions w/out constraints
    try:
      PositionCstDict = CstExtrapolator.Cst[Position]
    except KeyError:
      continue

    for AtomName in PositionCstDict:
      for Constraint in PositionCstDict[AtomName]:
        # unpack tuple values 
        AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
        
        # Redundancy check with redundict 
        try:
          Check = Redundict[CstLineNumber]
          # if cst considered already, skip it! 
          continue
        except KeyError:
          Redundict[CstLineNumber] = 1
        
        if pose_has(CappedRepeatPose, AtomResidueCoords):
          ConstraintSet.append(Constraint)

  ' C cap constraints are harder; need to shift due to pose expansion '

  # CstExtrapolator.output_cst(ConstraintSet, 'NcapConstraints.cst')\
  Redundict = {} 

  # print 'CcapCorrespondingResidues', CcapCorrespondingResidues
  RepeatCcapPositionStart = CcapCorrespondingResidues[0][0]
  # print 'RepeatCcapPositionStart', RepeatCcapPositionStart

  ShiftToRepeatPose = RepeatCcapPositionStart - Cshift
  # print 'ShiftToRepeatPose', ShiftToRepeatPose

  for Position in range( Cshift, ReferencePose.n_residue()+1 ):
    # Skip positions w/out constraints
    try:
      PositionCstDict = CstExtrapolator.Cst[Position]
    except KeyError:
      continue

    for AtomName in PositionCstDict:
      for Constraint in PositionCstDict[AtomName]:
        # unpack tuple values 
        AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
        
        # Redundancy check with redundict 
        try:
          Check = Redundict[CstLineNumber]
          # if cst considered already, skip it! 
          continue
        except KeyError:
          Redundict[CstLineNumber] = 1

        ExpandedPoseAtomResidueCoords = []
        # iterate through atom residue pairs
        for AtomResiduePair in AtomResidueCoords:
          # print 'AtomResiduePair', AtomResiduePair
          ExpandedPosePosition = (AtomResiduePair[1]) + ShiftToRepeatPose
          # print 'ExpandedPosePosition', ExpandedPosePosition
          ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) )

        ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType       

        if pose_has(CappedRepeatPose, ExpandedPoseAtomResidueCoords):
          ConstraintSet.append(ShiftedConstraint)  

  CapCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb)
  CstExtrapolator.output_cst(ConstraintSet, CapCstName)

  '''
  idealize peptide bonds with command line subprocess
  '''
  subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', CappedNamePdb])
  IdealizedPdbOldName = re.sub(r'(.*).pdb$', r'\1_0001.pdb', CappedNamePdb)
  IdealizedPdbNewName = re.sub(r'(.*).pdb$', r'\1_Ideal.pdb', CappedNamePdb)
  
  subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName])
  time.sleep(0.2)

  IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName )

  # make constraint mover
  Constrainer = rosetta.ConstraintSetMover()
  # get constraints from file
  Constrainer.constraint_file(CapCstName)
  Constrainer.apply(IdealizedCappedPose)

  ''' SET UP WEIGHTS AS decided '''

  # RelativeWeight = 0.1

  Talaris = rosetta.getScoreFunction()
  TalarisPlusCst = rosetta.getScoreFunction()
  AtomPairCst = set_all_weights_zero( rosetta.getScoreFunction() )
  AtomPairCst.set_weight(rosetta.atom_pair_constraint, 1.0)

  # RosettaScore = Talaris(IdealizedCappedPose) 
  # AtomPairCstScore = AtomPairCst(IdealizedCappedPose)
  
  # Weight = ( RosettaScore * RelativeWeight ) / AtomPairCstScore  
  Weight = 1.0
  TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, Weight)
  TalarisPlusCst.set_weight(rosetta.angle_constraint, Weight)
  TalarisPlusCst.set_weight(rosetta.dihedral_constraint, Weight)

  print 'relaxing %s with %s'%(IdealizedPdbNewName, CapCstName) 
  print ' Weight %d '%Weight
  rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag')
  RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax.pdb', IdealizedPdbNewName)
  rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)

  rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag')

  RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax2.pdb', IdealizedPdbNewName)
  rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
Beispiel #8
0
def cap_pdb_make_cst( RepeatPdbFileName, RepeatCstFileName, ReferencePdb, ReferenceCst, Ntrim=0, Ctrim=0, Step=0 ):
  if Step:
    if Ntrim:
      assert Ntrim % Step == 0
    if Ctrim:
      assert Ctrim % Step == 0 

  # Grep out repeat length and src ranges 
  RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdbFileName))
  SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdbFileName)
  assert SourceRanges != RepeatPdbFileName, 'src string not found in pdb name '
  SourceRanges = SourceRanges.split('__')
  SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ]
  SourceStart = SourceRanges[0][0]
  SourceEnd = SourceRanges[0][1]

  # Load repeat pose
  RepeatPose = rosetta.pose_from_pdb( RepeatPdbFileName )
  # Trim off floppy end residues
  TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 )
  TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) )
  # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb')
  # Load reference (native) pose
  ReferencePose = rosetta.pose_from_pdb( ReferencePdb )
  ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) )

  PdbCstPairs = []

  ''' Loop through N terminal caps '''
  # print '(SourceStart-Ntrim, SourceStart, -1*Step)', (SourceStart-Ntrim, SourceStart, -1*Step)

  for NcapTrimBackSteps in range(0, (Ntrim/Step) + 1 ):
    # print 'Ntrils -ltrhm:', NcapTrimBackSteps * Step
    NcapLastRes = SourceStart - (NcapTrimBackSteps * Step)
    # print 'NcapLastRes:', NcapLastRes

    ### Get pose for n-terminal cap with overhang for superimpositions
    try:
      NcapPose = grafting.return_region( ReferencePose, 1, NcapLastRes+5 )
    except RuntimeError:
      print 'Requested end of n-terminal cap, %d, beyond range of reference protein. '%NcapLastRes
      continue    
    except OverflowError:
      print 'Requested end of n-terminal cap, %d, beyond range of reference protein. '%NcapLastRes
      continue    

    try:
      assert NcapPose.n_residue() > 4
    except AssertionError:
      print 'Too few residues to attach n-terminal cap ending at %d; skipping '%NcapLastRes
      continue

    # rosetta.dump_pdb(NcapPose, 'Ncap.pdb')
    NcapLength = NcapPose.n_residue()
    
    NcapOverhangPositions = [ Position for Position in range( NcapLength-3, NcapLength+1 ) ]
    # print NcapOverhangPositions
    NcapOverhangArray = generate_backbones.get_residue_array( NcapPose, NcapOverhangPositions )
    

    RepStartOverhangPositions = [1, 2, 3, 4]
    RepStartOverhangArray = generate_backbones.get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions )
    # print RepStartOverhangArray

    RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray )
    rosetta.Pose.apply_transform_Rx_plus_v( TrimmedRepeatPose, rMtx, tVec )
    # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' )
    
    try:
      NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = generate_backbones.fuse( NcapPose, TrimmedRepeatPose )
    except AssertionError:
      print ' Not enough structural similarity to attach n-terminal cap ending at %d; skipping '%NcapLastRes
      continue

    # print 'Ncap attachment RMSD %f'%RMSD
    rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' )
    NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) )    

    RepeatCstExtrapolator = expand_cst.constraint_extrapolator(RepeatCstFileName)
    # print 'NcapLastRes', NcapLastRes
    # print NcapPlusRepeatPose

    ''' Shift repeat unit constraints to accomadiate numbering with n-cap length'''
    Redundict = {}
    RepeatCsts = []
    
    for RepeatPosition in range(1, RepeatPose.n_residue()+1 ):
      # print 'RepeatPosition', RepeatPosition
      try:
        RepeatPositionCstDict = RepeatCstExtrapolator.Cst[RepeatPosition]
      except KeyError:
        continue
      for AtomName in RepeatPositionCstDict:
        for Cst in RepeatPositionCstDict[AtomName]:
          ### unpack tuple values 
          AtomResidueCoords, CstParameters, CstLineNumber, CstType = Cst
          ### Redundancy check with redundict 
          try:
            Check = Redundict[CstLineNumber]
            ### if cst considered already, skip it! 
            continue
          except KeyError:
            Redundict[CstLineNumber] = 1

          ShiftedPoseAtomResidueCoords = []
          ### iterate through atom residue pairs
          for AtomResiduePair in AtomResidueCoords:
            # print 'AtomResiduePair', AtomResiduePair
            RepeatPosePosition = (AtomResiduePair[1]) + NcapLastRes - 1
            # print 'RepeatPosePosition', RepeatPosePosition
            ShiftedPoseAtomResidueCoords.append( ( AtomResiduePair[0], RepeatPosePosition ) )

          ShiftedCst = ShiftedPoseAtomResidueCoords, CstParameters, CstLineNumber, CstType       

          
          if expand_cst.pose_has(NcapPlusRepeatPose, ShiftedPoseAtomResidueCoords):
            RepeatCsts.append(ShiftedCst)
          try:
            assert expand_cst.pose_has(NcapPlusRepeatPose, ShiftedPoseAtomResidueCoords), ' Cst shifted from repeat pose not found in capped pose'
          except AssertionError:
            pass
            # print 'AtomResidueCoords', AtomResidueCoords
            # print 'ShiftedPoseAtomResidueCoords', ShiftedPoseAtomResidueCoords

  
    ''' Loop through C terminal caps '''
    for CcapTrimForwardSteps in range(0, (Ctrim/Step) + 1 ):
      # print 'CcapTrimForwardSteps', CcapTrimForwardSteps
      CcapFirstRes = SourceEnd + ( CcapTrimForwardSteps * Step )
      # print 'CcapFirstRes:', CcapFirstRes
      Cshift = CcapFirstRes-6
      print 'Cshift', Cshift
      print 'ReferencePose.n_residue()', ReferencePose.n_residue()
      
      try:
        CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() )
      except RuntimeError:
        print 'Requested start of c-terminal, %d, beyond range of reference protein. '%CcapFirstRes
        continue        
      except OverflowError:
        print 'Requested start of c-terminal, %d, beyond range of reference protein. '%CcapFirstRes
        continue   

      # rosetta.dump_pdb(CcapPose, 'Ccap.pdb')

      try:
        assert CcapPose.n_residue() > 4
      except AssertionError:
        print 'Too few residues to attach c-terminal cap starting at %d; skipping '%CcapFirstRes
        continue

      CcapOverhangPositions = [1, 2, 3, 4]
      CcapOverhangArray = generate_backbones.get_residue_array( CcapPose, CcapOverhangPositions )

      RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ]
      # print 'RepEndOverhangPositions', RepEndOverhangPositions
      RepEndOverhangArray = generate_backbones.get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions )
      
      RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray )
      rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec)
      rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' )

      try:
        CappedRepeatPose, RMSD, CcapCorrespondingResidues = generate_backbones.fuse(NcapPlusRepeatPose, CcapPose)
      except AssertionError:
        print 'Not enough structural similarity to attach c-terminal cap starting at %d; skipping '%CcapFirstRes
        continue

      CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_%dCap%d.pdb'%(NcapLastRes, CcapFirstRes), RepeatPdbFileName)
      assert CappedNamePdb != RepeatPdbFileName, 'regular expression substitution failed!'
      
      rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb )

      ''' Generate csts for cap/repeat edges '''
      CapCstExtrapolator = expand_cst.constraint_extrapolator(ReferenceCst)
      CapCsts = []
      
      ' N cap constraints are easy; no shifts are needed '

      # For catching when individual constraints have been considered already  
      Redundict = {} 
      for Position in range(1, NcapLastRes):
        # print 'Position', Position
        # Skip positions w/out constraints
        try:
          PositionCstDict = CapCstExtrapolator.Cst[Position]
        except KeyError:
          continue

        for AtomName in PositionCstDict:
          for Constraint in PositionCstDict[AtomName]:
            # unpack tuple values 
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            
            # Redundancy check with redundict 
            try:
              Check = Redundict[CstLineNumber]
              # if cst considered already, skip it! 
              continue
            except KeyError:
              Redundict[CstLineNumber] = 1
            
            CapCsts.append(Constraint)

      ' C cap constraints are harder; need to shift due to pose expansion '
      CcapCstShift = CappedRepeatPose.n_residue() - ReferencePose.n_residue()

      # CapCstExtrapolator.output_cst(CapCsts, 'NcapConstraints.cst')\
      Redundict = {} 

      # print 'CcapCorrespondingResidues', CcapCorrespondingResidues
      RepeatCcapPositionStart = CcapCorrespondingResidues[0][0]
      # print 'RepeatCcapPositionStart', RepeatCcapPositionStart

      ShiftToRepeatPose = RepeatCcapPositionStart - Cshift
      # print 'ShiftToRepeatPose', ShiftToRepeatPose

      for Position in range( CcapFirstRes, ReferencePose.n_residue()+1 ):
        # Skip positions w/out constraints
        try:
          PositionCstDict = CapCstExtrapolator.Cst[Position]
        except KeyError:
          continue

        for AtomName in PositionCstDict:
          for Constraint in PositionCstDict[AtomName]:
            # unpack tuple values 
            AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint
            
            # Redundancy check with redundict 
            try:
              Check = Redundict[CstLineNumber]
              # if cst considered already, skip it! 
              continue
            except KeyError:
              Redundict[CstLineNumber] = 1

            ExpandedPoseAtomResidueCoords = []
            # iterate through atom residue pairs
            for AtomResiduePair in AtomResidueCoords:
              # print 'AtomResiduePair', AtomResiduePair
              ExpandedPosePosition = (AtomResiduePair[1]) + CcapCstShift  
              # print 'ExpandedPosePosition', ExpandedPosePosition
              ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) )

            ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType       

            CapCsts.append(ShiftedConstraint)  


      CappedCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb)
      
      with open(CappedCstName, 'w') as OverwriteExistingFile:
        pass
      
      FinalCstSet = []
      
      for Cst in CapCsts:
        if expand_cst.pose_has(CappedRepeatPose, Cst[0]):
          FinalCstSet.append(Cst)
      for Cst in RepeatCsts:
        if expand_cst.pose_has(CappedRepeatPose, Cst[0]):
          FinalCstSet.append(Cst)

      CapCstExtrapolator.output_cst(FinalCstSet, CappedCstName)        
      PdbCstPairs.append((CappedNamePdb, CappedCstName))
  
  return PdbCstPairs