def __init__(self,path, pdb_id='pdb', cif_id='cif'): # copied from input_output.py _read_structure self.file_name = os.path.basename(path).split('.')[0] self.file_sufix = os.path.basename(path).split('.')[1] self.dir_path = os.path.dirname(path) self.params = CP.read_charmm_FF() self.chains = [] self.models = {} if self.file_sufix == 'pdb': self.header = struct.parse_pdb_header(path) self.structure = struct.PDBParser(QUIET=True).get_structure(pdb_id, path) self.has_sequence = False elif self.file_sufix == 'cif': self.header = struct.MMCIF2Dict() self.structure = struct.MMCIFParser().get_structure(cif_id, path) self.has_sequence = True else: print("ERROR: Unreognized file type " + self.file_sufix + " in " + self.file_name) sys.exit(1)
def main(): usage = "usage: %prog [options] arg" d = "This program reads a CSV file that has been generated by Super_Structure.\ One residue or terminal will be deleted at the time.\n \ WARNING: Deleting residues will leave a 'hole' in the structure. Amino Acids will not be renumbered.\n \ This program can only delete residues or terminals that are in the parameter file.\ The program will create a new and modified CSV file with the name of the input file plus the entity number." opt_parser = optparse.OptionParser(usage,description=d) opt_parser.add_option("--rem", type="str",help="Enter Instruction for removing amino acid or terminal in hard \n \ quotes '\"'. Place: Amino Acid Number, Entity ID, Chain ID, \ Residue or Terminal to be deleted separated by comas.\n \ Example \"1,1,A,ACE\", \"1,1,A,CTER\" or \"20,2,A,LYS\". \n \ Chain ID or terminal name are case sensitive and \ do not need to go in quotes.\n\ For residues all atoms will be deleted. For terminals, only the \ atoms that correspond to the terminal will be deleted.") opt_parser.add_option("--inp", type="str",help="Path to CSV file for removing residue.") opt_parser.add_option("--out", type="str",help="Name of output CSV file after removal of amino acid or terminal.") opt_parser.add_option("--par", type="str",help="Path to charmm parameters folder.") options, args = opt_parser.parse_args() if not os.path.exists(options.inp): print "Error: File path for Super Structure CSV file does not exist." print("Type -h or --help for description and options.") sys.exit(1) params = CP.read_charmm_FF(options.par) insulin = SS.Super_Structure(params, options.inp,'add_linker') parse_list = options.rem.split(',') #insulin = SS.Super_Structure(params, '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2zta.csv','add_linker') #parse_list = "1,1,A,CTER".split(',') message = '' if len(parse_list) == 4: amino_acid_number = int(parse_list[0]) entity_number = int(parse_list[1]) chain = str(parse_list[2]).upper() term_res = str(parse_list[3]).upper() # So far this only works with natural aminoacids and ACE and CTER if term_res in ['ILE','GLN','GLY','GLU','CYS','ASP','SER','HSD','HSE','PRO','HSP','ASN','VAL','THR','TRP','CTER',\ 'LYS','PHE','ALA','MET','ACE','LEU','ARG','TYR']: message += 'Deleting a '+term_res+' from ' message += 'amino acid number '+str(amino_acid_number)+' in entity id '+str(entity_number)+' and chain '+chain+'.\n' insulin.delete_aa(amino_acid_number,entity_number,chain,term_res) # If amino acid that is at the protein terminal is deleted, the terminal must be deleted too. min_aa = min(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)]) max_aa = max(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)]) if amino_acid_number == min_aa: del_terminal = False # This checks that there is no ACETYL atoms and removes them it does. for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\ (insulin.Full_Structure.ent_id == entity_number) &\ (insulin.Full_Structure.chain == chain)]: if insulin.Full_Structure.loc[ii,'component'] == 'ACETY': del_terminal = True term_res = 'ACE' break if del_terminal: insulin.delete_aa(amino_acid_number,entity_number,chain,term_res) if amino_acid_number == max_aa: if term_res != 'CTER': del_terminal = False for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\ (insulin.Full_Structure.ent_id == entity_number) &\ (insulin.Full_Structure.chain == chain)]: if insulin.Full_Structure.loc[ii,'component'] == 'CTERM': del_terminal = True term_res = 'CTER' break if del_terminal: insulin.delete_aa(amino_acid_number,entity_number,chain,term_res) else: pass file_name = os.path.basename(options.out).split('.')[0] dir_path = os.path.dirname(options.out) # Super Structure needs to know about models. num_models = len(range(((insulin.Full_Structure.shape[1]-20)/5)))+1 insulin.models = [str(i) for i in range(1,num_models+1)] insulin.write_csv(dir_path,file_name) IO.write_pdb(insulin, dir_path, file_name, 'all') else: print('ERROR: del_residue.py only works with natural aminoacids and ACE and CTER terminals.') sys.exit(1) else: message += 'The number of entries in the instruction field, followed by -o or --rem, is not right.\n' message += 'Type -h or --help for instructions\n' print(message)
def mmgbsa_CA_bindingMatrix(self, options): #################################### READ FILES ####################################################### #directory, filename = os.path.split() crd_file = crd(self.dirpath + '/' + options.crd) psf_file = psf(self.dirpath + '/' + options.psf) out_file = os.path.basename(options.crd).split('.')[0] ###################### After reading files, Generate and Index and Super Structure ############################## params = CP.read_charmm_FF(param_path) insu = SS.Super_Structure(params, self.dirpath, 'charmm_input') # At this point, a XPLOR psf could only have been creted from a complete structure, so no worries of gaps. insu.create_super_structure_df_from_CRD_PSF(crd_file, psf_file) ################################################################################################################ insu.add_column_to_super_structure_df_from_CRD_PSF( 'gb_z0', self.dirpath + '/' + options.gb) insu.add_column_to_super_structure_df_from_CRD_PSF( 'gb_z500', self.dirpath + '/' + options.gbz) insu.add_column_to_super_structure_df_from_CRD_PSF( 'sa_z0', self.dirpath + '/' + options.sa) insu.add_column_to_super_structure_df_from_CRD_PSF( 'sa_z500', self.dirpath + '/' + options.saz) insu.create_column_with_ztranslated('z1_d', 'z1', "A", float(500)) ###################### After reading files, Generate and Index a Super Structure ############################## idx_ss = SS.index_super_structure(insu.Full_Structure) idx_ss.generate_indexes_from_Full_Structure() idx_ss.sort_comp_index_by_aaid_within_chains() new_index = [] for i in range(len(idx_ss.comp_indx)): if idx_ss.comp_indx[i][3] == 'PRO': if idx_ss.comp_indx[i][4] == 'AMIN': temp_list = range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6] + 1) temp_list = [temp_list[j] for j in [0, 4, 5, 1, 2, 3]] for j in temp_list[3:]: insu.Full_Structure.loc[j, 'component'] = 'SIDE2' new_index = new_index + temp_list elif idx_ss.comp_indx[i][4] == 'SIDE': new_index = new_index + range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6] + 1) elif idx_ss.comp_indx[i][4] == 'CARB': new_index = new_index + range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6] + 1) else: new_index = new_index + range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6] + 1) insu.Full_Structure = insu.Full_Structure.loc[new_index] insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & ( insu.Full_Structure.atmtyp1 == 'CB')] = 'SIDE3' insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & ( insu.Full_Structure.atmtyp1 == 'HB1')] = 'SIDE3' insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & ( insu.Full_Structure.atmtyp1 == 'HB2')] = 'SIDE3' insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & ( insu.Full_Structure.atmtyp1 == 'CG')] = 'SIDE4' insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & ( insu.Full_Structure.atmtyp1 == 'HG1')] = 'SIDE4' insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & ( insu.Full_Structure.atmtyp1 == 'HG2')] = 'SIDE4' insu.Full_Structure = insu.Full_Structure.reset_index(drop=True) idx_ss = SS.index_super_structure(insu.Full_Structure) idx_ss.generate_indexes_from_Full_Structure() idx_ss.sort_comp_index_by_aaid_within_chains() # CHARMM_Test/gbsw_ab_nbxmod5.dat # ELECB,ELECU, GBENB, GBENU, VDWAB, VDWAU, ASPB, ASPU # -379.654,-332.641,-495.252, -595.916, 5672.62, 2338.65, 37.9186, 48.284 ch_AB = insu.Full_Structure.loc[idx_ss.chn_indx[0][0]:idx_ss.chn_indx[-1][-1],['charg','epsilon',\ 'rmin_half','x1','y1','z1','gb_z0','gb_z500','z1_d','sa_z0','sa_z500','atmtyp1','chain',\ 'component','aa','aaid']].values EE_K = -327.90 EE_K2 = -163.95 #Surface tension coefficient (sgamma) = 0.010 [kcal/mol/Angs**2] SA_K = 0.010 l = len(idx_ss.comp_indx) idxFile = open(self.dirpath + '/' + out_file + '_MMGBSA.idx', 'w') if options.ver: outFile = open( self.dirpath + '/' + out_file + '_MMGBSA_verbose.out', 'w') else: outFile = open(self.dirpath + '/' + out_file + '_MMGBSA.out', 'w') tempGB = 0 tempGB_Z = 0 tempSA = 0 tempSA_Z = 0 tempEE = 0 tempEE_Z = 0 tempVDW = 0 tempVDW_Z = 0 pairGB = 0 pairGB_Z = 0 DGB = 0.0 DGB_Z = 0.0 DEE = 0.0 DEE_Z = 0.0 DVW = 0.0 DVW_Z = 0.0 DSA = 0.0 DSA_Z = 0.0 for i in range(0, l): idxFile.write(str(i)+" "+str(idx_ss.comp_indx[i][2])+" "+idx_ss.comp_indx[i][0]+" "+idx_ss.comp_indx[i][3]+\ " "+idx_ss.comp_indx[i][4]+"\n") for k in range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6]): temp5 = EE_K2 * ch_AB[k][0] * ch_AB[k][0] / ch_AB[k][6] tempGB += temp5 temp5_Z = EE_K2 * ch_AB[k][0] * ch_AB[k][0] / ch_AB[k][7] tempGB_Z += temp5_Z tempSA += SA_K * ch_AB[k][9] tempSA_Z += SA_K * ch_AB[k][10] for kk in range(k + 1, idx_ss.comp_indx[i][6] + 1): last = kk d1 = np.linalg.norm(ch_AB[k][3:6] - ch_AB[kk][3:6]) r2 = d1 * d1 d1_Z = np.linalg.norm(ch_AB[k][3:6] - ch_AB[kk][3:6]) r2_Z = d1_Z * d1_Z temp3 = ch_AB[k][6] * ch_AB[kk][6] temp4 = (-1 * r2) / (4 * temp3) temp3_Z = ch_AB[k][7] * ch_AB[kk][7] temp4_Z = (-1 * r2_Z) / (4 * temp3_Z) denm = np.sqrt(r2 + (temp3 * np.exp(temp4))) denm_Z = np.sqrt(r2_Z + (temp3_Z * np.exp(temp4_Z))) Kqq = EE_K * ch_AB[k][0] * ch_AB[kk][0] temp5 = Kqq / denm temp5_Z = Kqq / denm_Z tempGB += temp5 tempGB_Z += temp5_Z tempSA += SA_K * ch_AB[last][9] tempSA_Z += SA_K * ch_AB[last][10] temp5 = EE_K2 * ch_AB[last][0] * ch_AB[last][0] / ch_AB[last][6] tempGB += temp5 temp5_Z = EE_K2 * ch_AB[last][0] * ch_AB[last][0] / ch_AB[last][7] tempGB_Z += temp5_Z DGB += tempGB DGB_Z += tempGB_Z DSA += tempSA DSA_Z += tempSA_Z if options.ver: # Output for comparison with GB_Comp_Analy.prl outFile.write("SELF "+str(i+1)+" "+str(i+1)+" "+idx_ss.comp_indx[i][0]+" "+str(idx_ss.comp_indx[i][2])\ +" "+str(idx_ss.comp_indx[i][3])+" "+str(idx_ss.comp_indx[i][4])+"\n GB "+\ str(tempGB)+"\n GBZ "+str(tempGB_Z)+"\n SA "+str(tempSA)+"\n SAZ "+\ str(tempSA_Z)+"\n") else: outFile.write( str(i) + " " + str(i) + " " + str(tempGB - tempGB_Z) + " " + str(tempSA - tempSA_Z) + "\n") tempSA = 0.0 tempSA_Z = 0.0 for j in range(i + 1, l): for m in range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6] + 1): for n in range(idx_ss.comp_indx[j][5], idx_ss.comp_indx[j][6] + 1): r = np.linalg.norm(ch_AB[m][3:6] - ch_AB[n][3:6]) r2 = r * r r_Z = np.linalg.norm(np.array((ch_AB[m][3],ch_AB[m][4],ch_AB[m][8]))-\ np.array((ch_AB[n][3],ch_AB[n][4],ch_AB[n][8]))) r2_Z = r_Z * r_Z temp = (332.06 * ch_AB[m][0] * ch_AB[n][0]) / r temp_Z = (332.06 * ch_AB[m][0] * ch_AB[n][0]) / r_Z tempEE += temp tempEE_Z += temp_Z Eps = np.sqrt(ch_AB[m][1] * ch_AB[n][1]) Rmin = ch_AB[m][2] + ch_AB[n][2] A = Rmin / r A_Z = Rmin / r_Z A2 = A * A A2_Z = A_Z * A_Z A6 = A2 * A2 * A2 A6_Z = A2_Z * A2_Z * A2_Z A12 = A6 * A6 A12_Z = A6_Z * A6_Z tempVDW += Eps * (A12 - (2 * A6)) tempVDW_Z += Eps * (A12_Z - (2 * A6_Z)) temp3 = ch_AB[m][6] * ch_AB[n][6] temp4 = (-1 * r2) / (4 * temp3) temp3_Z = ch_AB[m][7] * ch_AB[n][7] temp4_Z = (-1 * r2_Z) / (4 * temp3_Z) denm = np.sqrt(r2 + (temp3 * np.exp(temp4))) denm_Z = np.sqrt(r2_Z + (temp3_Z * np.exp(temp4_Z))) temp5 = (EE_K * ch_AB[m][0] * ch_AB[n][0]) / denm temp5_Z = (EE_K * ch_AB[m][0] * ch_AB[n][0]) / denm_Z pairGB += temp5 pairGB_Z += temp5_Z DGB += pairGB DGB_Z += pairGB_Z DEE += tempEE DEE_Z += tempEE_Z DVW += tempVDW DVW_Z += tempVDW_Z if options.ver: # Output for comparison with GB_Comp_Analy.prl outFile.write("PAIR " + str(i + 1) + " " + str(j + 1) + " ") outFile.write(idx_ss.comp_indx[i][0]+" "+str(idx_ss.comp_indx[i][2])+" "+str(idx_ss.comp_indx[i][3])\ +" "+str(idx_ss.comp_indx[i][4])+" - "+idx_ss.comp_indx[j][0]+" "+\ str(idx_ss.comp_indx[j][2])+" "+str(idx_ss.comp_indx[j][3])+" "+\ str(idx_ss.comp_indx[j][4])+"\n EE "+str(tempEE)+"\n EEZ "+str(tempEE_Z)+\ "\n VDW "+str(tempVDW)+"\n VDWZ "+str(tempVDW_Z)+"\n pGB "+str(pairGB)\ +"\n pGBZ "+str(pairGB_Z)+"\n") else: outFile.write(str(i)+" "+str(j)+" "+str(tempEE-tempEE_Z)+" "+str(tempVDW-tempVDW_Z)+" "+\ str(pairGB-pairGB_Z)+"\n") pairGB = 0 pairGB_Z = 0 tempEE = 0 tempEE_Z = 0 tempVDW = 0 tempVDW_Z = 0 tempGB = 0 tempGB_Z = 0 tempSA = 0 tempSA_Z = 0
def main(): usage = "usage: %prog [options]" d = "It takes two pdbs and aligns the structures in different orientations. A center-pdb is placed at (0,0,0),and \ a rotate-pdb is place around center at given angle intervals and distances. You can choose to join chains to make \ a fusion peptide, or, if no, you can just align proteins for searching protein-protein interactions." option_parser = optparse.OptionParser(usage, description=d) option_parser.add_option("--center", type="str", \ help="Path to pdb structure to place at (0,0,0) according to its center of mass.") option_parser.add_option("--rotate", type="str", \ help="Path to pdb structure to rotated around center structure at X degree intervals. \ Angle intervals must be multiple of 360." ) option_parser.add_option("--angle", type="int", \ help="Angle intervals used to place rotated structured around center one.") option_parser.add_option("--distance", type="float",\ help="Center of mass distance between rotated and centered structures.") option_parser.add_option( '--id', type="str", action="store", default='s', help="A prefix id to identify the output structures. Default is s.") option_parser.add_option('--link', type="str",action = "store", default = 'x', \ help = "Option only works with a 'yes' value from the --join option. It will join chains \ in the following format, for example: 'A.r,B.f:B.f,A.f'. This --link option can be read as\ the centered structure's chain 'A' amino acid sequence numbering is reversed and joind to \ the rotated structure's chain B with amino acid sequenced numbers not reversed, or kept \ forward. The values after the column are read similarly. Default if missing: 'x'" ) option_parser.add_option('--join', type="str",action = "store", default = 'no', \ help = "Only two possible options 'CR' and 'RC'. In both options C stands for Center, and \ R for rotate. CR means that the numbering starts at the first amino acid of --center \ structure up to its end, and it is followed by the first amino acid of --rotate up to its \ end. This option might overide the directions (r or f) of amino acid numbering for each\ chain in --link option. This gives total manipulation flexibility for joining chains." ) options, args = option_parser.parse_args() if not os.path.exists(options.center): print "Error: File path for molecule to be centered does not exist." print("Type -h or --help for description and options.") sys.exit(1) if not os.path.exists(options.rotate): print "Error: File path for molecule to be rotated does not exist." print("Type -h or --help for description and options.") sys.exit(1) if options.map.lower() == 'yes' and options.link.lower() == 'x': print("Error: when option map is equal to yes, option link must be") print( " a series of chains identifiers for association as described" ) print(" in the link paramater help.") sys.exit(1) pdb_parser = PDBParser(QUIET=True) Angle = options.angle distance = options.distance #directory = os.path.dirname(options.center) filepath1 = options.center filepath2 = options.rotate join_o = options.join link_o = options.link ########################################################################### # Uncomment to test from spyder IDE #pdb_parser = PDBParser(QUIET = True) #Angle = 45 #distance = 45 #file_name = os.path.basename(options.out).split('.')[0] #directory = "/home/noel/Projects/Protein_design/ccl_lectures/Lecture_4/" #filepath1 = directory+'2hiu_1rr.pdb' #filepath2 = directory+'2zta_1rr.pdb' #param_path = "/home/noel/Projects/Protein_design/EntropyMaxima/params/charmm27.ff/" #join_o = "RC" #link_o = "A.f,A.f:B.f,B.f" #################################################################################################################### # Process strig that the determines how the centered and rotated structures will be connected. params = CP.read_charmm_FF() cmc = md.CenterOfMassCalculator(params) rig = MRM.Molecular_Rigid_Manipulation(params) #################################################################################################################### # Check that the structures only have one model, and Place the structures' center of mass at (0,0,0) to give an idea # of their location in the cartesian coordinate system s1 = pdb_parser.get_structure('Centered', filepath1) countS1 = 0 modelS1 = -1 for i in s1.get_models(): countS1 += 1 modelS1 = i.id if countS1 != 1: print( "ERROR: Number of models cannot be different from 1. Models found:" + str(countS1)) print(" Make sure Centered PDBs have only one model.") sys.exit(1) rig.translate_molecule(s1, modelS1, rig.center_molecule(cmc.get_center_of_mass(s1))) s2 = pdb_parser.get_structure('Rotated', filepath2) countS2 = 0 modelS2 = -1 for i in s2.get_models(): countS2 += 1 modelS2 = i.id if countS2 != 1: print( "ERROR: Number of models cannot be different from 1. Models found:" + str(countS2)) print(" Make sure Rotated PDBs have only one model.") sys.exit(1) rig.translate_molecule(s2, modelS2, rig.center_molecule(cmc.get_center_of_mass(s2))) #################################################################################################################### # TODO: This works only for angles between 0 and 90 not including 0, and 90 and will generate angles in all 8 # quadrants of the cartesian coordinate system (I do not see why using an angle other than 45 for now.) # TODO: quaternions might work better. # The location list has a list of normalized vectors releative to (0,0,0) that will be use to place s2's in the # right orientation relative to s1 locations = [] angles = [] for h in range(0, 3): for i in range(0, 360, Angle): for j in range(0, 90 / Angle - 1): if h == 0: z = 0 angles.append(str(i) + "_" + str(0)) elif h == 1: z = np.cos(45 * np.pi / 180) angles.append(str(i) + "_" + str(45)) elif h == 2: z = -1 * np.cos(45 * np.pi / 180) angles.append(str(i) + "_" + str(315)) locations.append( [np.cos(i * np.pi / 180), np.sin(i * np.pi / 180), z]) locations.append([0, 0, 1]) angles.append(str(0) + "_" + str(90)) locations.append([0, 0, -1]) angles.append(str(0) + "_" + str(270)) for i in range(0, len(locations)): locations[i] = list(locations[i] / np.linalg.norm(locations[i])) #################################################################################################################### # This works when you do not want to link two proteins but instead you want to place one around the other and check # protein-protein interaction's binding affinity. if join_o.lower() == 'no' and link_o.lower() == 'x': ids = {} for i in string.ascii_uppercase: ids[i] = False # First We used model 0 of structure 1 and turn ids for chains to True identifier to True. for i in s1[0]: ids[i.id] = True # Now we go through structure 2 and if there are any chains with the same id as those found in structure 1, # we will change the chain ids to something else becase if chain ids are repeated in the same structure # it will be consider one chain when they are actually separated. for i in s2[0]: if ids[i.id]: id_found = False for j in string.ascii_uppercase: if not ids[j]: i.id = j ids[j] = True # ID founds means id is unused so far and will be reserved and changed on S2 id_found = True break # If the number of chains and identifiers exceeds letters in the alphabet, # It is necessary to modify the code. Until then, let's check this won't happen by exiting. if not id_found: print( "ERROR: Number of chains in both structures exceeds letters in the alphabet. No ID \ identifiers available. Program will exit without output. Fix the code. June 24, 2016" ) sys.exit(1) s3 = copy.deepcopy(s1) s3.id = 'Ensamble' for i in s2.get_chains(): s3[0].add(i) ############################################################################### structure_id = 0 for i in locations: if structure_id == 0: ccc = md.ChargeCalculator(params) cm = ccc.calculate_center_of_charge(s2) cm = cm / np.linalg.norm( cm) # ix orientation of LZ here only the first time. RM = rig.alignVectors(i, cm) for j in s2.get_atoms(): v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]] jj = np.dot(v2, RM) j.set_coord(jj) last_direction = i # After aligning along centerofcharge/dipolemoment, the structure # is flipped to have the cterm closest to insulin. This only works with # LZ because it is a homodimer with both helices aligned in parallel. # For any other structure, this might not work. # TODO This Vector needs to be picked by the user from information from the pdb_cif.py --summary # # For some reason after modifications to flower the next comented out code in this if step does not seem necessery #m = rotaxis(np.pi, Vector(0, 1, 0)) #for j2 in s2.get_atoms(): # v2 = Vector([j2.get_coord()[0],j2.get_coord()[1],j2.get_coord()[2]]) # v3 = v2.left_multiply(m) # j2.set_coord(v3.get_array()) else: # FIX: Something is wrong with rig.alignVectors([0,0,1],[0,0,-1]) it could just be a trigonometry case that # gives some sort of singularity. The following code inside the if statement patch just avoids the problem # but does not explain it. Find out. if i == [0.0, 0.0, -1.0] and last_direction == [0.0, 0.0, 1.0]: RM = rig.alignVectors([0.0, 1.0, 1.0], [0.0, 0.0, 1.0]) for j in s2.get_atoms(): v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]] jj = np.dot(v2, RM) j.set_coord(jj) last_direction = [0.0, 1.0, 1.0] RM = rig.alignVectors(i, last_direction) for j in s2.get_atoms(): v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]] jj = np.dot(v2, RM) j.set_coord(jj) last_direction = i # This 45 changes to explore different distaces between insulin and LZ ii = [k * distance for k in i] rig.translate_molecule(s2, modelS2, ii) ### # We want to join the two structures into one structure, with one model # and the chains of structure 1 and 2 joined and named consistently. if join_o.lower() == 'no' and link_o.lower() == 'x': pass else: s3 = struct.StructureBuilder.Structure('newrot') s3.add(struct.Model.Model(0)) lnk_label = join_chains(s1, s2, s3, join_o, link_o) io = PDBIO() io.set_structure(s3) io.save('s' + '_' + angles[structure_id] + "_" + str(structure_id) + lnk_label + '.pdb') #io.save(directory+options.id+'_'+angles[structure_id]+"_"+str(structure_id)+lnk_label+'.pdb') structure_id = structure_id + 1 rig.translate_molecule(s2, modelS2, rig.center_molecule(cmc.get_center_of_mass(s2)))
def main(): usage = "usage: %prog [options] arg" d = "This program reads a CSV file that has been generated by Super_Structure.\ The file corresponds to a Super Structure of a Protein. \ Multiple residues can be added at the time, No terminal will be added..\n \ This program can only add residues or terminals that are in the parameter file." opt_parser = optparse.OptionParser(usage, description=d) opt_parser.add_option( "--apn", type="str", help="Enter Instruction for where to append residues in hard '\"'\n \ quotes. Place: Amino Acid Number, Entity ID, Chain ID and \n \ the direction to add residues separated by comas. Add. The \ direction to add residues is either Ndir or Cdir. This means \ that if a residue is added in residue 10, it could be toward \ the N or C terminal. This is important so that the program \ knows if the new residue is placed before or after the residue.\ Example \"1,1,A,Ndir\" or \"20,2,A,Cdir\". \n \ Chain ID, amino acid or terminal name are not case sensitive \ and do not need to go in quotes.\n" ) opt_parser.add_option( "-r", "--res", type="str", help="Enter list of amino acids to be added in hard quotes.'\"'\n\ Example: \"ALA,VAL,ASP,ASN,GLU\"." ) opt_parser.add_option("--inp", type="str", help="Path to CSV file for adding residue.") opt_parser.add_option( "--out", type="str", help="Path and name to CSV and PDB outputs with added residues.") opt_parser.add_option("--pep", type="str", help="Path to peptide file.") opt_parser.add_option("--par", type="str", help="Path to Charmm parameter folder.") options, args = opt_parser.parse_args() if not os.path.exists(options.inp): print "Error: File path Super Structure CSV file does not exist." print("Type -h or --help for description and options.") sys.exit(1) ########################## Init Setup ##################################### # Comment out the next four lines to test in Spyder. directory, filename = os.path.split(options.inp) params = CP.read_charmm_FF(options.par) insulin = SS.Super_Structure(params, options.inp, 'add_linker') parse_list = options.apn.split(',') if options.res.find(',') == -1: aa_add = [i for i in options.res] aa_add = [ut.utilities.residueDict1_1[i] for i in aa_add] else: aa_add = options.res.split(',') parser2 = PDBParser() pep_file = parser2.get_structure('Peptides', options.pep) # Uncomment the next four lines to test #file_path = '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2hiu.csv' #insulin = SS.Super_Structure(params, file_path,'add_linker') #parse_list = "1,1,A,Ndir".split(',') #aa_add = "ALA".split(',') ############################################### insulin.build_pep_and_anchers(pep_file) ############### Begin processing parse_list and aa_add #################### message = '' print(parse_list, len(parse_list)) if len(parse_list) == 4 and len(aa_add) > 0: aaid_add = int(parse_list[0]) ent_id_add = int(parse_list[1]) chain_add = str(parse_list[2]).upper() term_dir = str(parse_list[3]) # So far this only works with natural aminoacids and ACE and CTER if term_dir in ['Ndir', 'Cdir']: message += 'Adding residues ' + str( aa_add) + ' in th ' + term_dir + ' at amino acid ' + str( aaid_add) + ', ' + 'entity ' message += str(ent_id_add) + ' and direction ' + term_dir + '.' print(message) # TODO: counting atoms do not seem necessary. Consider deleting. #count_atoms_added = 0 #for i in aa_add: # for j in insulin.params.AA[i].atoms: # for k in j: # count_atoms_added += 1 #count_aa_added = len(aa_add) ################################################################### # So we now create the link dataframe and follow the prosses in # Super_Structures to populate its fields. link = pd.DataFrame() aa = [] aaid = [] entity_id = [] chain_id = [] atmtyp1 = [] atmtyp2 = [] charg = [] component = [] snum = 1 for res in aa_add: chrm = res pdbx = res if chrm in insulin.params.AA: comp = 1 for k in insulin.params.AA[chrm].atoms: for l in k: aa.append(pdbx) aaid.append(snum) entity_id.append(ent_id_add) chain_id.append(chain_add) atmtyp1.append(insulin.corrections(chrm, l)) atmtyp2.append(insulin.params.AA[chrm].atom_type[ insulin.corrections(chrm, l)]) charg.append(insulin.params.AA[chrm].atom_chrg[ insulin.corrections(chrm, l)]) if comp == 1: component.append('AMINO') else: if l in ['C', 'O']: component.append('CARBO') else: component.append(('SIDE' + str(comp))) comp += 1 snum += 1 else: print('Warning: Amino Acid identifier', chrm, ' is not found in parameters.') sys.exit(1) link['aa'] = pd.Series(aa) link['aaid'] = pd.Series(aaid) link['ent_id'] = pd.Series(entity_id) link['chain'] = pd.Series(chain_id) link['atmtyp1'] = pd.Series(atmtyp1) link['atmtyp2'] = pd.Series(atmtyp2) link['component'] = pd.Series(component) link['charg'] = pd.Series(charg) ########################################################################### # Add atomtyp, masses and atmNumber to each atom type mass = [] atmNum = [] atmtyp3 = [] epsilon = [] rmin_half = [] atminfo = [] aainfo = [] for i in link['atmtyp2']: atmNum.append(params.am.MASS[i][0]) mass.append(params.am.MASS[i][1]) atmtyp3.append(params.am.MASS[i][2]) epsilon.append(params.NONBONDED[i][1]) rmin_half.append(params.NONBONDED[i][2]) atminfo.append(True) aainfo.append(False) link['epsilon'] = pd.Series(epsilon) link['rmin_half'] = pd.Series(rmin_half) link['atmtyp3'] = pd.Series(atmtyp3) link['mass'] = pd.Series(mass) link['atmNum'] = pd.Series(atmNum) ########################################################################### # DF Type correction. link['aaid'] = link['aaid'].apply(int) link['ent_id'] = link['ent_id'].apply(int) link['mass'] = link['mass'].apply(float) link['epsilon'] = link['epsilon'].apply(float) link['rmin_half'] = link['rmin_half'].apply(float) link['atmNum'] = link['atmNum'].apply(int) # We now fill out the number of columns in the DataFrame with nan for i in insulin.Full_Structure.columns: if i not in list(link.columns): if i[0:6] == 'aainfo': link[i] = pd.Series(aainfo) elif i[0:7] == 'atminfo': link[i] = pd.Series(atminfo) else: link[i] = pd.Series( [float('nan') for j in range(len(link))]) if term_dir == 'Ndir': beg_insert = min(insulin.Full_Structure.index[(insulin.Full_Structure.aaid == aaid_add) &\ (insulin.Full_Structure.ent_id == ent_id_add) &\ (insulin.Full_Structure.chain == chain_add)]) end_insert = beg_insert + link.shape[0] elif term_dir == 'Cdir': print( 'WARNING: The code has not been design and tested for insertions in the CTER.' ) print('Exiting the program without finishing.') sys.exit(1) else: print( 'ERROR: wrong terminal to insert link. Ndir and Cdir are the only choices. Exiting now.' ) sys.exit(1) joint_df = pd.DataFrame(columns=link.columns) count = 0 insert = True # When links are added , aaid needs to be fixed to reflect added residues aaid_offset = 0 for i in insulin.Full_Structure.index: if (i >= beg_insert) and (i < end_insert): if insert: for j in link.index: joint_df.loc[count] = link.loc[j] joint_df.loc[count, 'aaid'] = joint_df.loc[ count, 'aaid'] + aaid_offset current_aaid = link.loc[j, 'aaid'] count += 1 insert = False aaid_offset = aaid_offset + current_aaid joint_df.loc[count] = insulin.Full_Structure.loc[i] # So that only residues after the added link get increased in the given ent_id and chain # Any other entity or chain in the molecules is not fixed. if (joint_df.loc[count, 'ent_id'] == ent_id_add) & ( joint_df.loc[count, 'chain'] == chain_add): joint_df.loc[count, 'aaid'] = joint_df.loc[count, 'aaid'] + aaid_offset count += 1 # After adding residues, it all gets copied back to original dataframe. for i in joint_df.index: insulin.Full_Structure.loc[i] = joint_df.loc[i] # The way to get number of models is very specific to the way this program # stores data in DataFrame. Be careful if the data frame column structure changes. # TODO: missing atom coordinates are added manually. It needs to be automated more. num_models = len( range(((insulin.Full_Structure.shape[1] - 20) / 5))) + 1 for i in range(1, num_models + 1): for j in range(len(aa_add), 0, -1): insulin.fit_coordinates(term_dir, j, ent_id_add, chain_add, str(i), aa_add[j - 1]) # NOTE: insulin.models are not in the Super Structure Class, but it is added here. # This works, but it does not seem the best way to do it. should models be a field of super # structures and be initialized there? insulin.models = [str(i) for i in range(1, num_models + 1)] ################ Write to outputs #################### file_name = os.path.basename(options.out).split('.')[0] dir_path = os.path.dirname(options.out) insulin.write_csv(os.path.dirname(options.out), file_name) IO.write_pdb(insulin, dir_path, file_name, 'all') else: print("ERROR: only two directions to add residues, Ndir and Cdir.") print(" The entries are not case sensitive.") else: message += 'The number of entries in the instruction field, followed by -a or --apn, is not right.\n' message += 'Type -h or --help for instructions\n' print(message)
def main(): usage = "usage: %prog [options]" d = "It takes two pdbs and aligns the structures in different \ orientations. A center-pdb is placed at (0,0,0),and a rotate-pdb \ is place around at given angle intervals and distances. \ The program assumes that the rotated protein's N-terminal will \ become the N-Terminal when joined to the center protein. \ WARNING: The programs needs to be extended to allow the reading \ of the dipole vectors of the two proteins, and align them in any \ way the user wants. As is, it centers the center-protein in whatever \ orientation it is, and it alings the center of charge vector with \ vector <0, 1, 0>. From that starting point, it rotates rotate-protein \ by the angle given as input." option_parser = optparse.OptionParser(usage, description=d) option_parser.add_option("--center", type="str", \ help="Path to pdb structure to place at (0,0,0) \ according to its center of mass." ) option_parser.add_option("--rotate", type="str", \ help="Path to pdb structure to rotated around \ center structure at X degree intervals. Angle \ intervals must be multiple of 360." ) option_parser.add_option("--angle", type="int", \ help="Angle intervals used to place rotated \ structured around center one." ) option_parser.add_option("--distance", type="float",\ help="Center of mass distance between rotated and \ centered structures." ) option_parser.add_option('--id', type="str", action="store", default='s', help="A prefix id to identify the \ output structures. Default is s.") option_parser.add_option('--map', type="str", action="store", default='no', help="Optional arg if not present it \ is 'no', and it will add rotate with different chain \ identifiers. if it is 'yes' then it expects --link \ to be something different from 'X' and in the right format." ) option_parser.add_option('--link', type="str", action="store", default="X", help="Optional arg if not present it \ is 'X', and it will add \'rotate\' structure with different chain \ identifiers. If it is 'yes', it will add the \'rotate\' structure \ with the same identifiers as \'center\' structure in the following \ format: A:A,B:B or chain A in \'rotate\' will link with A in \'center\' \ and B to B respectively. When both proteins have two chains, the only \ other option is A:B,B:A. There can be as many chains linked separated \ by commas but be careful because the program does not checks the \ physicality of this connections.") option_parser.add_option("--par", type="str", help="Path to charmm parameters folder.") options, args = option_parser.parse_args() if not os.path.exists(options.center): print "Error: File path for molecule to be centered does not exist." print("Type -h or --help for description and options.") sys.exit(1) if not os.path.exists(options.rotate): print "Error: File path for molecule to be rotated does not exist." print("Type -h or --help for description and options.") sys.exit(1) if options.map.lower() == 'yes' and options.link.lower() == 'x': print("Error: when option map is equal to yes, option link must be") print( " a series of chains identifiers for association as described" ) print(" in the link paramater help.") sys.exit(1) pdb_parser = PDBParser(QUIET=True) Angle = options.angle distance = options.distance #directory = os.path.dirname(options.center) filepath1 = options.center filepath2 = options.rotate map_o = options.map lnk_o = options.link param_path = options.par ########################################################################### # Uncomment to test from spyder IDE #pdb_parser = PDBParser(QUIET = True) #Angle = 45 #distance = 45 ##file_name = os.path.basename(options.out).split('.')[0] #directory = "/home/noel/Projects/Protein_design/ccl_lectures/Lecture_4/" #filepath1 = directory+'2hiu_1rr.pdb' #filepath2 = directory+'2zta_1rr.pdb' #param_path = "/home/noel/Projects/Protein_design/EntropyMaxima/params/charmm27.ff/" #map_o = "yes" #lnk_o = "A:A,B:B" #################################################################################################################### # Process strig that the determines how the centered and rotated structures will be connected. lnk_o = lnk_o.split(',') lnk_o = [i.split(':') for i in lnk_o] lnk_label = '' for i in lnk_o: lnk_label += '_' for j in i: lnk_label += j.lower() params = CP.read_charmm_FF(param_path) cmc = md.CenterOfMassCalculator(params) rig = MRM.Molecular_Rigid_Manipulation(param_path) #################################################################################################################### # Check that the structures only have one model, and Place the structures' center of mass at (0,0,0) to give an idea # of their location in the cartesian coordinate system s1 = pdb_parser.get_structure('Centered', filepath1) countS1 = 0 modelS1 = -1 for i in s1.get_models(): countS1 += 1 modelS1 = i.id if countS1 != 1: print( "ERROR: Number of models cannot be different from 1. Models found:" + str(countS1)) print(" Make sure Centered PDBs have only one model.") sys.exit(1) rig.translate_molecule(s1, modelS1, rig.center_molecule(cmc.get_center_of_mass(s1))) s2 = pdb_parser.get_structure('Rotated', filepath2) countS2 = 0 modelS2 = -1 for i in s2.get_models(): countS2 += 1 modelS2 = i.id if countS2 != 1: print( "ERROR: Number of models cannot be different from 1. Models found:" + str(countS2)) print(" Make sure Rotated PDBs have only one model.") sys.exit(1) rig.translate_molecule(s2, modelS2, rig.center_molecule(cmc.get_center_of_mass(s2))) #################################################################################################################### # TODO: This works only for angles between 0 and 90 not including 0, and 90 and will generate angles in all 8 # quadrants of the cartesian coordinate system (I do not see why using an angle other than 45 for now.) # TODO: quaternions might work better. # The location list has a list of normalized vectors releative to (0,0,0) that will be use to place s2's in the # right orientation relative to s1 locations = [] angles = [] for h in range(0, 3): for i in range(0, 360, Angle): for j in range(0, 90 / Angle - 1): if h == 0: z = 0 angles.append(str(i) + "_" + str(0)) elif h == 1: z = np.cos(45 * np.pi / 180) angles.append(str(i) + "_" + str(45)) elif h == 2: z = -1 * np.cos(45 * np.pi / 180) angles.append(str(i) + "_" + str(315)) locations.append( [np.cos(i * np.pi / 180), np.sin(i * np.pi / 180), z]) locations.append([0, 0, 1]) angles.append(str(0) + "_" + str(90)) locations.append([0, 0, -1]) angles.append(str(0) + "_" + str(270)) for i in range(0, len(locations)): locations[i] = list(locations[i] / np.linalg.norm(locations[i])) #################################################################################################################### # This works when you do not want to link two proteins but instead you want to place one around the other and check # protein-protein interaction's binding affinity. if map_o.lower() == 'no' and options.link.lower() == 'x': ids = {} for i in string.ascii_uppercase: ids[i] = False # First We used model 0 of structure 1 and turn ids for chains to True identifier to True. for i in s1[0]: ids[i.id] = True # Now we go through structure 2 and if there are any chains with the same id as those found in structure 1, # we will change the chain ids to something else becase if chain id is repeated in the same structure # it will be consider one chain when they are actually separated. for i in s2[0]: if ids[i.id]: id_found = False for j in string.ascii_uppercase: if not ids[j]: i.id = j ids[j] = True # ID founds means id is unused so far and will be reserved and changed on S2 id_found = True break # If the number of chains and identifiers exceeds letters in the alphabet, # It is necessary to modify the code. Until then, let's check this won't happen by exiting. if not id_found: print( "ERROR: Number of chains in both structures exceeds letters in the alphabet. No ID \ identifiers available. Program will exit without output. Fix the code. June 24, 2016" ) sys.exit(1) s3 = copy.deepcopy(s1) s3.id = 'Ensamble' for i in s2.get_chains(): s3[0].add(i) else: chain_info_s1 = get_chains_info(s1) chain_info_s2 = get_chains_info(s2) ############################################################################### structure_id = 0 for i in locations: if structure_id == 0: ccc = md.ChargeCalculator(params) cm = ccc.calculate_center_of_charge(s2) cm = cm / np.linalg.norm( cm) # ix orientation of LZ here only the first time. RM = rig.alignVectors(i, cm) for j in s2.get_atoms(): v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]] jj = np.dot(v2, RM) j.set_coord(jj) last_direction = i # After aligning along centerofcharge/dipolemoment, the structure # is flipped to have the cterm closest to insulin. This only works with # LZ because it is a homodimer with both helices aligned in parallel. # For any other structure, this might not work. # TODO This Vector needs to be picked by the user from information from the pdb_cif.py --summary # # For some reason after modifications to flower the next comented out code in this if step does not seem necessery #m = rotaxis(np.pi, Vector(0, 1, 0)) #for j2 in s2.get_atoms(): # v2 = Vector([j2.get_coord()[0],j2.get_coord()[1],j2.get_coord()[2]]) # v3 = v2.left_multiply(m) # j2.set_coord(v3.get_array()) else: # FIX: Something is wrong with rig.alignVectors([0,0,1],[0,0,-1]) it could just be a trigonometry case that # gives some sort of singularity. The following code inside the if statement patch just avoids the problem # but does not explain it. Find out. if i == [0.0, 0.0, -1.0] and last_direction == [0.0, 0.0, 1.0]: RM = rig.alignVectors([0.0, 1.0, 1.0], [0.0, 0.0, 1.0]) for j in s2.get_atoms(): v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]] jj = np.dot(v2, RM) j.set_coord(jj) last_direction = [0.0, 1.0, 1.0] RM = rig.alignVectors(i, last_direction) for j in s2.get_atoms(): v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]] jj = np.dot(v2, RM) j.set_coord(jj) last_direction = i # This 45 changes to explore different distaces between insulin and LZ ii = [k * distance for k in i] rig.translate_molecule(s2, modelS2, ii) # We want to join the two structures into one structure, with one model # and the chains of structure 1 and 2 joined and name consistently. if map_o.lower() == 'no' and lnk_o.lower() == 'x': pass else: s3 = struct.StructureBuilder.Structure('newrot') s3.add(struct.Model.Model(0)) join_range1 = [(chain_info_s2[lnk_o[0][0]]['min_res'], chain_info_s2[lnk_o[0][0]]['max_res']), (chain_info_s1[lnk_o[0][1]]['min_res'], chain_info_s1[lnk_o[0][1]]['max_res'])] join_chains(s3, s2, s1, lnk_o[0], join_range1) join_range2 = [(chain_info_s2[lnk_o[1][0]]['min_res'], chain_info_s2[lnk_o[1][0]]['max_res']), (chain_info_s1[lnk_o[1][1]]['min_res'], chain_info_s1[lnk_o[1][1]]['max_res'])] join_chains(s3, s2, s1, lnk_o[1], join_range2) io = PDBIO() io.set_structure(s3) io.save('s' + '_' + angles[structure_id] + "_" + str(structure_id) + lnk_label + '.pdb') #io.save(directory+options.id+'_'+angles[structure_id]+"_"+str(structure_id)+lnk_label+'.pdb') structure_id = structure_id + 1 rig.translate_molecule(s2, modelS2, rig.center_molecule(cmc.get_center_of_mass(s2)))
def main(): usage = "usage: %prog [options] arg" d = "This program reads a CIF file and checks that all residues in the file\ are found in the CHARMM top_27 parameters. Residues found, but missing in \ the structure, are added to the structure. The full structure is outputed \ to a CSV file where Charmm, CIF and additional information is stored. \ Added residues are copied from a peptide structure with all amino acids \ present in the local CHARMM parameters files with fixed dihedral angles. \ Info in the CSV file should be all there is to explore the conformational \ space of added atoms." opt_parser = optparse.OptionParser(usage, description=d) group = optparse.OptionGroup( opt_parser, "Generates CSV and PDB files for each model from a CIF file.") group.add_option("--fromcif", action="store_true", help="Flag to generate a CSV frile from a CIF file.") group.add_option("-i", "--cif", type="str", help="Path to input cif file.") group.add_option("-o", "--out1", type="str", help="Path to output csv.") group.add_option("-p", "--pep", type="str", help="Path to CHARMM peptide file.") opt_parser.add_option_group(group) group = optparse.OptionGroup( opt_parser, "Generates a CSV file from CRD and PSF files.") group.add_option( "--frompsfcrd", action="store_true", help="Flag to generates a CSV frile from a CRD and PSF file.") group.add_option("-f", "--psf", type="str", help="Path to input PSF file in XPLOR format.") group.add_option("-d", "--crd", type="str", help="Path to input CRD file.") opt_parser.add_option_group(group) options, args = opt_parser.parse_args() ############################################ Options Entered ########################################################## if options.fromcif: if options.frompsfcrd: opt_parser.error( "Two option flags can't be selected at the same time. Enter -h for help." ) ######################################################################################################################## if options.fromcif: if not os.path.exists(options.cif): print "Error: File path for input file does not exist." print("Type -h or --help for description and options.") sys.exit(1) params = CP.read_charmm_FF() parser2 = PDBParser(QUIET=True) pep_file_path = pkg_resources.resource_filename( 'em', 'params/' + 'peptides.pdb') p1 = parser2.get_structure('Peptides', pep_file_path) ########################################################################### # The peptide construct is build with charmm so corrections for some atom # names to PDB/Databank atom types is needed. # TODO: this might not be necessary as the correction and inv_correction dictionary in Super Structure takes care of it. # Check before removing the correction here. for i in p1.get_models(): for j in i.get_chains(): for k in j.get_residues(): for l in k.get_atom(): if k.get_resname() == 'ILE' and l.get_id() == 'CD': l.name = 'CD1' l.id = 'CD1' ########################################################################### # Create Super Structure myCIF = SS.Super_Structure(params, options.cif, 'setup') myCIF.build_pep_and_anchers(p1) myCIF.read_dict_into_dataframes() myCIF.check_models() myCIF.create_super_structure_df() ########################################################################### # Find missing residues to add to the Super Structure. Missing residues # are group in lists of contiguous residues and aded to another list. myCIF.build_missing_aa() file_name = os.path.basename(options.cif).split('.')[0] myCIF.write_csv('', file_name) #outPDB = IO.pdb() IO.write_pdb(myCIF, '', file_name, 'all') if options.frompsfcrd: if not os.path.exists(options.psf): print "Error: File path for PSF file does not exist." print("Type -h or --help for description and options.") sys.exit(1) if not os.path.exists(options.crd): print "Error: File path for CRD file does not exist." print("Type -h or --help for description and options.") sys.exit(1) directory, filename = os.path.split(options.crd) crd_file = IO.crd(options.crd) psf_file = IO.psf(options.psf) file_name = filename.split('.')[0] ################################################################################################################ ###################### After reading files, Generate and Index a Super Structure ############################## params = CP.read_charmm_FF() myCSV = SS.Super_Structure(params, directory, 'charmm_input') # At this point, a XPLOR psf could only have been creted from a complete structure, so no worries of gaps. myCSV.create_super_structure_df_from_CRD_PSF(crd_file, psf_file) myCSV.write_csv(directory, file_name)