def main(): # # Get pdb_id for each structure in PDB with respective uniprot_id. Can be many pdb's per target. # # THIS WORKS. DO NOT CHANGE. # Ignore this for now. Need to build tool that allows selection of a pdb based on header file information. # Need to build capability for psiBLAST. # pdb_from_uniprot = PDBFromUniprot() # pdb_getter = PDBFromUniprot() # uniprot_pdb_mapping = {} # for uniprot_id in steele_uniprot_codes: # uniprot_pdb_mapping[uniprot_id] = [] # pdb_codes = pdb_from_uniprot.get_pdb_id(queryText) # if pdb_codes != []: # for pdb_id in pdb_codes: # uniprot_pdb_mapping[uniprot_id].append(pdb_id) # else: # print "%s returned no pdb codes" % uniprot_id # print "steele's mapping: ", uniprot_pdb_mapping # uniprot_id = 'P0A9Q1' # queryText = ("<orgPdbQuery>" + # "<queryType>org.pdb.query.simple.UpAccessionIdQuery</queryType>" + # "<description>Simple query for a list of Uniprot Accession IDs: " + uniprot_id + " </description>" + # "<accessionIdList>" + uniprot_id + "</accessionIdList>" + # "</orgPdbQuery>") # pdb_from_uniprot = PDBFromUniprot() # proteins with no E. coli structures uniprot_ids = ['P07001', 'P0AB67', 'P27306', 'P0A7B3', 'P0ACS2', 'P0A9E2', 'P0A9B2', 'P0A6T1', 'P0AFG8', 'P06959', 'P0A9P0', 'P0AFG3', 'P0AFG6', 'P61889', 'P33940', 'P0AC53', 'P00350', 'P0AB67', 'P27306', 'P0A7B3', 'P0A9E2', 'P0AFG8', 'P0AFG3', 'P0AFG6', 'P33940', 'P0AC53', 'H6N162'] genbank_ids = ['81171066', '11182439', '67470903', '71162387', '84027826', '84027822', '84027824', '2506692', '81175321', '170180374'] ronming = ['3TCF', '3TCG', '3TCH'] gene_names = ['pntA'] pdb_codes = ['2BHS'] queue = ['...', '3BP8', '1LB2', '1DPS', '1BG8', '2XUV', '3UCS'] #pdb_codes = ['2ZHG', '2CGP', '2H27'] rerun = ['4JDR', '3NBU', '2CMD', '2ZYA', '4TWZ', '1SRU', '1JTS'] #pdb_codes = ['1X15', '4N72', '4JDR', '2CMD', '2ZYA', '3NBU', '4TWZ', '1SRU', '1W36'] dna_bound = ['3JRG', '3JRH', '2ZHG', '2CGP', '2H27'] key_error = ['1XGE'] # line 1433 in _edit_bfactor KeyErro 103 list_index_out_of_range = ['4TWZ', '2BUI', '2BHS'] # list index out of range line 1395 ligan_is_on_chain_B = ['1X15'] homodimer_is_inverted = ['3PFL', '2ZYA', '1NYT', '2EFT', '1DFG'] # targeted residues are on wrong chain or both chains. need to get residues for both chains (copy B to A) hold = ['4OX6'] two_chains = ['1NYE', '3UCS', '2O97'] gaps_in_structure = ['2G67'] # need to fix write_csv_output to be robust to gaps in seq. currently just subtracting the first aln_diff pops_issue = [] pocket_finder_issue = ['1IHF', '1OR7', '1SRU'] ligand_issue = ['2ZHG', '1IHF'] cleaned_file_mapping = {'1W36' : '1W36DBCY', '1W36': '1W36D', '1W36': '1W36B', '1W36': '1W36C', '4B2N' : '4B2NA', '1U60' : '1U60AB', '2XUV' : '2XUVABCD', '1Y00' : '1Y00AB'} done = ['1X15', '4N72', '4JDR', '2CMD', '2ZYA', '3NBU', '4TWZ', '1SRU', '1S7C', '4B2NA', '2J1N', '1U60AB', '1YAC', '2XUVABCD', '1Y00AB', '1W36D', '1W36B', '1W36C', '3NR7', '2L15', '1A04', '2GQQ', '1LB2', '3I2Z', '3TCH', '3TCH', '1H16', '3TCH'] preliminary = [] final = ['4N72', '3NBU', '3TCH', '1DPS', '1NYE', '3NR7', '2GQQ', '3I2Z', '1BG8', '1YAC', '2CMD', '1GS5', '3N1C', '2R97', '4LFU', '4JDR', '3PFL', '1S7C', '2GFY', '2GFW'] # Download .pdb using pdb_id # THIS WORKS. DO NOT CHANGE # pdb_getter = PDBFromUniprot() print "====================================================================" print "Entering bioverse Design Pipeline" print "====================================================================" # for pdb_code in pdb_codes: # print "\n\nIdentifying residues of interest for %s" % pdb_code # Download .pdb using pdb_id # THIS WORKS. DO NOT CHANGE preprocessor = PDBPreProcessor(pdb_code) preprocessor.preprocess_check() #pdb_getter = PDBFromUniprot() #pdb_getter.fetch_pdb(pdb_code) #cif_getter = CIFFromUniprot(pdb_code) #cif_getter.fetch_mmCIF() # ## Structural pipeline: ## # Parse PDB header ## # THIS WORKS. DO NOT CHANGE header_parser = HeaderParser(pdb_code) header_parser.get_header_dict() # ## # CIF parser ## # THIS WORKS. DO NOT CHANGE cif_parser = CIFFParser(pdb_code) chains, genes, organisms, pdb_sequences = cif_parser.get_gene_annotations() sequence_annotations = cif_parser.collate_sequence_annotations() print sequence_annotations cif_parser.write_fasta(sequence_annotations) # what is the gene_name on each chain and what is the sequence on each chain. then save each sequence to different fasta file # ( chain, gene_name, sequence ) # ## # Sequence Analysis ## # THIS WORKS. DO NOT CHANGE seq_getter = SequenceGetter(genes, organisms) dna_sequences = seq_getter.get_DNA_sequence() seq_getter.get_protein_sequence() # ## # Preprocessing ## # THIS WORKS. DO NOT CHANGE number_of_structures = preprocessor.process() preprocessor.get_complex_info() # ## # Find start site for structure ## # THIS WORKS. DO NOT CHANGE local_blast = localBLAST(pdb_code, sequence_annotations) aln_diff = local_blast.align_pdb_seqs() print aln_diff # ## # Instantiate PDBEditor ## # THIS WORKS. DO NOT CHANGE pdb_editor = EditPDB(pdb_code, server_mode=False) # ## # Get surface residues ## # THIS WORKS. DO NOT CHANGE # sr_getter = SurfaceResidues(pdb_code, server_mode=False) # sr_getter.write_resi_sasa_output() # sr_getter.write_frac_sasa_output() # pdb_editor.edit_bfactor_sasa() # sr_getter.write_surface_resi_output(0.3) # pdb_editor.edit_bfactor_surface_residues() # # # Get ligand # # THIS WORKS. DO NOT CHANGE # ligand = LigandBindingSite(pdb_code, chains) # ligand.get_residues_within_5A() # ligand.write_residue_output() # pdb_editor.edit_bfactor_ligand_binding_pocket() # # ## # Find pockets ## # THIS WORKS. DO NOT CHANGE # rosetta = Rosetta(pdb_code) # rosetta.find_pockets() pdb_editor.edit_bfactor_pocket_residues() # # ## # Create Rosetta minimized structure for non-redundant monomers in the structure ## # This will be used for ddG monomer preprocessor = PDBPreProcessor(pdb_code, target_finder_mode=False) preprocessor.process() number_of_structures, chains, minimum_chains = preprocessor.count_structures_in_asymmetric_unit() # # ## # Make 'mutants_list' file for ddg_monomer ## # THIS WORKS. DO NOT CHANGE ListMaker1 = MutantListMaker(pdb_code, chains) # ListMaker.generate_mutant_list(lpocket=True, SurfRes=True) ListMaker1.generate_mutant_list(pocketres=True, lpocket=True, SurfRes=True) # ListMaker.generate_mutant_list(pocketres=True, lpocket=True) ListMaker2 = MutantListMaker(pdb_code, minimum_chains, asymmetric_unit=True) ListMaker2.filter_mutant_list(minimum_chains) # # ## # Calculate ddG ## # THIS WORKS. DO NOT CHANGE ddgMonomer = DDGMonomer(pdb_code, minimum_chains) ddgMonomer.get_targets(5.5) # ## # Write output csv ## # THIS WORKS. DO NOT CHANGE mutation_list_generator = MutationListGenerator(sequence_annotations, dna_sequences, aln_diff, pdb_code=pdb_code) mutation_list_generator.write_csv_output(aln_diff)
def post(self, request, *args, **kwargs): form_class = self.get_form_class() form = self.get_form(form_class) if form.is_valid(): query_text = request.POST["pdb_code"] print query_text pdb_status_getter = PDBstatusGetter(query_text) pdb_status = pdb_status_getter.get_status() status_text = str(pdb_status) if status_text == 'False': pdb_info_getter = PDBinfoGetter(query_text) pdb_map = pdb_info_getter.main() print pdb_map return render(request, self.template_name, {'pdb_map': pdb_map, 'code': query_text, 'pdb_status': pdb_status, 'msg': self.msg, 'spt_ref_file': self.spt_ref_file, 'uniprot_id': self.uniprot_id, 'genbank': self.genbank, 'title': self.title, 'target_residue_files': self.target_residue_files, 'sequence_annotations': self.sequence_annotations, 'length_sa': self.length_sa}) elif status_text == 'True': pdb_code = query_text.upper() # Check if the pdb and ciff files have been downloaded. If not, # then fetch them. preprocessor = PDBPreProcessor(pdb_code) preprocessor.preprocess_check() number_of_structures, chains_in_asymmetric_unit = \ preprocessor.count_structures_in_asymmetric_unit() experiment = preprocessor.get_experiment_type() resolution = preprocessor.get_diffraction_resolution() # Get organism ID, gene names, pdb_sequences, and paper # metadata from .ciff file cif_parser = CIFFParser(pdb_code) chains, genes, organisms, pdb_sequences, title, authors = \ cif_parser.get_gene_annotations() sequence_annotations = \ cif_parser.collate_sequence_annotations() cif_parser.write_fasta(sequence_annotations) gene_info = cif_parser.get_gene_info_from_genbank() spt_writer = JmolSPTWriter(pdb_code) spt_writer.write_spt() spt_ref_file = 'jmol_script_' + pdb_code + '.spt' if sequence_annotations != []: for gene in gene_info: gene_name = gene_info[gene]['GENE'][0] product = gene_info[gene]['PRODUCT'] function = gene_info[gene]['FUNCTION'] ref_seq = gene_info[gene]['PROTEIN_ID'] for information in sequence_annotations: if gene_name.upper() == information[1]: information[1] = gene_name chain = information[0] organism = information[2] aa_sequence = information[3] # Add data to database created, msg = \ set_pdb_information( pdb_code, title, authors, genes, organism, number_of_structures, chain, chains_in_asymmetric_unit, ref_seq, product, function, gene_name, spt_ref_file, aa_sequence) print msg colors = ['yellow', 'green', 'red', 'blue', 'orange', 'purple', 'grey'] recorded_info = {} for entry in sequence_annotations: if entry[1].upper() in gene_info: recorded_info[entry[1].upper()] = \ gene_info[entry[1].upper()] recorded_info[entry[1].upper()]['SEQUENCE'] = \ entry[3] recorded_info[entry[1].upper()]['CHAIN'] = \ entry[0] recorded_info[entry[1].upper()]['ORGANISM'] = \ entry[2] library_calculator = \ LibraryCalculator(aa_sequence=entry[3]) saturated_library_size = \ library_calculator.calculate_library_size( full_saturation=True) recorded_info[entry[1].upper()]['LIBRARY_SIZE'] = \ saturated_library_size recorded_info[entry[1].upper()]['COLOR'] = \ colors[sequence_annotations.index(entry)] tot_library_size = 0 for key in recorded_info: tot_library_size += \ recorded_info[key]['LIBRARY_SIZE'] print "recorded_info:", recorded_info print "sequence annotations:", sequence_annotations print "gene info:", gene_info return render(request, self.template_name, {'tot_library_size': tot_library_size, 'recorded_info': recorded_info, 'resolution': resolution, 'experiment': experiment, 'colors': colors, 'msg': self.msg, 'target_residue_files': self.target_residue_files, 'number_of_structures': number_of_structures, 'str_chains': chains_in_asymmetric_unit, 'title': title, 'authors': authors, 'gene_info': gene_info, 'spt_ref_file': spt_ref_file, 'code': pdb_code, 'chains': chains, 'genes': genes, 'organisms': organisms, 'pdb_sequences': pdb_sequences, 'sequence_annotations': sequence_annotations}) else: return render(request, self.template_name, {'code': query_text, 'pdb_status': pdb_status, 'msg': self.msg, 'spt_ref_file': self.spt_ref_file, 'uniprot_id': self.uniprot_id, 'genbank': self.genbank, 'title': self.title, 'target_residue_files': self.target_residue_files, 'sequence_annotations': self.sequence_annotations, 'length_sa': self.length_sa})