def runLizsSet(PredictionSet, ProtocolID): raise colortext.Exception("Do you really want to run this?") colortext.printf("\nAdding Liz's data set to %s prediction set." % PredictionSet, "lightgreen") KeepHETATMLines = False FilterTester.openDB() # Filter by the DummySource set of experiments er1 = ExperimentResultSet(ddGdb) ef1 = ExperimentFilter() ef1.setSource(ExperimentFilter.LizKellogg) er1.addFilter(ef1) FilterTester.test(er1) experimentIDs = sorted(list(er1.getFilteredIDs())) colortext.message("\nThe number of unique experiments is %d.\n" % len(experimentIDs)) ddG_connection = db.ddG() count = 0 for experimentID in experimentIDs: ddG_connection.addPrediction(experimentID, PredictionSet, ProtocolID, KeepHETATMLines, StoreOutput = True) count += 1 if count >= 10: colortext.write(".") colortext.flush() count = 0 print("")
def addLinsJobs(PredictionSet, ProtocolID): raise colortext.Exception("Do you really want to run this?") colortext.printf("\nAdding Lin's mutations to %s prediction set." % PredictionSet, "lightgreen") KeepHETATMLines = False FilterTester.openDB() # Filter by the DummySource set of experiments er1 = ExperimentResultSet(ddGdb) ef1 = ExperimentFilter() ef1.setSource(ExperimentFilter.DummySource) er1.addFilter(ef1) # Filter by the particular PDB sr = StructureResultSet(ddGdb, 'WHERE PDB_ID="3K0NB_lin"') er1 = ExperimentResultSet.fromIDs(ddGdb, er1.getFilteredIDs()).filterBySet(sr) FilterTester.test(er1) experimentIDs = sorted(list(er1.getFilteredIDs())) colortext.message("\nThe number of unique experiments is %d.\n" % len(experimentIDs)) ddG_connection = db.ddG() count = 0 for experimentID in experimentIDs: ddG_connection.addPrediction(experimentID, PredictionSet, ProtocolID, KeepHETATMLines, StoreOutput = True) count += 1 if count >= 10: colortext.write(".") colortext.flush() count = 0 print("")
def check_JSON_dataset(dataset_ID): # I substitute PDB IDs so this function does a simple check to make sure that the mutations still look okay (this is a simple check - the mutations may not be correct) colortext.message('Reading PDB IDs...') PDB_ids = set([record['PDBFileID'] for record in JSON_datasets[dataset_ID]['data']]) colortext.message('Loading %s PDBs...' % len(PDB_ids)) for PDB_id in PDB_ids: if not(cached_pdbs.get(PDB_id)): print('Reading %s' % PDB_id) colortext.write('.', 'yellow') sys.stdout.flush() cached_pdbs[PDB_id] = PDB(ddGdb.execute_select('SELECT Content FROM PDBFile WHERE ID=%s', parameters=(PDB_id,))[0]['Content']) print('') count = 0 for record in JSON_datasets[dataset_ID]['data']: pdb_id = record['PDBFileID'] p = cached_pdbs[pdb_id] #colortext.printf('pdb_id', color='cyan') #pprint.pprint(record) #pprint.pprint(record['Mutations']) for m in record['Mutations']: chain_id = m['Chain'] residue_id = m['ResidueID'] residue_aa = m['WildTypeAA'] padded_id = ChainResidueID2String(chain_id, residue_id) if p.atom_sequences[chain_id][padded_id].ResidueAA != residue_aa: print(pdb_id, chain_id, residue_id, residue_aa) print(p.atom_sequences[chain_id][padded_id].ResidueAA, residue_aa) assert(p.atom_sequences[chain_id][padded_id].ResidueAA == residue_aa) count += 1 print('Successfully checked %d datapoints.' % count)
def showAllEligibleProTherm(PredictionSet, ProtocolID, KeepHETATMLines): #inserter = JobInserter() colortext.printf("\nAdding ProTherm mutations to %s prediction set." % PredictionSet, "lightgreen") #ddGdb = dbi.ddGDatabase() MAX_RESOLUTION = 2.1 MAX_NUMRES_PROTHERM = 350 MAX_STANDARD_DEVIATION = 1.0 FilterTester.openDB() if False: t1 = time.time() er1 = ExperimentResultSet(ddGdb) er1.addFilter(ExperimentFilter.OnSource(ExperimentFilter.ProTherm)) er1.addFilter(ExperimentFilter.NumberOfMutations(1, 1)) er1.addFilter(ExperimentFilter.NumberOfChains(1, 1)) er1.addFilter(ExperimentFilter.StandardDeviation(None, MAX_STANDARD_DEVIATION)) er1.addFilter(StructureFilter.Resolution(None, MAX_RESOLUTION)) er1.addFilter(StructureFilter.Techniques(StructureFilter.XRay)) FilterTester.test(er1) t2 = time.time() print(t2 - t1) # This method usually takes around 65% of the time as the method above t1 = time.time() ef1 = ExperimentFilter() ef1.setSource(ExperimentFilter.ProTherm) er1 = ExperimentResultSet(ddGdb) er1.addFilter(ExperimentFilter.OnSource(ExperimentFilter.ProTherm)) FilterTester.test(er1) ef1.setNumberOfMutations(1, 1) ef1.setNumberOfChains(1, 1) ef1.setStandardDeviation(None, MAX_STANDARD_DEVIATION) sf1 = StructureFilter() sf1.setResolution(None, MAX_RESOLUTION) sf1.setTechniques(StructureFilter.XRay) er1 = ExperimentResultSet(ddGdb) er1.addFilter(ef1) er1.addFilter(sf1) FilterTester.test(er1) t2 = time.time() print(t2 - t1) experimentIDs = sorted(list(er1.getFilteredIDs())) colortext.message("\nThe number of unique ProTherm experiments with:\n\t- one mutation;\n\t- structures solved by X-ray diffraction and with <= %d residues;\n\t- a maximum standard deviation in experimental results of <= %0.2f;\n\t- and a resolution of <= %0.2f Angstroms.\nis %d.\n" % (MAX_NUMRES_PROTHERM, MAX_STANDARD_DEVIATION, MAX_RESOLUTION, len(experimentIDs))) ddG_connection = db.ddG() count = 0 sys.exit(0) print("") for experimentID in experimentIDs: ddG_connection.addPrediction(experimentID, PredictionSet, ProtocolID, KeepHETATMLines, StoreOutput = True) count += 1 if count >= 10: colortext.write(".") colortext.flush() count = 0 print("")
def __init__(self, UniProtAC, XML = None, cache_dir = None, silent = True): if cache_dir and not(os.path.exists(cache_dir)): raise Exception("The cache directory %s does not exist." % cache_dir) self.UniProtAC = UniProtAC self.silent = silent # Get XML if XML == None: protein_xml = None cached_filepath = None if cache_dir: cached_filepath = os.path.join(cache_dir, '%s.xml' % UniProtAC) if cached_filepath and os.path.exists(cached_filepath): protein_xml = read_file(cached_filepath) else: if not silent: colortext.write("Retrieving %s\n" % UniProtAC, "cyan") url = 'http://www.uniprot.org/uniprot/%s.xml' % UniProtAC protein_xml = http_get(url) if not(protein_xml.strip()): raise EmptyUniProtACXMLException('The file %s is empty.' % UniProtAC) if cached_filepath: write_file(cached_filepath, protein_xml) self.XML = protein_xml else: self.XML = XML self.recommended_name = None self.submitted_names = [] self.alternative_names = [] # Get DOM try: self._dom = parseString(protein_xml) except: if cached_filepath: raise Exception("The UniProtAC XML for '%s' was invalid. The cached file is located at %s. Check this file - if it is not valid XML then delete the file and rerun the script." % (UniProtAC, cached_filepath)) else: raise Exception("The UniProtAC XML for '%s' was invalid." % UniProtAC) main_tags = self._dom.getElementsByTagName("uniprot") assert(len(main_tags) == 1) entry_tags = main_tags[0].getElementsByTagName("entry") assert(len(entry_tags) == 1) self.entry_tag = entry_tags[0] self._parse_evidence_tag() self._parse_sequence_tag() self._parse_protein_tag() self._parse_organism_tag() self._parse_subsections() self._parse_PDB_mapping()
def test_pdbml_speed(): test_cases = [ '1WSY', '1YGV', '487D', '1HIO', '1H38', '3ZKB', ] for test_case in test_cases: print("\n") colortext.message("Creating PDBML object for %s" % test_case) #PDBML.retrieve(test_case, cache_dir = cache_dir) print("") colortext.printf("Using the old minidom class", color = 'cyan') t1 = time.clock() p_minidom = PDBML_slow.retrieve(test_case, cache_dir = cache_dir) t2 = time.clock() colortext.message("Done in %0.2fs!" % (t2 - t1)) print("") colortext.printf("Using the new sax class", color = 'cyan') t1 = time.clock() p_sax = PDBML.retrieve(test_case, cache_dir = cache_dir) t2 = time.clock() colortext.message("Done in %0.2fs!" % (t2 - t1)) colortext.write("\nEquality test: ", color = 'cyan') try: assert(p_minidom.atom_to_seqres_sequence_maps.keys() == p_sax.atom_to_seqres_sequence_maps.keys()) for c, s_1 in p_minidom.atom_to_seqres_sequence_maps.iteritems(): s_2 = p_sax.atom_to_seqres_sequence_maps[c] assert(str(s_1) == str(s_2)) colortext.message("passed\n") except: colortext.error("failed\n")
def _get_XML(self): uparc_xml = None cached_filepath = None if self.cache_dir: cached_filepath = os.path.join(self.cache_dir, '%s.xml' % self.UniParcID) if cached_filepath and os.path.exists(cached_filepath): uparc_xml = read_file(cached_filepath) else: if not self.silent: colortext.write("Retrieving %s\n" % self.UniParcID, "cyan") url = 'http://www.uniprot.org/uniparc/%s.xml' % self.UniParcID uparc_xml = http_get(url) if cached_filepath: write_file(cached_filepath, uparc_xml) self.XML = uparc_xml # Get DOM self._dom = parseString(uparc_xml) main_tags = self._dom.getElementsByTagName("uniparc") assert(len(main_tags) == 1) entry_tags = main_tags[0].getElementsByTagName("entry") assert(len(entry_tags) == 1) self.entry_tag = entry_tags[0]
def main(prediction_ids = None, memory_free='3.0G', cfg = None): # This uses the version of Rosetta from your cluster template settings file settings = parse_settings.get_dict() rosetta_scripts_path = settings['local_rosetta_installation_path'] + '/source/bin/' + 'rosetta_scripts' + settings['local_rosetta_binary_type'] ppi_api = get_interface_with_config_file(rosetta_scripts_path = rosetta_scripts_path, rosetta_database_path = '/home/kyleb/rosetta/working_branches/alascan/database') t1, t2 = None, None # Read the keep_hetatm_lines optional setting keep_hetatm_lines = False keep_all_lines = False try: keep_hetatm_lines = cfg.keep_hetatm_lines except: colortext.warning('Note: keep_hetatm_lines is not specified in {0}. Defaulting to {1}.'.format(sys.argv[1], keep_hetatm_lines)) try: keep_all_lines = cfg.keep_all_lines except: colortext.warning('Note: keep_all_lines is not specified in {0}. Defaulting to {1}.'.format(sys.argv[1], keep_all_lines)) prediction_set_id = cfg.prediction_set_id if prediction_ids == None: assert( len(sys.argv) > 1 ) cfg = importlib.import_module(sys.argv[1], package=None) protocol_name = cfg.protocol_name suppress_warnings = True if not ppi_api.prediction_set_exists(prediction_set_id): print 'Creating new prediction set:', prediction_set_id t1 = time.time() ppi_api.add_prediction_set(prediction_set_id, halted = True, priority = 7, allow_existing_prediction_set = False, description = cfg.prediction_set_description) # Populate the prediction set with jobs from a (tagged subset of a) user dataset print 'Created PredictionSet:', prediction_set_id ppi_api.add_prediction_run(prediction_set_id, cfg.user_dataset_name, keep_all_lines = keep_all_lines, keep_hetatm_lines = keep_hetatm_lines, tagged_subset = cfg.tagged_subset, extra_rosetta_command_flags = '-ignore_zero_occupancy false -ignore_unrecognized_res', show_full_errors = True, suppress_warnings = suppress_warnings) t2 = time.time() existing_job = False end_job_name = '%s_%s' % (getpass.getuser(), prediction_set_id) if not os.path.exists(job_output_directory): os.makedirs(job_output_directory) for d in os.listdir(job_output_directory): if os.path.isdir(os.path.join(job_output_directory, d)) and end_job_name in d: print 'Found existing job:', d job_name = d existing_job = True if not existing_job: job_name = '%s-%s' % (time.strftime("%y%m%d"), end_job_name) ppi_api.add_development_protocol_command_lines( prediction_set_id, protocol_name, 'minimize_with_cst', '' ) # 2x because bugs ppi_api.add_development_protocol_command_lines( prediction_set_id, protocol_name, 'minimize_with_cst', '' ) prediction_ids = sorted(ppi_api.get_prediction_ids(prediction_set_id)) output_dir = os.path.join(job_output_directory, job_name ) else: # Prediction_ids passed in job_name = '%s-%s_%s-rerun' % (time.strftime("%y%m%d"), getpass.getuser(), prediction_set_id) output_dir = os.path.join(job_output_directory, job_name ) if os.path.isdir(output_dir): shutil.rmtree(output_dir) existing_job = False settings['scriptname'] = prediction_set_id + '_run' settings['tasks_per_process'] = 5 settings['mem_free'] = memory_free settings['output_dir'] = output_dir settings['rosetta_args_list'] = [ '-in:file:fullatom', '-ignore_zero_occupancy false', '-ignore_unrecognized_res', '-fa_max_dis 9.0', '-ddg::harmonic_ca_tether 0.5', '-ddg::constraint_weight 1.0', '-ddg::out_pdb_prefix min_cst_0.5', '-ddg::sc_min_only false', ] settings['rosetta_args_list'].extend(cfg.extra_flags) print settings['rosetta_args_list'] # Now get run settings from database and save to pickle file job_dict = {} output_data_dir = os.path.join(settings['output_dir'], 'data') if not os.path.isdir(output_data_dir): os.makedirs(output_data_dir) if t1 != None and t2 != None and len(prediction_ids) != 0: print('Time taken for {0} predictions: {1}s ({2}s per prediction).'.format(len(prediction_ids), t2-t1, (t2-t1)/len(prediction_ids))) print('File cache statistics:') pprint.pprint(ppi_api.get_file_content_cache_stats()) settings['numjobs'] = len(prediction_ids) app_name = 'minimize_with_cst' settings['appname'] = app_name print('') t1 = time.time() # Progress counter setup colortext.message('Creating input data for %d predictions.' % (len(prediction_ids))) count, records_per_dot = 0, 50 print("|" + ("*" * (int(len(prediction_ids)/records_per_dot)-2)) + "|") for prediction_id in prediction_ids: # Progress counter count += 1 if count % records_per_dot == 0: colortext.write(".", "cyan", flush = True) # Check if job already ran prediction_id_dir = os.path.join(output_dir, str(prediction_id)) if existing_job: if os.path.isdir( prediction_id_dir ): pdb_output_files = [x for x in os.listdir( prediction_id_dir ) if '.pdb' in x] else: pdb_output_files = [] if len(pdb_output_files) >= 1: print 'Skipping', prediction_id settings['numjobs'] = settings['numjobs'] - 1 continue if os.path.isdir(prediction_id_dir): print 'Job directory %s already exists, deleting' % prediction_id_dir shutil.rmtree(prediction_id_dir) # else: # print 'Creating new job directory %s' % prediction_id_dir job_data_dir = os.path.join(output_data_dir, str(prediction_id)) # Allow us to resume from an interrupted setup truncate_content = None all_files_exist = os.path.exists(job_data_dir) and os.path.exists(os.path.join(job_data_dir, '.ready')) if all_files_exist: truncate_content = 0 job_details = ppi_api.get_job_details(prediction_id, truncate_content = truncate_content) file_tuples = [] # List of names, contents for file_info in job_details['Files']['Input']: file_tuples.append( (file_info['Filename'], file_info['Content']) ) substitution_parameters = json.loads(job_details['JSONParameters']) # Scrub the folder if not all_files_exist: if os.path.isdir(job_data_dir): shutil.rmtree(job_data_dir) os.makedirs(job_data_dir) files_dict = {} # Maps name to filepath position for file_name, file_contents in file_tuples: new_file_location = os.path.join(job_data_dir, file_name) if not all_files_exist: if '.pdb' in file_name: if keep_hetatm_lines or keep_all_lines: write_file(new_file_location, file_contents) else: write_file(new_file_location, '\n'.join([l for l in file_contents.split('\n') if l.startswith('ATOM')])) else: with open(new_file_location, 'w') as f: f.write(file_contents) files_dict[file_name] = os.path.relpath(new_file_location, settings['output_dir']) if not all_files_exist: write_file(os.path.join(job_data_dir, '.ready'), '') argdict = { 'input_file_list' : [files_dict[substitution_parameters['%%input_pdb%%']]], } for file_name, file_location in files_dict.iteritems(): if 'params' in file_name: argdict['-extra_res_fa'] = file_location job_dict[prediction_id] = argdict t2 = time.time() print('') if count != 0: print('Time taken for {0} predictions: {1}s ({2}s per prediction).'.format(count, t2-t1, (t2-t1)/count)) print('File cache statistics:') pprint.pprint(ppi_api.get_file_content_cache_stats()) print('') if len(job_dict) > 0: write_run_file(settings, database_run = False, job_dict = job_dict) print 'Job files written to directory:', os.path.abspath(output_dir) else: print 'No tasks to process, not writing job files'
pprint.pprint(ppi_api.get_file_content_cache_stats()) settings['numjobs'] = len(prediction_ids) settings['appname'] = None print('') t1 = time.time() # Progress counter setup colortext.message('Creating input data for %d predictions.' % (len(prediction_ids))) count, records_per_dot = 0, 50 print("|" + ("*" * (int(len(prediction_ids)/records_per_dot)-2)) + "|") for prediction_id in prediction_ids: # Progress counter count += 1 if count % records_per_dot == 0: colortext.write(".", "cyan", flush = True) # Check if job already ran prediction_id_dir = os.path.join(output_dir, str(prediction_id)) if existing_job: if os.path.isdir( prediction_id_dir ): pdb_output_files = [x for x in os.listdir( prediction_id_dir ) if '.pdb' in x] else: pdb_output_files = [] if len(pdb_output_files) >= 1: print 'Skipping', prediction_id settings['numjobs'] = settings['numjobs'] - 1 continue if os.path.isdir(prediction_id_dir): print 'Job directory %s already exists, deleting' % prediction_id_dir shutil.rmtree(prediction_id_dir)
def __init__(self, UniParcID, UniProtACs = None, UniProtIDs = None, cache_dir = None, silent = False): if cache_dir and not(os.path.exists(os.path.abspath(cache_dir))): raise Exception("The cache directory %s does not exist." % os.path.abspath(cache_dir)) self.UniParcID = UniParcID self.cache_dir = cache_dir self.recommended_name = None self.silent = silent # Get AC mapping if not UniProtACs or UniParcID=='UPI0000047CA3': # todo: is this UPI0000047CA3 special handling necessary? mapping = uniprot_map('UPARC', 'ACC', [UniParcID], cache_dir = cache_dir, silent = silent)[UniParcID] self.UniProtACs = mapping else: self.UniProtACs = UniProtACs # Get ID mapping if not UniProtIDs: mapping = uniprot_map('UPARC', 'ID', [UniParcID], cache_dir = cache_dir, silent = silent)[UniParcID] self.UniProtIDs = mapping else: self.UniProtIDs = UniProtIDs # Get FASTA cached_filepath = None if cache_dir: cached_filepath = os.path.join(cache_dir, '%s.fasta' % UniParcID) if cached_filepath and os.path.exists(cached_filepath): fasta = read_file(cached_filepath) else: if not silent: print("Getting FASTA file") url = 'http://www.uniprot.org/uniparc/%s.fasta' % UniParcID fasta = http_get(url) if cached_filepath: write_file(cached_filepath, fasta) # Get sequence header = fasta.split("\n")[0].split() assert(len(header) == 2) assert(header[0] == ">%s" % UniParcID) assert(header[1].startswith("status=")) sequence = "".join(map(string.strip, fasta.split("\n")[1:])) self.sequence = sequence # Get atomic mass (and sequence again) self.atomic_mass = None self.CRC64Digest = None recommended_names = [] alternative_names = [] submitted_names = [] self.AC_entries = {} subsections = ProteinSubsectionHolder(len(sequence)) for UniProtAC in self.UniProtACs: #colortext.write("%s\n" % UniProtAC, 'cyan') try: AC_entry = UniProtACEntry(UniProtAC, cache_dir = self.cache_dir, silent = silent) except EmptyUniProtACXMLException: continue self.AC_entries[UniProtAC] = AC_entry # Mass sanity check if self.atomic_mass != None: assert(self.atomic_mass == AC_entry.atomic_mass) self.atomic_mass = AC_entry.atomic_mass # Sequence sanity check assert(self.sequence == AC_entry.sequence) # CRC 64 sanity check if self.CRC64Digest != None: assert(self.CRC64Digest == AC_entry.CRC64Digest) self.CRC64Digest = AC_entry.CRC64Digest assert(CRC64.CRC64digest(self.sequence) == self.CRC64Digest) if AC_entry.recommended_name: found = False for n in recommended_names: if n[0] == AC_entry.recommended_name: n[1] += 1 found = True break if not found: recommended_names.append([AC_entry.recommended_name, 1]) for alternative_name in AC_entry.alternative_names: found = False for n in alternative_names: if n[0] == alternative_name: n[1] += 1 found = True break if not found: alternative_names.append([alternative_name, 1]) for submitted_name in AC_entry.submitted_names: found = False for n in submitted_names: if n[0] == submitted_name: n[1] += 1 found = True break if not found: submitted_names.append([submitted_name, 1]) subsections += AC_entry.subsections self.subsections = subsections assert(len(set(UniParcMergedRecommendedNamesRemap.keys()).intersection(set(UniParcMergedSubmittedNamesRemap.keys()))) == 0) if UniParcID in UniParcMergedRecommendedNamesRemap: recommended_names = [[UniParcMergedRecommendedNamesRemap[UniParcID], 1]] elif UniParcID in UniParcMergedSubmittedNamesRemap: recommended_names = [[UniParcMergedSubmittedNamesRemap[UniParcID], 1]] if not silent: colortext.write('Subsections\n', 'orange') #print(subsections) if len(recommended_names) == 0 and len(alternative_names) == 0 and len(submitted_names) == 0: raise UniParcEntryStandardizationException("UniParcID %s has no recommended names." % UniParcID) elif len(recommended_names) == 0: s = ["UniParcID %s has no recommended names.\n" % UniParcID] if alternative_names: s.append("It has the following alternative names:") for tpl in sorted(alternative_names, key=lambda x:-x[1]): s.append("\n count=%s: %s" % (str(tpl[1]).ljust(5), tpl[0]['Name'])) if tpl[0]['Short names']: s.append(" (short names: %s)" % ",".join(tpl[0]['Short names'])) if tpl[0]['EC numbers']: s.append(" (EC numbers: %s)" % ",".join(tpl[0]['EC numbers'])) if submitted_names: s.append("It has the following submitted names:") for tpl in sorted(submitted_names, key=lambda x:-x[1]): s.append("\n count=%s: %s" % (str(tpl[1]).ljust(5), tpl[0]['Name'])) if tpl[0]['Short names']: s.append(" (short names: %s)" % ",".join(tpl[0]['Short names'])) if tpl[0]['EC numbers']: s.append(" (EC numbers: %s)" % ",".join(tpl[0]['EC numbers'])) #raise UniParcEntryStandardizationException("".join(s)) elif len(recommended_names) > 1: s = ["UniParcID %s has multiple recommended names: " % UniParcID] for tpl in sorted(recommended_names, key=lambda x:-x[1]): s.append("\n count=%s: %s" % (str(tpl[1]).ljust(5), tpl[0]['Name'])) if tpl[0]['Short names']: s.append(" (short names: %s)" % ",".join(tpl[0]['Short names'])) if tpl[0]['EC numbers']: s.append(" (EC numbers: %s)" % ",".join(tpl[0]['EC numbers'])) raise UniParcEntryStandardizationException("".join(s)) #assert(len(recommended_names) == 1) # todo: this is not always available #print(recommended_names) self.recommended_name = None if len(recommended_names) == 1: self.recommended_name = recommended_names[0][0] self.get_organisms()
def pdb_to_uniparc(pdb_ids, silent = True, cache_dir = None, manual_additions = {}): ''' Returns a mapping {PDB ID -> List(UniParcEntry)} The UniParcEntry objects have a to_dict() method which may be useful. ''' # Map PDB IDs to UniProtKB AC if not silent: colortext.write("Retrieving PDB to UniProtKB AC mapping: ", 'cyan') pdb_ac_mapping = uniprot_map('PDB_ID', 'ACC', pdb_ids, cache_dir = cache_dir, silent = silent) for k, v in manual_additions.iteritems(): if k in pdb_ids: if pdb_ac_mapping.get(k): pdb_ac_mapping[k].extend(v) pdb_ac_mapping[k] = list(set(pdb_ac_mapping[k])) else: pdb_ac_mapping[k] = v if not silent: colortext.write("done\n", 'green') # Get a list of AC_IDs if not silent: colortext.write("Retrieving UniProtKB AC to UniProtKB ID mapping: ", 'cyan') AC_IDs = set() for k, v in pdb_ac_mapping.iteritems(): AC_IDs = AC_IDs.union(set(v)) AC_IDs = list(AC_IDs) if not silent: colortext.write("done\n", 'green') # Map UniProtKB ACs to UniParc IDs if not silent: colortext.write("Retrieving UniProtKB AC to UniParc ID mapping: ", 'cyan') ac_uniparc_mapping = uniprot_map('ACC', 'UPARC', AC_IDs, cache_dir = cache_dir, silent = silent) for k, v in ac_uniparc_mapping.iteritems(): assert(len(v) == 1) ac_uniparc_mapping[k] = v[0] if not silent: colortext.write("done\n", 'green') # Map UniProtKB ACs to UniProtKB IDs ac_id_mapping = uniprot_map('ACC', 'ID', AC_IDs, cache_dir = cache_dir, silent = silent) for k, v in ac_id_mapping.iteritems(): assert(len(v) == 1) ac_id_mapping[k] = v[0] # Create mapping from PDB IDs to UniParcEntry objects m = {} if not silent: colortext.message("\nRetrieving FASTA sequences for the %d PDB IDs." % len(pdb_ids)) for pdb_id, ACs in pdb_ac_mapping.iteritems(): if not silent: colortext.write("%s: " % pdb_id, "orange") m[pdb_id] = [] for AC in ACs: entry = UniParcEntry(ac_uniparc_mapping[AC], [AC], [ac_id_mapping[AC]], cache_dir = cache_dir) m[pdb_id].append(entry) if not silent: colortext.write(".", "green") if not silent: print("") return m
from klab import colortext # Test chars = 'A' count = 0 for name, data in colortext.colors.iteritems(): colortext.write(name, name) for effect in colortext.EFFECTS_: colortext.write(name, color = name, bgcolor = 'lightblue', effect = effect) print("") colortext.rainbowprint("Rainbow test") colortext.printf("\ntest1", color = 'red') colortext.printf("test2") colortext.bar('blue', 9, suffix = "\n")