def __init__(self, motif_type=''): MotifAtlasBaseClass.__init__(self) self.success = False self.motifs_root = self.config['locations']['releases_dir'] self.motif_type = motif_type.upper() self.done = [] self.folders = []
def import_loops(self, Loops, l, pdb_id, loop_type): """ """ try: if Loops == 0: self.mark_pdb_as_analyzed(pdb_id, loop_type) return for i in xrange(l): loop_id = self._get_loop_id(Loops[i].AllLoops_table.full_id, pdb_id, loop_type) Loops[i].Filename = loop_id session.merge( AllLoops(id = loop_id, type = loop_type, pdb = pdb_id, sequential_id = loop_id[-3:], length = int(Loops[i].NumNT[0][0]), seq = Loops[i].AllLoops_table.seq, r_seq = Loops[i].AllLoops_table.r_seq, nwc_seq = Loops[i].AllLoops_table.nwc, r_nwc_seq = Loops[i].AllLoops_table.r_nwc, pdb_file = Loops[i].PDBFilename, nt_ids = Loops[i].AllLoops_table.full_id, loop_name = Loops[i].AllLoops_table.loop_name)) self.save_mat_files(Loops) self.mark_pdb_as_analyzed(pdb_id, loop_type) logging.info('%s from %s successfully imported', loop_type, pdb_id) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self,e)
def __init__(self): MotifAtlasBaseClass.__init__(self) self.loopSearchDir = self.config['locations']['loops_search_dir'] self.precomputedData = self.config['locations']['loops_mat_files'] self.loop_regex = '(IL|HL)_\w{4}_\d{3}' self.pdb_regex = '^[0-9A-Za-z]{4}$' self.update = True # determines whether to update existing values in the db
def __init__(self): """ """ MotifAtlasBaseClass.__init__(self) self.cache_dir = self.config['locations']['cache'] if not os.path.exists(self.cache_dir): os.mkdir(self.cache_dir) self.baseurl = 'http://rna.bgsu.edu/rna3dhub'
def __init__(self): MotifAtlasBaseClass.__init__(self) self.commit_every = 100 self.file_types = ['.pdb', '.pdb1'] root = self.config['locations']['fr3d_root'] self.pdb_files_folder = os.path.join(root, 'FR3D', 'PDBFiles') self.known = ['A', 'C', 'G', 'U'] query = session.query(PdbModifiedCorrespondecies.modified_unit) for entry in query.all(): self.known.append(entry.modified_unit)
def __init__(self): MotifAtlasBaseClass.__init__(self) self.success = False self.num_jobs = 4 self.pdb_ids = [] self.loop_ids = [] self.best_loops = [] # loops to be clustered self.fr3d_root = self.config['locations']['fr3d_root'] self.retries_left = 3 self.script_prefix = 'aAa_script_' self.mlab_input_filename = os.path.join(self.fr3d_root, 'loops.txt')
def save_mat_files(self,Loops): """Pass the Loops structure array back to matlab so that it can save the .mat files in the specified location.""" MotifAtlasBaseClass._setup_matlab(self) [status, err_msg] = self.mlab.aSaveLoops(Loops, self.config['locations']['loops_mat_files'], nout=2) if status == 0: logging.info('mat files saved') else: MotifAtlasBaseClass._crash(self,err_msg)
def __init__(self): """ locations is where pdbs will be placed pdbs is an array the files to download """ MotifAtlasBaseClass.__init__(self) self.baseurl = 'http://www.rcsb.org/pdb/files/' self.ba_url = 'http://www.pdb.org/pdb/rest/getEntityInfo?structureId=' self.filetypes = ['.pdb', '.pdb1', '.cif'] self.locations = [] self.pdbs = [] self.config['email']['subject'] = 'Pdb File Sync'
def __init__(self): """ """ MotifAtlasBaseClass.__init__(self) self.temp_file = 'temp.csv' self.nrlists_root = self.config['locations']['nrlists_dir'] self.resolutions = ['1,5A','2A','2,5A','3A','3,5A','4A','20A','All_Resolution'] self.resolution_labels = ['1.5','2.0','2.5','3.0','3.5','4.0','20.0','all'] self.done = [] self.list_done() self.lists = sorted(os.listdir(self.nrlists_root)) self.success = False # status of the current update
def loop_qa(self, pdb_id, release_id): """ """ logging.info('QA on %s', pdb_id) MotifAtlasBaseClass._setup_matlab(self) [ifn, err_msg] = self.mlab.aLoopQualityAssurance(pdb_id, nout=2) if err_msg != '': logging.warning('Error %s in pdb %s' % (err_msg, pdb_id)) else: self.__import_qa_from_csv(ifn, release_id) self.mark_pdb_as_analyzed(pdb_id,'qa')
def main(argv): """ RNA 3D Hub update entry point. """ try: m = MotifAtlasBaseClass() m.start_logging() pdb_ids = get_pdb_info() update_loops(pdb_ids) update_pairwise_annotations(pdb_ids) update_unit_ids(pdb_ids) update_unit_ordering(pdb_ids) update_coordinates(pdb_ids) update_redundant_nucleotides(pdb_ids) update_best_chains_and_models(pdb_ids) # must follow best chain and model update cluster_motifs('IL') import_motifs('IL') cluster_motifs('HL') import_motifs('HL') # must follow motif clustering update_loop_annotations() # must follow unit id updates export_data(pdb_ids) # TODO annotate all pdb files with motifs # TODO compute new non-redundant lists, import into the database update_cache() logging.info('Update completed') m.send_report() except: try: logging.critical('Update failed') logging.critical(traceback.format_exc(sys.exc_info())) m.set_email_subject('RNA 3D Hub update failed') m.send_report() except: pass
def check_loop_quality(self, pdbs): """ """ try: logging.info('Loop Quality Assurance') release = LoopRelease(mode=self.config['release_mode']['loops']) for pdb_id in pdbs: self.loop_qa(pdb_id, release.id) if pdbs: session.add(release) session.commit() logging.info('Loop QA complete') logging.info('%s', '='*40) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self,e)
def __init__(self, ensembles=None, release_mode="", release_description="", upload_mode=""): MotifAtlasBaseClass.__init__(self) self.c = ensembles # collections, NRCollectionMerger self.motifs = [] self.loops = [] self.history = [] self.final_ids = dict() self.intersection = [] self.release_diff = [] self.added_groups = [] self.removed_groups = [] self.updated_groups = [] self.old_updated_groups = [] self.same_groups = [] self.added_pdbs = [] self.removed_pdbs = [] self.upload_mode = upload_mode self.release_mode = release_mode self.release_description = release_description
def extract_and_import_loops(self, pdbs, recalculate=None): """Loops over `pdbs`, extracts and imports all loops""" try: for loop_type in self.loop_types: logging.info('Extracting %s' % loop_type) if recalculate is None: recalculate = self.config['recalculate'][loop_type] if recalculate: pdb_list = pdbs[:] else: pdb_list = self.filter_out_analyzed_pdbs(pdbs, loop_type) for pdb_id in pdb_list: logging.info('Extracting %s from %s', loop_type, pdb_id) (Loops,l) = self.extract_loops(pdb_id, loop_type) self.import_loops(Loops, l, pdb_id, loop_type) logging.info('%s', '='*40) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self,e)
def setUp(self): """runs the entire pipeline""" self.success = False self.clean_up_database() m = MotifAtlasBaseClass() m.start_logging() logging.info('Initializing update') """get new pdb files""" p = PdbInfoLoader() p.get_all_rna_pdbs() """override pdb files with a smaller set""" p.pdbs = ['1FG0','1HLX'] """extract all loops and import into the database""" e = LoopExtractor() e.extract_and_import_loops(p.pdbs) """do loop QA, import into the database. Create a new loop release.""" q = LoopQualityChecker() q.check_loop_quality(p.pdbs) """import pairwise interactions annotated by FR3D""" i = PairwiseInteractionsLoader() i.import_interactions(p.pdbs) """import coordinates and distances into the database""" d = DistancesAndCoordinatesLoader() d.import_distances(p.pdbs) d.import_coordinates(p.pdbs) """import info about redundant nucleotides""" r = RedundantNucleotidesLoader() r.import_redundant_nucleotides(p.pdbs) """import best chains and models""" b = BestChainsAndModelsLoader() b.import_best_chains_and_models(p.pdbs) self.success = True
def load_loop_positions(self): """update loop_positions table by loading data from the mat files stored in the PrecomputedData folder""" if not self.mlab: self._setup_matlab() # loop over directories for folder in os.listdir(self.precomputedData): if re.search(self.pdb_regex, folder): logging.info('Importing loop annotations from %s', folder) else: continue [outputFile, err_msg] = self.mlab.loadLoopPositions(os.path.join(self.precomputedData, folder), nout=2) if err_msg != '': MotifAtlasBaseClass._crash(self, err_msg) else: reader = csv.reader(open(outputFile), delimiter=',', quotechar='"') for row in reader: (loop_id, position, nt_id, bulge, flanking, border) = row existing = session.query(LoopPositions). \ filter(LoopPositions.loop_id==loop_id). \ filter(LoopPositions.position==position). \ filter(LoopPositions.border==border). \ first() if existing: if self.update: existing.flanking = int(flanking) existing.bulge = int(bulge) existing.nt_id = nt_id existing.border = int(border) session.merge(existing) else: logging.info('Keeping existing annotations') else: session.add(LoopPositions(loop_id=loop_id, position=position, nt_id=nt_id, flanking=int(flanking), bulge=int(bulge), border=int(border))) session.commit() os.remove(outputFile) # delete temporary csv file
def import_redundant_nucleotides(self, pdbs, recalculate=False): """ """ try: logging.info("Importing redundant nucleotides") if not recalculate: recalculate = self.config["recalculate"]["redundant_nts"] if recalculate: pdb_list = pdbs self.__delete_old_data(pdbs) else: pdb_list = self.filter_out_analyzed_pdbs(pdbs, "redundant_nts") if pdb_list: MotifAtlasBaseClass._setup_matlab(self) for pdb_file in pdb_list: logging.info("Running matlab on %s", pdb_file) ifn, err_msg = self.mlab.loadRedundantNucleotides(pdb_file, nout=2) if err_msg == "": self.__import_temporary_file(ifn, pdb_file) else: MotifAtlasBaseClass._crash(self, err_msg) self.mark_pdb_as_analyzed(pdb_file, "redundant_nts") logging.info("%s", "=" * 40) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self, e)
def import_best_chains_and_models(self, pdbs, recalculate=False): """ """ try: logging.info("Importing best chains and models") if not recalculate: recalculate = self.config["recalculate"]["best_chains_and_models"] if recalculate: pdb_list = pdbs self.__delete_old_data(pdbs) else: pdb_list = self.filter_out_analyzed_pdbs(pdbs, "best_chains_and_models") if pdb_list: MotifAtlasBaseClass._setup_matlab(self) for pdb_file in pdb_list: logging.info("Running matlab on %s", pdb_file) # 'ABC', '1,2', '' best_chains, best_models, err_msg = self.mlab.loadBestChainsAndModels(pdb_file, nout=3) best_chains = ",".join(list(best_chains)) if err_msg == "": self.__import_into_db(pdb_file, best_chains, best_models) else: MotifAtlasBaseClass._crash(self, err_msg) self.mark_pdb_as_analyzed(pdb_file, "best_chains_and_models") logging.info("%s", "=" * 40) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self, e)
def load_loop_searches(self): """ directory structure: loopSearchDir filesep IL_1S72_001 filesep IL_1S72_001_IL_1J5E_001.mat """ # loop over directories for loop_id in os.listdir(self.loopSearchDir): if re.search(self.loop_regex, loop_id): logging.info('Importing %s searches', loop_id) else: continue # read in No_candidates.txt if it exists self._read_no_candidates_file(loop_id) # get stored loop searches and list all matfiles imported = self._get_imported_loop_searches(loop_id) matfiles = self._get_saved_mat_files(os.path.join(self.loopSearchDir, loop_id, '*.mat')) toImport = matfiles - imported; if len(toImport) == 0: continue toImport = [os.path.join(self.loopSearchDir, loop_id, x + '.mat') for x in toImport] if not self.mlab: self._setup_matlab() # run matlab to create a temporary csv file with results [outputFile, err_msg] = self.mlab.loadLoopSearchFile(','.join(toImport), os.path.join(self.loopSearchDir, loop_id), nout=2) if err_msg != '': MotifAtlasBaseClass._crash(self, err_msg) else: reader = csv.reader(open(outputFile), delimiter=',', quotechar='"') for row in reader: (loop_id1, loop_id2, disc, nt_list1, nt_list2) = row self._store_in_database(loop_id1, loop_id2, disc, nt_list1, nt_list2) os.remove(outputFile) # delete temporary csv file
def extract_loops(self, pdb_id, loop_type): """ """ try: MotifAtlasBaseClass._setup_matlab(self) """Loops - array of FR3D File structures. l - its length""" [Loops, l, err_msg] = self.mlab.extractLoops(pdb_id, loop_type, nout=3) if err_msg != '': MotifAtlasBaseClass._crash(self,err_msg) if Loops == 0: logging.info('No %s in %s', loop_type, pdb_id) return (0, 0) else: logging.info('Found %i loops', l) return (Loops, l) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self,e)
def import_interactions(self, pdbs, recalculate=False): """Determines what files need to be analyzed, deletes stored data if necessary, loops over the pdbs, runs matlab on each of them independently, matlab generates a temporary csv file, it's imported and immediately deleted.""" try: logging.info('Inside import_interactions') if not recalculate: recalculate = self.config['recalculate']['interactions'] if recalculate: pdb_list = pdbs self.__delete_interactions(pdbs) else: pdb_list = self.filter_out_analyzed_pdbs(pdbs,'interactions') if pdb_list: MotifAtlasBaseClass._setup_matlab(self) for pdb_file in pdb_list: logging.info('Running matlab on %s', pdb_file) ifn, status, err_msg = self.mlab.loadInteractions(pdb_file,nout=3) status = status[0][0] if status == 0: self.__import_interactions_from_csv(ifn, pdb_file) elif status == 2: # no nucleotides in the pdb file logging.info('Pdb file %s has no nucleotides', pdb_file) else: logging.warning('Matlab error code %i when analyzing %s', status, pdb_file) MotifAtlasBaseClass._crash(self,err_msg) self.mark_pdb_as_analyzed(pdb_file,'interactions') self.success = True logging.info('%s', '='*40) except: e = sys.exc_info()[1] MotifAtlasBaseClass._crash(self,e)
def __init__(self): MotifAtlasBaseClass.__init__(self) self.success = False
def __init__(self): MotifAtlasBaseClass.__init__(self) self.loop_types = ['IL','HL','J3']
def __init__(self): MotifAtlasBaseClass.__init__(self)
def __init__(self): """ """ MotifAtlasBaseClass.__init__(self) self.output = ''
def __init__(self): MotifAtlasBaseClass.__init__(self) self.pdb_file_types = ['.pdb', '.pdb1'] self.pdb_files_folder = os.path.join(self.config['locations']['fr3d_root'], 'FR3D', 'PDBFiles')