예제 #1
0
 def __init__(self, motif_type=''):
     MotifAtlasBaseClass.__init__(self)
     self.success = False
     self.motifs_root = self.config['locations']['releases_dir']
     self.motif_type  = motif_type.upper()
     self.done  = []
     self.folders = []
예제 #2
0
 def import_loops(self, Loops, l, pdb_id, loop_type):
     """
     """
     try:
         if Loops == 0:
             self.mark_pdb_as_analyzed(pdb_id, loop_type)
             return
         for i in xrange(l):
             loop_id = self._get_loop_id(Loops[i].AllLoops_table.full_id,
                                         pdb_id, loop_type)
             Loops[i].Filename = loop_id
             session.merge(
                 AllLoops(id            = loop_id,
                          type          = loop_type,
                          pdb           = pdb_id,
                          sequential_id = loop_id[-3:],
                          length        = int(Loops[i].NumNT[0][0]),
                          seq           = Loops[i].AllLoops_table.seq,
                          r_seq         = Loops[i].AllLoops_table.r_seq,
                          nwc_seq       = Loops[i].AllLoops_table.nwc,
                          r_nwc_seq     = Loops[i].AllLoops_table.r_nwc,
                          pdb_file      = Loops[i].PDBFilename,
                          nt_ids        = Loops[i].AllLoops_table.full_id,
                          loop_name     = Loops[i].AllLoops_table.loop_name))
         self.save_mat_files(Loops)
         self.mark_pdb_as_analyzed(pdb_id, loop_type)
         logging.info('%s from %s successfully imported', loop_type, pdb_id)
     except:
         e = sys.exc_info()[1]
         MotifAtlasBaseClass._crash(self,e)
예제 #3
0
    def __init__(self):
        MotifAtlasBaseClass.__init__(self)

        self.loopSearchDir = self.config['locations']['loops_search_dir']
        self.precomputedData = self.config['locations']['loops_mat_files']
        self.loop_regex = '(IL|HL)_\w{4}_\d{3}'
        self.pdb_regex = '^[0-9A-Za-z]{4}$'
        self.update = True # determines whether to update existing values in the db
예제 #4
0
 def __init__(self):
     """
     """
     MotifAtlasBaseClass.__init__(self)
     self.cache_dir = self.config['locations']['cache']
     if not os.path.exists(self.cache_dir):
         os.mkdir(self.cache_dir)
     self.baseurl = 'http://rna.bgsu.edu/rna3dhub'
예제 #5
0
    def __init__(self):
        MotifAtlasBaseClass.__init__(self)
        self.commit_every = 100
        self.file_types = ['.pdb', '.pdb1']
        root = self.config['locations']['fr3d_root']
        self.pdb_files_folder = os.path.join(root, 'FR3D', 'PDBFiles')

        self.known = ['A', 'C', 'G', 'U']
        query = session.query(PdbModifiedCorrespondecies.modified_unit)
        for entry in query.all():
            self.known.append(entry.modified_unit)
예제 #6
0
 def __init__(self):
     MotifAtlasBaseClass.__init__(self)
     self.success    = False
     self.num_jobs   = 4
     self.pdb_ids    = []
     self.loop_ids   = []
     self.best_loops = [] # loops to be clustered
     self.fr3d_root  = self.config['locations']['fr3d_root']
     self.retries_left  = 3
     self.script_prefix = 'aAa_script_'
     self.mlab_input_filename = os.path.join(self.fr3d_root, 'loops.txt')
예제 #7
0
 def save_mat_files(self,Loops):
     """Pass the Loops structure array back to matlab so that it can
     save the .mat files in the specified location."""
     MotifAtlasBaseClass._setup_matlab(self)
     [status, err_msg] = self.mlab.aSaveLoops(Loops,
                                              self.config['locations']['loops_mat_files'],
                                              nout=2)
     if status == 0:
         logging.info('mat files saved')
     else:
         MotifAtlasBaseClass._crash(self,err_msg)
예제 #8
0
 def __init__(self):
     """
         locations is where pdbs will be placed
         pdbs is an array the files to download
     """
     MotifAtlasBaseClass.__init__(self)
     self.baseurl = 'http://www.rcsb.org/pdb/files/'
     self.ba_url  = 'http://www.pdb.org/pdb/rest/getEntityInfo?structureId='
     self.filetypes = ['.pdb', '.pdb1', '.cif']
     self.locations = []
     self.pdbs = []
     self.config['email']['subject'] = 'Pdb File Sync'
예제 #9
0
 def __init__(self):
     """
     """
     MotifAtlasBaseClass.__init__(self)
     self.temp_file          = 'temp.csv'
     self.nrlists_root       = self.config['locations']['nrlists_dir']
     self.resolutions        = ['1,5A','2A','2,5A','3A','3,5A','4A','20A','All_Resolution']
     self.resolution_labels  = ['1.5','2.0','2.5','3.0','3.5','4.0','20.0','all']
     self.done = []
     self.list_done()
     self.lists = sorted(os.listdir(self.nrlists_root))
     self.success = False # status of the current update
예제 #10
0
    def loop_qa(self, pdb_id, release_id):
        """
        """
        logging.info('QA on %s', pdb_id)
        MotifAtlasBaseClass._setup_matlab(self)

        [ifn, err_msg] = self.mlab.aLoopQualityAssurance(pdb_id, nout=2)

        if err_msg != '':
            logging.warning('Error %s in pdb %s' % (err_msg, pdb_id))
        else:
            self.__import_qa_from_csv(ifn, release_id)
            self.mark_pdb_as_analyzed(pdb_id,'qa')
예제 #11
0
def main(argv):
    """
        RNA 3D Hub update entry point.
    """

    try:
        m = MotifAtlasBaseClass()
        m.start_logging()

        pdb_ids = get_pdb_info()

        update_loops(pdb_ids)

        update_pairwise_annotations(pdb_ids)

        update_unit_ids(pdb_ids)

        update_unit_ordering(pdb_ids)

        update_coordinates(pdb_ids)

        update_redundant_nucleotides(pdb_ids)

        update_best_chains_and_models(pdb_ids)

        # must follow best chain and model update
        cluster_motifs('IL')
        import_motifs('IL')

        cluster_motifs('HL')
        import_motifs('HL')

        # must follow motif clustering
        update_loop_annotations()

        # must follow unit id updates
        export_data(pdb_ids)

        # TODO annotate all pdb files with motifs

        # TODO compute new non-redundant lists, import into the database

        update_cache()

        logging.info('Update completed')
        m.send_report()

    except:
        try:
            logging.critical('Update failed')
            logging.critical(traceback.format_exc(sys.exc_info()))
            m.set_email_subject('RNA 3D Hub update failed')
            m.send_report()
        except:
            pass
예제 #12
0
 def check_loop_quality(self, pdbs):
     """
     """
     try:
         logging.info('Loop Quality Assurance')
         release = LoopRelease(mode=self.config['release_mode']['loops'])
         for pdb_id in pdbs:
             self.loop_qa(pdb_id, release.id)
         if pdbs:
             session.add(release)
             session.commit()
         logging.info('Loop QA complete')
         logging.info('%s', '='*40)
     except:
         e = sys.exc_info()[1]
         MotifAtlasBaseClass._crash(self,e)
예제 #13
0
 def __init__(self, ensembles=None, release_mode="", release_description="", upload_mode=""):
     MotifAtlasBaseClass.__init__(self)
     self.c = ensembles  # collections, NRCollectionMerger
     self.motifs = []
     self.loops = []
     self.history = []
     self.final_ids = dict()
     self.intersection = []
     self.release_diff = []
     self.added_groups = []
     self.removed_groups = []
     self.updated_groups = []
     self.old_updated_groups = []
     self.same_groups = []
     self.added_pdbs = []
     self.removed_pdbs = []
     self.upload_mode = upload_mode
     self.release_mode = release_mode
     self.release_description = release_description
예제 #14
0
 def extract_and_import_loops(self, pdbs, recalculate=None):
     """Loops over `pdbs`, extracts and imports all loops"""
     try:
         for loop_type in self.loop_types:
             logging.info('Extracting %s' % loop_type)
             if recalculate is None:
                 recalculate = self.config['recalculate'][loop_type]
             if recalculate:
                 pdb_list = pdbs[:]
             else:
                 pdb_list = self.filter_out_analyzed_pdbs(pdbs, loop_type)
             for pdb_id in pdb_list:
                 logging.info('Extracting %s from %s', loop_type, pdb_id)
                 (Loops,l) = self.extract_loops(pdb_id, loop_type)
                 self.import_loops(Loops, l, pdb_id, loop_type)
                 logging.info('%s', '='*40)
     except:
         e = sys.exc_info()[1]
         MotifAtlasBaseClass._crash(self,e)
예제 #15
0
    def setUp(self):
        """runs the entire pipeline"""
        self.success = False

        self.clean_up_database()

        m = MotifAtlasBaseClass()
        m.start_logging()
        logging.info('Initializing update')

        """get new pdb files"""
        p = PdbInfoLoader()
        p.get_all_rna_pdbs()

        """override pdb files with a smaller set"""
        p.pdbs = ['1FG0','1HLX']

        """extract all loops and import into the database"""
        e = LoopExtractor()
        e.extract_and_import_loops(p.pdbs)

        """do loop QA, import into the database. Create a new loop release."""
        q = LoopQualityChecker()
        q.check_loop_quality(p.pdbs)

        """import pairwise interactions annotated by FR3D"""
        i = PairwiseInteractionsLoader()
        i.import_interactions(p.pdbs)

        """import coordinates and distances into the database"""
        d = DistancesAndCoordinatesLoader()
        d.import_distances(p.pdbs)
        d.import_coordinates(p.pdbs)

        """import info about redundant nucleotides"""
        r = RedundantNucleotidesLoader()
        r.import_redundant_nucleotides(p.pdbs)

        """import best chains and models"""
        b = BestChainsAndModelsLoader()
        b.import_best_chains_and_models(p.pdbs)

        self.success = True
예제 #16
0
    def load_loop_positions(self):
        """update loop_positions table by loading data from the mat files
        stored in the PrecomputedData folder"""
        if not self.mlab:
            self._setup_matlab()

        # loop over directories
        for folder in os.listdir(self.precomputedData):
            if re.search(self.pdb_regex, folder):
                logging.info('Importing loop annotations from %s', folder)
            else:
                continue
            [outputFile, err_msg] = self.mlab.loadLoopPositions(os.path.join(self.precomputedData, folder), nout=2)
            if err_msg != '':
                MotifAtlasBaseClass._crash(self, err_msg)
            else:
                reader = csv.reader(open(outputFile), delimiter=',', quotechar='"')
                for row in reader:
                    (loop_id, position, nt_id, bulge, flanking, border) = row
                    existing = session.query(LoopPositions). \
                                       filter(LoopPositions.loop_id==loop_id). \
                                       filter(LoopPositions.position==position). \
                                       filter(LoopPositions.border==border). \
                                       first()
                    if existing:
                        if self.update:
                            existing.flanking = int(flanking)
                            existing.bulge = int(bulge)
                            existing.nt_id = nt_id
                            existing.border = int(border)
                            session.merge(existing)
                        else:
                            logging.info('Keeping existing annotations')
                    else:
                        session.add(LoopPositions(loop_id=loop_id,
                                                  position=position,
                                                  nt_id=nt_id,
                                                  flanking=int(flanking),
                                                  bulge=int(bulge),
                                                  border=int(border)))
                session.commit()
                os.remove(outputFile) # delete temporary csv file
    def import_redundant_nucleotides(self, pdbs, recalculate=False):
        """
        """
        try:
            logging.info("Importing redundant nucleotides")
            if not recalculate:
                recalculate = self.config["recalculate"]["redundant_nts"]
            if recalculate:
                pdb_list = pdbs
                self.__delete_old_data(pdbs)
            else:
                pdb_list = self.filter_out_analyzed_pdbs(pdbs, "redundant_nts")

            if pdb_list:
                MotifAtlasBaseClass._setup_matlab(self)

            for pdb_file in pdb_list:
                logging.info("Running matlab on %s", pdb_file)
                ifn, err_msg = self.mlab.loadRedundantNucleotides(pdb_file, nout=2)
                if err_msg == "":
                    self.__import_temporary_file(ifn, pdb_file)
                else:
                    MotifAtlasBaseClass._crash(self, err_msg)

                self.mark_pdb_as_analyzed(pdb_file, "redundant_nts")

            logging.info("%s", "=" * 40)
        except:
            e = sys.exc_info()[1]
            MotifAtlasBaseClass._crash(self, e)
    def import_best_chains_and_models(self, pdbs, recalculate=False):
        """
        """
        try:
            logging.info("Importing best chains and models")
            if not recalculate:
                recalculate = self.config["recalculate"]["best_chains_and_models"]
            if recalculate:
                pdb_list = pdbs
                self.__delete_old_data(pdbs)
            else:
                pdb_list = self.filter_out_analyzed_pdbs(pdbs, "best_chains_and_models")

            if pdb_list:
                MotifAtlasBaseClass._setup_matlab(self)

            for pdb_file in pdb_list:
                logging.info("Running matlab on %s", pdb_file)
                # 'ABC', '1,2', ''
                best_chains, best_models, err_msg = self.mlab.loadBestChainsAndModels(pdb_file, nout=3)
                best_chains = ",".join(list(best_chains))

                if err_msg == "":
                    self.__import_into_db(pdb_file, best_chains, best_models)
                else:
                    MotifAtlasBaseClass._crash(self, err_msg)

                self.mark_pdb_as_analyzed(pdb_file, "best_chains_and_models")

            logging.info("%s", "=" * 40)
        except:
            e = sys.exc_info()[1]
            MotifAtlasBaseClass._crash(self, e)
예제 #19
0
    def load_loop_searches(self):
        """
            directory structure: loopSearchDir filesep IL_1S72_001 filesep IL_1S72_001_IL_1J5E_001.mat
        """
        # loop over directories
        for loop_id in os.listdir(self.loopSearchDir):
            if re.search(self.loop_regex, loop_id):
                logging.info('Importing %s searches', loop_id)
            else:
                continue

            # read in No_candidates.txt if it exists
            self._read_no_candidates_file(loop_id)

            # get stored loop searches and list all matfiles
            imported = self._get_imported_loop_searches(loop_id)
            matfiles = self._get_saved_mat_files(os.path.join(self.loopSearchDir, loop_id, '*.mat'))
            toImport = matfiles - imported;

            if len(toImport) == 0:
                continue

            toImport = [os.path.join(self.loopSearchDir, loop_id, x + '.mat') for x in toImport]

            if not self.mlab:
                self._setup_matlab()

            # run matlab to create a temporary csv file with results
            [outputFile, err_msg] = self.mlab.loadLoopSearchFile(','.join(toImport), os.path.join(self.loopSearchDir, loop_id), nout=2)

            if err_msg != '':
                MotifAtlasBaseClass._crash(self, err_msg)
            else:
                reader = csv.reader(open(outputFile), delimiter=',', quotechar='"')
                for row in reader:
                    (loop_id1, loop_id2, disc, nt_list1, nt_list2) = row
                    self._store_in_database(loop_id1, loop_id2, disc, nt_list1, nt_list2)
                os.remove(outputFile) # delete temporary csv file
예제 #20
0
    def extract_loops(self, pdb_id, loop_type):
        """
        """
        try:
            MotifAtlasBaseClass._setup_matlab(self)
            """Loops - array of FR3D File structures. l - its length"""
            [Loops, l, err_msg] = self.mlab.extractLoops(pdb_id, loop_type, nout=3)

            if err_msg != '':
                MotifAtlasBaseClass._crash(self,err_msg)

            if Loops == 0:
                logging.info('No %s in %s', loop_type, pdb_id)
                return (0, 0)
            else:
                logging.info('Found %i loops', l)
                return (Loops, l)
        except:
            e = sys.exc_info()[1]
            MotifAtlasBaseClass._crash(self,e)
예제 #21
0
    def import_interactions(self, pdbs, recalculate=False):
        """Determines what files need to be analyzed, deletes stored data if
           necessary, loops over the pdbs, runs matlab on each of them
           independently, matlab generates a temporary csv file, it's imported
           and immediately deleted."""
        try:
            logging.info('Inside import_interactions')
            if not recalculate:
                recalculate = self.config['recalculate']['interactions']
            if recalculate:
                pdb_list = pdbs
                self.__delete_interactions(pdbs)
            else:
                pdb_list = self.filter_out_analyzed_pdbs(pdbs,'interactions')

            if pdb_list:
                MotifAtlasBaseClass._setup_matlab(self)

            for pdb_file in pdb_list:
                logging.info('Running matlab on %s', pdb_file)
                ifn, status, err_msg = self.mlab.loadInteractions(pdb_file,nout=3)
                status = status[0][0]
                if status == 0:
                    self.__import_interactions_from_csv(ifn, pdb_file)
                elif status == 2: # no nucleotides in the pdb file
                    logging.info('Pdb file %s has no nucleotides', pdb_file)
                else:
                    logging.warning('Matlab error code %i when analyzing %s',
                                     status, pdb_file)
                    MotifAtlasBaseClass._crash(self,err_msg)

                self.mark_pdb_as_analyzed(pdb_file,'interactions')
            self.success = True
            logging.info('%s', '='*40)
        except:
            e = sys.exc_info()[1]
            MotifAtlasBaseClass._crash(self,e)
예제 #22
0
 def __init__(self):
     MotifAtlasBaseClass.__init__(self)
     self.success = False
예제 #23
0
 def __init__(self):
     MotifAtlasBaseClass.__init__(self)
     self.loop_types = ['IL','HL','J3']
예제 #24
0
 def __init__(self):
     MotifAtlasBaseClass.__init__(self)
예제 #25
0
 def __init__(self):
     """
     """
     MotifAtlasBaseClass.__init__(self)
     self.output = ''
예제 #26
0
 def __init__(self):
     MotifAtlasBaseClass.__init__(self)
     self.pdb_file_types   = ['.pdb', '.pdb1']
     self.pdb_files_folder = os.path.join(self.config['locations']['fr3d_root'],
                                          'FR3D',
                                          'PDBFiles')