def removeAll(self, force=False): ''' Remove all bank releases and database records :param force: force removal even if some production dirs are freezed :type force: bool :return: bool ''' if not force: has_freeze = False for prod in self.bank['production']: if 'freeze' in prod and prod['freeze']: has_freeze = True break if has_freeze: logging.error('Cannot remove bank, some production directories are freezed, use force if needed') return False self.banks.remove({'name': self.name}) BmajIndex.delete_all_bank(self.name) bank_data_dir = self.get_data_dir() logging.warn('DELETE ' + bank_data_dir) if os.path.exists(bank_data_dir): shutil.rmtree(bank_data_dir) bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name')) if os.path.exists(bank_offline_dir): shutil.rmtree(bank_offline_dir) bank_log_dir = os.path.join(self.config.get('log.dir'), self.name) if os.path.exists(bank_log_dir) and self.no_log: shutil.rmtree(bank_log_dir) return True
def load_config(config_file=None, allow_user_config=True): ''' Loads general config :param config_file: global.properties file path :type config_file: str :param allow_user_config: use ~/.biomaj.cfg if present :type allow_user_config: bool ''' if config_file is None: env_file = os.environ.get('BIOMAJ_CONF') if env_file is not None and os.path.exists(env_file): config_file = env_file else: env_file = 'global.properties' if os.path.exists(env_file): config_file = env_file if config_file is None or not os.path.exists(config_file): raise Exception('Missing global configuration file') BiomajConfig.config_file = os.path.abspath(config_file) BiomajConfig.global_config = configparser.ConfigParser() if allow_user_config and os.path.exists( os.path.expanduser('~/.biomaj.cfg')): BiomajConfig.user_config_file = os.path.expanduser('~/.biomaj.cfg') BiomajConfig.user_config = configparser.ConfigParser() BiomajConfig.user_config.read( [os.path.expanduser('~/.biomaj.cfg')]) else: BiomajConfig.user_config_file = None BiomajConfig.global_config.read([config_file]) # ElasticSearch indexation support do_index = False if BiomajConfig.global_config.get('GENERAL', 'use_elastic') and \ BiomajConfig.global_config.get('GENERAL', 'use_elastic') == "1": do_index = True if do_index: if BiomajConfig.global_config.get('GENERAL', 'elastic_nodes'): elastic_hosts = BiomajConfig.global_config.get( 'GENERAL', 'elastic_nodes').split(',') else: elastic_hosts = ['localhost'] elastic_index = BiomajConfig.global_config.get( 'GENERAL', 'elastic_index') if elastic_index is None: elastic_index = 'biomaj' if BiomajConfig.global_config.has_option('GENERAL', 'test') and \ BiomajConfig.global_config.get('GENERAL', 'test') == "1": # Test connection to elasticsearch, if not available skip indexing for tests BmajIndex.skip_if_failure = True BmajIndex.load(index=elastic_index, hosts=elastic_hosts, do_index=do_index)
def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True
def removeAll(self, force=False): ''' Remove all bank releases and database records :param force: force removal even if some production dirs are freezed :type force: bool :return: bool ''' if not force: has_freeze = False for prod in self.bank['production']: if 'freeze' in prod and prod['freeze']: has_freeze = True break if has_freeze: logging.error( 'Cannot remove bank, some production directories are freezed, use force if needed' ) return False self.banks.remove({'name': self.name}) BmajIndex.delete_all_bank(self.name) bank_data_dir = self.get_data_dir() logging.warn('DELETE ' + bank_data_dir) if os.path.exists(bank_data_dir): shutil.rmtree(bank_data_dir) bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name')) if os.path.exists(bank_offline_dir): shutil.rmtree(bank_offline_dir) bank_log_dir = os.path.join(self.config.get('log.dir'), self.name) if os.path.exists(bank_log_dir) and self.no_log: shutil.rmtree(bank_log_dir) return True
def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() BmajIndex.delete_all_bank('test')
def test_index(self): BmajIndex.do_index = True prod = { "data_dir": "/tmp/test/data", "formats": { "fasta": [{ "files": ["fasta/chr1.fa", "fasta/chr2.fa"], "types": ["nucleic"], "tags": { "organism": "hg19" } }], "blast": [{ "files": ["blast/chr1/chr1db"], "types": ["nucleic"], "tags": { "chr": "chr1", "organism": "hg19" } }] }, "freeze": False, "session": 1416229253.930908, "prod_dir": "alu-2003-11-26", "release": "2003-11-26", "types": ["nucleic"] } BmajIndex.add('test', prod, True) query = {'query': {'match': {'bank': 'test'}}} res = BmajIndex.search(query) self.assertTrue(len(res) == 2)
def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() BmajIndex.delete_all_bank('test')
def load_config(config_file=None, allow_user_config=True): ''' Loads general config :param config_file: global.properties file path :type config_file: str :param allow_user_config: use ~/.biomaj.cfg if present :type allow_user_config: bool ''' if config_file is None: env_file = os.environ.get('BIOMAJ_CONF') if env_file is not None and os.path.exists(env_file): config_file = env_file else: env_file = 'global.properties' if os.path.exists(env_file): config_file = env_file if config_file is None or not os.path.exists(config_file): raise Exception('Missing global configuration file') BiomajConfig.config_file = os.path.abspath(config_file) BiomajConfig.global_config = configparser.ConfigParser() if allow_user_config and os.path.exists(os.path.expanduser('~/.biomaj.cfg')): BiomajConfig.user_config_file = os.path.expanduser('~/.biomaj.cfg') BiomajConfig.user_config = configparser.ConfigParser() BiomajConfig.user_config.read([os.path.expanduser('~/.biomaj.cfg')]) else: BiomajConfig.user_config_file = None BiomajConfig.global_config.read([config_file]) # ElasticSearch indexation support do_index = False if BiomajConfig.global_config.get('GENERAL', 'use_elastic') and \ BiomajConfig.global_config.get('GENERAL', 'use_elastic') == "1": do_index = True if do_index: if BiomajConfig.global_config.get('GENERAL', 'elastic_nodes'): elastic_hosts = BiomajConfig.global_config.get('GENERAL', 'elastic_nodes').split(',') else: elastic_hosts = ['localhost'] elastic_index = BiomajConfig.global_config.get('GENERAL', 'elastic_index') if elastic_index is None: elastic_index = 'biomaj' if BiomajConfig.global_config.has_option('GENERAL', 'test') and \ BiomajConfig.global_config.get('GENERAL', 'test') == "1": # Test connection to elasticsearch, if not available skip indexing for tests BmajIndex.skip_if_failure = True BmajIndex.load(index=elastic_index, hosts=elastic_hosts, do_index=do_index)
def test_remove_all(self): self.test_index() query = { 'query' : { 'match' : {'bank': 'test'} } } BmajIndex.delete_all_bank('test') res = BmajIndex.search(query) self.assertTrue(len(res)==0)
def test_index(self): BmajIndex.do_index = True prod = { "data_dir" : "/tmp/test/data", "formats" : { "fasta" : [ { "files" : [ "fasta/chr1.fa", "fasta/chr2.fa" ], "types" : [ "nucleic" ], "tags" : { "organism" : "hg19" } } ], "blast": [ { "files" : [ "blast/chr1/chr1db" ], "types" : [ "nucleic" ], "tags" : { "chr" : "chr1", "organism" : "hg19" } } ] }, "freeze" : False, "session" : 1416229253.930908, "prod_dir" : "alu-2003-11-26", "release" : "2003-11-26", "types" : [ "nucleic" ] } BmajIndex.add('test',prod, True) query = { 'query' : { 'match' : {'bank': 'test'} } } res = BmajIndex.search(query) self.assertTrue(len(res)==2)
def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest("Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank('local') b.banks.remove({}) BmajIndex.delete_all_bank('local') self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest( "Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank('local') b.banks.remove({}) BmajIndex.delete_all_bank('local') self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] cache_dir = self.config.get('cache.dir') download_files = os.path.join(cache_dir, 'files_'+str(sid)) if os.path.exists(download_files): os.remove(download_files) local_files = os.path.join(cache_dir, 'local_files_'+str(sid)) if os.path.exists(local_files): os.remove(local_files) if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions') if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'production': {'session': sid} } }) self.banks.update({'name': self.name, 'sessions.id': sid}, {'$set': {'sessions.$.deleted': time.time()}}) else: if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True
def save_session(self): ''' Save session in database ''' self.session._session['last_update_time'] = time.time() self.session._session['log_file'] = self.config.log_file if self.use_last_session: # Remove last session self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': self.session._session['id']}}}) # Insert session if self.session.get('action') == 'update': action = 'last_update_session' if self.session.get('action') == 'remove': action = 'last_remove_session' cache_dir = self.config.get('cache.dir') download_files = self.session.get('download_files') if download_files is not None: f_downloaded_files = open(os.path.join(cache_dir, 'files_'+str(self.session.get('id'))), 'w') f_downloaded_files.write(json.dumps(download_files)) f_downloaded_files.close() self.session.set('download_files',[]) local_files = self.session.get('files') if local_files is not None: f_local_files = open(os.path.join(cache_dir, 'local_files_'+str(self.session.get('id'))), 'w') f_local_files.write(json.dumps(download_files)) f_local_files.close() self.session.set('files',[]) self.banks.update({'name': self.name}, { '$set': { action: self.session._session['id'], 'properties': self.get_properties() }, '$push': {'sessions': self.session._session} }) BmajIndex.add(self.name, self.session._session) if self.session.get('action') == 'update' and not self.session.get_status( Workflow.FLOW_OVER) and self.session.get('release'): self.banks.update({'name': self.name}, {'$set': {'pending.' + self.session.get('release'): self.session._session['id']}}) if self.session.get('action') == 'update' and self.session.get_status(Workflow.FLOW_OVER) and self.session.get( 'update'): # We expect that a production release has reached the FLOW_OVER status. # If no update is needed (same release etc...), the *update* session of the session is set to False logging.debug('Bank:Save:' + self.name) if len(self.bank['production']) > 0: # Remove from database self.banks.update({'name': self.name}, {'$pull': {'production': {'release': self.session._session['release']}}}) # Update local object # index = 0 # for prod in self.bank['production']: # if prod['release'] == self.session._session['release']: # break; # index += 1 # if index < len(self.bank['production']): # self.bank['production'].pop(index) release_types = [] if self.config.get('db.type'): release_types = self.config.get('db.type').split(',') release_formats = list(self.session._session['formats'].keys()) if self.config.get('db.formats'): config_formats = self.config.get('db.formats').split(',') for config_format in config_formats: if config_format not in release_formats: release_formats.append(config_format) for release_format in self.session._session['formats']: for release_files in self.session._session['formats'][release_format]: if release_files['types']: for rtype in release_files['types']: if rtype not in release_types: release_types.append(rtype) prod_dir = self.session.get_release_directory() if self.session.get('prod_dir'): prod_dir = self.session.get('prod_dir') production = {'release': self.session.get('release'), 'remoterelease': self.session.get('remoterelease'), 'session': self.session._session['id'], 'formats': release_formats, 'types': release_types, 'size': self.session.get('fullsize'), 'data_dir': self.session._session['data_dir'], 'dir_version': self.session._session['dir_version'], 'prod_dir': prod_dir, 'freeze': False} self.bank['production'].append(production) self.banks.update({'name': self.name}, {'$push': {'production': production}, '$unset': {'pending.' + self.session.get('release'): ''} }) # self.banks.update({'name': self.name}, # {'$unset': 'pending.'+self.session.get('release') # }) self.bank = self.banks.find_one({'name': self.name})
def searchindex(query): return BmajIndex.searchq(query)
def test_remove_all(self): self.test_index() query = {'query': {'match': {'bank': 'test'}}} BmajIndex.delete_all_bank('test') res = BmajIndex.search(query) self.assertTrue(len(res) == 0)
def save_session(self): ''' Save session in database ''' self.session._session['last_update_time'] = time.time() self.session._session['log_file'] = self.config.log_file if self.use_last_session: # Remove last session self.banks.update( {'name': self.name}, {'$pull': { 'sessions': { 'id': self.session._session['id'] } }}) # Insert session if self.session.get('action') == 'update': action = 'last_update_session' if self.session.get('action') == 'remove': action = 'last_remove_session' cache_dir = self.config.get('cache.dir') download_files = self.session.get('download_files') if download_files is not None: f_downloaded_files = open( os.path.join(cache_dir, 'files_' + str(self.session.get('id'))), 'w') f_downloaded_files.write(json.dumps(download_files)) f_downloaded_files.close() self.session.set('download_files', []) local_files = self.session.get('files') if local_files is not None: f_local_files = open( os.path.join(cache_dir, 'local_files_' + str(self.session.get('id'))), 'w') f_local_files.write(json.dumps(download_files)) f_local_files.close() self.session.set('files', []) self.banks.update({'name': self.name}, { '$set': { action: self.session._session['id'], 'properties': self.get_properties() }, '$push': { 'sessions': self.session._session } }) BmajIndex.add(self.name, self.session._session) if self.session.get( 'action') == 'update' and not self.session.get_status( Workflow.FLOW_OVER) and self.session.get('release'): self.banks.update({'name': self.name}, { '$set': { 'pending.' + self.session.get('release'): self.session._session['id'] } }) if self.session.get('action') == 'update' and self.session.get_status( Workflow.FLOW_OVER) and self.session.get('update'): # We expect that a production release has reached the FLOW_OVER status. # If no update is needed (same release etc...), the *update* session of the session is set to False logging.debug('Bank:Save:' + self.name) if len(self.bank['production']) > 0: # Remove from database self.banks.update({'name': self.name}, { '$pull': { 'production': { 'release': self.session._session['release'] } } }) # Update local object # index = 0 # for prod in self.bank['production']: # if prod['release'] == self.session._session['release']: # break; # index += 1 # if index < len(self.bank['production']): # self.bank['production'].pop(index) release_types = [] if self.config.get('db.type'): release_types = self.config.get('db.type').split(',') release_formats = list(self.session._session['formats'].keys()) if self.config.get('db.formats'): config_formats = self.config.get('db.formats').split(',') for config_format in config_formats: if config_format not in release_formats: release_formats.append(config_format) for release_format in self.session._session['formats']: for release_files in self.session._session['formats'][ release_format]: if release_files['types']: for rtype in release_files['types']: if rtype not in release_types: release_types.append(rtype) prod_dir = self.session.get_release_directory() if self.session.get('prod_dir'): prod_dir = self.session.get('prod_dir') production = { 'release': self.session.get('release'), 'remoterelease': self.session.get('remoterelease'), 'session': self.session._session['id'], 'formats': release_formats, 'types': release_types, 'size': self.session.get('fullsize'), 'data_dir': self.session._session['data_dir'], 'dir_version': self.session._session['dir_version'], 'prod_dir': prod_dir, 'freeze': False } self.bank['production'].append(production) self.banks.update({'name': self.name}, { '$push': { 'production': production }, '$unset': { 'pending.' + self.session.get('release'): '' } }) # self.banks.update({'name': self.name}, # {'$unset': 'pending.'+self.session.get('release') # }) self.bank = self.banks.find_one({'name': self.name})
def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] cache_dir = self.config.get('cache.dir') download_files = os.path.join(cache_dir, 'files_' + str(sid)) if os.path.exists(download_files): os.remove(download_files) local_files = os.path.join(cache_dir, 'local_files_' + str(sid)) if os.path.exists(local_files): os.remove(local_files) if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions') if session_release is not None: self.banks.update({'name': self.name}, { '$pull': { 'production': { 'session': sid } }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'production': { 'session': sid } }}) self.banks.update({ 'name': self.name, 'sessions.id': sid }, {'$set': { 'sessions.$.deleted': time.time() }}) else: if session_release is not None: self.banks.update({'name': self.name}, { '$pull': { 'sessions': { 'id': sid }, 'production': { 'session': sid } }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, { '$pull': { 'sessions': { 'id': sid }, 'production': { 'session': sid } } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True