def __init__(self, name, options=None, no_log=False): """ Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool """ logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) self.banks = MongoConnector.banks self.bank = self.banks.find_one({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } self.bank['_id'] = self.banks.insert(self.bank) self.session = None self.use_last_session = False
def setUp(self): self.utils = UtilsForTest() self.curdir = os.path.dirname(os.path.realpath(__file__)) self.examples = os.path.join(self.curdir,'bank') + '/' BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) '''
def setUp(self): self.utils = UtilsForTest() self.curdir = os.path.dirname(os.path.realpath(__file__)) self.examples = os.path.join(self.curdir, 'bank') + '/' BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) '''
class TestElastic(unittest.TestCase): """ test indexing and search """ def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest("Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank("local") b.banks.remove({}) self.config = BiomajConfig("local") data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "local.lock") if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "local.lock") if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() BmajIndex.delete_all_bank("test") def test_index(self): prod = { "data_dir": "/tmp/test/data", "formats": { "fasta": [ {"files": ["fasta/chr1.fa", "fasta/chr2.fa"], "types": ["nucleic"], "tags": {"organism": "hg19"}} ], "blast": [ {"files": ["blast/chr1/chr1db"], "types": ["nucleic"], "tags": {"chr": "chr1", "organism": "hg19"}} ], }, "freeze": False, "session": 1416229253.930908, "prod_dir": "alu-2003-11-26", "release": "2003-11-26", "types": ["nucleic"], } BmajIndex.add("test", prod, True) query = {"query": {"match": {"bank": "test"}}} res = BmajIndex.search(query) self.assertTrue(len(res) == 2)
def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest("Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank("local") b.banks.remove({}) self.config = BiomajConfig("local") data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "local.lock") if os.path.exists(lock_file): os.remove(lock_file)
def __init__(self): # If connector type not set, try to get it from the global.properties if not BiomajConfig.global_config: BiomajConfig.load_config() url = BiomajConfig.global_config.get('GENERAL', 'db.url') db = BiomajConfig.global_config.get('GENERAL', 'db.name') if url is None: raise Exception("No connection url set!") if db is None: raise Exception("No connection db set!") driver = split(url, ':')[0] if not driver: raise Exception("Can't determine database driver") Connector.url = url Connector.db = db Connector.driver = driver
class TestBiomajHTTPDownload(unittest.TestCase): """ Test HTTP downloader """ def setUp(self): self.utils = UtilsForTest() BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) self.config = BiomajConfig('testhttp') def tearDown(self): self.utils.clean() def test_http_list(self): httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.config) (file_list, dir_list) = httpd.list() httpd.close() self.assertTrue(len(file_list) == 1) def test_http_list_dateregexp(self): self.config.set('http.parse.file.date.format', "%%d-%%b-%%Y %%H:%%M") httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.config) (file_list, dir_list) = httpd.list() httpd.close() self.assertTrue(len(file_list) == 1) def test_http_download(self): httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.config) (file_list, dir_list) = httpd.list() httpd.match([r'^README$'], file_list, dir_list) httpd.download(self.utils.data_dir) httpd.close() self.assertTrue(len(httpd.files_to_download) == 1) def test_http_download_in_subdir(self): httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/', self.config) (file_list, dir_list) = httpd.list() httpd.match([r'^dists/README$'], file_list, dir_list) httpd.download(self.utils.data_dir) httpd.close() self.assertTrue(len(httpd.files_to_download) == 1)
def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest( "Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank('local') b.banks.remove({}) BmajIndex.delete_all_bank('local') self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
class TestElastic(unittest.TestCase): ''' test indexing and search ''' def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest( "Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank('local') b.banks.remove({}) BmajIndex.delete_all_bank('local') self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() BmajIndex.delete_all_bank('test') def test_index(self): BmajIndex.do_index = True prod = { "data_dir": "/tmp/test/data", "formats": { "fasta": [{ "files": ["fasta/chr1.fa", "fasta/chr2.fa"], "types": ["nucleic"], "tags": { "organism": "hg19" } }], "blast": [{ "files": ["blast/chr1/chr1db"], "types": ["nucleic"], "tags": { "chr": "chr1", "organism": "hg19" } }] }, "freeze": False, "session": 1416229253.930908, "prod_dir": "alu-2003-11-26", "release": "2003-11-26", "types": ["nucleic"] } BmajIndex.add('test', prod, True) query = {'query': {'match': {'bank': 'test'}}} res = BmajIndex.search(query) self.assertTrue(len(res) == 2) def test_remove_all(self): self.test_index() query = {'query': {'match': {'bank': 'test'}}} BmajIndex.delete_all_bank('test') res = BmajIndex.search(query) self.assertTrue(len(res) == 0)
class Bank(object): ''' BioMAJ bank ''' def __init__(self, name, options=None, no_log=False): ''' Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool ''' logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options # if MongoConnector.db is None: # MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), # BiomajConfig.global_config.get('GENERAL', 'db.name')) # # self.banks = MongoConnector.banks # self.bank = self.banks.find_one({'name': self.name}) self.connector = Connector().get_connector() #self.banks = self.connector.get_collection('banks') self.banks = self.connector self.bank = self.connector.get({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } #self.bank['_id'] = self.banks.insert(self.bank) self.bank['_id'] = self.connector.set('banks', self.bank) self.session = None self.use_last_session = False def check(self): ''' Checks bank configuration ''' return self.config.check() def is_locked(self): ''' Checks if bank is locked ie action is in progress ''' data_dir = self.config.get('data.dir') lock_dir = self.config.get('lock.dir', default=data_dir) lock_file = os.path.join(lock_dir, self.name + '.lock') if os.path.exists(lock_file): return True else: return False def get_bank(self): ''' Get bank stored in db :return: bank json object ''' return self.bank @staticmethod def get_banks_disk_usage(): ''' Get disk usage per bank and release ''' if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1}) for b in banks: bank_elt = {'name': b['name'], 'size': 0, 'releases': []} for p in b['production']: if p['size'] is None: p['size'] = 0 bank_elt['size'] += p['size'] bank_elt['releases'].append({'name': p['release'], 'size': p['size']}) bank_list.append(bank_elt) return bank_list def get_bank_release_info(self, full=False): ''' Get release info for the bank. Used with --status option from biomaj-cly.py :param full: Display full for the bank :type full: Boolean :return: Dict with keys if full=True - info, prod, pend else - info ''' _bank = self.bank info = {} if full: bank_info = [] prod_info = [] pend_info = [] release = None if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['release'] # Bank info header bank_info.append(["Name", "Type(s)", "Last update status", "Published release"]) bank_info.append([_bank['name'], str(','.join(_bank['properties']['type'])), str(datetime.fromtimestamp(_bank['last_update_session']).strftime("%Y-%m-%d %H:%M:%S")), str(release)]) # Bank production info header prod_info.append(["Session", "Remote release", "Release", "Directory", "Freeze"]) for prod in _bank['production']: data_dir = self.config.get('data.dir') dir_version = self.config.get('dir.version') if 'data.dir' in prod: data_dir = prod['data.dir'] if 'dir.version' in prod: dir_version = prod['dir.version'] release_dir = os.path.join(data_dir, dir_version, prod['prod_dir']) date = datetime.fromtimestamp(prod['session']).strftime('%Y-%m-%d %H:%M:%S') prod_info.append([date, prod['remoterelease'], prod['release'], release_dir, 'yes' if 'freeze' in prod and prod['freeze'] else 'no']) # Bank pending info header if 'pending' in _bank and len(_bank['pending'].keys()) > 0: pend_info.append(["Pending release", "Last run"]) for pending in _bank['pending'].keys(): run = datetime.fromtimestamp(_bank['pending'][pending]).strftime('%Y-%m-%d %H:%M:%S') pend_info.append([pending, run]) info['info'] = bank_info info['prod'] = prod_info info['pend'] = pend_info return info else: release = 'N/A' if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['remoterelease'] info['info'] = [_bank['name'], ','.join(_bank['properties']['type']), str(release), _bank['properties']['visibility']] return info def update_dependencies(self): ''' Update bank dependencies :return: status of updates ''' self.depends = [] if self.run_depends: depends = self.get_dependencies() else: depends = [] self.session.set('depends', {}) res = True for dep in depends: self.session._session['depends'][dep] = False for dep in depends: if self.session._session['depends'][dep]: logging.debug('Update:Depends:' + dep + ':SKIP') # Bank has been marked as depends multiple times, run only once continue logging.info('Update:Depends:' + dep) b = Bank(dep) res = b.update() self.depends.append(b) self.session._session['depends'][dep] = res logging.info('Update:Depends:' + dep + ':' + str(res)) if not res: break return res def get_bank(self, bank, no_log=False): ''' Gets an other bank ''' return Bank(bank, no_log=no_log) def get_dependencies(self, bank=None): ''' Search all bank dependencies :return: list of bank names to update ''' if bank is None: deps = self.config.get('depends') else: deps = bank.config.get('depends') if deps is None: return [] # Mainn deps deps = deps.split(',') # Now search in deps if they themselves depend on other banks for dep in deps: b = Bank(dep, no_log = True) deps = b.get_dependencies() + deps return deps def is_owner(self): ''' Checks if current user is owner or admin ''' admin_config = self.config.get('admin') admin = [] if admin_config is not None: admin = [x.strip() for x in admin_config.split(',')] if admin and os.environ['LOGNAME'] in admin: return True if os.environ['LOGNAME'] == self.bank['properties']['owner']: return True return False def set_owner(self, owner): ''' Update bank owner, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) #self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}}) self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}}) def set_visibility(self, visibility): ''' Update bank visibility, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.banks.update({'name': self.name}, {'$set': {'properties': {'visibility': visibility}}}) def get_properties(self): ''' Read bank properties from config file :return: properties dict ''' owner = os.environ['LOGNAME'] # If owner not set, use current user, else keep current if self.bank and 'properties' in self.bank and 'owner' in self.bank['properties']: owner = self.bank['properties']['owner'] props = { 'visibility': self.config.get('visibility.default'), 'type': self.config.get('db.type').split(','), 'tags': [], 'owner': owner } return props @staticmethod def searchindex(query): return BmajIndex.searchq(query) @staticmethod def search(formats=None, types=None, with_sessions=True): ''' Search all bank releases matching some formats and types Matches production release with at least one of formats and one of types ''' if formats is None: formats = [] if types is None: types = [] if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) searchfilter = {} if formats: searchfilter['production.formats'] = {'$in': formats} if with_sessions: res = MongoConnector.banks.find(searchfilter) else: res = MongoConnector.banks.find(searchfilter, {'sessions': 0}) # Now search in which production release formats and types apply search_list = [] for r in res: prod_to_delete = [] for p in r['production']: is_format = False if not formats: is_format = True # Are formats present in this production release? for f in formats: if f in p['formats']: is_format = True break # Are types present in this production release? is_type = False if not types: is_type = True if is_format: for t in types: if t in p['types'] or t in r['properties']['type']: is_type = True break if not is_type or not is_format: prod_to_delete.append(p) for prod_del in prod_to_delete: r['production'].remove(prod_del) if len(r['production']) > 0: search_list.append(r) return search_list @staticmethod def list(with_sessions=False): ''' Return a list of banks :param with_sessions: should sessions be returned or not (can be quite big) :type with_sessions: bool :return: list of :class:`biomaj.bank.Bank` ''' if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] if with_sessions: res = MongoConnector.banks.find({}) else: res = MongoConnector.banks.find({}, {'sessions': 0}) for r in res: bank_list.append(r) return bank_list def controls(self): ''' Initial controls (create directories etc...) ''' data_dir = self.config.get('data.dir') bank_dir = self.config.get('dir.version') bank_dir = os.path.join(data_dir, bank_dir) if not os.path.exists(bank_dir): os.makedirs(bank_dir) offline_dir = self.config.get('offline.dir.name') offline_dir = os.path.join(data_dir, offline_dir) if not os.path.exists(offline_dir): os.makedirs(offline_dir) log_dir = self.config.get('log.dir') log_dir = os.path.join(log_dir, self.name) if not os.path.exists(log_dir): os.makedirs(log_dir) def _delete(self): ''' Delete bank from database, not files ''' self.banks.remove({'_id': self.bank['_id']}) def save_session(self): ''' Save session in database ''' self.session._session['last_update_time'] = time.time() self.session._session['log_file'] = self.config.log_file if self.use_last_session: # Remove last session self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': self.session._session['id']}}}) # Insert session if self.session.get('action') == 'update': action = 'last_update_session' if self.session.get('action') == 'remove': action = 'last_remove_session' cache_dir = self.config.get('cache.dir') download_files = self.session.get('download_files') if download_files is not None: f_downloaded_files = open(os.path.join(cache_dir, 'files_'+str(self.session.get('id'))), 'w') f_downloaded_files.write(json.dumps(download_files)) f_downloaded_files.close() self.session.set('download_files',[]) local_files = self.session.get('files') if local_files is not None: f_local_files = open(os.path.join(cache_dir, 'local_files_'+str(self.session.get('id'))), 'w') f_local_files.write(json.dumps(download_files)) f_local_files.close() self.session.set('files',[]) self.banks.update({'name': self.name}, { '$set': { action: self.session._session['id'], 'properties': self.get_properties() }, '$push': {'sessions': self.session._session} }) BmajIndex.add(self.name, self.session._session) if self.session.get('action') == 'update' and not self.session.get_status( Workflow.FLOW_OVER) and self.session.get('release'): self.banks.update({'name': self.name}, {'$set': {'pending.' + self.session.get('release'): self.session._session['id']}}) if self.session.get('action') == 'update' and self.session.get_status(Workflow.FLOW_OVER) and self.session.get( 'update'): # We expect that a production release has reached the FLOW_OVER status. # If no update is needed (same release etc...), the *update* session of the session is set to False logging.debug('Bank:Save:' + self.name) if len(self.bank['production']) > 0: # Remove from database self.banks.update({'name': self.name}, {'$pull': {'production': {'release': self.session._session['release']}}}) # Update local object # index = 0 # for prod in self.bank['production']: # if prod['release'] == self.session._session['release']: # break; # index += 1 # if index < len(self.bank['production']): # self.bank['production'].pop(index) release_types = [] if self.config.get('db.type'): release_types = self.config.get('db.type').split(',') release_formats = list(self.session._session['formats'].keys()) if self.config.get('db.formats'): config_formats = self.config.get('db.formats').split(',') for config_format in config_formats: if config_format not in release_formats: release_formats.append(config_format) for release_format in self.session._session['formats']: for release_files in self.session._session['formats'][release_format]: if release_files['types']: for rtype in release_files['types']: if rtype not in release_types: release_types.append(rtype) prod_dir = self.session.get_release_directory() if self.session.get('prod_dir'): prod_dir = self.session.get('prod_dir') production = {'release': self.session.get('release'), 'remoterelease': self.session.get('remoterelease'), 'session': self.session._session['id'], 'formats': release_formats, 'types': release_types, 'size': self.session.get('fullsize'), 'data_dir': self.session._session['data_dir'], 'dir_version': self.session._session['dir_version'], 'prod_dir': prod_dir, 'freeze': False} self.bank['production'].append(production) self.banks.update({'name': self.name}, {'$push': {'production': production}, '$unset': {'pending.' + self.session.get('release'): ''} }) # self.banks.update({'name': self.name}, # {'$unset': 'pending.'+self.session.get('release') # }) self.bank = self.banks.find_one({'name': self.name}) def clean_old_sessions(self): ''' Delete old sessions, not latest ones nor related to production sessions ''' if self.session is None: return # No previous session if 'sessions' not in self.bank: return if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions, skipping...') return # 'last_update_session' in self.bank and self.bank['last_update_session'] old_sessions = [] prod_releases = [] for session in self.bank['sessions']: if session['id'] == self.session.get('id'): # Current session prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_update_session'): prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_remove_session'): continue is_prod_session = False for prod in self.bank['production']: if session['id'] == prod['session']: is_prod_session = True break if is_prod_session: prod_releases.append(session['release']) continue old_sessions.append(session) if len(old_sessions) > 0: for session in old_sessions: session_id = session['id'] self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': session_id}}}) # Check if in pending sessions for rel in list(self.bank['pending'].keys()): rel_session = self.bank['pending'][rel] if rel_session == session_id: self.banks.update({'name': self.name}, {'$unset': {'pending': {str(session['release']): ""}}}) if session['release'] not in prod_releases and session['release'] != self.session.get('release'): # There might be unfinished releases linked to session, delete them # if they are not related to a production directory or latest run session_dir = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), self.name + self.config.get('release.separator', default='_') + str(session['release'])) if os.path.exists(session_dir): logging.info('Bank:DeleteOldSessionDir:' + self.name + self.config.get('release.separator', default='_') + str(session['release'])) shutil.rmtree(session_dir) self.bank = self.banks.find_one({'name': self.name}) def publish(self): ''' Set session release to *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') prod_dir = self.session.get_full_release_directory() to_dir = os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) if os.path.lexists(current_link): os.remove(current_link) os.chdir(to_dir) os.symlink(self.session.get_release_directory(), 'current') self.bank['current'] = self.session._session['id'] self.banks.update({'name': self.name}, { '$set': {'current': self.session._session['id']} }) def unpublish(self): ''' Unset *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') if os.path.lexists(current_link): os.remove(current_link) self.banks.update({'name': self.name}, { '$set': {'current': None} }) def get_production(self, release): ''' Get production field for release :param release: release name or production dir name :type release: str :return: production field ''' release = str(release) production = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: production = prod return production def freeze(self, release): ''' Freeze a production release When freezed, a production release cannot be removed (manually or automatically) :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({'name': self.name, 'production.release': rel}, {'$set': {'production.$.freeze': True}}) self.bank = self.banks.find_one({'name': self.name}) return True def unfreeze(self, release): ''' Unfreeze a production release to allow removal :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({'name': self.name, 'production.release': rel}, {'$set': {'production.$.freeze': False}}) self.bank = self.banks.find_one({'name': self.name}) return True def get_new_session(self, flow=None): ''' Returns an empty session :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW return Session(self.name, self.config, flow) def get_session_from_release(self, release): ''' Loads the session matching a specific release :param release: release name oe production dir :type release: str :return: :class:`biomaj.session.Session` ''' release = str(release) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: # No prod session, try to find a session for this release, session may have failed or be stopped for s in self.bank['sessions']: if s['release'] and release.endswith(s['release']): oldsession = s if oldsession is None: logging.error('No production session could be found for this release') return oldsession def load_session(self, flow=None, session=None): ''' Loads last session or, if over or forced, a new session Creates a new session or load last session if not over :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW if session is not None: logging.debug('Load specified session ' + str(session['id'])) self.session = Session(self.name, self.config, flow) self.session.load(session) self.use_last_session = True return if len(self.bank['sessions']) == 0 or self.options.get_option(Options.FROMSCRATCH): self.session = Session(self.name, self.config, flow) logging.debug('Start new session') else: # Take last session self.session = Session(self.name, self.config, flow) session_id = None # Load previous session for updates only if self.session.get('action') == 'update' and 'last_update_session' in self.bank and self.bank[ 'last_update_session']: session_id = self.bank['last_update_session'] load_session = None for session in self.bank['sessions']: if session['id'] == session_id: load_session = session break if load_session is not None: # self.session.load(self.bank['sessions'][len(self.bank['sessions'])-1]) self.session.load(session) # if self.config.last_modified > self.session.get('last_modified'): # # Config has changed, need to restart # self.session = Session(self.name, self.config, flow) # logging.info('Configuration file has been modified since last session, restart in any case a new session') if self.session.get_status(Workflow.FLOW_OVER) and self.options.get_option( Options.FROM_TASK) is None: previous_release = self.session.get('remoterelease') self.session = Session(self.name, self.config, flow) self.session.set('previous_release', previous_release) logging.debug('Start new session') else: logging.debug('Load previous session ' + str(self.session.get('id'))) self.use_last_session = True def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] cache_dir = self.config.get('cache.dir') download_files = os.path.join(cache_dir, 'files_'+str(sid)) if os.path.exists(download_files): os.remove(download_files) local_files = os.path.join(cache_dir, 'local_files_'+str(sid)) if os.path.exists(local_files): os.remove(local_files) if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions') if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'production': {'session': sid} } }) self.banks.update({'name': self.name, 'sessions.id': sid}, {'$set': {'sessions.$.deleted': time.time()}}) else: if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True def get_data_dir(self): ''' Returns bank data directory :return: str ''' return os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) def removeAll(self, force=False): ''' Remove all bank releases and database records :param force: force removal even if some production dirs are freezed :type force: bool :return: bool ''' if not force: has_freeze = False for prod in self.bank['production']: if 'freeze' in prod and prod['freeze']: has_freeze = True break if has_freeze: logging.error('Cannot remove bank, some production directories are freezed, use force if needed') return False self.banks.remove({'name': self.name}) BmajIndex.delete_all_bank(self.name) bank_data_dir = self.get_data_dir() logging.warn('DELETE ' + bank_data_dir) if os.path.exists(bank_data_dir): shutil.rmtree(bank_data_dir) bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name')) if os.path.exists(bank_offline_dir): shutil.rmtree(bank_offline_dir) bank_log_dir = os.path.join(self.config.get('log.dir'), self.name) if os.path.exists(bank_log_dir) and self.no_log: shutil.rmtree(bank_log_dir) return True def get_status(self): ''' Get status of current workflow :return: dict of current workflow status ''' if self.bank['status'] is None: return {} return self.bank['status'] def remove_pending(self, release): ''' Remove pending releases :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':RemovePending') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) if not self.bank['pending']: return True pendings = self.bank['pending'] for release in list(pendings.keys()): pending_session_id = pendings[release] pending_session = None for s in self.bank['sessions']: if s['id'] == pending_session_id: pending_session = s break session = Session(self.name, self.config, RemoveWorkflow.FLOW) if pending_session is None: session._session['release'] = release else: session.load(pending_session) if os.path.exists(session.get_full_release_directory()): logging.debug("Remove:Pending:Dir:" + session.get_full_release_directory()) shutil.rmtree(session.get_full_release_directory()) self.remove_session(pendings[release]) self.banks.update({'name': self.name}, {'$set': {'pending': {}}}) return True def remove(self, release): ''' Remove a release (db and files) :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':Remove') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.session = self.get_new_session(RemoveWorkflow.FLOW) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: if 'freeze' in prod and prod['freeze']: logging.error('Cannot remove release, release is freezed, unfreeze it first') return False # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: logging.error('No production session could be found for this release') return False if 'current' in self.bank and self.bank['current'] == oldsession['id']: logging.error('This release is the release in the main release production, you should first unpublish it') return False # New empty session for removal session = Session(self.name, self.config, RemoveWorkflow.FLOW) session.set('action', 'remove') session.set('release', oldsession['release']) session.set('update_session_id', oldsession['id']) self.session = session # Reset status, we take an update session res = self.start_remove(session) self.session.set('workflow_status', res) self.save_session() return res def update(self, depends=False): ''' Launch a bank update :param depends: run update of bank dependencies first :type depends: bool :return: bool ''' logging.warning('Bank:' + self.name + ':Update') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.run_depends = depends self.controls() if self.options.get_option('release'): logging.info('Bank:' + self.name + ':Release:' + self.options.get_option('release')) s = self.get_session_from_release(self.options.get_option('release')) # No session in prod if s is None: logging.error('Release does not exists: ' + self.options.get_option('release')) return False self.load_session(UpdateWorkflow.FLOW, s) else: logging.info('Bank:' + self.name + ':Release:latest') self.load_session(UpdateWorkflow.FLOW) # if from task, reset workflow status in session. if self.options.get_option('from_task'): set_to_false = False for task in self.session.flow: # If task was in False status (KO) and we ask to start after this task, exit if not set_to_false and not self.session.get_status(task['name']) and task[ 'name'] != self.options.get_option('from_task'): logging.error( 'Previous task ' + task['name'] + ' was not successful, cannot restart after this task') return False if task['name'] == self.options.get_option('from_task'): set_to_false = True if set_to_false: # After from_task task, tasks must be set to False to be run self.session.set_status(task['name'], False) proc = None if task['name'] in [Workflow.FLOW_POSTPROCESS, Workflow.FLOW_PREPROCESS, Workflow.FLOW_REMOVEPROCESS]: proc = self.options.get_option('process') self.session.reset_proc(task['name'], proc) # if task['name'] == Workflow.FLOW_POSTPROCESS: # self.session.reset_proc(Workflow.FLOW_POSTPROCESS, proc) # elif task['name'] == Workflow.FLOW_PREPROCESS: # self.session.reset_proc(Workflow.FLOW_PREPROCESS, proc) # elif task['name'] == Workflow.FLOW_REMOVEPROCESS: # self.session.reset_proc(Workflow.FLOW_REMOVEPROCESS, proc) self.session.set('action', 'update') res = self.start_update() self.session.set('workflow_status', res) self.save_session() return res def start_remove(self, session): ''' Start a removal workflow :param session: Session to remove :type session: :class:`biomaj.session.Session` :return: bool ''' workflow = RemoveWorkflow(self, session) return workflow.start() def start_update(self): ''' Start an update workflow ''' workflow = UpdateWorkflow(self) return workflow.start()
def migrate_pendings(): """ Migrate database 3.0.18: Check the actual BioMAJ version and if older than 3.0.17, do the 'pending' key migration """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file") return None if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks schema_version = schema.find_one({'id': 1}) installed_version = pkg_resources.get_distribution("biomaj").version if schema_version is None: schema_version = {'id': 1, 'version': '3.0.0'} schema.insert(schema_version) moderate = int(schema_version['version'].split('.')[1]) minor = int(schema_version['version'].split('.')[2]) if moderate == 0 and minor <= 17: print("Migrate from release: %s" % schema_version['version']) # Update pending releases bank_list = banks.find() updated = 0 for bank in bank_list: if 'pending' in bank: # Check we have an old pending type if type(bank['pending']) == dict: updated += 1 pendings = [] for release in sorted( bank['pending'], key=lambda r: bank['pending'][r]): pendings.append({ 'release': str(release), 'id': bank['pending'][str(release)] }) if len(pendings) > 0: banks.update({'name': bank['name']}, {'$set': { 'pending': pendings }}) else: # We remove old type for 'pending' banks.update({'name': bank['name']}, {'$unset': { 'pending': "" }}) print("Migration: %d bank(s) updated" % updated) schema.update_one({'id': 1}, {'$set': {'version': installed_version}})
def main(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument('-c', '--config', dest="config",help="Configuration file") parser.add_argument('--check', dest="check", help="Check bank property file", action="store_true", default=False) parser.add_argument('-u', '--update', dest="update", help="Update action", action="store_true", default=False) parser.add_argument('--fromscratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-z', '--from-scratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-p', '--publish', dest="publish", help="Publish", action="store_true", default=False) parser.add_argument('--unpublish', dest="unpublish", help="Unpublish", action="store_true", default=False) parser.add_argument('--release', dest="release", help="release of the bank") parser.add_argument('--from-task', dest="from_task", help="Start cycle at a specific task (init always executed)") parser.add_argument('--process', dest="process", help="Linked to from-task, optionally specify a block, meta or process name to start from") parser.add_argument('-l', '--log', dest="log", help="log level") parser.add_argument('-r', '--remove', dest="remove", help="Remove a bank release", action="store_true", default=False) parser.add_argument('--remove-all', dest="removeall", help="Remove all bank releases and database records", action="store_true", default=False) parser.add_argument('--remove-pending', dest="removepending", help="Remove pending release", action="store_true", default=False) parser.add_argument('-s', '--status', dest="status", help="Get status", action="store_true", default=False) parser.add_argument('-b', '--bank', dest="bank", help="bank name") parser.add_argument('--owner', dest="owner", help="change owner of the bank") parser.add_argument('--stop-before', dest="stop_before", help="Store workflow before task") parser.add_argument('--stop-after', dest="stop_after", help="Store workflow after task") parser.add_argument('--freeze', dest="freeze", help="Freeze a bank release", action="store_true", default=False) parser.add_argument('--unfreeze', dest="unfreeze", help="Unfreeze a bank release", action="store_true", default=False) parser.add_argument('-f', '--force', dest="force", help="Force action", action="store_true", default=False) parser.add_argument('-h', '--help', dest="help", help="Show usage", action="store_true", default=False) parser.add_argument('--search', dest="search", help="Search by format and types", action="store_true", default=False) parser.add_argument('--formats', dest="formats", help="List of formats to search, comma separated") parser.add_argument('--types', dest="types", help="List of types to search, comma separated") parser.add_argument('--query', dest="query", help="Lucene query syntax to search in index") parser.add_argument('--show', dest="show", help="Show format files for selected bank", action="store_true", default=False) parser.add_argument('-n', '--change-dbname', dest="newbank", help="Change old bank name to this new bank name") parser.add_argument('-e', '--move-production-directories', dest="newdir",help="Change bank production directories location to this new path, path must exists") parser.add_argument('--visibility', dest="visibility", help="visibility status of the bank") parser.add_argument('--maintenance', dest="maintenance", help="Maintenance mode (on/off/status)") parser.add_argument('--version', dest="version", help="Show version", action="store_true", default=False) parser.add_argument('--status-ko', dest="statusko", help="Get bank in KO status", action="store_true", default=False) options = Options() parser.parse_args(namespace=options) options.no_log = False if options.help: print(''' --config: global.properties file path --status: list of banks with published release [OPTIONAL] --bank xx / bank: Get status details of bank --status-ko: list of banks in error status (last run) --log DEBUG|INFO|WARN|ERR [OPTIONAL]: set log level in logs for this run, default is set in global.properties file --check: Check bank property file [MANDATORY] --bank xx: name of the bank to check (will check xx.properties) --owner yy: Change owner of the bank (user id) [MANDATORY] --bank xx: name of the bank --visibility public|private: change visibility public/private of a bank [MANDATORY] --bank xx: name of the bank --change-dbname yy: Change name of the bank to this new name [MANDATORY] --bank xx: current name of the bank --move-production-directories yy: Change bank production directories location to this new path, path must exists [MANDATORY] --bank xx: current name of the bank --update: Update bank [MANDATORY] --bank xx: name of the bank(s) to update, comma separated [OPTIONAL] --publish: after update set as *current* version --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1) --stop-before xx: stop update cycle before the start of step xx --stop-after xx: stop update cycle after step xx has completed --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx* --process xx: linked to from-task, optionally specify a block, meta or process name to start from --release xx: release to update --publish: Publish bank as current release to use [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to publish --unpublish: Unpublish bank (remove current) [MANDATORY] --bank xx: name of the bank to update --remove-all: Remove all bank releases and database records [MANDATORY] --bank xx: name of the bank to update [OPTIONAL] --force: remove freezed releases --remove-pending: Remove pending releases [MANDATORY] --bank xx: name of the bank to update --remove: Remove bank release (files and database release) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove Release must not be the *current* version. If this is the case, publish a new release before. --freeze: Freeze bank release (cannot be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --unfreeze: Unfreeze bank release (can be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --search: basic search in bank production releases, return list of banks --formats xx,yy : list of comma separated format AND/OR --types xx,yy : list of comma separated type --query "LUCENE query syntax": search in index (if activated) --show: Show bank files per format [MANDATORY] --bank xx: name of the bank to show [OPTIONAL] --release xx: release of the bank to show --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal ''') return if options.version: version = pkg_resources.require('biomaj')[0].version print('Version: '+str(version)) return if options.stop_after or options.stop_before or options.from_task: available_steps = [] for flow in UpdateWorkflow.FLOW: available_steps.append(flow['name']) for flow in RemoveWorkflow.FLOW: available_steps.append(flow['name']) if options.stop_after: if options.stop_after not in available_steps: print('Invalid step: '+options.stop_after) sys.exit(1) if options.stop_before: if options.stop_before not in available_steps: print('Invalid step: '+options.stop_before) sys.exit(1) if options.from_task: if options.from_task not in available_steps: print('Invalid step: '+options.from_task) sys.exit(1) bmaj = None try: if options.config is not None: BiomajConfig.load_config(options.config) else: BiomajConfig.load_config() except Exception as e: print(str(e)) sys.exit(1) try: if options.maintenance: if options.maintenance not in ['on', 'off', 'status']: print("Wrong maintenance value [on,off,status]") sys.exit(1) data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir') if BiomajConfig.global_config.has_option('GENERAL', 'lock.dir'): lock_dir = BiomajConfig.global_config.get('GENERAL', 'lock.dir') else: lock_dir = data_dir maintenance_lock_file = os.path.join(lock_dir,'biomaj.lock') if options.maintenance == 'status': if os.path.exists(maintenance_lock_file): print("Maintenance: On") else: print("Maintenance: Off") sys.exit(0) if options.maintenance == 'on': f = open(maintenance_lock_file, 'w') f.write('1') f.close() print("Maintenance set to On") sys.exit(0) if options.maintenance == 'off': if os.path.exists(maintenance_lock_file): os.remove(maintenance_lock_file) print("Maintenance set to Off") sys.exit(0) if options.owner: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_owner(options.owner) sys.exit(0) if options.visibility: if not options.bank: print("Bank option is missing") sys.exit(1) if options.visibility not in ['public', 'private']: print("Valid values are public|private") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_visibility(options.visibility) print("Do not forget to update accordingly the visibility.default parameter in the configuration file") sys.exit(0) if options.newdir: if not options.bank: print("Bank option is missing") sys.exit(1) if not os.path.exists(options.newdir): print("Destination directory does not exists") bank = Bank(options.bank, options=options, no_log=True) if not bank.bank['production']: print("Nothing to move, no production directory") sys.exit(0) bank.load_session(Workflow.FLOW, None) w = Workflow(bank) res = w.wf_init() if not res: sys.exit(1) for prod in bank.bank['production']: session = bank.get_session_from_release(prod['release']) bank.load_session(Workflow.FLOW, session) prod_path = bank.session.get_full_release_directory() if os.path.exists(prod_path): shutil.move(prod_path, options.newdir) prod['data_dir'] = options.newdir bank.banks.update({'name': options.bank}, {'$set' : { 'production': bank.bank['production'] }}) print("Bank production directories moved to " + options.newdir) print("WARNING: do not forget to update accordingly the data.dir and dir.version properties") w.wf_over() sys.exit(0) if options.newbank: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) conf_dir = BiomajConfig.global_config.get('GENERAL', 'conf.dir') bank_prop_file = os.path.join(conf_dir,options.bank+'.properties') config_bank = configparser.SafeConfigParser() config_bank.read([os.path.join(conf_dir,options.bank+'.properties')]) config_bank.set('GENERAL', 'db.name', options.newbank) newbank_prop_file = open(os.path.join(conf_dir,options.newbank+'.properties'),'w') config_bank.write(newbank_prop_file) newbank_prop_file.close() bank.banks.update({'name': options.bank}, {'$set' : { 'name': options.newbank }}) os.remove(bank_prop_file) print("Bank "+options.bank+" renamed to "+options.newbank) sys.exit(0) if options.search: if options.query: res = Bank.searchindex(options.query) print("Query matches for :"+options.query) results = [["Release", "Format(s)", "Type(s)", "Files"]] for match in res: results.append([match['_source']['release'], str(match['_source']['format']), str(match['_source']['types']), ','.join(match['_source']['files'])]) print(tabulate(results, headers="firstrow", tablefmt="grid")) else: formats = [] if options.formats: formats = options.formats.split(',') types = [] if options.types: types = options.types.split(',') print("Search by formats="+str(formats)+", types="+str(types)) res = Bank.search(formats, types, False) results = [["Name", "Release", "Format(s)", "Type(s)", 'Current']] for bank in sorted(res, key=lambda bank: (bank['name'])): b = bank['name'] bank['production'].sort(key=lambda n: n['release'], reverse=True) for prod in bank['production']: iscurrent = "" if prod['session'] == bank['current']: iscurrent = "yes" results.append([b if b else '', prod['release'], ','.join(prod['formats']), ','.join(prod['types']), iscurrent]) b = None print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.show: if not options.bank: print("Bank option is required") sys.exit(1) bank = Bank(options.bank, no_log=True) results = [["Name", "Release", "Format(s)", "Type(s)", "Tag(s)", "File(s)"]] current = None fformat = None if 'current' in bank.bank and bank.bank['current']: current = bank.bank['current'] for prod in bank.bank['production']: include = True release = prod['release'] if current == prod['session']: release += ' (current)' if options.release and (prod['release'] != options.release and prod['prod_dir'] != options.release): include =False if include: session = bank.get_session_from_release(prod['release']) formats = session['formats'] afiles = [] atags = [] atypes = [] for fformat in list(formats.keys()): for elt in formats[fformat]: atypes.append(','.join(elt['types'])) for tag in list(elt['tags'].keys()): atags.append(elt['tags'][tag]) for eltfile in elt['files']: afiles.append(eltfile) results.append([bank.bank['name'], release, fformat, ','.join(atypes), ','.join(atags), ','.join(afiles)]) print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.check: if not options.bank: print("Bank name is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) print(options.bank+" check: "+str(bank.check())+"\n") sys.exit(0) if options.status: if options.bank: bank = Bank(options.bank, no_log=True) info = bank.get_bank_release_info(full=True) print(tabulate(info['info'], headers='firstrow', tablefmt='psql')) print(tabulate(info['prod'], headers='firstrow', tablefmt='psql')) # do we have some pending release(s) if 'pend' in info and len(info['pend']) > 1: print(tabulate(info['pend'], headers='firstrow', tablefmt='psql')) else: banks = Bank.list() # Headers of output table banks_list = [["Name", "Type(s)", "Release", "Visibility"]] for bank in sorted(banks, key=lambda k: k['name']): bank = Bank(bank['name'], no_log=True) banks_list.append(bank.get_bank_release_info()['info']) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) sys.exit(0) if options.statusko: banks = Bank.list() banks_list = [["Name", "Type(s)", "Release", "Visibility"]] for bank in sorted(banks, key=lambda k: k['name']): try: bank = Bank(bank['name'], no_log=True) bank.load_session(UpdateWorkflow.FLOW) if bank.session is not None: if bank.use_last_session and not bank.session.get_status(Workflow.FLOW_OVER): wf_status = bank.session.get('workflow_status') if wf_status is None or not wf_status: banks_list.append(bank.get_bank_release_info()['info']) except Exception as e: print(str(e)) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) if options.update: if not options.bank: print("Bank name is missing") sys.exit(1) banks = options.bank.split(',') gres = True for bank in banks: options.bank = bank bmaj = Bank(bank, options) print('Log file: '+bmaj.config.log_file) check_status = bmaj.check() if not check_status: print('Skip bank ' + options.bank + ': wrong config') gres = False continue res = bmaj.update(depends=True) if not res: gres = False Notify.notifyBankAction(bmaj) if not gres: sys.exit(1) if options.freeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.freeze(options.release) if not res: sys.exit(1) if options.unfreeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.unfreeze(options.release) if not res: sys.exit(1) if options.remove or options.removeall: if not options.bank: print("Bank name is missing") sys.exit(1) if options.remove and not options.release: print("Bank release is missing") sys.exit(1) if options.removeall: bmaj = Bank(options.bank, options, no_log=True) print('Log file: '+bmaj.config.log_file) res = bmaj.removeAll(options.force) else: bmaj = Bank(options.bank, options) print('Log file: '+bmaj.config.log_file) res = bmaj.remove(options.release) Notify.notifyBankAction(bmaj) if not res: sys.exit(1) if options.removepending: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) print('Log file: '+bmaj.config.log_file) res = bmaj.remove_pending(options.release) if not res: sys.exit(1) if options.unpublish: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) bmaj.load_session() bmaj.unpublish() sys.exit(0) if options.publish: if not options.bank: print("Bank name or release is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) print('Log file: '+bmaj.config.log_file) bmaj.load_session() bank = bmaj.bank session = None if options.get_option('release') is None: # Get latest prod release if len(bank['production'])>0: prod = bank['production'][len(bank['production'])-1] for s in bank['sessions']: if s['id'] == prod['session']: session = s break else: # Search production release matching release for prod in bank['production']: if prod['release'] == options.release or prod['prod_dir'] == options.release: # Search session related to this production release for s in bank['sessions']: if s['id'] == prod['session']: session = s break break if session is None: print("No production session could be found for this release") sys.exit(1) bmaj.session._session = session bmaj.publish() except Exception as e: print(str(e))
class TestBiomajSetup(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank('alu') b.banks.remove({}) self.config = BiomajConfig('alu') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_new_bank(self): ''' Checks bank init ''' b = Bank('alu') def test_new_session(self): ''' Checks an empty session is created ''' b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) for key in b.session._session['status'].keys(): self.assertFalse(b.session.get_status(key)) def test_session_reload_notover(self): ''' Checks a session is used if present ''' b = Bank('alu') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertTrue(b.session.get_status(Workflow.FLOW_INIT)) def test_clean_old_sessions(self): ''' Checks a session is used if present ''' b = Bank('local') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b2 = Bank('local') b2.update() b2.clean_old_sessions() self.assertTrue(len(b2.bank['sessions']) == 1) def test_session_reload_over(self): ''' Checks a session if is not over ''' b = Bank('alu') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True s._session['status'][Workflow.FLOW_OVER] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertFalse(b.session.get_status(Workflow.FLOW_INIT)) def test_bank_list(self): b1 = Bank('alu') b2 = Bank('local') banks = Bank.list() self.assertTrue(len(banks) == 2) @attr('network') def test_get_release(self): ''' Get release ''' b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue(res) self.assertTrue(b.session._session['release'] is not None) def test_remove_session(self): b = Bank('alu') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() self.assertTrue(len(b.bank['sessions'])==4) b.remove_session(b.session.get('id')) self.assertTrue(len(b.bank['sessions'])==3) @attr('process') def test_postprocesses_setup(self): b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run(True) self.assertTrue(len(pfactory.threads_tasks[0])==2) self.assertTrue(len(pfactory.threads_tasks[1])==1) @attr('process') def test_postprocesses_exec_again(self): ''' Execute once, set a status to false, check that False processes are executed ''' b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run() self.assertTrue(pfactory.blocks['BLOCK1']['META0']['PROC0']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC1']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC2']) blocks = copy.deepcopy(pfactory.blocks) blocks['BLOCK2']['META1']['PROC2'] = False pfactory2 = PostProcessFactory(b, blocks) pfactory2.run() self.assertTrue(pfactory2.blocks['BLOCK2']['META1']['PROC2']) @attr('process') def test_preprocesses(self): b = Bank('localprocess') pfactory = PreProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) @attr('process') def test_removeprocesses(self): b = Bank('localprocess') pfactory = RemoveProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) def test_dependencies_list(self): b = Bank('computed') deps = b.get_dependencies() self.assertTrue(len(deps)==2)
class TestBiomajFunctional(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_extract_release_from_file_name(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') def test_extract_release_from_file_content(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_100\.txt') b.session.config.set('release.regexp', 'Release\s*(\d+)') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '103') def test_publish(self): ''' Update a bank, then publish it ''' b = Bank('local') b.update() current_link = os.path.join(b.config.get('data.dir'), b.config.get('dir.version'), 'current') self.assertFalse(os.path.exists(current_link)) self.assertTrue(b.bank['current'] is None) b.publish() self.assertTrue(os.path.exists(current_link)) self.assertTrue(b.bank['current'] == b.session._session['id']) # Should test this on local downloader, changing 1 file to force update, # else we would get same bank and there would be no update def test_no_update(self): ''' Try updating twice, at second time, bank should not be updated ''' b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS)) @attr('release') def test_release_control(self): ''' Try updating twice, at second time, modify one file (same date), bank should update ''' b = Bank('local') b.update() b.session.config.set('keep.old.version', '3') self.assertTrue(b.session.get('update')) remote_file = b.session.config.get('remote.dir') + 'test2.fasta' os.utime(remote_file, None) # Update test2.fasta and set release.control b.session.config.set('release.control', 'true') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) b.session.config.set('remote.files', '^test2.fasta') b.update() self.assertTrue(b.session.get('update')) def test_fromscratch_update(self): ''' Try updating twice, at second time, bank should be updated (force with fromscratc) ''' b = Bank('local') b.update() self.assertTrue(b.session.get('update')) sess = b.session.get('release') b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertEqual(b.session.get('release'), sess+'__1') def test_fromscratch_update_with_release(self): ''' Try updating twice, at second time, bank should be updated (force with fromscratch) Use case with release defined in release file ''' b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') os.makedirs(b.session.get_full_release_directory()) w = UpdateWorkflow(b) # Reset release b.session.set('release', None) w.options.fromscratch = True w.wf_release() self.assertTrue(b.session.get('release') == '100__1') def test_mix_stop_from_task(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel+'__1') b3 = Bank('local') res = b3.update() self.assertTrue(b3.session.get('release') == rel+'__1') self.assertTrue(res) def test_mix_stop_from_task2(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel+'__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'download' self.assertTrue(b3.session.get('release') == rel+'__1') self.assertTrue(res) def test_mix_stop_from_task3(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel+'__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'postprocess' self.assertTrue(b3.session.get('release') == rel+'__1') self.assertTrue(res) def test_mix_stop_from_task4(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_before = 'download' b2.options.fromscratch = True res = b2.update() b3 = Bank('local') b3.options.from_task = 'postprocess' res = b3.update() self.assertFalse(res) def test_delete_old_dirs(self): ''' Try updating 3 times, oldest dir should be removed ''' b = Bank('local') b.removeAll(True) b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 2) def test_delete_old_dirs_with_freeze(self): ''' Try updating 3 times, oldest dir should be removed but not freezed releases ''' b = Bank('local') b.removeAll(True) b = Bank('local') b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 3) def test_removeAll(self): b = Bank('local') b.update() b.removeAll() self.assertFalse(os.path.exists(b.get_data_dir())) bdb = b.banks.find_one({'name': b.name}) self.assertTrue(bdb is None) def test_remove(self): ''' test removal of a production dir ''' b = Bank('local') b.update() self.assertTrue(os.path.exists(b.session.get_full_release_directory())) self.assertTrue(len(b.bank['production'])==1) b.remove(b.session.get('release')) self.assertFalse(os.path.exists(b.session.get_full_release_directory())) b = Bank('local') self.assertTrue(len(b.bank['production'])==0) def test_update_stop_after(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_update_stop_before(self): b = Bank('local') b.options.stop_before = 'postprocess' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_reupdate_from_task(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(b2.session.get_status('postprocess')) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_reupdate_from_task_error(self): b = Bank('local') b.options.stop_after = 'check' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') res = b2.update() self.assertFalse(res) def test_reupdate_from_task_wrong_release(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = 'wrongrelease' res = b2.update() self.assertFalse(res) @attr('process') def test_postprocesses_restart_from_proc(self): b = Bank('localprocess') b.update() proc1file = os.path.join(b.session.get_full_release_directory(),'proc1.txt') proc2file = os.path.join(b.session.get_full_release_directory(),'proc2.txt') self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, reexecute all processes b2 = Bank('localprocess') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, but at process PROC2 and following b3 = Bank('localprocess') b3.options.from_task = 'postprocess' b3.options.process = 'PROC2' b3.options.release = b.session.get('release') b3.update() #self.assertFalse(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) def test_computed(self): b = Bank('computed') res = b.update(True) self.assertTrue(res) self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/sub1/flat/test_100.txt')) self.assertTrue(b.session.get('update')) # Check that, with depends non updated, bank is not updated itself nextb = Bank('computed') res = nextb.update(True) self.assertFalse(nextb.session.get('update')) @attr('nofile') def test_computed_nofile(self): b = Bank('computed2') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('protocol', 'none') b.session.config.set('sub1.files.move', 'flat/test_.*') res = b.update(True) self.assertTrue(res) self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/sub1/flat/test_100.txt')) def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) b2 = Bank('sub1') b2release = b2.bank['production'][len(b2.bank['production'])-1]['release'] brelease = b.bank['production'][len(b.bank['production'])-1]['release'] self.assertTrue(res) self.assertTrue(brelease == b2release) @attr('computed') def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) self.assertTrue(b.session.get('update')) b2 = Bank('computed2') res = b2.update(True) self.assertFalse(b2.session.get('update')) def test_computederror(self): b = Bank('computederror') res = b.update(True) self.assertFalse(res) self.assertTrue(b.session._session['depends']['sub2']) self.assertFalse(b.session._session['depends']['error']) @attr('directrelease') def test_directhttp_release(self): b = Bank('directhttp') res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/flat/debian/README.html')) #print str(b.session.get('release')) #print str(b.session.get('remoterelease')) @attr('network') def test_multi(self): b = Bank('multi') res = b.update() with open(os.path.join(b.session.get_full_release_directory(),'flat/test1.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['args']['key1'] == 'value1') with open(os.path.join(b.session.get_full_release_directory(),'flat/test2.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['form']['key1'] == 'value1') def test_freeze(self): b = Bank('local') b.update() rel = b.session.get('release') b.freeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == True) res = b.remove(rel) self.assertTrue(res == False) b.unfreeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == False) res = b.remove(rel) self.assertTrue(res == True) def test_stats(self): b = Bank('local') b.update() rel = b.session.get('release') stats = Bank.get_banks_disk_usage() self.assertTrue(stats[0]['size']>0) for release in stats[0]['releases']: if release['name'] == rel: self.assertTrue(release['size']>0) @attr('process') def test_processes_meta_data(self): b = Bank('localprocess') b.update() formats = b.session.get('formats') self.assertTrue(len(formats['blast'])==2) self.assertTrue(len(formats['test'][0]['files'])==3) @attr('process') def test_search(self): b = Bank('localprocess') b.update() search_res = Bank.search(['blast'],[]) self.assertTrue(len(search_res)==1) search_res = Bank.search([],['nucleic']) self.assertTrue(len(search_res)==1) search_res = Bank.search(['blast'],['nucleic']) self.assertTrue(len(search_res)==1) search_res = Bank.search(['blast'],['proteic']) self.assertTrue(len(search_res)==0) def test_owner(self): ''' test ACL with owner ''' b = Bank('local') res = b.update() self.assertTrue(res) b.set_owner('sample') b2 = Bank('local') try: res = b2.update() self.fail('not owner, should not be allowed') except Exception as e: pass
class TestBiomajSetup(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank("alu") b.banks.remove({}) self.config = BiomajConfig("alu") data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "alu.lock") if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "alu.lock") if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_new_bank(self): """ Checks bank init """ b = Bank("alu") def test_new_session(self): """ Checks an empty session is created """ b = Bank("alu") b.load_session(UpdateWorkflow.FLOW) for key in b.session._session["status"].keys(): self.assertFalse(b.session.get_status(key)) def test_session_reload_notover(self): """ Checks a session is used if present """ b = Bank("alu") for i in range(1, 5): s = Session("alu", self.config, UpdateWorkflow.FLOW) s._session["status"][Workflow.FLOW_INIT] = True b.session = s b.save_session() b = Bank("alu") b.load_session(UpdateWorkflow.FLOW) self.assertTrue(b.session.get_status(Workflow.FLOW_INIT)) def test_clean_old_sessions(self): """ Checks a session is used if present """ b = Bank("local") for i in range(1, 5): s = Session("alu", self.config, UpdateWorkflow.FLOW) s._session["status"][Workflow.FLOW_INIT] = True b.session = s b.save_session() b2 = Bank("local") b2.update() b2.clean_old_sessions() self.assertTrue(len(b2.bank["sessions"]) == 1) def test_session_reload_over(self): """ Checks a session if is not over """ b = Bank("alu") for i in range(1, 5): s = Session("alu", self.config, UpdateWorkflow.FLOW) s._session["status"][Workflow.FLOW_INIT] = True s._session["status"][Workflow.FLOW_OVER] = True b.session = s b.save_session() b = Bank("alu") b.load_session(UpdateWorkflow.FLOW) self.assertFalse(b.session.get_status(Workflow.FLOW_INIT)) def test_bank_list(self): b1 = Bank("alu") b2 = Bank("local") banks = Bank.list() self.assertTrue(len(banks) == 2) @attr("network") def test_get_release(self): """ Get release """ b = Bank("alu") b.load_session(UpdateWorkflow.FLOW) res = b.update() self.assertTrue(b.session.get("update")) self.assertTrue(res) self.assertTrue(b.session._session["release"] is not None) def test_remove_session(self): b = Bank("alu") for i in range(1, 5): s = Session("alu", self.config, UpdateWorkflow.FLOW) s._session["status"][Workflow.FLOW_INIT] = True b.session = s b.save_session() self.assertTrue(len(b.bank["sessions"]) == 4) b.remove_session(b.session.get("id")) self.assertTrue(len(b.bank["sessions"]) == 3) @attr("process") def test_postprocesses_setup(self): b = Bank("localprocess") pfactory = PostProcessFactory(b) pfactory.run(True) self.assertTrue(len(pfactory.threads_tasks[0]) == 2) self.assertTrue(len(pfactory.threads_tasks[1]) == 1) @attr("process") def test_postprocesses_exec_again(self): """ Execute once, set a status to false, check that False processes are executed """ b = Bank("localprocess") pfactory = PostProcessFactory(b) pfactory.run() self.assertTrue(pfactory.blocks["BLOCK1"]["META0"]["PROC0"]) self.assertTrue(pfactory.blocks["BLOCK2"]["META1"]["PROC1"]) self.assertTrue(pfactory.blocks["BLOCK2"]["META1"]["PROC2"]) blocks = copy.deepcopy(pfactory.blocks) blocks["BLOCK2"]["META1"]["PROC2"] = False pfactory2 = PostProcessFactory(b, blocks) pfactory2.run() self.assertTrue(pfactory2.blocks["BLOCK2"]["META1"]["PROC2"]) @attr("process") def test_preprocesses(self): b = Bank("localprocess") pfactory = PreProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status["META0"]["PROC0"]) @attr("process") def test_removeprocesses(self): b = Bank("localprocess") pfactory = RemoveProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status["META0"]["PROC0"]) def test_dependencies_list(self): b = Bank("computed") deps = b.get_dependencies() self.assertTrue(len(deps) == 2)
def setUp(self): self.utils = UtilsForTest() BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) self.config = BiomajConfig('testhttp')
def setUp(self): self.utils = UtilsForTest() self.curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
def main(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument('-c', '--config', dest="config", help="Configuration file") parser.add_argument('--check', dest="check", help="Check bank property file", action="store_true", default=False) parser.add_argument('-u', '--update', dest="update", help="Update action", action="store_true", default=False) parser.add_argument('--fromscratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-z', '--from-scratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-p', '--publish', dest="publish", help="Publish", action="store_true", default=False) parser.add_argument('--unpublish', dest="unpublish", help="Unpublish", action="store_true", default=False) parser.add_argument('--release', dest="release", help="release of the bank") parser.add_argument( '--from-task', dest="from_task", help="Start cycle at a specific task (init always executed)") parser.add_argument( '--process', dest="process", help= "Linked to from-task, optionally specify a block, meta or process name to start from" ) parser.add_argument('-l', '--log', dest="log", help="log level") parser.add_argument('-r', '--remove', dest="remove", help="Remove a bank release", action="store_true", default=False) parser.add_argument('--remove-all', dest="removeall", help="Remove all bank releases and database records", action="store_true", default=False) parser.add_argument('--remove-pending', dest="removepending", help="Remove pending release", action="store_true", default=False) parser.add_argument('-s', '--status', dest="status", help="Get status", action="store_true", default=False) parser.add_argument('-b', '--bank', dest="bank", help="bank name") parser.add_argument('--owner', dest="owner", help="change owner of the bank") parser.add_argument('--stop-before', dest="stop_before", help="Store workflow before task") parser.add_argument('--stop-after', dest="stop_after", help="Store workflow after task") parser.add_argument('--freeze', dest="freeze", help="Freeze a bank release", action="store_true", default=False) parser.add_argument('--unfreeze', dest="unfreeze", help="Unfreeze a bank release", action="store_true", default=False) parser.add_argument('-f', '--force', dest="force", help="Force action", action="store_true", default=False) parser.add_argument('-h', '--help', dest="help", help="Show usage", action="store_true", default=False) parser.add_argument('--search', dest="search", help="Search by format and types", action="store_true", default=False) parser.add_argument('--formats', dest="formats", help="List of formats to search, comma separated") parser.add_argument('--types', dest="types", help="List of types to search, comma separated") parser.add_argument('--query', dest="query", help="Lucene query syntax to search in index") parser.add_argument('--show', dest="show", help="Show format files for selected bank", action="store_true", default=False) parser.add_argument('-n', '--change-dbname', dest="newbank", help="Change old bank name to this new bank name") parser.add_argument( '-e', '--move-production-directories', dest="newdir", help= "Change bank production directories location to this new path, path must exists" ) parser.add_argument('--visibility', dest="visibility", help="visibility status of the bank") parser.add_argument('--maintenance', dest="maintenance", help="Maintenance mode (on/off/status)") parser.add_argument('--version', dest="version", help="Show version", action="store_true", default=False) parser.add_argument('--status-ko', dest="statusko", help="Get bank in KO status", action="store_true", default=False) options = Options() parser.parse_args(namespace=options) options.no_log = False if options.help: print(''' --config: global.properties file path --status: list of banks with published release [OPTIONAL] --bank xx / bank: Get status details of bank --status-ko: list of banks in error status (last run) --log DEBUG|INFO|WARN|ERR [OPTIONAL]: set log level in logs for this run, default is set in global.properties file --check: Check bank property file [MANDATORY] --bank xx: name of the bank to check (will check xx.properties) --owner yy: Change owner of the bank (user id) [MANDATORY] --bank xx: name of the bank --visibility public|private: change visibility public/private of a bank [MANDATORY] --bank xx: name of the bank --change-dbname yy: Change name of the bank to this new name [MANDATORY] --bank xx: current name of the bank --move-production-directories yy: Change bank production directories location to this new path, path must exists [MANDATORY] --bank xx: current name of the bank --update: Update bank [MANDATORY] --bank xx: name of the bank(s) to update, comma separated [OPTIONAL] --publish: after update set as *current* version --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1) --stop-before xx: stop update cycle before the start of step xx --stop-after xx: stop update cycle after step xx has completed --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx* --process xx: linked to from-task, optionally specify a block, meta or process name to start from --release xx: release to update --publish: Publish bank as current release to use [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to publish --unpublish: Unpublish bank (remove current) [MANDATORY] --bank xx: name of the bank to update --remove-all: Remove all bank releases and database records [MANDATORY] --bank xx: name of the bank to update [OPTIONAL] --force: remove freezed releases --remove-pending: Remove pending releases [MANDATORY] --bank xx: name of the bank to update --remove: Remove bank release (files and database release) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove Release must not be the *current* version. If this is the case, publish a new release before. --freeze: Freeze bank release (cannot be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --unfreeze: Unfreeze bank release (can be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --search: basic search in bank production releases, return list of banks --formats xx,yy : list of comma separated format AND/OR --types xx,yy : list of comma separated type --query "LUCENE query syntax": search in index (if activated) --show: Show bank files per format [MANDATORY] --bank xx: name of the bank to show [OPTIONAL] --release xx: release of the bank to show --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal ''') return if options.version: version = pkg_resources.require('biomaj')[0].version print('Version: ' + str(version)) return if options.stop_after or options.stop_before or options.from_task: available_steps = [] for flow in UpdateWorkflow.FLOW: available_steps.append(flow['name']) for flow in RemoveWorkflow.FLOW: available_steps.append(flow['name']) if options.stop_after: if options.stop_after not in available_steps: print('Invalid step: ' + options.stop_after) sys.exit(1) if options.stop_before: if options.stop_before not in available_steps: print('Invalid step: ' + options.stop_before) sys.exit(1) if options.from_task: if options.from_task not in available_steps: print('Invalid step: ' + options.from_task) sys.exit(1) bmaj = None try: if options.config is not None: BiomajConfig.load_config(options.config) else: BiomajConfig.load_config() except Exception as e: print(str(e)) sys.exit(1) try: if options.maintenance: if options.maintenance not in ['on', 'off', 'status']: print("Wrong maintenance value [on,off,status]") sys.exit(1) data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir') if BiomajConfig.global_config.has_option('GENERAL', 'lock.dir'): lock_dir = BiomajConfig.global_config.get( 'GENERAL', 'lock.dir') else: lock_dir = data_dir maintenance_lock_file = os.path.join(lock_dir, 'biomaj.lock') if options.maintenance == 'status': if os.path.exists(maintenance_lock_file): print("Maintenance: On") else: print("Maintenance: Off") sys.exit(0) if options.maintenance == 'on': f = open(maintenance_lock_file, 'w') f.write('1') f.close() print("Maintenance set to On") sys.exit(0) if options.maintenance == 'off': if os.path.exists(maintenance_lock_file): os.remove(maintenance_lock_file) print("Maintenance set to Off") sys.exit(0) if options.owner: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_owner(options.owner) sys.exit(0) if options.visibility: if not options.bank: print("Bank option is missing") sys.exit(1) if options.visibility not in ['public', 'private']: print("Valid values are public|private") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_visibility(options.visibility) print( "Do not forget to update accordingly the visibility.default parameter in the configuration file" ) sys.exit(0) if options.newdir: if not options.bank: print("Bank option is missing") sys.exit(1) if not os.path.exists(options.newdir): print("Destination directory does not exists") bank = Bank(options.bank, options=options, no_log=True) if not bank.bank['production']: print("Nothing to move, no production directory") sys.exit(0) bank.load_session(Workflow.FLOW, None) w = Workflow(bank) res = w.wf_init() if not res: sys.exit(1) for prod in bank.bank['production']: session = bank.get_session_from_release(prod['release']) bank.load_session(Workflow.FLOW, session) prod_path = bank.session.get_full_release_directory() if os.path.exists(prod_path): shutil.move(prod_path, options.newdir) prod['data_dir'] = options.newdir bank.banks.update( {'name': options.bank}, {'$set': { 'production': bank.bank['production'] }}) print("Bank production directories moved to " + options.newdir) print( "WARNING: do not forget to update accordingly the data.dir and dir.version properties" ) w.wf_over() sys.exit(0) if options.newbank: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) conf_dir = BiomajConfig.global_config.get('GENERAL', 'conf.dir') bank_prop_file = os.path.join(conf_dir, options.bank + '.properties') config_bank = configparser.SafeConfigParser() config_bank.read( [os.path.join(conf_dir, options.bank + '.properties')]) config_bank.set('GENERAL', 'db.name', options.newbank) newbank_prop_file = open( os.path.join(conf_dir, options.newbank + '.properties'), 'w') config_bank.write(newbank_prop_file) newbank_prop_file.close() bank.banks.update({'name': options.bank}, {'$set': { 'name': options.newbank }}) os.remove(bank_prop_file) print("Bank " + options.bank + " renamed to " + options.newbank) sys.exit(0) if options.search: if options.query: res = Bank.searchindex(options.query) print("Query matches for :" + options.query) results = [["Release", "Format(s)", "Type(s)", "Files"]] for match in res: results.append([ match['_source']['release'], str(match['_source']['format']), str(match['_source']['types']), ','.join(match['_source']['files']) ]) print(tabulate(results, headers="firstrow", tablefmt="grid")) else: formats = [] if options.formats: formats = options.formats.split(',') types = [] if options.types: types = options.types.split(',') print("Search by formats=" + str(formats) + ", types=" + str(types)) res = Bank.search(formats, types, False) results = [[ "Name", "Release", "Format(s)", "Type(s)", 'Published' ]] for bank in sorted(res, key=lambda bank: (bank['name'])): b = bank['name'] bank['production'].sort(key=lambda n: n['release'], reverse=True) for prod in bank['production']: iscurrent = "" if prod['session'] == bank['current']: iscurrent = "yes" results.append([ b if b else '', prod['release'], ','.join(prod['formats']), ','.join(prod['types']), iscurrent ]) print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.show: if not options.bank: print("Bank option is required") sys.exit(1) bank = Bank(options.bank, no_log=True) results = [[ "Name", "Release", "Format(s)", "Type(s)", "Tag(s)", "File(s)" ]] current = None fformat = None if 'current' in bank.bank and bank.bank['current']: current = bank.bank['current'] for prod in bank.bank['production']: include = True release = prod['release'] if current == prod['session']: release += ' (current)' if options.release and (prod['release'] != options.release and prod['prod_dir'] != options.release): include = False if include: session = bank.get_session_from_release(prod['release']) formats = session['formats'] afiles = [] atags = [] atypes = [] for fformat in list(formats.keys()): for elt in formats[fformat]: atypes.append(','.join(elt['types'])) for tag in list(elt['tags'].keys()): atags.append(elt['tags'][tag]) for eltfile in elt['files']: afiles.append(eltfile) results.append([ bank.bank['name'], release, fformat, ','.join(atypes), ','.join(atags), ','.join(afiles) ]) print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.check: if not options.bank: print("Bank name is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) print(options.bank + " check: " + str(bank.check()) + "\n") sys.exit(0) if options.status: if options.bank: bank = Bank(options.bank, no_log=True) info = bank.get_bank_release_info(full=True) print( tabulate(info['info'], headers='firstrow', tablefmt='psql')) print( tabulate(info['prod'], headers='firstrow', tablefmt='psql')) # do we have some pending release(s) if 'pend' in info and len(info['pend']) > 1: print( tabulate(info['pend'], headers='firstrow', tablefmt='psql')) else: banks = Bank.list() # Headers of output table banks_list = [["Name", "Type(s)", "Release", "Visibility"]] for bank in sorted(banks, key=lambda k: k['name']): bank = Bank(bank['name'], no_log=True) banks_list.append(bank.get_bank_release_info()['info']) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) sys.exit(0) if options.statusko: banks = Bank.list() banks_list = [[ "Name", "Type(s)", "Release", "Visibility", "Last run" ]] for bank in sorted(banks, key=lambda k: k['name']): try: bank = Bank(bank['name'], no_log=True) bank.load_session(UpdateWorkflow.FLOW) if bank.session is not None: if bank.use_last_session and not bank.session.get_status( Workflow.FLOW_OVER): wf_status = bank.session.get('workflow_status') if wf_status is None or not wf_status: banks_list.append( bank.get_bank_release_info()['info']) except Exception as e: print(str(e)) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) if options.update: if not options.bank: print("Bank name is missing") sys.exit(1) banks = options.bank.split(',') gres = True for bank in banks: options.bank = bank bmaj = Bank(bank, options) print('Log file: ' + bmaj.config.log_file) check_status = bmaj.check() if not check_status: print('Skip bank ' + options.bank + ': wrong config') gres = False continue res = bmaj.update(depends=True) if not res: gres = False Notify.notifyBankAction(bmaj) if not gres: sys.exit(1) if options.freeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.freeze(options.release) if not res: sys.exit(1) if options.unfreeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.unfreeze(options.release) if not res: sys.exit(1) if options.remove or options.removeall: if not options.bank: print("Bank name is missing") sys.exit(1) if options.remove and not options.release: print("Bank release is missing") sys.exit(1) if options.removeall: bmaj = Bank(options.bank, options, no_log=True) print('Log file: ' + bmaj.config.log_file) res = bmaj.removeAll(options.force) else: bmaj = Bank(options.bank, options) print('Log file: ' + bmaj.config.log_file) res = bmaj.remove(options.release) Notify.notifyBankAction(bmaj) if not res: sys.exit(1) if options.removepending: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) res = bmaj.remove_pending(options.release) if not res: sys.exit(1) if options.unpublish: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) bmaj.load_session() bmaj.unpublish() sys.exit(0) if options.publish: if not options.bank: print("Bank name or release is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) bmaj.load_session() bank = bmaj.bank session = None if options.get_option('release') is None: # Get latest prod release if len(bank['production']) > 0: prod = bank['production'][len(bank['production']) - 1] for s in bank['sessions']: if s['id'] == prod['session']: session = s break else: # Search production release matching release for prod in bank['production']: if prod['release'] == options.release or prod[ 'prod_dir'] == options.release: # Search session related to this production release for s in bank['sessions']: if s['id'] == prod['session']: session = s break break if session is None: print("No production session could be found for this release") sys.exit(1) bmaj.session._session = session bmaj.publish() except Exception as e: print(str(e))
class TestBiomajFunctional(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_extract_release_from_file_name(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') def test_extract_release_from_file_content(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_100\.txt') b.session.config.set('release.regexp', 'Release\s*(\d+)') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '103') def test_publish(self): ''' Update a bank, then publish it ''' b = Bank('local') b.update() current_link = os.path.join(b.config.get('data.dir'), b.config.get('dir.version'), 'current') self.assertFalse(os.path.exists(current_link)) self.assertTrue(b.bank['current'] is None) b.publish() self.assertTrue(os.path.exists(current_link)) self.assertTrue(b.bank['current'] == b.session._session['id']) # Should test this on local downloader, changing 1 file to force update, # else we would get same bank and there would be no update def test_no_update(self): ''' Try updating twice, at second time, bank should not be updated ''' b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS)) @attr('release') def test_release_control(self): ''' Try updating twice, at second time, modify one file (same date), bank should update ''' b = Bank('local') b.update() b.session.config.set('keep.old.version', '3') self.assertTrue(b.session.get('update')) remote_file = b.session.config.get('remote.dir') + 'test2.fasta' os.utime(remote_file, None) # Update test2.fasta and set release.control b.session.config.set('release.control', 'true') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) b.session.config.set('remote.files', '^test2.fasta') b.update() self.assertTrue(b.session.get('update')) def test_fromscratch_update(self): ''' Try updating twice, at second time, bank should be updated (force with fromscratc) ''' b = Bank('local') b.update() self.assertTrue(b.session.get('update')) sess = b.session.get('release') b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertEqual(b.session.get('release'), sess + '__1') def test_fromscratch_update_with_release(self): ''' Try updating twice, at second time, bank should be updated (force with fromscratch) Use case with release defined in release file ''' b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') os.makedirs(b.session.get_full_release_directory()) w = UpdateWorkflow(b) # Reset release b.session.set('release', None) w.options.fromscratch = True w.wf_release() self.assertTrue(b.session.get('release') == '100__1') def test_mix_stop_from_task(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel + '__1') b3 = Bank('local') res = b3.update() self.assertTrue(b3.session.get('release') == rel + '__1') self.assertTrue(res) def test_mix_stop_from_task2(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel + '__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'download' self.assertTrue(b3.session.get('release') == rel + '__1') self.assertTrue(res) def test_mix_stop_from_task3(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel + '__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'postprocess' self.assertTrue(b3.session.get('release') == rel + '__1') self.assertTrue(res) def test_mix_stop_from_task4(self): ''' Get a first release, then fromscratch --stop-after, then restart from-task ''' b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_before = 'download' b2.options.fromscratch = True res = b2.update() b3 = Bank('local') b3.options.from_task = 'postprocess' res = b3.update() self.assertFalse(res) def test_delete_old_dirs(self): ''' Try updating 3 times, oldest dir should be removed ''' b = Bank('local') b.removeAll(True) b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 2) def test_delete_old_dirs_with_freeze(self): ''' Try updating 3 times, oldest dir should be removed but not freezed releases ''' b = Bank('local') b.removeAll(True) b = Bank('local') b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 3) def test_removeAll(self): b = Bank('local') b.update() b.removeAll() self.assertFalse(os.path.exists(b.get_data_dir())) bdb = b.banks.find_one({'name': b.name}) self.assertTrue(bdb is None) def test_remove(self): ''' test removal of a production dir ''' b = Bank('local') b.update() self.assertTrue(os.path.exists(b.session.get_full_release_directory())) self.assertTrue(len(b.bank['production']) == 1) b.remove(b.session.get('release')) self.assertFalse(os.path.exists( b.session.get_full_release_directory())) b = Bank('local') self.assertTrue(len(b.bank['production']) == 0) def test_update_stop_after(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_update_stop_before(self): b = Bank('local') b.options.stop_before = 'postprocess' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_reupdate_from_task(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(b2.session.get_status('postprocess')) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_reupdate_from_task_error(self): b = Bank('local') b.options.stop_after = 'check' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') res = b2.update() self.assertFalse(res) def test_reupdate_from_task_wrong_release(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = 'wrongrelease' res = b2.update() self.assertFalse(res) @attr('process') def test_postprocesses_restart_from_proc(self): b = Bank('localprocess') b.update() proc1file = os.path.join(b.session.get_full_release_directory(), 'proc1.txt') proc2file = os.path.join(b.session.get_full_release_directory(), 'proc2.txt') self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, reexecute all processes b2 = Bank('localprocess') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, but at process PROC2 and following b3 = Bank('localprocess') b3.options.from_task = 'postprocess' b3.options.process = 'PROC2' b3.options.release = b.session.get('release') b3.update() #self.assertFalse(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) def test_computed(self): b = Bank('computed') res = b.update(True) self.assertTrue(res) self.assertTrue( os.path.exists(b.session.get_full_release_directory() + '/sub1/flat/test_100.txt')) self.assertTrue(b.session.get('update')) # Check that, with depends non updated, bank is not updated itself nextb = Bank('computed') res = nextb.update(True) self.assertFalse(nextb.session.get('update')) @attr('nofile') def test_computed_nofile(self): b = Bank('computed2') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('protocol', 'none') b.session.config.set('sub1.files.move', 'flat/test_.*') res = b.update(True) self.assertTrue(res) self.assertTrue( os.path.exists(b.session.get_full_release_directory() + '/sub1/flat/test_100.txt')) def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) b2 = Bank('sub1') b2release = b2.bank['production'][len(b2.bank['production']) - 1]['release'] brelease = b.bank['production'][len(b.bank['production']) - 1]['release'] self.assertTrue(res) self.assertTrue(brelease == b2release) @attr('computed') def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) self.assertTrue(b.session.get('update')) b2 = Bank('computed2') res = b2.update(True) self.assertFalse(b2.session.get('update')) def test_computederror(self): b = Bank('computederror') res = b.update(True) self.assertFalse(res) self.assertTrue(b.session._session['depends']['sub2']) self.assertFalse(b.session._session['depends']['error']) @attr('directrelease') def test_directhttp_release(self): b = Bank('directhttp') res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue( os.path.exists(b.session.get_full_release_directory() + '/flat/debian/README.html')) #print str(b.session.get('release')) #print str(b.session.get('remoterelease')) @attr('network') def test_multi(self): b = Bank('multi') res = b.update() with open( os.path.join(b.session.get_full_release_directory(), 'flat/test1.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['args']['key1'] == 'value1') with open( os.path.join(b.session.get_full_release_directory(), 'flat/test2.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['form']['key1'] == 'value1') def test_freeze(self): b = Bank('local') b.update() rel = b.session.get('release') b.freeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == True) res = b.remove(rel) self.assertTrue(res == False) b.unfreeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == False) res = b.remove(rel) self.assertTrue(res == True) def test_stats(self): b = Bank('local') b.update() rel = b.session.get('release') stats = Bank.get_banks_disk_usage() self.assertTrue(stats[0]['size'] > 0) for release in stats[0]['releases']: if release['name'] == rel: self.assertTrue(release['size'] > 0) @attr('process') def test_processes_meta_data(self): b = Bank('localprocess') b.update() formats = b.session.get('formats') self.assertTrue(len(formats['blast']) == 2) self.assertTrue(len(formats['test'][0]['files']) == 3) @attr('process') def test_search(self): b = Bank('localprocess') b.update() search_res = Bank.search(['blast'], []) self.assertTrue(len(search_res) == 1) search_res = Bank.search([], ['nucleic']) self.assertTrue(len(search_res) == 1) search_res = Bank.search(['blast'], ['nucleic']) self.assertTrue(len(search_res) == 1) search_res = Bank.search(['blast'], ['proteic']) self.assertTrue(len(search_res) == 0) def test_owner(self): ''' test ACL with owner ''' b = Bank('local') res = b.update() self.assertTrue(res) b.set_owner('sample') b2 = Bank('local') try: res = b2.update() self.fail('not owner, should not be allowed') except Exception as e: pass
class TestBiomajFunctional(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank("local") b.banks.remove({}) self.config = BiomajConfig("local") data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "local.lock") if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get("data.dir") lock_file = os.path.join(data_dir, "local.lock") if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_extract_release_from_file_name(self): b = Bank("local") b.load_session(UpdateWorkflow.FLOW) b.session.config.set("release.file", "test_(\d+)\.txt") b.session.config.set("release.regexp", "") w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get("release") == "100") def test_extract_release_from_file_content(self): b = Bank("local") b.load_session(UpdateWorkflow.FLOW) b.session.config.set("release.file", "test_100\.txt") b.session.config.set("release.regexp", "Release\s*(\d+)") w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get("release") == "103") def test_publish(self): """ Update a bank, then publish it """ b = Bank("local") b.update() current_link = os.path.join(b.config.get("data.dir"), b.config.get("dir.version"), "current") self.assertFalse(os.path.exists(current_link)) self.assertTrue(b.bank["current"] is None) b.publish() self.assertTrue(os.path.exists(current_link)) self.assertTrue(b.bank["current"] == b.session._session["id"]) # Should test this on local downloader, changing 1 file to force update, # else we would get same bank and there would be no update def test_no_update(self): """ Try updating twice, at second time, bank should not be updated """ b = Bank("local") b.update() self.assertTrue(b.session.get("update")) b.update() self.assertFalse(b.session.get("update")) self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS)) def test_fromscratch_update(self): """ Try updating twice, at second time, bank should be updated (force with fromscratc) """ b = Bank("local") b.update() self.assertTrue(b.session.get("update")) sess = b.session.get("release") b.options.fromscratch = True b.update() self.assertTrue(b.session.get("update")) self.assertEqual(b.session.get("release"), sess + "__1") def test_fromscratch_update_with_release(self): """ Try updating twice, at second time, bank should be updated (force with fromscratch) Use case with release defined in release file """ b = Bank("local") b.load_session(UpdateWorkflow.FLOW) b.session.config.set("release.file", "test_(\d+)\.txt") b.session.config.set("release.regexp", "") w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get("release") == "100") os.makedirs(b.session.get_full_release_directory()) w = UpdateWorkflow(b) # Reset release b.session.set("release", None) w.options.fromscratch = True w.wf_release() self.assertTrue(b.session.get("release") == "100__1") def test_mix_stop_from_task(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank("local") b.update() rel = b.session.get("release") b2 = Bank("local") b2.options.stop_after = "download" b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get("release") == rel + "__1") b3 = Bank("local") res = b3.update() self.assertTrue(b3.session.get("release") == rel + "__1") self.assertTrue(res) def test_mix_stop_from_task2(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank("local") b.update() rel = b.session.get("release") b2 = Bank("local") b2.options.stop_after = "download" b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get("release") == rel + "__1") b3 = Bank("local") res = b3.update() b2.options.from_task = "download" self.assertTrue(b3.session.get("release") == rel + "__1") self.assertTrue(res) def test_mix_stop_from_task3(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank("local") b.update() rel = b.session.get("release") b2 = Bank("local") b2.options.stop_after = "download" b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get("release") == rel + "__1") b3 = Bank("local") res = b3.update() b2.options.from_task = "postprocess" self.assertTrue(b3.session.get("release") == rel + "__1") self.assertTrue(res) def test_mix_stop_from_task4(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank("local") b.update() rel = b.session.get("release") b2 = Bank("local") b2.options.stop_before = "download" b2.options.fromscratch = True res = b2.update() b3 = Bank("local") b3.options.from_task = "postprocess" res = b3.update() self.assertFalse(res) def test_delete_old_dirs(self): """ Try updating 3 times, oldest dir should be removed """ b = Bank("local") b.removeAll(True) b = Bank("local") b.update() self.assertTrue(b.session.get("update")) b.options.fromscratch = True b.update() self.assertTrue(b.session.get("update")) self.assertTrue(len(b.bank["production"]) == 2) b.update() self.assertTrue(b.session.get("update")) # one new dir, but olders must be deleted self.assertTrue(len(b.bank["production"]) == 2) def test_delete_old_dirs_with_freeze(self): """ Try updating 3 times, oldest dir should be removed but not freezed releases """ b = Bank("local") b.removeAll(True) b = Bank("local") b.update() b.freeze(b.session.get("release")) self.assertTrue(b.session.get("update")) b.options.fromscratch = True b.update() b.freeze(b.session.get("release")) self.assertTrue(b.session.get("update")) self.assertTrue(len(b.bank["production"]) == 2) b.update() self.assertTrue(b.session.get("update")) # one new dir, but olders must be deleted self.assertTrue(len(b.bank["production"]) == 3) def test_removeAll(self): b = Bank("local") b.update() b.removeAll() self.assertFalse(os.path.exists(b.get_data_dir())) bdb = b.banks.find_one({"name": b.name}) self.assertTrue(bdb is None) def test_remove(self): """ test removal of a production dir """ b = Bank("local") b.update() self.assertTrue(os.path.exists(b.session.get_full_release_directory())) self.assertTrue(len(b.bank["production"]) == 1) b.remove(b.session.get("release")) self.assertFalse(os.path.exists(b.session.get_full_release_directory())) b = Bank("local") self.assertTrue(len(b.bank["production"]) == 0) def test_update_stop_after(self): b = Bank("local") b.options.stop_after = "download" b.update() self.assertTrue(b.session.get_status("download")) self.assertFalse(b.session.get_status("postprocess")) def test_update_stop_before(self): b = Bank("local") b.options.stop_before = "postprocess" b.update() self.assertTrue(b.session.get_status("download")) self.assertFalse(b.session.get_status("postprocess")) def test_reupdate_from_task(self): b = Bank("local") b.options.stop_after = "download" b.update() self.assertFalse(b.session.get_status("postprocess")) b2 = Bank("local") b2.options.from_task = "postprocess" b2.options.release = b.session.get("release") b2.update() self.assertTrue(b2.session.get_status("postprocess")) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_reupdate_from_task_error(self): b = Bank("local") b.options.stop_after = "check" b.update() self.assertFalse(b.session.get_status("postprocess")) b2 = Bank("local") b2.options.from_task = "postprocess" b2.options.release = b.session.get("release") res = b2.update() self.assertFalse(res) def test_reupdate_from_task_wrong_release(self): b = Bank("local") b.options.stop_after = "download" b.update() self.assertFalse(b.session.get_status("postprocess")) b2 = Bank("local") b2.options.from_task = "postprocess" b2.options.release = "wrongrelease" res = b2.update() self.assertFalse(res) @attr("process") def test_postprocesses_restart_from_proc(self): b = Bank("localprocess") b.update() proc1file = os.path.join(b.session.get_full_release_directory(), "proc1.txt") proc2file = os.path.join(b.session.get_full_release_directory(), "proc2.txt") self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, reexecute all processes b2 = Bank("localprocess") b2.options.from_task = "postprocess" b2.options.release = b.session.get("release") b2.update() self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, but at process PROC2 and following b3 = Bank("localprocess") b3.options.from_task = "postprocess" b3.options.process = "PROC2" b3.options.release = b.session.get("release") b3.update() # self.assertFalse(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) def test_computed(self): b = Bank("computed") res = b.update(True) self.assertTrue(res) self.assertTrue(os.path.exists(b.session.get_full_release_directory() + "/sub1/flat/test_100.txt")) def test_computed_ref_release(self): b = Bank("computed2") res = b.update(True) b2 = Bank("sub1") b2release = b2.bank["production"][len(b2.bank["production"]) - 1]["release"] brelease = b.bank["production"][len(b.bank["production"]) - 1]["release"] self.assertTrue(res) self.assertTrue(brelease == b2release) def test_computederror(self): b = Bank("computederror") res = b.update(True) self.assertFalse(res) self.assertTrue(b.session._session["depends"]["sub2"]) self.assertFalse(b.session._session["depends"]["error"]) @attr("network") def test_multi(self): b = Bank("multi") res = b.update() with open(os.path.join(b.session.get_full_release_directory(), "flat/test1.json"), "r") as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json["args"]["key1"] == "value1") with open(os.path.join(b.session.get_full_release_directory(), "flat/test2.json"), "r") as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json["form"]["key1"] == "value1") def test_freeze(self): b = Bank("local") b.update() rel = b.session.get("release") b.freeze(rel) prod = b.get_production(rel) self.assertTrue(prod["freeze"] == True) res = b.remove(rel) self.assertTrue(res == False) b.unfreeze(rel) prod = b.get_production(rel) self.assertTrue(prod["freeze"] == False) res = b.remove(rel) self.assertTrue(res == True) def test_stats(self): b = Bank("local") b.update() rel = b.session.get("release") stats = Bank.get_banks_disk_usage() self.assertTrue(stats[0]["size"] > 0) for release in stats[0]["releases"]: if release["name"] == rel: self.assertTrue(release["size"] > 0) @attr("process") def test_processes_meta_data(self): b = Bank("localprocess") b.update() formats = b.session.get("formats") self.assertTrue(len(formats["blast"]) == 2) self.assertTrue(len(formats["test"][0]["files"]) == 3) @attr("process") def test_search(self): b = Bank("localprocess") b.update() search_res = Bank.search(["blast"], []) self.assertTrue(len(search_res) == 1) search_res = Bank.search([], ["nucleic"]) self.assertTrue(len(search_res) == 1) search_res = Bank.search(["blast"], ["nucleic"]) self.assertTrue(len(search_res) == 1) search_res = Bank.search(["blast"], ["proteic"]) self.assertTrue(len(search_res) == 0) def test_owner(self): """ test ACL with owner """ b = Bank("local") res = b.update() self.assertTrue(res) b.set_owner("sample") b2 = Bank("local") try: res = b2.update() self.fail("not owner, should not be allowed") except Exception as e: pass
def load_config(request): if BiomajConfig.global_config is None: settings = request.registry.settings global_properties = settings['global_properties'] BiomajConfig.load_config(global_properties)
def main(global_config, **settings): """ This function returns a Pyramid WSGI application. """ #config = Configurator(settings=settings) global_properties = settings.get( 'global_properties', '/etc/biomaj/global.properties') if not os.path.exists(global_properties): print 'global.properties configuration field is not set' sys.exit(1) BiomajConfig.load_config(global_properties) settings['global_properties'] = global_properties config = Configurator(settings=settings) config.include('pyramid_chameleon') config.add_subscriber(before_render, BeforeRender) authentication_policy = AuthTktAuthenticationPolicy('seekrit', callback=None, hashalg='sha512') authorization_policy = ACLAuthorizationPolicy() config.set_authentication_policy(authentication_policy) config.set_authorization_policy(authorization_policy) config.add_static_view('static', 'static', cache_max_age=3600) config.add_static_view('app', 'biomajwatcher:webapp/app') config.add_route('home', '/') config.add_route('user','/user') config.add_route('user_banks','/user/{id}/banks') config.add_route('bank', '/bank') config.add_route('bankdetails', '/bank/{id}') config.add_route('banklocked', '/bank/{id}/locked') config.add_route('bankstatus', '/bank/{id}/status') config.add_route('bankconfig', '/bank/{id}/config') config.add_route('bankreleaseremove', '/bank/{id}/{release}') config.add_route('sessionlog', '/bank/{id}/log/{session}') config.add_route('schedulebank','/schedule') config.add_route('updateschedulebank','/schedule/{name}') config.add_route('search', '/search') config.add_route('search_format', '/search/format/{format}') config.add_route('search_format_type', '/search/format/{format}/type/{type}') config.add_route('search_type', '/search/type/{type}') config.add_route('stat', '/stat') config.add_route('is_auth', '/auth') config.add_route('auth', '/auth/{id}') config.add_route('logout', '/logout') config.add_route('old_api', 'BmajWatcher/GET') config.scan() # automatically serialize bson ObjectId and datetime to Mongo extended JSON json_renderer = JSON() def pymongo_adapter(obj, request): return json_util.default(obj) json_renderer.add_adapter(ObjectId, pymongo_adapter) json_renderer.add_adapter(datetime.datetime, pymongo_adapter) config.add_renderer('json', json_renderer) return config.make_wsgi_app()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', dest="config", help="Biomaj3 Configuration file") parser.add_argument('-o', '--oldconfig', dest="oldconfig", help="Old configuration file") parser.add_argument('-u', '--user', dest="user", help="MySQL user to override global properties") parser.add_argument('-p', '--password', dest="password", help="MySQL password to override global properties") parser.add_argument('-l', '--host', dest="host", help="MySQL host to override global properties") parser.add_argument('-d', '--database', dest="database", help="MySQL database to override global properties") parser.add_argument('-H', '--keep_history', dest="history", action="store_true", default=False, help="Keep bank history, not only production") args = parser.parse_args() biomajconfig = {} banks = [] with open(args.oldconfig, 'r') as old: for line in old: vals = line.split('=') if len(vals) > 1: biomajconfig[vals[0].strip()] = vals[1].strip() BiomajConfig.load_config(args.config, allow_user_config=False) db_properties_dir = os.path.dirname(args.oldconfig) if db_properties_dir == os.path.dirname(args.config): logging.error( "Bank properties use the same directory, please use a different conf.dir" ) sys.exit(1) data_dir = biomajconfig['data.dir'] if data_dir.endswith('/'): data_dir = data_dir[:-1] if not os.path.dirname(data_dir) == os.path.dirname( BiomajConfig.global_config.get('GENERAL', 'data.dir')): logging.error('Data dirs are different, please use the same data dirs') sys.exit(1) prop_files = [] for root, dirnames, filenames in os.walk(db_properties_dir): for filename in fnmatch.filter(filenames, '*.properties'): if filename != 'global.properties': prop_files.append(os.path.join(root, filename)) if not os.path.exists(BiomajConfig.global_config.get( 'GENERAL', 'conf.dir')): os.makedirs(BiomajConfig.global_config.get('GENERAL', 'conf.dir')) for prop_file in prop_files: propbankconfig = {} with open(prop_file, 'r') as old: for line in old: vals = line.split('=') if len(vals) > 1: propbankconfig[vals[0].strip()] = vals[1].strip() newpropfile = os.path.join( BiomajConfig.global_config.get('GENERAL', 'conf.dir'), os.path.basename(prop_file)) newprop = open(newpropfile, 'w') # logging.warn("manage "+prop_file+" => "+newpropfile) newprop.write("[GENERAL]\n") with open(prop_file, 'r') as props: for line in props: if not (line.startswith('*') or line.startswith('/*')): # Replace config variables with new syntax ${xx} => %(xx)s, not other env variables pattern = re.compile("\$\{([a-zA-Z0-9-_.]+)\}") varmatch = pattern.findall(line) if varmatch: for match in varmatch: if match in biomajconfig or match in propbankconfig: line = line.replace('${' + match + '}', '%(' + match + ')s') newprop.write( line.replace('\\\\', '\\').replace('db.source', 'depends')) newprop.close() b = Bank(os.path.basename(prop_file).replace('.properties', ''), no_log=True) banks.append(b.name) # database.url=jdbc\:mysql\://genobdd.genouest.org/biomaj_log vals = biomajconfig['database.url'].split('/') urllen = len(vals) db_name = vals[urllen - 1] if args.database: db_name = args.database db_host = vals[urllen - 2] if args.host: db_host = args.host db_user = biomajconfig['database.login'] if args.user: db_user = args.user db_password = biomajconfig['database.password'] if args.password: db_password = args.password try: cnx = mysql.connector.connect(host=db_host, database=db_name, user=db_user, password=db_password) cur = cnx.cursor() cur.execute("SELECT name FROM bank") for row in cur.fetchall(): migrate_bank(cur, row[0], history=args.history) except mysql.connector.Error as error: if error.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Wrong username or password: %s" % error.msg) elif error.errno == errorcode.ER_BAD_DB_ERROR: print("Database does not exist: %s" % error.msg) else: print("Unknown error: %s" % error) finally: cnx.close()
class TestElastic(unittest.TestCase): ''' test indexing and search ''' def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() BmajIndex.delete_all_bank('test') def test_index(self): prod = { "data_dir" : "/tmp/test/data", "formats" : { "fasta" : [ { "files" : [ "fasta/chr1.fa", "fasta/chr2.fa" ], "types" : [ "nucleic" ], "tags" : { "organism" : "hg19" } } ], "blast": [ { "files" : [ "blast/chr1/chr1db" ], "types" : [ "nucleic" ], "tags" : { "chr" : "chr1", "organism" : "hg19" } } ] }, "freeze" : False, "session" : 1416229253.930908, "prod_dir" : "alu-2003-11-26", "release" : "2003-11-26", "types" : [ "nucleic" ] } BmajIndex.add('test',prod, True) query = { 'query' : { 'match' : {'bank': 'test'} } } res = BmajIndex.search(query) self.assertTrue(len(res)==2)
class TestBiomajSetup(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank('alu') b.banks.remove({}) self.config = BiomajConfig('alu') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_new_bank(self): ''' Checks bank init ''' b = Bank('alu') def test_new_session(self): ''' Checks an empty session is created ''' b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) for key in b.session._session['status'].keys(): self.assertFalse(b.session.get_status(key)) def test_session_reload_notover(self): ''' Checks a session is used if present ''' b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertTrue(b.session.get_status(Workflow.FLOW_INIT)) def test_clean_old_sessions(self): ''' Checks a session is used if present ''' b = Bank('local') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b2 = Bank('local') b2.update() b2.clean_old_sessions() self.assertTrue(len(b2.bank['sessions']) == 1) def test_session_reload_over(self): ''' Checks a session if is not over ''' b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True s._session['status'][Workflow.FLOW_OVER] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertFalse(b.session.get_status(Workflow.FLOW_INIT)) def test_bank_list(self): b1 = Bank('alu') b2 = Bank('local') banks = Bank.list() self.assertTrue(len(banks) == 2) @attr('network') def test_get_release(self): ''' Get release ''' b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue(res) self.assertTrue(b.session._session['release'] is not None) def test_remove_session(self): b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() self.assertTrue(len(b.bank['sessions']) == 4) b.remove_session(b.session.get('id')) self.assertTrue(len(b.bank['sessions']) == 3) @attr('process') def test_postprocesses_setup(self): b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run(True) self.assertTrue(len(pfactory.threads_tasks[0]) == 2) self.assertTrue(len(pfactory.threads_tasks[1]) == 1) @attr('process') def test_postprocesses_exec_again(self): ''' Execute once, set a status to false, check that False processes are executed ''' b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run() self.assertTrue(pfactory.blocks['BLOCK1']['META0']['PROC0']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC1']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC2']) blocks = copy.deepcopy(pfactory.blocks) blocks['BLOCK2']['META1']['PROC2'] = False pfactory2 = PostProcessFactory(b, blocks) pfactory2.run() self.assertTrue(pfactory2.blocks['BLOCK2']['META1']['PROC2']) @attr('process') def test_preprocesses(self): b = Bank('localprocess') pfactory = PreProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) @attr('process') def test_removeprocesses(self): b = Bank('localprocess') pfactory = RemoveProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) def test_dependencies_list(self): b = Bank('computed') deps = b.get_dependencies() self.assertTrue(len(deps) == 2)
class Bank(object): ''' BioMAJ bank ''' def __init__(self, name, options=None, no_log=False): ''' Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool ''' logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options # if MongoConnector.db is None: # MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), # BiomajConfig.global_config.get('GENERAL', 'db.name')) # # self.banks = MongoConnector.banks # self.bank = self.banks.find_one({'name': self.name}) self.connector = Connector().get_connector() #self.banks = self.connector.get_collection('banks') self.banks = self.connector self.bank = self.connector.get({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } #self.bank['_id'] = self.banks.insert(self.bank) self.bank['_id'] = self.connector.set('banks', self.bank) self.session = None self.use_last_session = False def check(self): ''' Checks bank configuration ''' return self.config.check() def is_locked(self): ''' Checks if bank is locked ie action is in progress ''' data_dir = self.config.get('data.dir') lock_dir = self.config.get('lock.dir', default=data_dir) lock_file = os.path.join(lock_dir, self.name + '.lock') if os.path.exists(lock_file): return True else: return False def get_bank(self): ''' Get bank stored in db :return: bank json object ''' return self.bank @staticmethod def get_banks_disk_usage(): ''' Get disk usage per bank and release ''' if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1}) for b in banks: bank_elt = {'name': b['name'], 'size': 0, 'releases': []} for p in b['production']: if p['size'] is None: p['size'] = 0 bank_elt['size'] += p['size'] bank_elt['releases'].append({ 'name': p['release'], 'size': p['size'] }) bank_list.append(bank_elt) return bank_list def get_bank_release_info(self, full=False): ''' Get release info for the bank. Used with --status option from biomaj-cly.py :param full: Display full for the bank :type full: Boolean :return: Dict with keys if full=True - info, prod, pend else - info ''' _bank = self.bank info = {} if full: bank_info = [] prod_info = [] pend_info = [] release = None if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['release'] # Bank info header bank_info.append( ["Name", "Type(s)", "Last update status", "Published release"]) bank_info.append([ _bank['name'], str(','.join(_bank['properties']['type'])), str( datetime.fromtimestamp( _bank['last_update_session']).strftime( "%Y-%m-%d %H:%M:%S")), str(release) ]) # Bank production info header prod_info.append([ "Session", "Remote release", "Release", "Directory", "Freeze" ]) for prod in _bank['production']: data_dir = self.config.get('data.dir') dir_version = self.config.get('dir.version') if 'data.dir' in prod: data_dir = prod['data.dir'] if 'dir.version' in prod: dir_version = prod['dir.version'] release_dir = os.path.join(data_dir, dir_version, prod['prod_dir']) date = datetime.fromtimestamp( prod['session']).strftime('%Y-%m-%d %H:%M:%S') prod_info.append([ date, prod['remoterelease'], prod['release'], release_dir, 'yes' if 'freeze' in prod and prod['freeze'] else 'no' ]) # Bank pending info header if 'pending' in _bank and len(_bank['pending'].keys()) > 0: pend_info.append(["Pending release", "Last run"]) for pending in _bank['pending'].keys(): run = datetime.fromtimestamp( _bank['pending'][pending]).strftime( '%Y-%m-%d %H:%M:%S') pend_info.append([pending, run]) info['info'] = bank_info info['prod'] = prod_info info['pend'] = pend_info return info else: release = 'N/A' if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['remoterelease'] info['info'] = [ _bank['name'], ','.join(_bank['properties']['type']), str(release), _bank['properties']['visibility'] ] return info def update_dependencies(self): ''' Update bank dependencies :return: status of updates ''' self.depends = [] if self.run_depends: depends = self.get_dependencies() else: depends = [] self.session.set('depends', {}) res = True for dep in depends: self.session._session['depends'][dep] = False for dep in depends: if self.session._session['depends'][dep]: logging.debug('Update:Depends:' + dep + ':SKIP') # Bank has been marked as depends multiple times, run only once continue logging.info('Update:Depends:' + dep) b = Bank(dep) res = b.update() self.depends.append(b) self.session._session['depends'][dep] = res logging.info('Update:Depends:' + dep + ':' + str(res)) if not res: break return res def get_bank(self, bank, no_log=False): ''' Gets an other bank ''' return Bank(bank, no_log=no_log) def get_dependencies(self, bank=None): ''' Search all bank dependencies :return: list of bank names to update ''' if bank is None: deps = self.config.get('depends') else: deps = bank.config.get('depends') if deps is None: return [] # Mainn deps deps = deps.split(',') # Now search in deps if they themselves depend on other banks for dep in deps: b = Bank(dep, no_log=True) deps = b.get_dependencies() + deps return deps def is_owner(self): ''' Checks if current user is owner or admin ''' admin_config = self.config.get('admin') admin = [] if admin_config is not None: admin = [x.strip() for x in admin_config.split(',')] if admin and os.environ['LOGNAME'] in admin: return True if os.environ['LOGNAME'] == self.bank['properties']['owner']: return True return False def set_owner(self, owner): ''' Update bank owner, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) #self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}}) self.banks.update({'name': self.name}, {'$set': { 'properties.owner': owner }}) def set_visibility(self, visibility): ''' Update bank visibility, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.banks.update({'name': self.name}, {'$set': { 'properties': { 'visibility': visibility } }}) def get_properties(self): ''' Read bank properties from config file :return: properties dict ''' owner = os.environ['LOGNAME'] # If owner not set, use current user, else keep current if self.bank and 'properties' in self.bank and 'owner' in self.bank[ 'properties']: owner = self.bank['properties']['owner'] props = { 'visibility': self.config.get('visibility.default'), 'type': self.config.get('db.type').split(','), 'tags': [], 'owner': owner } return props @staticmethod def searchindex(query): return BmajIndex.searchq(query) @staticmethod def search(formats=None, types=None, with_sessions=True): ''' Search all bank releases matching some formats and types Matches production release with at least one of formats and one of types ''' if formats is None: formats = [] if types is None: types = [] if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) searchfilter = {} if formats: searchfilter['production.formats'] = {'$in': formats} if with_sessions: res = MongoConnector.banks.find(searchfilter) else: res = MongoConnector.banks.find(searchfilter, {'sessions': 0}) # Now search in which production release formats and types apply search_list = [] for r in res: prod_to_delete = [] for p in r['production']: is_format = False if not formats: is_format = True # Are formats present in this production release? for f in formats: if f in p['formats']: is_format = True break # Are types present in this production release? is_type = False if not types: is_type = True if is_format: for t in types: if t in p['types'] or t in r['properties']['type']: is_type = True break if not is_type or not is_format: prod_to_delete.append(p) for prod_del in prod_to_delete: r['production'].remove(prod_del) if len(r['production']) > 0: search_list.append(r) return search_list @staticmethod def list(with_sessions=False): ''' Return a list of banks :param with_sessions: should sessions be returned or not (can be quite big) :type with_sessions: bool :return: list of :class:`biomaj.bank.Bank` ''' if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] if with_sessions: res = MongoConnector.banks.find({}) else: res = MongoConnector.banks.find({}, {'sessions': 0}) for r in res: bank_list.append(r) return bank_list def controls(self): ''' Initial controls (create directories etc...) ''' data_dir = self.config.get('data.dir') bank_dir = self.config.get('dir.version') bank_dir = os.path.join(data_dir, bank_dir) if not os.path.exists(bank_dir): os.makedirs(bank_dir) offline_dir = self.config.get('offline.dir.name') offline_dir = os.path.join(data_dir, offline_dir) if not os.path.exists(offline_dir): os.makedirs(offline_dir) log_dir = self.config.get('log.dir') log_dir = os.path.join(log_dir, self.name) if not os.path.exists(log_dir): os.makedirs(log_dir) def _delete(self): ''' Delete bank from database, not files ''' self.banks.remove({'_id': self.bank['_id']}) def save_session(self): ''' Save session in database ''' self.session._session['last_update_time'] = time.time() self.session._session['log_file'] = self.config.log_file if self.use_last_session: # Remove last session self.banks.update( {'name': self.name}, {'$pull': { 'sessions': { 'id': self.session._session['id'] } }}) # Insert session if self.session.get('action') == 'update': action = 'last_update_session' if self.session.get('action') == 'remove': action = 'last_remove_session' cache_dir = self.config.get('cache.dir') download_files = self.session.get('download_files') if download_files is not None: f_downloaded_files = open( os.path.join(cache_dir, 'files_' + str(self.session.get('id'))), 'w') f_downloaded_files.write(json.dumps(download_files)) f_downloaded_files.close() self.session.set('download_files', []) local_files = self.session.get('files') if local_files is not None: f_local_files = open( os.path.join(cache_dir, 'local_files_' + str(self.session.get('id'))), 'w') f_local_files.write(json.dumps(download_files)) f_local_files.close() self.session.set('files', []) self.banks.update({'name': self.name}, { '$set': { action: self.session._session['id'], 'properties': self.get_properties() }, '$push': { 'sessions': self.session._session } }) BmajIndex.add(self.name, self.session._session) if self.session.get( 'action') == 'update' and not self.session.get_status( Workflow.FLOW_OVER) and self.session.get('release'): self.banks.update({'name': self.name}, { '$set': { 'pending.' + self.session.get('release'): self.session._session['id'] } }) if self.session.get('action') == 'update' and self.session.get_status( Workflow.FLOW_OVER) and self.session.get('update'): # We expect that a production release has reached the FLOW_OVER status. # If no update is needed (same release etc...), the *update* session of the session is set to False logging.debug('Bank:Save:' + self.name) if len(self.bank['production']) > 0: # Remove from database self.banks.update({'name': self.name}, { '$pull': { 'production': { 'release': self.session._session['release'] } } }) # Update local object # index = 0 # for prod in self.bank['production']: # if prod['release'] == self.session._session['release']: # break; # index += 1 # if index < len(self.bank['production']): # self.bank['production'].pop(index) release_types = [] if self.config.get('db.type'): release_types = self.config.get('db.type').split(',') release_formats = list(self.session._session['formats'].keys()) if self.config.get('db.formats'): config_formats = self.config.get('db.formats').split(',') for config_format in config_formats: if config_format not in release_formats: release_formats.append(config_format) for release_format in self.session._session['formats']: for release_files in self.session._session['formats'][ release_format]: if release_files['types']: for rtype in release_files['types']: if rtype not in release_types: release_types.append(rtype) prod_dir = self.session.get_release_directory() if self.session.get('prod_dir'): prod_dir = self.session.get('prod_dir') production = { 'release': self.session.get('release'), 'remoterelease': self.session.get('remoterelease'), 'session': self.session._session['id'], 'formats': release_formats, 'types': release_types, 'size': self.session.get('fullsize'), 'data_dir': self.session._session['data_dir'], 'dir_version': self.session._session['dir_version'], 'prod_dir': prod_dir, 'freeze': False } self.bank['production'].append(production) self.banks.update({'name': self.name}, { '$push': { 'production': production }, '$unset': { 'pending.' + self.session.get('release'): '' } }) # self.banks.update({'name': self.name}, # {'$unset': 'pending.'+self.session.get('release') # }) self.bank = self.banks.find_one({'name': self.name}) def clean_old_sessions(self): ''' Delete old sessions, not latest ones nor related to production sessions ''' if self.session is None: return # No previous session if 'sessions' not in self.bank: return if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions, skipping...') return # 'last_update_session' in self.bank and self.bank['last_update_session'] old_sessions = [] prod_releases = [] for session in self.bank['sessions']: if session['id'] == self.session.get('id'): # Current session prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_update_session'): prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_remove_session'): continue is_prod_session = False for prod in self.bank['production']: if session['id'] == prod['session']: is_prod_session = True break if is_prod_session: prod_releases.append(session['release']) continue old_sessions.append(session) if len(old_sessions) > 0: for session in old_sessions: session_id = session['id'] self.banks.update({'name': self.name}, {'$pull': { 'sessions': { 'id': session_id } }}) # Check if in pending sessions for rel in list(self.bank['pending'].keys()): rel_session = self.bank['pending'][rel] if rel_session == session_id: self.banks.update({'name': self.name}, { '$unset': { 'pending': { str(session['release']): "" } } }) if session['release'] not in prod_releases and session[ 'release'] != self.session.get('release'): # There might be unfinished releases linked to session, delete them # if they are not related to a production directory or latest run session_dir = os.path.join( self.config.get('data.dir'), self.config.get('dir.version'), self.name + self.config.get('release.separator', default='_') + str(session['release'])) if os.path.exists(session_dir): logging.info( 'Bank:DeleteOldSessionDir:' + self.name + self.config.get('release.separator', default='_') + str(session['release'])) shutil.rmtree(session_dir) self.bank = self.banks.find_one({'name': self.name}) def publish(self): ''' Set session release to *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') prod_dir = self.session.get_full_release_directory() to_dir = os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) if os.path.lexists(current_link): os.remove(current_link) os.chdir(to_dir) os.symlink(self.session.get_release_directory(), 'current') self.bank['current'] = self.session._session['id'] self.banks.update({'name': self.name}, {'$set': { 'current': self.session._session['id'] }}) def unpublish(self): ''' Unset *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') if os.path.lexists(current_link): os.remove(current_link) self.banks.update({'name': self.name}, {'$set': {'current': None}}) def get_production(self, release): ''' Get production field for release :param release: release name or production dir name :type release: str :return: production field ''' release = str(release) production = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: production = prod return production def freeze(self, release): ''' Freeze a production release When freezed, a production release cannot be removed (manually or automatically) :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({ 'name': self.name, 'production.release': rel }, {'$set': { 'production.$.freeze': True }}) self.bank = self.banks.find_one({'name': self.name}) return True def unfreeze(self, release): ''' Unfreeze a production release to allow removal :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({ 'name': self.name, 'production.release': rel }, {'$set': { 'production.$.freeze': False }}) self.bank = self.banks.find_one({'name': self.name}) return True def get_new_session(self, flow=None): ''' Returns an empty session :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW return Session(self.name, self.config, flow) def get_session_from_release(self, release): ''' Loads the session matching a specific release :param release: release name oe production dir :type release: str :return: :class:`biomaj.session.Session` ''' release = str(release) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: # No prod session, try to find a session for this release, session may have failed or be stopped for s in self.bank['sessions']: if s['release'] and release.endswith(s['release']): oldsession = s if oldsession is None: logging.error( 'No production session could be found for this release') return oldsession def load_session(self, flow=None, session=None): ''' Loads last session or, if over or forced, a new session Creates a new session or load last session if not over :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW if session is not None: logging.debug('Load specified session ' + str(session['id'])) self.session = Session(self.name, self.config, flow) self.session.load(session) self.use_last_session = True return if len(self.bank['sessions']) == 0 or self.options.get_option( Options.FROMSCRATCH): self.session = Session(self.name, self.config, flow) logging.debug('Start new session') else: # Take last session self.session = Session(self.name, self.config, flow) session_id = None # Load previous session for updates only if self.session.get( 'action' ) == 'update' and 'last_update_session' in self.bank and self.bank[ 'last_update_session']: session_id = self.bank['last_update_session'] load_session = None for session in self.bank['sessions']: if session['id'] == session_id: load_session = session break if load_session is not None: # self.session.load(self.bank['sessions'][len(self.bank['sessions'])-1]) self.session.load(session) # if self.config.last_modified > self.session.get('last_modified'): # # Config has changed, need to restart # self.session = Session(self.name, self.config, flow) # logging.info('Configuration file has been modified since last session, restart in any case a new session') if self.session.get_status( Workflow.FLOW_OVER) and self.options.get_option( Options.FROM_TASK) is None: previous_release = self.session.get('remoterelease') self.session = Session(self.name, self.config, flow) self.session.set('previous_release', previous_release) logging.debug('Start new session') else: logging.debug('Load previous session ' + str(self.session.get('id'))) self.use_last_session = True def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] cache_dir = self.config.get('cache.dir') download_files = os.path.join(cache_dir, 'files_' + str(sid)) if os.path.exists(download_files): os.remove(download_files) local_files = os.path.join(cache_dir, 'local_files_' + str(sid)) if os.path.exists(local_files): os.remove(local_files) if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions') if session_release is not None: self.banks.update({'name': self.name}, { '$pull': { 'production': { 'session': sid } }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'production': { 'session': sid } }}) self.banks.update({ 'name': self.name, 'sessions.id': sid }, {'$set': { 'sessions.$.deleted': time.time() }}) else: if session_release is not None: self.banks.update({'name': self.name}, { '$pull': { 'sessions': { 'id': sid }, 'production': { 'session': sid } }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, { '$pull': { 'sessions': { 'id': sid }, 'production': { 'session': sid } } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True def get_data_dir(self): ''' Returns bank data directory :return: str ''' return os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) def removeAll(self, force=False): ''' Remove all bank releases and database records :param force: force removal even if some production dirs are freezed :type force: bool :return: bool ''' if not force: has_freeze = False for prod in self.bank['production']: if 'freeze' in prod and prod['freeze']: has_freeze = True break if has_freeze: logging.error( 'Cannot remove bank, some production directories are freezed, use force if needed' ) return False self.banks.remove({'name': self.name}) BmajIndex.delete_all_bank(self.name) bank_data_dir = self.get_data_dir() logging.warn('DELETE ' + bank_data_dir) if os.path.exists(bank_data_dir): shutil.rmtree(bank_data_dir) bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name')) if os.path.exists(bank_offline_dir): shutil.rmtree(bank_offline_dir) bank_log_dir = os.path.join(self.config.get('log.dir'), self.name) if os.path.exists(bank_log_dir) and self.no_log: shutil.rmtree(bank_log_dir) return True def get_status(self): ''' Get status of current workflow :return: dict of current workflow status ''' if self.bank['status'] is None: return {} return self.bank['status'] def remove_pending(self, release): ''' Remove pending releases :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':RemovePending') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) if not self.bank['pending']: return True pendings = self.bank['pending'] for release in list(pendings.keys()): pending_session_id = pendings[release] pending_session = None for s in self.bank['sessions']: if s['id'] == pending_session_id: pending_session = s break session = Session(self.name, self.config, RemoveWorkflow.FLOW) if pending_session is None: session._session['release'] = release else: session.load(pending_session) if os.path.exists(session.get_full_release_directory()): logging.debug("Remove:Pending:Dir:" + session.get_full_release_directory()) shutil.rmtree(session.get_full_release_directory()) self.remove_session(pendings[release]) self.banks.update({'name': self.name}, {'$set': {'pending': {}}}) return True def remove(self, release): ''' Remove a release (db and files) :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':Remove') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.session = self.get_new_session(RemoveWorkflow.FLOW) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: if 'freeze' in prod and prod['freeze']: logging.error( 'Cannot remove release, release is freezed, unfreeze it first' ) return False # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: logging.error( 'No production session could be found for this release') return False if 'current' in self.bank and self.bank['current'] == oldsession['id']: logging.error( 'This release is the release in the main release production, you should first unpublish it' ) return False # New empty session for removal session = Session(self.name, self.config, RemoveWorkflow.FLOW) session.set('action', 'remove') session.set('release', oldsession['release']) session.set('update_session_id', oldsession['id']) self.session = session # Reset status, we take an update session res = self.start_remove(session) self.session.set('workflow_status', res) self.save_session() return res def update(self, depends=False): ''' Launch a bank update :param depends: run update of bank dependencies first :type depends: bool :return: bool ''' logging.warning('Bank:' + self.name + ':Update') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.run_depends = depends self.controls() if self.options.get_option('release'): logging.info('Bank:' + self.name + ':Release:' + self.options.get_option('release')) s = self.get_session_from_release( self.options.get_option('release')) # No session in prod if s is None: logging.error('Release does not exists: ' + self.options.get_option('release')) return False self.load_session(UpdateWorkflow.FLOW, s) else: logging.info('Bank:' + self.name + ':Release:latest') self.load_session(UpdateWorkflow.FLOW) # if from task, reset workflow status in session. if self.options.get_option('from_task'): set_to_false = False for task in self.session.flow: # If task was in False status (KO) and we ask to start after this task, exit if not set_to_false and not self.session.get_status( task['name'] ) and task['name'] != self.options.get_option('from_task'): logging.error( 'Previous task ' + task['name'] + ' was not successful, cannot restart after this task') return False if task['name'] == self.options.get_option('from_task'): set_to_false = True if set_to_false: # After from_task task, tasks must be set to False to be run self.session.set_status(task['name'], False) proc = None if task['name'] in [ Workflow.FLOW_POSTPROCESS, Workflow.FLOW_PREPROCESS, Workflow.FLOW_REMOVEPROCESS ]: proc = self.options.get_option('process') self.session.reset_proc(task['name'], proc) # if task['name'] == Workflow.FLOW_POSTPROCESS: # self.session.reset_proc(Workflow.FLOW_POSTPROCESS, proc) # elif task['name'] == Workflow.FLOW_PREPROCESS: # self.session.reset_proc(Workflow.FLOW_PREPROCESS, proc) # elif task['name'] == Workflow.FLOW_REMOVEPROCESS: # self.session.reset_proc(Workflow.FLOW_REMOVEPROCESS, proc) self.session.set('action', 'update') res = self.start_update() self.session.set('workflow_status', res) self.save_session() return res def start_remove(self, session): ''' Start a removal workflow :param session: Session to remove :type session: :class:`biomaj.session.Session` :return: bool ''' workflow = RemoveWorkflow(self, session) return workflow.start() def start_update(self): ''' Start an update workflow ''' workflow = UpdateWorkflow(self) return workflow.start()
database into a PostgreSQL data using Jsonb data typexs """ from __future__ import print_function from pymongo import MongoClient import psycopg2 from psycopg2 import OperationalError, DatabaseError, IntegrityError import json from biomaj.config import BiomajConfig import os import sys __author__ = 'tuco' if __name__ == '__main__': BiomajConfig.load_config() mongo_url = BiomajConfig.global_config.get('GENERAL', 'db.url') mongo_db = BiomajConfig.global_config.get('GENERAL', 'db.name') mc = MongoClient(mongo_url) m_bank = mc[mongo_db].banks banks = [] dbname = 'biomaj' insert_query = "INSERT INTO bank(data) VALUES " for bank in m_bank.find({}, {'_id': 0}): insert_query += "('%s')," % json.dumps(bank) insert_query = insert_query.strip(',') # In case we empty the databble if len(sys.argv) > 1: dbname = sys.argv[1]
def __init__(self, name, options=None, no_log=False): ''' Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool ''' logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options # if MongoConnector.db is None: # MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), # BiomajConfig.global_config.get('GENERAL', 'db.name')) # # self.banks = MongoConnector.banks # self.bank = self.banks.find_one({'name': self.name}) self.connector = Connector().get_connector() #self.banks = self.connector.get_collection('banks') self.banks = self.connector self.bank = self.connector.get({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } #self.bank['_id'] = self.banks.insert(self.bank) self.bank['_id'] = self.connector.set('banks', self.bank) self.session = None self.use_last_session = False
import bcrypt from biomaj.config import BiomajConfig parser = argparse.ArgumentParser(description='Initialize database content.') parser.add_argument('--config') parser.add_argument('--user') parser.add_argument('--pwd') parser.add_argument('--email') args = parser.parse_args() if not args.config: print "config argument is missing" sys.exit(2) BiomajConfig.load_config(args.config) from biomaj.user import BmajUser from hashlib import sha1 if not args.user: print 'user parameter is missing' sys.exit(1) rootuser = BmajUser(args.user) if args.pwd: pwd = args.pwd else: pwd = sha1("%s" % randint(1, 1e99)).hexdigest()