def __init__(self, name, options=None, no_log=False): """ Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool """ logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) self.banks = MongoConnector.banks self.bank = self.banks.find_one({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } self.bank['_id'] = self.banks.insert(self.bank) self.session = None self.use_last_session = False
def __init__(self, name, options=None, no_log=False): ''' Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool ''' logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options # if MongoConnector.db is None: # MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), # BiomajConfig.global_config.get('GENERAL', 'db.name')) # # self.banks = MongoConnector.banks # self.bank = self.banks.find_one({'name': self.name}) self.connector = Connector().get_connector() #self.banks = self.connector.get_collection('banks') self.banks = self.connector self.bank = self.connector.get({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } #self.bank['_id'] = self.banks.insert(self.bank) self.bank['_id'] = self.connector.set('banks', self.bank) self.session = None self.use_last_session = False
class Bank(object): ''' BioMAJ bank ''' def __init__(self, name, options=None, no_log=False): ''' Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool ''' logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options # if MongoConnector.db is None: # MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), # BiomajConfig.global_config.get('GENERAL', 'db.name')) # # self.banks = MongoConnector.banks # self.bank = self.banks.find_one({'name': self.name}) self.connector = Connector().get_connector() #self.banks = self.connector.get_collection('banks') self.banks = self.connector self.bank = self.connector.get({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } #self.bank['_id'] = self.banks.insert(self.bank) self.bank['_id'] = self.connector.set('banks', self.bank) self.session = None self.use_last_session = False def check(self): ''' Checks bank configuration ''' return self.config.check() def is_locked(self): ''' Checks if bank is locked ie action is in progress ''' data_dir = self.config.get('data.dir') lock_dir = self.config.get('lock.dir', default=data_dir) lock_file = os.path.join(lock_dir, self.name + '.lock') if os.path.exists(lock_file): return True else: return False def get_bank(self): ''' Get bank stored in db :return: bank json object ''' return self.bank @staticmethod def get_banks_disk_usage(): ''' Get disk usage per bank and release ''' if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1}) for b in banks: bank_elt = {'name': b['name'], 'size': 0, 'releases': []} for p in b['production']: if p['size'] is None: p['size'] = 0 bank_elt['size'] += p['size'] bank_elt['releases'].append({'name': p['release'], 'size': p['size']}) bank_list.append(bank_elt) return bank_list def get_bank_release_info(self, full=False): ''' Get release info for the bank. Used with --status option from biomaj-cly.py :param full: Display full for the bank :type full: Boolean :return: Dict with keys if full=True - info, prod, pend else - info ''' _bank = self.bank info = {} if full: bank_info = [] prod_info = [] pend_info = [] release = None if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['release'] # Bank info header bank_info.append(["Name", "Type(s)", "Last update status", "Published release"]) bank_info.append([_bank['name'], str(','.join(_bank['properties']['type'])), str(datetime.fromtimestamp(_bank['last_update_session']).strftime("%Y-%m-%d %H:%M:%S")), str(release)]) # Bank production info header prod_info.append(["Session", "Remote release", "Release", "Directory", "Freeze"]) for prod in _bank['production']: data_dir = self.config.get('data.dir') dir_version = self.config.get('dir.version') if 'data.dir' in prod: data_dir = prod['data.dir'] if 'dir.version' in prod: dir_version = prod['dir.version'] release_dir = os.path.join(data_dir, dir_version, prod['prod_dir']) date = datetime.fromtimestamp(prod['session']).strftime('%Y-%m-%d %H:%M:%S') prod_info.append([date, prod['remoterelease'], prod['release'], release_dir, 'yes' if 'freeze' in prod and prod['freeze'] else 'no']) # Bank pending info header if 'pending' in _bank and len(_bank['pending'].keys()) > 0: pend_info.append(["Pending release", "Last run"]) for pending in _bank['pending'].keys(): run = datetime.fromtimestamp(_bank['pending'][pending]).strftime('%Y-%m-%d %H:%M:%S') pend_info.append([pending, run]) info['info'] = bank_info info['prod'] = prod_info info['pend'] = pend_info return info else: release = 'N/A' if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['remoterelease'] info['info'] = [_bank['name'], ','.join(_bank['properties']['type']), str(release), _bank['properties']['visibility']] return info def update_dependencies(self): ''' Update bank dependencies :return: status of updates ''' self.depends = [] if self.run_depends: depends = self.get_dependencies() else: depends = [] self.session.set('depends', {}) res = True for dep in depends: self.session._session['depends'][dep] = False for dep in depends: if self.session._session['depends'][dep]: logging.debug('Update:Depends:' + dep + ':SKIP') # Bank has been marked as depends multiple times, run only once continue logging.info('Update:Depends:' + dep) b = Bank(dep) res = b.update() self.depends.append(b) self.session._session['depends'][dep] = res logging.info('Update:Depends:' + dep + ':' + str(res)) if not res: break return res def get_bank(self, bank, no_log=False): ''' Gets an other bank ''' return Bank(bank, no_log=no_log) def get_dependencies(self, bank=None): ''' Search all bank dependencies :return: list of bank names to update ''' if bank is None: deps = self.config.get('depends') else: deps = bank.config.get('depends') if deps is None: return [] # Mainn deps deps = deps.split(',') # Now search in deps if they themselves depend on other banks for dep in deps: b = Bank(dep, no_log = True) deps = b.get_dependencies() + deps return deps def is_owner(self): ''' Checks if current user is owner or admin ''' admin_config = self.config.get('admin') admin = [] if admin_config is not None: admin = [x.strip() for x in admin_config.split(',')] if admin and os.environ['LOGNAME'] in admin: return True if os.environ['LOGNAME'] == self.bank['properties']['owner']: return True return False def set_owner(self, owner): ''' Update bank owner, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) #self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}}) self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}}) def set_visibility(self, visibility): ''' Update bank visibility, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.banks.update({'name': self.name}, {'$set': {'properties': {'visibility': visibility}}}) def get_properties(self): ''' Read bank properties from config file :return: properties dict ''' owner = os.environ['LOGNAME'] # If owner not set, use current user, else keep current if self.bank and 'properties' in self.bank and 'owner' in self.bank['properties']: owner = self.bank['properties']['owner'] props = { 'visibility': self.config.get('visibility.default'), 'type': self.config.get('db.type').split(','), 'tags': [], 'owner': owner } return props @staticmethod def searchindex(query): return BmajIndex.searchq(query) @staticmethod def search(formats=None, types=None, with_sessions=True): ''' Search all bank releases matching some formats and types Matches production release with at least one of formats and one of types ''' if formats is None: formats = [] if types is None: types = [] if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) searchfilter = {} if formats: searchfilter['production.formats'] = {'$in': formats} if with_sessions: res = MongoConnector.banks.find(searchfilter) else: res = MongoConnector.banks.find(searchfilter, {'sessions': 0}) # Now search in which production release formats and types apply search_list = [] for r in res: prod_to_delete = [] for p in r['production']: is_format = False if not formats: is_format = True # Are formats present in this production release? for f in formats: if f in p['formats']: is_format = True break # Are types present in this production release? is_type = False if not types: is_type = True if is_format: for t in types: if t in p['types'] or t in r['properties']['type']: is_type = True break if not is_type or not is_format: prod_to_delete.append(p) for prod_del in prod_to_delete: r['production'].remove(prod_del) if len(r['production']) > 0: search_list.append(r) return search_list @staticmethod def list(with_sessions=False): ''' Return a list of banks :param with_sessions: should sessions be returned or not (can be quite big) :type with_sessions: bool :return: list of :class:`biomaj.bank.Bank` ''' if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] if with_sessions: res = MongoConnector.banks.find({}) else: res = MongoConnector.banks.find({}, {'sessions': 0}) for r in res: bank_list.append(r) return bank_list def controls(self): ''' Initial controls (create directories etc...) ''' data_dir = self.config.get('data.dir') bank_dir = self.config.get('dir.version') bank_dir = os.path.join(data_dir, bank_dir) if not os.path.exists(bank_dir): os.makedirs(bank_dir) offline_dir = self.config.get('offline.dir.name') offline_dir = os.path.join(data_dir, offline_dir) if not os.path.exists(offline_dir): os.makedirs(offline_dir) log_dir = self.config.get('log.dir') log_dir = os.path.join(log_dir, self.name) if not os.path.exists(log_dir): os.makedirs(log_dir) def _delete(self): ''' Delete bank from database, not files ''' self.banks.remove({'_id': self.bank['_id']}) def save_session(self): ''' Save session in database ''' self.session._session['last_update_time'] = time.time() self.session._session['log_file'] = self.config.log_file if self.use_last_session: # Remove last session self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': self.session._session['id']}}}) # Insert session if self.session.get('action') == 'update': action = 'last_update_session' if self.session.get('action') == 'remove': action = 'last_remove_session' cache_dir = self.config.get('cache.dir') download_files = self.session.get('download_files') if download_files is not None: f_downloaded_files = open(os.path.join(cache_dir, 'files_'+str(self.session.get('id'))), 'w') f_downloaded_files.write(json.dumps(download_files)) f_downloaded_files.close() self.session.set('download_files',[]) local_files = self.session.get('files') if local_files is not None: f_local_files = open(os.path.join(cache_dir, 'local_files_'+str(self.session.get('id'))), 'w') f_local_files.write(json.dumps(download_files)) f_local_files.close() self.session.set('files',[]) self.banks.update({'name': self.name}, { '$set': { action: self.session._session['id'], 'properties': self.get_properties() }, '$push': {'sessions': self.session._session} }) BmajIndex.add(self.name, self.session._session) if self.session.get('action') == 'update' and not self.session.get_status( Workflow.FLOW_OVER) and self.session.get('release'): self.banks.update({'name': self.name}, {'$set': {'pending.' + self.session.get('release'): self.session._session['id']}}) if self.session.get('action') == 'update' and self.session.get_status(Workflow.FLOW_OVER) and self.session.get( 'update'): # We expect that a production release has reached the FLOW_OVER status. # If no update is needed (same release etc...), the *update* session of the session is set to False logging.debug('Bank:Save:' + self.name) if len(self.bank['production']) > 0: # Remove from database self.banks.update({'name': self.name}, {'$pull': {'production': {'release': self.session._session['release']}}}) # Update local object # index = 0 # for prod in self.bank['production']: # if prod['release'] == self.session._session['release']: # break; # index += 1 # if index < len(self.bank['production']): # self.bank['production'].pop(index) release_types = [] if self.config.get('db.type'): release_types = self.config.get('db.type').split(',') release_formats = list(self.session._session['formats'].keys()) if self.config.get('db.formats'): config_formats = self.config.get('db.formats').split(',') for config_format in config_formats: if config_format not in release_formats: release_formats.append(config_format) for release_format in self.session._session['formats']: for release_files in self.session._session['formats'][release_format]: if release_files['types']: for rtype in release_files['types']: if rtype not in release_types: release_types.append(rtype) prod_dir = self.session.get_release_directory() if self.session.get('prod_dir'): prod_dir = self.session.get('prod_dir') production = {'release': self.session.get('release'), 'remoterelease': self.session.get('remoterelease'), 'session': self.session._session['id'], 'formats': release_formats, 'types': release_types, 'size': self.session.get('fullsize'), 'data_dir': self.session._session['data_dir'], 'dir_version': self.session._session['dir_version'], 'prod_dir': prod_dir, 'freeze': False} self.bank['production'].append(production) self.banks.update({'name': self.name}, {'$push': {'production': production}, '$unset': {'pending.' + self.session.get('release'): ''} }) # self.banks.update({'name': self.name}, # {'$unset': 'pending.'+self.session.get('release') # }) self.bank = self.banks.find_one({'name': self.name}) def clean_old_sessions(self): ''' Delete old sessions, not latest ones nor related to production sessions ''' if self.session is None: return # No previous session if 'sessions' not in self.bank: return if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions, skipping...') return # 'last_update_session' in self.bank and self.bank['last_update_session'] old_sessions = [] prod_releases = [] for session in self.bank['sessions']: if session['id'] == self.session.get('id'): # Current session prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_update_session'): prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_remove_session'): continue is_prod_session = False for prod in self.bank['production']: if session['id'] == prod['session']: is_prod_session = True break if is_prod_session: prod_releases.append(session['release']) continue old_sessions.append(session) if len(old_sessions) > 0: for session in old_sessions: session_id = session['id'] self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': session_id}}}) # Check if in pending sessions for rel in list(self.bank['pending'].keys()): rel_session = self.bank['pending'][rel] if rel_session == session_id: self.banks.update({'name': self.name}, {'$unset': {'pending': {str(session['release']): ""}}}) if session['release'] not in prod_releases and session['release'] != self.session.get('release'): # There might be unfinished releases linked to session, delete them # if they are not related to a production directory or latest run session_dir = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), self.name + self.config.get('release.separator', default='_') + str(session['release'])) if os.path.exists(session_dir): logging.info('Bank:DeleteOldSessionDir:' + self.name + self.config.get('release.separator', default='_') + str(session['release'])) shutil.rmtree(session_dir) self.bank = self.banks.find_one({'name': self.name}) def publish(self): ''' Set session release to *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') prod_dir = self.session.get_full_release_directory() to_dir = os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) if os.path.lexists(current_link): os.remove(current_link) os.chdir(to_dir) os.symlink(self.session.get_release_directory(), 'current') self.bank['current'] = self.session._session['id'] self.banks.update({'name': self.name}, { '$set': {'current': self.session._session['id']} }) def unpublish(self): ''' Unset *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') if os.path.lexists(current_link): os.remove(current_link) self.banks.update({'name': self.name}, { '$set': {'current': None} }) def get_production(self, release): ''' Get production field for release :param release: release name or production dir name :type release: str :return: production field ''' release = str(release) production = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: production = prod return production def freeze(self, release): ''' Freeze a production release When freezed, a production release cannot be removed (manually or automatically) :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({'name': self.name, 'production.release': rel}, {'$set': {'production.$.freeze': True}}) self.bank = self.banks.find_one({'name': self.name}) return True def unfreeze(self, release): ''' Unfreeze a production release to allow removal :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({'name': self.name, 'production.release': rel}, {'$set': {'production.$.freeze': False}}) self.bank = self.banks.find_one({'name': self.name}) return True def get_new_session(self, flow=None): ''' Returns an empty session :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW return Session(self.name, self.config, flow) def get_session_from_release(self, release): ''' Loads the session matching a specific release :param release: release name oe production dir :type release: str :return: :class:`biomaj.session.Session` ''' release = str(release) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: # No prod session, try to find a session for this release, session may have failed or be stopped for s in self.bank['sessions']: if s['release'] and release.endswith(s['release']): oldsession = s if oldsession is None: logging.error('No production session could be found for this release') return oldsession def load_session(self, flow=None, session=None): ''' Loads last session or, if over or forced, a new session Creates a new session or load last session if not over :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW if session is not None: logging.debug('Load specified session ' + str(session['id'])) self.session = Session(self.name, self.config, flow) self.session.load(session) self.use_last_session = True return if len(self.bank['sessions']) == 0 or self.options.get_option(Options.FROMSCRATCH): self.session = Session(self.name, self.config, flow) logging.debug('Start new session') else: # Take last session self.session = Session(self.name, self.config, flow) session_id = None # Load previous session for updates only if self.session.get('action') == 'update' and 'last_update_session' in self.bank and self.bank[ 'last_update_session']: session_id = self.bank['last_update_session'] load_session = None for session in self.bank['sessions']: if session['id'] == session_id: load_session = session break if load_session is not None: # self.session.load(self.bank['sessions'][len(self.bank['sessions'])-1]) self.session.load(session) # if self.config.last_modified > self.session.get('last_modified'): # # Config has changed, need to restart # self.session = Session(self.name, self.config, flow) # logging.info('Configuration file has been modified since last session, restart in any case a new session') if self.session.get_status(Workflow.FLOW_OVER) and self.options.get_option( Options.FROM_TASK) is None: previous_release = self.session.get('remoterelease') self.session = Session(self.name, self.config, flow) self.session.set('previous_release', previous_release) logging.debug('Start new session') else: logging.debug('Load previous session ' + str(self.session.get('id'))) self.use_last_session = True def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] cache_dir = self.config.get('cache.dir') download_files = os.path.join(cache_dir, 'files_'+str(sid)) if os.path.exists(download_files): os.remove(download_files) local_files = os.path.join(cache_dir, 'local_files_'+str(sid)) if os.path.exists(local_files): os.remove(local_files) if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions') if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'production': {'session': sid} } }) self.banks.update({'name': self.name, 'sessions.id': sid}, {'$set': {'sessions.$.deleted': time.time()}}) else: if session_release is not None: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'sessions': {'id': sid}, 'production': {'session': sid} } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True def get_data_dir(self): ''' Returns bank data directory :return: str ''' return os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) def removeAll(self, force=False): ''' Remove all bank releases and database records :param force: force removal even if some production dirs are freezed :type force: bool :return: bool ''' if not force: has_freeze = False for prod in self.bank['production']: if 'freeze' in prod and prod['freeze']: has_freeze = True break if has_freeze: logging.error('Cannot remove bank, some production directories are freezed, use force if needed') return False self.banks.remove({'name': self.name}) BmajIndex.delete_all_bank(self.name) bank_data_dir = self.get_data_dir() logging.warn('DELETE ' + bank_data_dir) if os.path.exists(bank_data_dir): shutil.rmtree(bank_data_dir) bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name')) if os.path.exists(bank_offline_dir): shutil.rmtree(bank_offline_dir) bank_log_dir = os.path.join(self.config.get('log.dir'), self.name) if os.path.exists(bank_log_dir) and self.no_log: shutil.rmtree(bank_log_dir) return True def get_status(self): ''' Get status of current workflow :return: dict of current workflow status ''' if self.bank['status'] is None: return {} return self.bank['status'] def remove_pending(self, release): ''' Remove pending releases :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':RemovePending') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) if not self.bank['pending']: return True pendings = self.bank['pending'] for release in list(pendings.keys()): pending_session_id = pendings[release] pending_session = None for s in self.bank['sessions']: if s['id'] == pending_session_id: pending_session = s break session = Session(self.name, self.config, RemoveWorkflow.FLOW) if pending_session is None: session._session['release'] = release else: session.load(pending_session) if os.path.exists(session.get_full_release_directory()): logging.debug("Remove:Pending:Dir:" + session.get_full_release_directory()) shutil.rmtree(session.get_full_release_directory()) self.remove_session(pendings[release]) self.banks.update({'name': self.name}, {'$set': {'pending': {}}}) return True def remove(self, release): ''' Remove a release (db and files) :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':Remove') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.session = self.get_new_session(RemoveWorkflow.FLOW) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: if 'freeze' in prod and prod['freeze']: logging.error('Cannot remove release, release is freezed, unfreeze it first') return False # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: logging.error('No production session could be found for this release') return False if 'current' in self.bank and self.bank['current'] == oldsession['id']: logging.error('This release is the release in the main release production, you should first unpublish it') return False # New empty session for removal session = Session(self.name, self.config, RemoveWorkflow.FLOW) session.set('action', 'remove') session.set('release', oldsession['release']) session.set('update_session_id', oldsession['id']) self.session = session # Reset status, we take an update session res = self.start_remove(session) self.session.set('workflow_status', res) self.save_session() return res def update(self, depends=False): ''' Launch a bank update :param depends: run update of bank dependencies first :type depends: bool :return: bool ''' logging.warning('Bank:' + self.name + ':Update') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.run_depends = depends self.controls() if self.options.get_option('release'): logging.info('Bank:' + self.name + ':Release:' + self.options.get_option('release')) s = self.get_session_from_release(self.options.get_option('release')) # No session in prod if s is None: logging.error('Release does not exists: ' + self.options.get_option('release')) return False self.load_session(UpdateWorkflow.FLOW, s) else: logging.info('Bank:' + self.name + ':Release:latest') self.load_session(UpdateWorkflow.FLOW) # if from task, reset workflow status in session. if self.options.get_option('from_task'): set_to_false = False for task in self.session.flow: # If task was in False status (KO) and we ask to start after this task, exit if not set_to_false and not self.session.get_status(task['name']) and task[ 'name'] != self.options.get_option('from_task'): logging.error( 'Previous task ' + task['name'] + ' was not successful, cannot restart after this task') return False if task['name'] == self.options.get_option('from_task'): set_to_false = True if set_to_false: # After from_task task, tasks must be set to False to be run self.session.set_status(task['name'], False) proc = None if task['name'] in [Workflow.FLOW_POSTPROCESS, Workflow.FLOW_PREPROCESS, Workflow.FLOW_REMOVEPROCESS]: proc = self.options.get_option('process') self.session.reset_proc(task['name'], proc) # if task['name'] == Workflow.FLOW_POSTPROCESS: # self.session.reset_proc(Workflow.FLOW_POSTPROCESS, proc) # elif task['name'] == Workflow.FLOW_PREPROCESS: # self.session.reset_proc(Workflow.FLOW_PREPROCESS, proc) # elif task['name'] == Workflow.FLOW_REMOVEPROCESS: # self.session.reset_proc(Workflow.FLOW_REMOVEPROCESS, proc) self.session.set('action', 'update') res = self.start_update() self.session.set('workflow_status', res) self.save_session() return res def start_remove(self, session): ''' Start a removal workflow :param session: Session to remove :type session: :class:`biomaj.session.Session` :return: bool ''' workflow = RemoveWorkflow(self, session) return workflow.start() def start_update(self): ''' Start an update workflow ''' workflow = UpdateWorkflow(self) return workflow.start()
def main(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument('-c', '--config', dest="config",help="Configuration file") parser.add_argument('--check', dest="check", help="Check bank property file", action="store_true", default=False) parser.add_argument('-u', '--update', dest="update", help="Update action", action="store_true", default=False) parser.add_argument('--fromscratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-z', '--from-scratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-p', '--publish', dest="publish", help="Publish", action="store_true", default=False) parser.add_argument('--unpublish', dest="unpublish", help="Unpublish", action="store_true", default=False) parser.add_argument('--release', dest="release", help="release of the bank") parser.add_argument('--from-task', dest="from_task", help="Start cycle at a specific task (init always executed)") parser.add_argument('--process', dest="process", help="Linked to from-task, optionally specify a block, meta or process name to start from") parser.add_argument('-l', '--log', dest="log", help="log level") parser.add_argument('-r', '--remove', dest="remove", help="Remove a bank release", action="store_true", default=False) parser.add_argument('--remove-all', dest="removeall", help="Remove all bank releases and database records", action="store_true", default=False) parser.add_argument('--remove-pending', dest="removepending", help="Remove pending release", action="store_true", default=False) parser.add_argument('-s', '--status', dest="status", help="Get status", action="store_true", default=False) parser.add_argument('-b', '--bank', dest="bank", help="bank name") parser.add_argument('--owner', dest="owner", help="change owner of the bank") parser.add_argument('--stop-before', dest="stop_before", help="Store workflow before task") parser.add_argument('--stop-after', dest="stop_after", help="Store workflow after task") parser.add_argument('--freeze', dest="freeze", help="Freeze a bank release", action="store_true", default=False) parser.add_argument('--unfreeze', dest="unfreeze", help="Unfreeze a bank release", action="store_true", default=False) parser.add_argument('-f', '--force', dest="force", help="Force action", action="store_true", default=False) parser.add_argument('-h', '--help', dest="help", help="Show usage", action="store_true", default=False) parser.add_argument('--search', dest="search", help="Search by format and types", action="store_true", default=False) parser.add_argument('--formats', dest="formats", help="List of formats to search, comma separated") parser.add_argument('--types', dest="types", help="List of types to search, comma separated") parser.add_argument('--query', dest="query", help="Lucene query syntax to search in index") parser.add_argument('--show', dest="show", help="Show format files for selected bank", action="store_true", default=False) parser.add_argument('-n', '--change-dbname', dest="newbank", help="Change old bank name to this new bank name") parser.add_argument('-e', '--move-production-directories', dest="newdir",help="Change bank production directories location to this new path, path must exists") parser.add_argument('--visibility', dest="visibility", help="visibility status of the bank") parser.add_argument('--maintenance', dest="maintenance", help="Maintenance mode (on/off/status)") parser.add_argument('--version', dest="version", help="Show version", action="store_true", default=False) parser.add_argument('--status-ko', dest="statusko", help="Get bank in KO status", action="store_true", default=False) options = Options() parser.parse_args(namespace=options) options.no_log = False if options.help: print(''' --config: global.properties file path --status: list of banks with published release [OPTIONAL] --bank xx / bank: Get status details of bank --status-ko: list of banks in error status (last run) --log DEBUG|INFO|WARN|ERR [OPTIONAL]: set log level in logs for this run, default is set in global.properties file --check: Check bank property file [MANDATORY] --bank xx: name of the bank to check (will check xx.properties) --owner yy: Change owner of the bank (user id) [MANDATORY] --bank xx: name of the bank --visibility public|private: change visibility public/private of a bank [MANDATORY] --bank xx: name of the bank --change-dbname yy: Change name of the bank to this new name [MANDATORY] --bank xx: current name of the bank --move-production-directories yy: Change bank production directories location to this new path, path must exists [MANDATORY] --bank xx: current name of the bank --update: Update bank [MANDATORY] --bank xx: name of the bank(s) to update, comma separated [OPTIONAL] --publish: after update set as *current* version --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1) --stop-before xx: stop update cycle before the start of step xx --stop-after xx: stop update cycle after step xx has completed --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx* --process xx: linked to from-task, optionally specify a block, meta or process name to start from --release xx: release to update --publish: Publish bank as current release to use [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to publish --unpublish: Unpublish bank (remove current) [MANDATORY] --bank xx: name of the bank to update --remove-all: Remove all bank releases and database records [MANDATORY] --bank xx: name of the bank to update [OPTIONAL] --force: remove freezed releases --remove-pending: Remove pending releases [MANDATORY] --bank xx: name of the bank to update --remove: Remove bank release (files and database release) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove Release must not be the *current* version. If this is the case, publish a new release before. --freeze: Freeze bank release (cannot be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --unfreeze: Unfreeze bank release (can be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --search: basic search in bank production releases, return list of banks --formats xx,yy : list of comma separated format AND/OR --types xx,yy : list of comma separated type --query "LUCENE query syntax": search in index (if activated) --show: Show bank files per format [MANDATORY] --bank xx: name of the bank to show [OPTIONAL] --release xx: release of the bank to show --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal ''') return if options.version: version = pkg_resources.require('biomaj')[0].version print('Version: '+str(version)) return if options.stop_after or options.stop_before or options.from_task: available_steps = [] for flow in UpdateWorkflow.FLOW: available_steps.append(flow['name']) for flow in RemoveWorkflow.FLOW: available_steps.append(flow['name']) if options.stop_after: if options.stop_after not in available_steps: print('Invalid step: '+options.stop_after) sys.exit(1) if options.stop_before: if options.stop_before not in available_steps: print('Invalid step: '+options.stop_before) sys.exit(1) if options.from_task: if options.from_task not in available_steps: print('Invalid step: '+options.from_task) sys.exit(1) bmaj = None try: if options.config is not None: BiomajConfig.load_config(options.config) else: BiomajConfig.load_config() except Exception as e: print(str(e)) sys.exit(1) try: if options.maintenance: if options.maintenance not in ['on', 'off', 'status']: print("Wrong maintenance value [on,off,status]") sys.exit(1) data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir') if BiomajConfig.global_config.has_option('GENERAL', 'lock.dir'): lock_dir = BiomajConfig.global_config.get('GENERAL', 'lock.dir') else: lock_dir = data_dir maintenance_lock_file = os.path.join(lock_dir,'biomaj.lock') if options.maintenance == 'status': if os.path.exists(maintenance_lock_file): print("Maintenance: On") else: print("Maintenance: Off") sys.exit(0) if options.maintenance == 'on': f = open(maintenance_lock_file, 'w') f.write('1') f.close() print("Maintenance set to On") sys.exit(0) if options.maintenance == 'off': if os.path.exists(maintenance_lock_file): os.remove(maintenance_lock_file) print("Maintenance set to Off") sys.exit(0) if options.owner: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_owner(options.owner) sys.exit(0) if options.visibility: if not options.bank: print("Bank option is missing") sys.exit(1) if options.visibility not in ['public', 'private']: print("Valid values are public|private") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_visibility(options.visibility) print("Do not forget to update accordingly the visibility.default parameter in the configuration file") sys.exit(0) if options.newdir: if not options.bank: print("Bank option is missing") sys.exit(1) if not os.path.exists(options.newdir): print("Destination directory does not exists") bank = Bank(options.bank, options=options, no_log=True) if not bank.bank['production']: print("Nothing to move, no production directory") sys.exit(0) bank.load_session(Workflow.FLOW, None) w = Workflow(bank) res = w.wf_init() if not res: sys.exit(1) for prod in bank.bank['production']: session = bank.get_session_from_release(prod['release']) bank.load_session(Workflow.FLOW, session) prod_path = bank.session.get_full_release_directory() if os.path.exists(prod_path): shutil.move(prod_path, options.newdir) prod['data_dir'] = options.newdir bank.banks.update({'name': options.bank}, {'$set' : { 'production': bank.bank['production'] }}) print("Bank production directories moved to " + options.newdir) print("WARNING: do not forget to update accordingly the data.dir and dir.version properties") w.wf_over() sys.exit(0) if options.newbank: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) conf_dir = BiomajConfig.global_config.get('GENERAL', 'conf.dir') bank_prop_file = os.path.join(conf_dir,options.bank+'.properties') config_bank = configparser.SafeConfigParser() config_bank.read([os.path.join(conf_dir,options.bank+'.properties')]) config_bank.set('GENERAL', 'db.name', options.newbank) newbank_prop_file = open(os.path.join(conf_dir,options.newbank+'.properties'),'w') config_bank.write(newbank_prop_file) newbank_prop_file.close() bank.banks.update({'name': options.bank}, {'$set' : { 'name': options.newbank }}) os.remove(bank_prop_file) print("Bank "+options.bank+" renamed to "+options.newbank) sys.exit(0) if options.search: if options.query: res = Bank.searchindex(options.query) print("Query matches for :"+options.query) results = [["Release", "Format(s)", "Type(s)", "Files"]] for match in res: results.append([match['_source']['release'], str(match['_source']['format']), str(match['_source']['types']), ','.join(match['_source']['files'])]) print(tabulate(results, headers="firstrow", tablefmt="grid")) else: formats = [] if options.formats: formats = options.formats.split(',') types = [] if options.types: types = options.types.split(',') print("Search by formats="+str(formats)+", types="+str(types)) res = Bank.search(formats, types, False) results = [["Name", "Release", "Format(s)", "Type(s)", 'Current']] for bank in sorted(res, key=lambda bank: (bank['name'])): b = bank['name'] bank['production'].sort(key=lambda n: n['release'], reverse=True) for prod in bank['production']: iscurrent = "" if prod['session'] == bank['current']: iscurrent = "yes" results.append([b if b else '', prod['release'], ','.join(prod['formats']), ','.join(prod['types']), iscurrent]) b = None print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.show: if not options.bank: print("Bank option is required") sys.exit(1) bank = Bank(options.bank, no_log=True) results = [["Name", "Release", "Format(s)", "Type(s)", "Tag(s)", "File(s)"]] current = None fformat = None if 'current' in bank.bank and bank.bank['current']: current = bank.bank['current'] for prod in bank.bank['production']: include = True release = prod['release'] if current == prod['session']: release += ' (current)' if options.release and (prod['release'] != options.release and prod['prod_dir'] != options.release): include =False if include: session = bank.get_session_from_release(prod['release']) formats = session['formats'] afiles = [] atags = [] atypes = [] for fformat in list(formats.keys()): for elt in formats[fformat]: atypes.append(','.join(elt['types'])) for tag in list(elt['tags'].keys()): atags.append(elt['tags'][tag]) for eltfile in elt['files']: afiles.append(eltfile) results.append([bank.bank['name'], release, fformat, ','.join(atypes), ','.join(atags), ','.join(afiles)]) print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.check: if not options.bank: print("Bank name is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) print(options.bank+" check: "+str(bank.check())+"\n") sys.exit(0) if options.status: if options.bank: bank = Bank(options.bank, no_log=True) info = bank.get_bank_release_info(full=True) print(tabulate(info['info'], headers='firstrow', tablefmt='psql')) print(tabulate(info['prod'], headers='firstrow', tablefmt='psql')) # do we have some pending release(s) if 'pend' in info and len(info['pend']) > 1: print(tabulate(info['pend'], headers='firstrow', tablefmt='psql')) else: banks = Bank.list() # Headers of output table banks_list = [["Name", "Type(s)", "Release", "Visibility"]] for bank in sorted(banks, key=lambda k: k['name']): bank = Bank(bank['name'], no_log=True) banks_list.append(bank.get_bank_release_info()['info']) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) sys.exit(0) if options.statusko: banks = Bank.list() banks_list = [["Name", "Type(s)", "Release", "Visibility"]] for bank in sorted(banks, key=lambda k: k['name']): try: bank = Bank(bank['name'], no_log=True) bank.load_session(UpdateWorkflow.FLOW) if bank.session is not None: if bank.use_last_session and not bank.session.get_status(Workflow.FLOW_OVER): wf_status = bank.session.get('workflow_status') if wf_status is None or not wf_status: banks_list.append(bank.get_bank_release_info()['info']) except Exception as e: print(str(e)) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) if options.update: if not options.bank: print("Bank name is missing") sys.exit(1) banks = options.bank.split(',') gres = True for bank in banks: options.bank = bank bmaj = Bank(bank, options) print('Log file: '+bmaj.config.log_file) check_status = bmaj.check() if not check_status: print('Skip bank ' + options.bank + ': wrong config') gres = False continue res = bmaj.update(depends=True) if not res: gres = False Notify.notifyBankAction(bmaj) if not gres: sys.exit(1) if options.freeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.freeze(options.release) if not res: sys.exit(1) if options.unfreeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.unfreeze(options.release) if not res: sys.exit(1) if options.remove or options.removeall: if not options.bank: print("Bank name is missing") sys.exit(1) if options.remove and not options.release: print("Bank release is missing") sys.exit(1) if options.removeall: bmaj = Bank(options.bank, options, no_log=True) print('Log file: '+bmaj.config.log_file) res = bmaj.removeAll(options.force) else: bmaj = Bank(options.bank, options) print('Log file: '+bmaj.config.log_file) res = bmaj.remove(options.release) Notify.notifyBankAction(bmaj) if not res: sys.exit(1) if options.removepending: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) print('Log file: '+bmaj.config.log_file) res = bmaj.remove_pending(options.release) if not res: sys.exit(1) if options.unpublish: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) bmaj.load_session() bmaj.unpublish() sys.exit(0) if options.publish: if not options.bank: print("Bank name or release is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) print('Log file: '+bmaj.config.log_file) bmaj.load_session() bank = bmaj.bank session = None if options.get_option('release') is None: # Get latest prod release if len(bank['production'])>0: prod = bank['production'][len(bank['production'])-1] for s in bank['sessions']: if s['id'] == prod['session']: session = s break else: # Search production release matching release for prod in bank['production']: if prod['release'] == options.release or prod['prod_dir'] == options.release: # Search session related to this production release for s in bank['sessions']: if s['id'] == prod['session']: session = s break break if session is None: print("No production session could be found for this release") sys.exit(1) bmaj.session._session = session bmaj.publish() except Exception as e: print(str(e))
class Bank(object): ''' BioMAJ bank ''' def __init__(self, name, options=None, no_log=False): ''' Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool ''' logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options # if MongoConnector.db is None: # MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), # BiomajConfig.global_config.get('GENERAL', 'db.name')) # # self.banks = MongoConnector.banks # self.bank = self.banks.find_one({'name': self.name}) self.connector = Connector().get_connector() #self.banks = self.connector.get_collection('banks') self.banks = self.connector self.bank = self.connector.get({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } #self.bank['_id'] = self.banks.insert(self.bank) self.bank['_id'] = self.connector.set('banks', self.bank) self.session = None self.use_last_session = False def check(self): ''' Checks bank configuration ''' return self.config.check() def is_locked(self): ''' Checks if bank is locked ie action is in progress ''' data_dir = self.config.get('data.dir') lock_dir = self.config.get('lock.dir', default=data_dir) lock_file = os.path.join(lock_dir, self.name + '.lock') if os.path.exists(lock_file): return True else: return False def get_bank(self): ''' Get bank stored in db :return: bank json object ''' return self.bank @staticmethod def get_banks_disk_usage(): ''' Get disk usage per bank and release ''' if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1}) for b in banks: bank_elt = {'name': b['name'], 'size': 0, 'releases': []} for p in b['production']: if p['size'] is None: p['size'] = 0 bank_elt['size'] += p['size'] bank_elt['releases'].append({ 'name': p['release'], 'size': p['size'] }) bank_list.append(bank_elt) return bank_list def get_bank_release_info(self, full=False): ''' Get release info for the bank. Used with --status option from biomaj-cly.py :param full: Display full for the bank :type full: Boolean :return: Dict with keys if full=True - info, prod, pend else - info ''' _bank = self.bank info = {} if full: bank_info = [] prod_info = [] pend_info = [] release = None if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['release'] # Bank info header bank_info.append( ["Name", "Type(s)", "Last update status", "Published release"]) bank_info.append([ _bank['name'], str(','.join(_bank['properties']['type'])), str( datetime.fromtimestamp( _bank['last_update_session']).strftime( "%Y-%m-%d %H:%M:%S")), str(release) ]) # Bank production info header prod_info.append([ "Session", "Remote release", "Release", "Directory", "Freeze" ]) for prod in _bank['production']: data_dir = self.config.get('data.dir') dir_version = self.config.get('dir.version') if 'data.dir' in prod: data_dir = prod['data.dir'] if 'dir.version' in prod: dir_version = prod['dir.version'] release_dir = os.path.join(data_dir, dir_version, prod['prod_dir']) date = datetime.fromtimestamp( prod['session']).strftime('%Y-%m-%d %H:%M:%S') prod_info.append([ date, prod['remoterelease'], prod['release'], release_dir, 'yes' if 'freeze' in prod and prod['freeze'] else 'no' ]) # Bank pending info header if 'pending' in _bank and len(_bank['pending'].keys()) > 0: pend_info.append(["Pending release", "Last run"]) for pending in _bank['pending'].keys(): run = datetime.fromtimestamp( _bank['pending'][pending]).strftime( '%Y-%m-%d %H:%M:%S') pend_info.append([pending, run]) info['info'] = bank_info info['prod'] = prod_info info['pend'] = pend_info return info else: release = 'N/A' if 'current' in _bank and _bank['current']: for prod in _bank['production']: if _bank['current'] == prod['session']: release = prod['remoterelease'] info['info'] = [ _bank['name'], ','.join(_bank['properties']['type']), str(release), _bank['properties']['visibility'] ] return info def update_dependencies(self): ''' Update bank dependencies :return: status of updates ''' self.depends = [] if self.run_depends: depends = self.get_dependencies() else: depends = [] self.session.set('depends', {}) res = True for dep in depends: self.session._session['depends'][dep] = False for dep in depends: if self.session._session['depends'][dep]: logging.debug('Update:Depends:' + dep + ':SKIP') # Bank has been marked as depends multiple times, run only once continue logging.info('Update:Depends:' + dep) b = Bank(dep) res = b.update() self.depends.append(b) self.session._session['depends'][dep] = res logging.info('Update:Depends:' + dep + ':' + str(res)) if not res: break return res def get_bank(self, bank, no_log=False): ''' Gets an other bank ''' return Bank(bank, no_log=no_log) def get_dependencies(self, bank=None): ''' Search all bank dependencies :return: list of bank names to update ''' if bank is None: deps = self.config.get('depends') else: deps = bank.config.get('depends') if deps is None: return [] # Mainn deps deps = deps.split(',') # Now search in deps if they themselves depend on other banks for dep in deps: b = Bank(dep, no_log=True) deps = b.get_dependencies() + deps return deps def is_owner(self): ''' Checks if current user is owner or admin ''' admin_config = self.config.get('admin') admin = [] if admin_config is not None: admin = [x.strip() for x in admin_config.split(',')] if admin and os.environ['LOGNAME'] in admin: return True if os.environ['LOGNAME'] == self.bank['properties']['owner']: return True return False def set_owner(self, owner): ''' Update bank owner, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) #self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}}) self.banks.update({'name': self.name}, {'$set': { 'properties.owner': owner }}) def set_visibility(self, visibility): ''' Update bank visibility, only if current owner ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.banks.update({'name': self.name}, {'$set': { 'properties': { 'visibility': visibility } }}) def get_properties(self): ''' Read bank properties from config file :return: properties dict ''' owner = os.environ['LOGNAME'] # If owner not set, use current user, else keep current if self.bank and 'properties' in self.bank and 'owner' in self.bank[ 'properties']: owner = self.bank['properties']['owner'] props = { 'visibility': self.config.get('visibility.default'), 'type': self.config.get('db.type').split(','), 'tags': [], 'owner': owner } return props @staticmethod def searchindex(query): return BmajIndex.searchq(query) @staticmethod def search(formats=None, types=None, with_sessions=True): ''' Search all bank releases matching some formats and types Matches production release with at least one of formats and one of types ''' if formats is None: formats = [] if types is None: types = [] if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) searchfilter = {} if formats: searchfilter['production.formats'] = {'$in': formats} if with_sessions: res = MongoConnector.banks.find(searchfilter) else: res = MongoConnector.banks.find(searchfilter, {'sessions': 0}) # Now search in which production release formats and types apply search_list = [] for r in res: prod_to_delete = [] for p in r['production']: is_format = False if not formats: is_format = True # Are formats present in this production release? for f in formats: if f in p['formats']: is_format = True break # Are types present in this production release? is_type = False if not types: is_type = True if is_format: for t in types: if t in p['types'] or t in r['properties']['type']: is_type = True break if not is_type or not is_format: prod_to_delete.append(p) for prod_del in prod_to_delete: r['production'].remove(prod_del) if len(r['production']) > 0: search_list.append(r) return search_list @staticmethod def list(with_sessions=False): ''' Return a list of banks :param with_sessions: should sessions be returned or not (can be quite big) :type with_sessions: bool :return: list of :class:`biomaj.bank.Bank` ''' if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] if with_sessions: res = MongoConnector.banks.find({}) else: res = MongoConnector.banks.find({}, {'sessions': 0}) for r in res: bank_list.append(r) return bank_list def controls(self): ''' Initial controls (create directories etc...) ''' data_dir = self.config.get('data.dir') bank_dir = self.config.get('dir.version') bank_dir = os.path.join(data_dir, bank_dir) if not os.path.exists(bank_dir): os.makedirs(bank_dir) offline_dir = self.config.get('offline.dir.name') offline_dir = os.path.join(data_dir, offline_dir) if not os.path.exists(offline_dir): os.makedirs(offline_dir) log_dir = self.config.get('log.dir') log_dir = os.path.join(log_dir, self.name) if not os.path.exists(log_dir): os.makedirs(log_dir) def _delete(self): ''' Delete bank from database, not files ''' self.banks.remove({'_id': self.bank['_id']}) def save_session(self): ''' Save session in database ''' self.session._session['last_update_time'] = time.time() self.session._session['log_file'] = self.config.log_file if self.use_last_session: # Remove last session self.banks.update( {'name': self.name}, {'$pull': { 'sessions': { 'id': self.session._session['id'] } }}) # Insert session if self.session.get('action') == 'update': action = 'last_update_session' if self.session.get('action') == 'remove': action = 'last_remove_session' cache_dir = self.config.get('cache.dir') download_files = self.session.get('download_files') if download_files is not None: f_downloaded_files = open( os.path.join(cache_dir, 'files_' + str(self.session.get('id'))), 'w') f_downloaded_files.write(json.dumps(download_files)) f_downloaded_files.close() self.session.set('download_files', []) local_files = self.session.get('files') if local_files is not None: f_local_files = open( os.path.join(cache_dir, 'local_files_' + str(self.session.get('id'))), 'w') f_local_files.write(json.dumps(download_files)) f_local_files.close() self.session.set('files', []) self.banks.update({'name': self.name}, { '$set': { action: self.session._session['id'], 'properties': self.get_properties() }, '$push': { 'sessions': self.session._session } }) BmajIndex.add(self.name, self.session._session) if self.session.get( 'action') == 'update' and not self.session.get_status( Workflow.FLOW_OVER) and self.session.get('release'): self.banks.update({'name': self.name}, { '$set': { 'pending.' + self.session.get('release'): self.session._session['id'] } }) if self.session.get('action') == 'update' and self.session.get_status( Workflow.FLOW_OVER) and self.session.get('update'): # We expect that a production release has reached the FLOW_OVER status. # If no update is needed (same release etc...), the *update* session of the session is set to False logging.debug('Bank:Save:' + self.name) if len(self.bank['production']) > 0: # Remove from database self.banks.update({'name': self.name}, { '$pull': { 'production': { 'release': self.session._session['release'] } } }) # Update local object # index = 0 # for prod in self.bank['production']: # if prod['release'] == self.session._session['release']: # break; # index += 1 # if index < len(self.bank['production']): # self.bank['production'].pop(index) release_types = [] if self.config.get('db.type'): release_types = self.config.get('db.type').split(',') release_formats = list(self.session._session['formats'].keys()) if self.config.get('db.formats'): config_formats = self.config.get('db.formats').split(',') for config_format in config_formats: if config_format not in release_formats: release_formats.append(config_format) for release_format in self.session._session['formats']: for release_files in self.session._session['formats'][ release_format]: if release_files['types']: for rtype in release_files['types']: if rtype not in release_types: release_types.append(rtype) prod_dir = self.session.get_release_directory() if self.session.get('prod_dir'): prod_dir = self.session.get('prod_dir') production = { 'release': self.session.get('release'), 'remoterelease': self.session.get('remoterelease'), 'session': self.session._session['id'], 'formats': release_formats, 'types': release_types, 'size': self.session.get('fullsize'), 'data_dir': self.session._session['data_dir'], 'dir_version': self.session._session['dir_version'], 'prod_dir': prod_dir, 'freeze': False } self.bank['production'].append(production) self.banks.update({'name': self.name}, { '$push': { 'production': production }, '$unset': { 'pending.' + self.session.get('release'): '' } }) # self.banks.update({'name': self.name}, # {'$unset': 'pending.'+self.session.get('release') # }) self.bank = self.banks.find_one({'name': self.name}) def clean_old_sessions(self): ''' Delete old sessions, not latest ones nor related to production sessions ''' if self.session is None: return # No previous session if 'sessions' not in self.bank: return if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions, skipping...') return # 'last_update_session' in self.bank and self.bank['last_update_session'] old_sessions = [] prod_releases = [] for session in self.bank['sessions']: if session['id'] == self.session.get('id'): # Current session prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_update_session'): prod_releases.append(session['release']) continue if session['id'] == self.session.get('last_remove_session'): continue is_prod_session = False for prod in self.bank['production']: if session['id'] == prod['session']: is_prod_session = True break if is_prod_session: prod_releases.append(session['release']) continue old_sessions.append(session) if len(old_sessions) > 0: for session in old_sessions: session_id = session['id'] self.banks.update({'name': self.name}, {'$pull': { 'sessions': { 'id': session_id } }}) # Check if in pending sessions for rel in list(self.bank['pending'].keys()): rel_session = self.bank['pending'][rel] if rel_session == session_id: self.banks.update({'name': self.name}, { '$unset': { 'pending': { str(session['release']): "" } } }) if session['release'] not in prod_releases and session[ 'release'] != self.session.get('release'): # There might be unfinished releases linked to session, delete them # if they are not related to a production directory or latest run session_dir = os.path.join( self.config.get('data.dir'), self.config.get('dir.version'), self.name + self.config.get('release.separator', default='_') + str(session['release'])) if os.path.exists(session_dir): logging.info( 'Bank:DeleteOldSessionDir:' + self.name + self.config.get('release.separator', default='_') + str(session['release'])) shutil.rmtree(session_dir) self.bank = self.banks.find_one({'name': self.name}) def publish(self): ''' Set session release to *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') prod_dir = self.session.get_full_release_directory() to_dir = os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) if os.path.lexists(current_link): os.remove(current_link) os.chdir(to_dir) os.symlink(self.session.get_release_directory(), 'current') self.bank['current'] = self.session._session['id'] self.banks.update({'name': self.name}, {'$set': { 'current': self.session._session['id'] }}) def unpublish(self): ''' Unset *current* ''' if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) current_link = os.path.join(self.config.get('data.dir'), self.config.get('dir.version'), 'current') if os.path.lexists(current_link): os.remove(current_link) self.banks.update({'name': self.name}, {'$set': {'current': None}}) def get_production(self, release): ''' Get production field for release :param release: release name or production dir name :type release: str :return: production field ''' release = str(release) production = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: production = prod return production def freeze(self, release): ''' Freeze a production release When freezed, a production release cannot be removed (manually or automatically) :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({ 'name': self.name, 'production.release': rel }, {'$set': { 'production.$.freeze': True }}) self.bank = self.banks.find_one({'name': self.name}) return True def unfreeze(self, release): ''' Unfreeze a production release to allow removal :param release: release name or production dir name :type release: str :return: bool ''' release = str(release) if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) rel = None for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release rel = prod['release'] if rel is None: logging.error('Release not found: ' + release) self.banks.update({ 'name': self.name, 'production.release': rel }, {'$set': { 'production.$.freeze': False }}) self.bank = self.banks.find_one({'name': self.name}) return True def get_new_session(self, flow=None): ''' Returns an empty session :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW return Session(self.name, self.config, flow) def get_session_from_release(self, release): ''' Loads the session matching a specific release :param release: release name oe production dir :type release: str :return: :class:`biomaj.session.Session` ''' release = str(release) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: # No prod session, try to find a session for this release, session may have failed or be stopped for s in self.bank['sessions']: if s['release'] and release.endswith(s['release']): oldsession = s if oldsession is None: logging.error( 'No production session could be found for this release') return oldsession def load_session(self, flow=None, session=None): ''' Loads last session or, if over or forced, a new session Creates a new session or load last session if not over :param flow: kind of workflow :type flow: :func:`biomaj.workflow.Workflow.FLOW` ''' if flow is None: flow = Workflow.FLOW if session is not None: logging.debug('Load specified session ' + str(session['id'])) self.session = Session(self.name, self.config, flow) self.session.load(session) self.use_last_session = True return if len(self.bank['sessions']) == 0 or self.options.get_option( Options.FROMSCRATCH): self.session = Session(self.name, self.config, flow) logging.debug('Start new session') else: # Take last session self.session = Session(self.name, self.config, flow) session_id = None # Load previous session for updates only if self.session.get( 'action' ) == 'update' and 'last_update_session' in self.bank and self.bank[ 'last_update_session']: session_id = self.bank['last_update_session'] load_session = None for session in self.bank['sessions']: if session['id'] == session_id: load_session = session break if load_session is not None: # self.session.load(self.bank['sessions'][len(self.bank['sessions'])-1]) self.session.load(session) # if self.config.last_modified > self.session.get('last_modified'): # # Config has changed, need to restart # self.session = Session(self.name, self.config, flow) # logging.info('Configuration file has been modified since last session, restart in any case a new session') if self.session.get_status( Workflow.FLOW_OVER) and self.options.get_option( Options.FROM_TASK) is None: previous_release = self.session.get('remoterelease') self.session = Session(self.name, self.config, flow) self.session.set('previous_release', previous_release) logging.debug('Start new session') else: logging.debug('Load previous session ' + str(self.session.get('id'))) self.use_last_session = True def remove_session(self, sid): ''' Delete a session from db :param sid: id of the session :type sid: long :return: bool ''' session_release = None _tmpbank = self.banks.find_one({'name': self.name}) for s in _tmpbank['sessions']: if s['id'] == sid: session_release = s['release'] cache_dir = self.config.get('cache.dir') download_files = os.path.join(cache_dir, 'files_' + str(sid)) if os.path.exists(download_files): os.remove(download_files) local_files = os.path.join(cache_dir, 'local_files_' + str(sid)) if os.path.exists(local_files): os.remove(local_files) if self.config.get_bool('keep.old.sessions'): logging.debug('keep old sessions') if session_release is not None: self.banks.update({'name': self.name}, { '$pull': { 'production': { 'session': sid } }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, {'$pull': { 'production': { 'session': sid } }}) self.banks.update({ 'name': self.name, 'sessions.id': sid }, {'$set': { 'sessions.$.deleted': time.time() }}) else: if session_release is not None: self.banks.update({'name': self.name}, { '$pull': { 'sessions': { 'id': sid }, 'production': { 'session': sid } }, '$unset': { 'pending.' + session_release: '' } }) else: self.banks.update({'name': self.name}, { '$pull': { 'sessions': { 'id': sid }, 'production': { 'session': sid } } }) # Update object self.bank = self.banks.find_one({'name': self.name}) if session_release is not None: BmajIndex.remove(self.name, session_release) return True def get_data_dir(self): ''' Returns bank data directory :return: str ''' return os.path.join(self.config.get('data.dir'), self.config.get('dir.version')) def removeAll(self, force=False): ''' Remove all bank releases and database records :param force: force removal even if some production dirs are freezed :type force: bool :return: bool ''' if not force: has_freeze = False for prod in self.bank['production']: if 'freeze' in prod and prod['freeze']: has_freeze = True break if has_freeze: logging.error( 'Cannot remove bank, some production directories are freezed, use force if needed' ) return False self.banks.remove({'name': self.name}) BmajIndex.delete_all_bank(self.name) bank_data_dir = self.get_data_dir() logging.warn('DELETE ' + bank_data_dir) if os.path.exists(bank_data_dir): shutil.rmtree(bank_data_dir) bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name')) if os.path.exists(bank_offline_dir): shutil.rmtree(bank_offline_dir) bank_log_dir = os.path.join(self.config.get('log.dir'), self.name) if os.path.exists(bank_log_dir) and self.no_log: shutil.rmtree(bank_log_dir) return True def get_status(self): ''' Get status of current workflow :return: dict of current workflow status ''' if self.bank['status'] is None: return {} return self.bank['status'] def remove_pending(self, release): ''' Remove pending releases :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':RemovePending') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) if not self.bank['pending']: return True pendings = self.bank['pending'] for release in list(pendings.keys()): pending_session_id = pendings[release] pending_session = None for s in self.bank['sessions']: if s['id'] == pending_session_id: pending_session = s break session = Session(self.name, self.config, RemoveWorkflow.FLOW) if pending_session is None: session._session['release'] = release else: session.load(pending_session) if os.path.exists(session.get_full_release_directory()): logging.debug("Remove:Pending:Dir:" + session.get_full_release_directory()) shutil.rmtree(session.get_full_release_directory()) self.remove_session(pendings[release]) self.banks.update({'name': self.name}, {'$set': {'pending': {}}}) return True def remove(self, release): ''' Remove a release (db and files) :param release: release or release directory :type release: str :return: bool ''' release = str(release) logging.warning('Bank:' + self.name + ':Remove') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.session = self.get_new_session(RemoveWorkflow.FLOW) oldsession = None # Search production release matching release for prod in self.bank['production']: if prod['release'] == release or prod['prod_dir'] == release: if 'freeze' in prod and prod['freeze']: logging.error( 'Cannot remove release, release is freezed, unfreeze it first' ) return False # Search session related to this production release for s in self.bank['sessions']: if s['id'] == prod['session']: oldsession = s break break if oldsession is None: logging.error( 'No production session could be found for this release') return False if 'current' in self.bank and self.bank['current'] == oldsession['id']: logging.error( 'This release is the release in the main release production, you should first unpublish it' ) return False # New empty session for removal session = Session(self.name, self.config, RemoveWorkflow.FLOW) session.set('action', 'remove') session.set('release', oldsession['release']) session.set('update_session_id', oldsession['id']) self.session = session # Reset status, we take an update session res = self.start_remove(session) self.session.set('workflow_status', res) self.save_session() return res def update(self, depends=False): ''' Launch a bank update :param depends: run update of bank dependencies first :type depends: bool :return: bool ''' logging.warning('Bank:' + self.name + ':Update') if not self.is_owner(): logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner']) raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner']) self.run_depends = depends self.controls() if self.options.get_option('release'): logging.info('Bank:' + self.name + ':Release:' + self.options.get_option('release')) s = self.get_session_from_release( self.options.get_option('release')) # No session in prod if s is None: logging.error('Release does not exists: ' + self.options.get_option('release')) return False self.load_session(UpdateWorkflow.FLOW, s) else: logging.info('Bank:' + self.name + ':Release:latest') self.load_session(UpdateWorkflow.FLOW) # if from task, reset workflow status in session. if self.options.get_option('from_task'): set_to_false = False for task in self.session.flow: # If task was in False status (KO) and we ask to start after this task, exit if not set_to_false and not self.session.get_status( task['name'] ) and task['name'] != self.options.get_option('from_task'): logging.error( 'Previous task ' + task['name'] + ' was not successful, cannot restart after this task') return False if task['name'] == self.options.get_option('from_task'): set_to_false = True if set_to_false: # After from_task task, tasks must be set to False to be run self.session.set_status(task['name'], False) proc = None if task['name'] in [ Workflow.FLOW_POSTPROCESS, Workflow.FLOW_PREPROCESS, Workflow.FLOW_REMOVEPROCESS ]: proc = self.options.get_option('process') self.session.reset_proc(task['name'], proc) # if task['name'] == Workflow.FLOW_POSTPROCESS: # self.session.reset_proc(Workflow.FLOW_POSTPROCESS, proc) # elif task['name'] == Workflow.FLOW_PREPROCESS: # self.session.reset_proc(Workflow.FLOW_PREPROCESS, proc) # elif task['name'] == Workflow.FLOW_REMOVEPROCESS: # self.session.reset_proc(Workflow.FLOW_REMOVEPROCESS, proc) self.session.set('action', 'update') res = self.start_update() self.session.set('workflow_status', res) self.save_session() return res def start_remove(self, session): ''' Start a removal workflow :param session: Session to remove :type session: :class:`biomaj.session.Session` :return: bool ''' workflow = RemoveWorkflow(self, session) return workflow.start() def start_update(self): ''' Start an update workflow ''' workflow = UpdateWorkflow(self) return workflow.start()
def main(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument('-c', '--config', dest="config", help="Configuration file") parser.add_argument('--check', dest="check", help="Check bank property file", action="store_true", default=False) parser.add_argument('-u', '--update', dest="update", help="Update action", action="store_true", default=False) parser.add_argument('--fromscratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-z', '--from-scratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False) parser.add_argument('-p', '--publish', dest="publish", help="Publish", action="store_true", default=False) parser.add_argument('--unpublish', dest="unpublish", help="Unpublish", action="store_true", default=False) parser.add_argument('--release', dest="release", help="release of the bank") parser.add_argument( '--from-task', dest="from_task", help="Start cycle at a specific task (init always executed)") parser.add_argument( '--process', dest="process", help= "Linked to from-task, optionally specify a block, meta or process name to start from" ) parser.add_argument('-l', '--log', dest="log", help="log level") parser.add_argument('-r', '--remove', dest="remove", help="Remove a bank release", action="store_true", default=False) parser.add_argument('--remove-all', dest="removeall", help="Remove all bank releases and database records", action="store_true", default=False) parser.add_argument('--remove-pending', dest="removepending", help="Remove pending release", action="store_true", default=False) parser.add_argument('-s', '--status', dest="status", help="Get status", action="store_true", default=False) parser.add_argument('-b', '--bank', dest="bank", help="bank name") parser.add_argument('--owner', dest="owner", help="change owner of the bank") parser.add_argument('--stop-before', dest="stop_before", help="Store workflow before task") parser.add_argument('--stop-after', dest="stop_after", help="Store workflow after task") parser.add_argument('--freeze', dest="freeze", help="Freeze a bank release", action="store_true", default=False) parser.add_argument('--unfreeze', dest="unfreeze", help="Unfreeze a bank release", action="store_true", default=False) parser.add_argument('-f', '--force', dest="force", help="Force action", action="store_true", default=False) parser.add_argument('-h', '--help', dest="help", help="Show usage", action="store_true", default=False) parser.add_argument('--search', dest="search", help="Search by format and types", action="store_true", default=False) parser.add_argument('--formats', dest="formats", help="List of formats to search, comma separated") parser.add_argument('--types', dest="types", help="List of types to search, comma separated") parser.add_argument('--query', dest="query", help="Lucene query syntax to search in index") parser.add_argument('--show', dest="show", help="Show format files for selected bank", action="store_true", default=False) parser.add_argument('-n', '--change-dbname', dest="newbank", help="Change old bank name to this new bank name") parser.add_argument( '-e', '--move-production-directories', dest="newdir", help= "Change bank production directories location to this new path, path must exists" ) parser.add_argument('--visibility', dest="visibility", help="visibility status of the bank") parser.add_argument('--maintenance', dest="maintenance", help="Maintenance mode (on/off/status)") parser.add_argument('--version', dest="version", help="Show version", action="store_true", default=False) parser.add_argument('--status-ko', dest="statusko", help="Get bank in KO status", action="store_true", default=False) options = Options() parser.parse_args(namespace=options) options.no_log = False if options.help: print(''' --config: global.properties file path --status: list of banks with published release [OPTIONAL] --bank xx / bank: Get status details of bank --status-ko: list of banks in error status (last run) --log DEBUG|INFO|WARN|ERR [OPTIONAL]: set log level in logs for this run, default is set in global.properties file --check: Check bank property file [MANDATORY] --bank xx: name of the bank to check (will check xx.properties) --owner yy: Change owner of the bank (user id) [MANDATORY] --bank xx: name of the bank --visibility public|private: change visibility public/private of a bank [MANDATORY] --bank xx: name of the bank --change-dbname yy: Change name of the bank to this new name [MANDATORY] --bank xx: current name of the bank --move-production-directories yy: Change bank production directories location to this new path, path must exists [MANDATORY] --bank xx: current name of the bank --update: Update bank [MANDATORY] --bank xx: name of the bank(s) to update, comma separated [OPTIONAL] --publish: after update set as *current* version --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1) --stop-before xx: stop update cycle before the start of step xx --stop-after xx: stop update cycle after step xx has completed --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx* --process xx: linked to from-task, optionally specify a block, meta or process name to start from --release xx: release to update --publish: Publish bank as current release to use [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to publish --unpublish: Unpublish bank (remove current) [MANDATORY] --bank xx: name of the bank to update --remove-all: Remove all bank releases and database records [MANDATORY] --bank xx: name of the bank to update [OPTIONAL] --force: remove freezed releases --remove-pending: Remove pending releases [MANDATORY] --bank xx: name of the bank to update --remove: Remove bank release (files and database release) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove Release must not be the *current* version. If this is the case, publish a new release before. --freeze: Freeze bank release (cannot be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --unfreeze: Unfreeze bank release (can be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --search: basic search in bank production releases, return list of banks --formats xx,yy : list of comma separated format AND/OR --types xx,yy : list of comma separated type --query "LUCENE query syntax": search in index (if activated) --show: Show bank files per format [MANDATORY] --bank xx: name of the bank to show [OPTIONAL] --release xx: release of the bank to show --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal ''') return if options.version: version = pkg_resources.require('biomaj')[0].version print('Version: ' + str(version)) return if options.stop_after or options.stop_before or options.from_task: available_steps = [] for flow in UpdateWorkflow.FLOW: available_steps.append(flow['name']) for flow in RemoveWorkflow.FLOW: available_steps.append(flow['name']) if options.stop_after: if options.stop_after not in available_steps: print('Invalid step: ' + options.stop_after) sys.exit(1) if options.stop_before: if options.stop_before not in available_steps: print('Invalid step: ' + options.stop_before) sys.exit(1) if options.from_task: if options.from_task not in available_steps: print('Invalid step: ' + options.from_task) sys.exit(1) bmaj = None try: if options.config is not None: BiomajConfig.load_config(options.config) else: BiomajConfig.load_config() except Exception as e: print(str(e)) sys.exit(1) try: if options.maintenance: if options.maintenance not in ['on', 'off', 'status']: print("Wrong maintenance value [on,off,status]") sys.exit(1) data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir') if BiomajConfig.global_config.has_option('GENERAL', 'lock.dir'): lock_dir = BiomajConfig.global_config.get( 'GENERAL', 'lock.dir') else: lock_dir = data_dir maintenance_lock_file = os.path.join(lock_dir, 'biomaj.lock') if options.maintenance == 'status': if os.path.exists(maintenance_lock_file): print("Maintenance: On") else: print("Maintenance: Off") sys.exit(0) if options.maintenance == 'on': f = open(maintenance_lock_file, 'w') f.write('1') f.close() print("Maintenance set to On") sys.exit(0) if options.maintenance == 'off': if os.path.exists(maintenance_lock_file): os.remove(maintenance_lock_file) print("Maintenance set to Off") sys.exit(0) if options.owner: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_owner(options.owner) sys.exit(0) if options.visibility: if not options.bank: print("Bank option is missing") sys.exit(1) if options.visibility not in ['public', 'private']: print("Valid values are public|private") sys.exit(1) bank = Bank(options.bank, no_log=True) bank.set_visibility(options.visibility) print( "Do not forget to update accordingly the visibility.default parameter in the configuration file" ) sys.exit(0) if options.newdir: if not options.bank: print("Bank option is missing") sys.exit(1) if not os.path.exists(options.newdir): print("Destination directory does not exists") bank = Bank(options.bank, options=options, no_log=True) if not bank.bank['production']: print("Nothing to move, no production directory") sys.exit(0) bank.load_session(Workflow.FLOW, None) w = Workflow(bank) res = w.wf_init() if not res: sys.exit(1) for prod in bank.bank['production']: session = bank.get_session_from_release(prod['release']) bank.load_session(Workflow.FLOW, session) prod_path = bank.session.get_full_release_directory() if os.path.exists(prod_path): shutil.move(prod_path, options.newdir) prod['data_dir'] = options.newdir bank.banks.update( {'name': options.bank}, {'$set': { 'production': bank.bank['production'] }}) print("Bank production directories moved to " + options.newdir) print( "WARNING: do not forget to update accordingly the data.dir and dir.version properties" ) w.wf_over() sys.exit(0) if options.newbank: if not options.bank: print("Bank option is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) conf_dir = BiomajConfig.global_config.get('GENERAL', 'conf.dir') bank_prop_file = os.path.join(conf_dir, options.bank + '.properties') config_bank = configparser.SafeConfigParser() config_bank.read( [os.path.join(conf_dir, options.bank + '.properties')]) config_bank.set('GENERAL', 'db.name', options.newbank) newbank_prop_file = open( os.path.join(conf_dir, options.newbank + '.properties'), 'w') config_bank.write(newbank_prop_file) newbank_prop_file.close() bank.banks.update({'name': options.bank}, {'$set': { 'name': options.newbank }}) os.remove(bank_prop_file) print("Bank " + options.bank + " renamed to " + options.newbank) sys.exit(0) if options.search: if options.query: res = Bank.searchindex(options.query) print("Query matches for :" + options.query) results = [["Release", "Format(s)", "Type(s)", "Files"]] for match in res: results.append([ match['_source']['release'], str(match['_source']['format']), str(match['_source']['types']), ','.join(match['_source']['files']) ]) print(tabulate(results, headers="firstrow", tablefmt="grid")) else: formats = [] if options.formats: formats = options.formats.split(',') types = [] if options.types: types = options.types.split(',') print("Search by formats=" + str(formats) + ", types=" + str(types)) res = Bank.search(formats, types, False) results = [[ "Name", "Release", "Format(s)", "Type(s)", 'Published' ]] for bank in sorted(res, key=lambda bank: (bank['name'])): b = bank['name'] bank['production'].sort(key=lambda n: n['release'], reverse=True) for prod in bank['production']: iscurrent = "" if prod['session'] == bank['current']: iscurrent = "yes" results.append([ b if b else '', prod['release'], ','.join(prod['formats']), ','.join(prod['types']), iscurrent ]) print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.show: if not options.bank: print("Bank option is required") sys.exit(1) bank = Bank(options.bank, no_log=True) results = [[ "Name", "Release", "Format(s)", "Type(s)", "Tag(s)", "File(s)" ]] current = None fformat = None if 'current' in bank.bank and bank.bank['current']: current = bank.bank['current'] for prod in bank.bank['production']: include = True release = prod['release'] if current == prod['session']: release += ' (current)' if options.release and (prod['release'] != options.release and prod['prod_dir'] != options.release): include = False if include: session = bank.get_session_from_release(prod['release']) formats = session['formats'] afiles = [] atags = [] atypes = [] for fformat in list(formats.keys()): for elt in formats[fformat]: atypes.append(','.join(elt['types'])) for tag in list(elt['tags'].keys()): atags.append(elt['tags'][tag]) for eltfile in elt['files']: afiles.append(eltfile) results.append([ bank.bank['name'], release, fformat, ','.join(atypes), ','.join(atags), ','.join(afiles) ]) print(tabulate(results, headers="firstrow", tablefmt="grid")) sys.exit(0) if options.check: if not options.bank: print("Bank name is missing") sys.exit(1) bank = Bank(options.bank, no_log=True) print(options.bank + " check: " + str(bank.check()) + "\n") sys.exit(0) if options.status: if options.bank: bank = Bank(options.bank, no_log=True) info = bank.get_bank_release_info(full=True) print( tabulate(info['info'], headers='firstrow', tablefmt='psql')) print( tabulate(info['prod'], headers='firstrow', tablefmt='psql')) # do we have some pending release(s) if 'pend' in info and len(info['pend']) > 1: print( tabulate(info['pend'], headers='firstrow', tablefmt='psql')) else: banks = Bank.list() # Headers of output table banks_list = [["Name", "Type(s)", "Release", "Visibility"]] for bank in sorted(banks, key=lambda k: k['name']): bank = Bank(bank['name'], no_log=True) banks_list.append(bank.get_bank_release_info()['info']) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) sys.exit(0) if options.statusko: banks = Bank.list() banks_list = [[ "Name", "Type(s)", "Release", "Visibility", "Last run" ]] for bank in sorted(banks, key=lambda k: k['name']): try: bank = Bank(bank['name'], no_log=True) bank.load_session(UpdateWorkflow.FLOW) if bank.session is not None: if bank.use_last_session and not bank.session.get_status( Workflow.FLOW_OVER): wf_status = bank.session.get('workflow_status') if wf_status is None or not wf_status: banks_list.append( bank.get_bank_release_info()['info']) except Exception as e: print(str(e)) print(tabulate(banks_list, headers="firstrow", tablefmt="psql")) if options.update: if not options.bank: print("Bank name is missing") sys.exit(1) banks = options.bank.split(',') gres = True for bank in banks: options.bank = bank bmaj = Bank(bank, options) print('Log file: ' + bmaj.config.log_file) check_status = bmaj.check() if not check_status: print('Skip bank ' + options.bank + ': wrong config') gres = False continue res = bmaj.update(depends=True) if not res: gres = False Notify.notifyBankAction(bmaj) if not gres: sys.exit(1) if options.freeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.freeze(options.release) if not res: sys.exit(1) if options.unfreeze: if not options.bank: print("Bank name is missing") sys.exit(1) if not options.release: print("Bank release is missing") sys.exit(1) bmaj = Bank(options.bank, options) res = bmaj.unfreeze(options.release) if not res: sys.exit(1) if options.remove or options.removeall: if not options.bank: print("Bank name is missing") sys.exit(1) if options.remove and not options.release: print("Bank release is missing") sys.exit(1) if options.removeall: bmaj = Bank(options.bank, options, no_log=True) print('Log file: ' + bmaj.config.log_file) res = bmaj.removeAll(options.force) else: bmaj = Bank(options.bank, options) print('Log file: ' + bmaj.config.log_file) res = bmaj.remove(options.release) Notify.notifyBankAction(bmaj) if not res: sys.exit(1) if options.removepending: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) res = bmaj.remove_pending(options.release) if not res: sys.exit(1) if options.unpublish: if not options.bank: print("Bank name is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) bmaj.load_session() bmaj.unpublish() sys.exit(0) if options.publish: if not options.bank: print("Bank name or release is missing") sys.exit(1) bmaj = Bank(options.bank, options, no_log=True) bmaj.load_session() bank = bmaj.bank session = None if options.get_option('release') is None: # Get latest prod release if len(bank['production']) > 0: prod = bank['production'][len(bank['production']) - 1] for s in bank['sessions']: if s['id'] == prod['session']: session = s break else: # Search production release matching release for prod in bank['production']: if prod['release'] == options.release or prod[ 'prod_dir'] == options.release: # Search session related to this production release for s in bank['sessions']: if s['id'] == prod['session']: session = s break break if session is None: print("No production session could be found for this release") sys.exit(1) bmaj.session._session = session bmaj.publish() except Exception as e: print(str(e))
def main(): parser = argparse.ArgumentParser(add_help=False) Utils.set_args(parser) parser.add_argument('--about-me', dest="aboutme", action="store_true", help="Get my user info") parser.add_argument('--user-login', dest="userlogin", help="Credentials login") parser.add_argument('--user-password', dest="userpassword", help="Credentials password") parser.add_argument('--proxy', dest="proxy", help="Biomaj daemon URL") # http://127.0.0.1 parser.add_argument('--api-key', dest="apikey", help="User API Key") parser.add_argument('--update-status', dest="updatestatus", action="store_true", default=False, help="Get update status") parser.add_argument('--update-cancel', dest="updatecancel", action="store_true", default=False, help="Cancel current bank update") parser.add_argument('--trace', dest="trace", action="store_true", help="Trace operation in zipkin") options = Options() parser.parse_args(namespace=options) options.no_log = False if options.help: print(''' --config: global.properties file path --proxy: BioMAJ daemon url (http://x.y.z) --api-key: User API key to authenticate against proxy --about-me: Get my info [MANDATORY] --proxy http://x.y.z --user-login XX --user-password XX --update-status: get status of an update [MANDATORY] --bank xx: name of the bank to check --update-cancel: cancel current update [MANDATORY] --bank xx: name of the bank to cancel --status: list of banks with published release [OPTIONAL] --bank xx / bank: Get status details of bank --status-ko: list of banks in error status (last run) --log DEBUG|INFO|WARN|ERR [OPTIONAL]: set log level in logs for this run, default is set in global.properties file --check: Check bank property file [MANDATORY] --bank xx: name of the bank to check (will check xx.properties) --owner yy: Change owner of the bank (user id) [MANDATORY] --bank xx: name of the bank --visibility public|private: change visibility public/private of a bank [MANDATORY] --bank xx: name of the bank --change-dbname yy: Change name of the bank to this new name [MANDATORY] --bank xx: current name of the bank --move-production-directories yy: Change bank production directories location to this new path, path must exists [MANDATORY] --bank xx: current name of the bank --update: Update bank [MANDATORY] --bank xx: name of the bank(s) to update, comma separated [OPTIONAL] --publish: after update set as *current* version --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1) --stop-before xx: stop update cycle before the start of step xx --stop-after xx: stop update cycle after step xx has completed --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx* --process xx: linked to from-task, optionally specify a block, meta or process name to start from --release xx: release to update --publish: Publish bank as current release to use [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to publish --unpublish: Unpublish bank (remove current) [MANDATORY] --bank xx: name of the bank to update --remove-all: Remove all bank releases and database records [MANDATORY] --bank xx: name of the bank to update [OPTIONAL] --force: remove freezed releases --remove-pending: Remove pending releases [MANDATORY] --bank xx: name of the bank to update --remove: Remove bank release (files and database release) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove Release must not be the *current* version. If this is the case, publish a new release before. --freeze: Freeze bank release (cannot be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --unfreeze: Unfreeze bank release (can be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --search: basic search in bank production releases, return list of banks --formats xx,yy : list of comma separated format AND/OR --types xx,yy : list of comma separated type --query "LUCENE query syntax": search in index (if activated) --show: Show bank files per format [MANDATORY] --bank xx: name of the bank to show [OPTIONAL] --release xx: release of the bank to show --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal ''') return proxy = options.proxy if 'BIOMAJ_PROXY' in os.environ: proxy = os.environ['BIOMAJ_PROXY'] options.proxy = proxy if 'BIOMAJ_APIKEY' in os.environ: apikey = os.environ['BIOMAJ_APIKEY'] options.apikey = apikey try: if not proxy: options.user = os.environ['LOGNAME'] (status, msg) = biomaj_client_action(options) else: headers = {} if options.apikey: headers = {'Authorization': 'APIKEY ' + options.apikey} r = requests.post(proxy + '/api/daemon', headers=headers, json={'options': options.__dict__}) if not r.status_code == 200: print('Failed to contact BioMAJ daemon') sys.exit(1) result = r.json() status = result['status'] msg = result['msg'] if not status: print('An error occured:\n') print(str(msg)) else: if msg: print(str(msg)) else: print('Done.') except Exception as e: logging.exception(e) print('Error:' + str(e))