def __init__(self, name, options=None, no_log=False): """ Get a bank from db or creates a new one :param name: name of the bank, must match its config file :type name: str :param options: bank options :type options: argparse :param no_log: create a log file for the bank :type no_log: bool """ logging.debug('Initialize ' + name) if BiomajConfig.global_config is None: raise Exception('Configuration must be loaded first') self.name = name self.depends = [] self.no_log = no_log if no_log: if options is None: # options = {'no_log': True} options = Options() options.no_log = True else: options.no_log = no_log self.config = BiomajConfig(self.name, options) if self.config.get('bank.num.threads') is not None: ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads')) if self.config.log_file is not None and self.config.log_file != 'none': logging.info("Log file: " + self.config.log_file) # self.options = Options(options) if options is None: self.options = Options() else: self.options = options if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) self.banks = MongoConnector.banks self.bank = self.banks.find_one({'name': self.name}) if self.bank is None: self.bank = { 'name': self.name, 'current': None, 'sessions': [], 'production': [], 'properties': self.get_properties() } self.bank['_id'] = self.banks.insert(self.bank) self.session = None self.use_last_session = False
def search(formats=None, types=None, with_sessions=True): ''' Search all bank releases matching some formats and types Matches production release with at least one of formats and one of types ''' if formats is None: formats = [] if types is None: types = [] if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) searchfilter = {} if formats: searchfilter['production.formats'] = {'$in': formats} if with_sessions: res = MongoConnector.banks.find(searchfilter) else: res = MongoConnector.banks.find(searchfilter, {'sessions': 0}) # Now search in which production release formats and types apply search_list = [] for r in res: prod_to_delete = [] for p in r['production']: is_format = False if not formats: is_format = True # Are formats present in this production release? for f in formats: if f in p['formats']: is_format = True break # Are types present in this production release? is_type = False if not types: is_type = True if is_format: for t in types: if t in p['types'] or t in r['properties']['type']: is_type = True break if not is_type or not is_format: prod_to_delete.append(p) for prod_del in prod_to_delete: r['production'].remove(prod_del) if len(r['production']) > 0: search_list.append(r) return search_list
def get_connector(self): """ Creates inherited connector :return: Connector :rtype: :class:`Connector.` inherited class instance """ if Connector._connector is not None: return Connector._connector if Connector.url is None or Connector.db is None: raise Exception("Can't create connector, params not set!") if Connector.driver == 'mongodb': Connector._connector = MongoConnector(Connector.url, Connector.db) elif Connector.driver == 'postgres': Connector._connector = PostgresConnector(url=Connector.url, db=Connector.url) return Connector._connector
def get_banks_disk_usage(): """ Get disk usage per bank and release """ if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1}) for b in banks: bank_elt = {'name': b['name'], 'size': 0, 'releases': []} for p in b['production']: if p['size'] is None: p['size'] = 0 bank_elt['size'] += p['size'] bank_elt['releases'].append({'name': p['release'], 'size': p['size']}) bank_list.append(bank_elt) return bank_list
def list(with_sessions=False): """ Return a list of banks :param with_sessions: should sessions be returned or not (can be quite big) :type with_sessions: bool :return: list of :class:`biomaj.bank.Bank` """ if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) bank_list = [] if with_sessions: res = MongoConnector.banks.find({}) else: res = MongoConnector.banks.find({}, {'sessions': 0}) for r in res: bank_list.append(r) return bank_list
def set_version(version=None): """ Set BioMAJ current installed version in db_schema collection if version is None :param version: db_schema collection version to set :type version: str """ installed_version = version if installed_version is None: installed_version = pkg_resources.get_distribution("biomaj").version if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return None if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema schema.update_one({'id': 1}, {'$set': {'version': installed_version}}) print("Schema version set to %s" % str(installed_version))
def __init__(self, user): if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) self.users = MongoConnector.users self.id = user self.user = self.users.find_one({'id': user}) ldap_server = None con = None if not self.user and BiomajConfig.global_config.get( 'GENERAL', 'use_ldap') == '1': # Check if in ldap #import ldap from ldap3 import Server, Connection, AUTH_SIMPLE, STRATEGY_SYNC, STRATEGY_ASYNC_THREADED, SEARCH_SCOPE_WHOLE_SUBTREE, GET_ALL_INFO try: ldap_host = BiomajConfig.global_config.get( 'GENERAL', 'ldap.host') ldap_port = BiomajConfig.global_config.get( 'GENERAL', 'ldap.port') #con = ldap.initialize('ldap://' + ldap_host + ':' + str(ldap_port)) ldap_server = Server(ldap_host, port=int(ldap_port), get_info=GET_ALL_INFO) con = Connection(ldap_server, auto_bind=True, client_strategy=STRATEGY_SYNC, check_names=True) except Exception as err: logging.error(str(err)) self.user = None ldap_dn = BiomajConfig.global_config.get('GENERAL', 'ldap.dn') base_dn = 'ou=People,' + ldap_dn ldapfilter = "(&(|(uid=" + user + ")(mail=" + user + ")))" try: #con.simple_bind_s() attrs = ['mail'] #results = con.search_s(base_dn, ldap.SCOPE_SUBTREE, filter, attrs) con.search(base_dn, ldapfilter, SEARCH_SCOPE_WHOLE_SUBTREE, attributes=attrs) if con.response: ldapMail = None #for dn, entry in results: for r in con.response: user_dn = str(r['dn']) #if 'mail' not in entry: if 'mail' not in r['attributes']: logging.error('Mail not set for user ' + user) else: #ldapMail = entry['mail'][0] ldapMail = r['attributes']['mail'][0] self.user = { 'id': user, 'email': ldapMail, 'is_ldap': True } self.user['_id'] = self.users.insert(self.user) else: self.user = None except Exception as err: logging.error(str(err)) if con: con.unbind()
def migrate_pendings(): """ Migrate database 3.0.18: Check the actual BioMAJ version and if older than 3.0.17, do the 'pending' key migration """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file") return None if MongoConnector.db is None: MongoConnector( BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks schema_version = schema.find_one({'id': 1}) installed_version = pkg_resources.get_distribution("biomaj").version if schema_version is None: schema_version = {'id': 1, 'version': '3.0.0'} schema.insert(schema_version) moderate = int(schema_version['version'].split('.')[1]) minor = int(schema_version['version'].split('.')[2]) if moderate == 0 and minor <= 17: print("Migrate from release: %s" % schema_version['version']) # Update pending releases bank_list = banks.find() updated = 0 for bank in bank_list: if 'pending' in bank: # Check we have an old pending type if type(bank['pending']) == dict: updated += 1 pendings = [] for release in sorted( bank['pending'], key=lambda r: bank['pending'][r]): pendings.append({ 'release': str(release), 'id': bank['pending'][str(release)] }) if len(pendings) > 0: banks.update({'name': bank['name']}, {'$set': { 'pending': pendings }}) else: # We remove old type for 'pending' banks.update({'name': bank['name']}, {'$unset': { 'pending': "" }}) print("Migration: %d bank(s) updated" % updated) schema.update_one({'id': 1}, {'$set': {'version': installed_version}})
def migrate_pendings(): """ Migrate database 3.0.18: Check the actual BioMAJ version and if older than 3.0.17, do the 'pending' key migration """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return None if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks users = MongoConnector.users schema_version = SchemaVersion.get_dbschema_version(schema) moderate = int(schema_version.split('.')[1]) minor = int(schema_version.split('.')[2]) if moderate == 0 and minor <= 17: print("Migrate from release: %s" % schema_version) # Update pending releases bank_list = banks.find() updated = 0 for bank in bank_list: if 'pending' in bank: # Check we have an old pending type if type(bank['pending']) == dict: updated += 1 pendings = [] for release in sorted(bank['pending'], key=lambda r: bank['pending'][r]): pendings.append({'release': str(release), 'id': bank['pending'][str(release)]}) if len(pendings) > 0: banks.update({'name': bank['name']}, {'$set': {'pending': pendings}}) else: # We remove old type for 'pending' banks.update({'name': bank['name']}, {'$unset': {'pending': ""}}) print("Migration: %d bank(s) updated" % updated) if moderate < 1: updated = 0 user_list = users.find() for user in user_list: if 'apikey' not in user: updated += 1 api_key = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) users.update({'_id': user['_id']}, {'$set': {'apikey': api_key}}) print("Migration: %d user(s) updated" % updated) # production size bank_list = banks.find() updated = 0 for bank in bank_list: for prod in bank['production']: ''' { "_id" : ObjectId("54edb10856e8bb11340b5f51"), "production" : [ { "freeze" : false, "remoterelease" : "2003-11-26", "session" : 1427809848.560108, "data_dir" : "/db", "formats" : [ ], "release" : "2003-11-26", "dir_version" : "ncbi/blast/alu", "prod_dir" : "alu-2003-11-26", "types" : [ ], "size" : 319432 } ] } ''' if 'size' not in prod or prod['size'] == 0: logging.info('Calculate size for bank %s' % (bank['name'])) if 'data_dir' not in prod or not prod['data_dir'] or 'prod_dir' not in prod or not prod['prod_dir'] or 'dir_version' not in prod or not prod['dir_version']: logging.warn('no production directory information for %s, skipping...' % (bank['name'])) continue prod_dir = os.path.join(prod['data_dir'], prod['dir_version'], prod['prod_dir']) if not os.path.exists(prod_dir): logging.warn('production directory %s does not exists for %s, skipping...' % (prod_dir, bank['name'])) continue dir_size = Utils.get_folder_size(prod_dir) banks.update({'name': bank['name'], 'production.release': prod['release']}, {'$set': {'production.$.size': dir_size}}) updated += 1 print("Migration: %d bank production info updated" % updated)
def add_property(bank=None, prop=None, value=None, cfg=None): """ Update properties field for banks. :param bank: Bank name to update, default all :type bank: str :param prop: New property to add :type prop: str :param value: Property value, if cfg set, value taken from bank configuration cfg key :type value: str :param cfg: Bank configuration key value is taken from :type cfg: str :raise Exception: If not configuration file found :returns: True/False :rtype: bool """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return False if prop is None: print("Property key is required", file=sys.stderr) return False if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks schema_version = SchemaVersion.get_dbschema_version(schema) moderate = int(schema_version.split('.')[1]) minor = int(schema_version.split('.')[2]) if moderate <= 1 and minor <= 0: bank_list = [] if bank is None: bank_list = banks.find() else: bank_list = [banks.find_one({'name': bank})] updated = 0 for bank in bank_list: if 'properties' in bank: b = Bank(bank['name'], no_log=True) new_prop = 'properties.' + prop new_value = value if new_value is None: if cfg is not None: new_value = b.config.get(cfg) else: print("[%s] With value set to None, you must set cfg to get " "corresponding value" % str(bank['name']), file=sys.stderr) continue banks.update({'name': bank['name']}, {'$set': {new_prop: new_value}}) updated += 1 else: logging.warn("Bank %s does not have 'properties' field!" % str(bank['name'])) print("Add property: %d bank(s) updated" % updated)