def test_copy_hardlink(self): """ Test copy with hardlinks: we create a file in data_dir and try to link it. This should work unless /tmp don't accept hardlinks. """ # Create a file and a directory in data_dir. We don't destroy them # since they are in /tmp. _, orig_file_full = tempfile.mkstemp(dir=self.utils.data_dir) orig_file = os.path.basename(orig_file_full) to_dir = tempfile.mkdtemp(dir=self.utils.data_dir) new_file_full = os.path.join(to_dir, orig_file) # Copy from_dir = self.utils.data_dir files_to_copy = [{'root': from_dir, 'name': orig_file}] Utils.copy_files(files_to_copy, to_dir, use_hardlinks=True) # Check if file was copied self.assertTrue(os.path.exists(new_file_full)) # Check if it's really a hardlink. This may fail so we catch # any exceptions. orig_file_stat = os.stat(orig_file_full) new_file_stat = os.stat(new_file_full) try: self.assertTrue(orig_file_stat.st_ino == new_file_stat.st_ino) except Exception: msg = "In %s: copy worked but hardlinks were not used." % self.id() print(msg, file=sys.stderr)
def test_copy_with_regexp_hardlink(self): """ Test copy with hardlinks: we create files in data_dir and try to link them. This should work unless /tmp don't accept hardlinks. """ # Create 5 files and a directory in data_dir. We don't destroy them # since they are in /tmp. suffix = ".dat" regexp = ".*\\" + suffix orig_file_full = [ tempfile.mkstemp(dir=self.utils.data_dir, suffix=suffix)[1] for i in range(5) ] to_dir = tempfile.mkdtemp(dir=self.utils.data_dir) new_file_full = [ os.path.join(to_dir, os.path.basename(f)) for f in orig_file_full ] # Copy from_dir = self.utils.data_dir Utils.copy_files_with_regexp(from_dir, to_dir, [regexp], use_hardlinks=True) # Check if files was copied for orig, new in zip(orig_file_full, new_file_full): self.assertTrue(os.path.exists(new)) # Check if it's really a hardlink. This may fail so we catch # any exceptions. orig_file_stat = os.stat(orig) new_file_stat = os.stat(new) try: self.assertTrue(orig_file_stat.st_ino == new_file_stat.st_ino) except Exception: msg = "In %s: copy worked but hardlinks were not used." % self.id( ) print(msg, file=sys.stderr)
def __init__(self, config_file): self.logger = logging self.session = None self.executed_callback = None with open(config_file, 'r') as ymlfile: self.config = yaml.load(ymlfile) Utils.service_config_override(self.config) Zipkin.set_config(self.config) BiomajConfig.load_config(self.config['biomaj']['config']) if 'log_config' in self.config: for handler in list(self.config['log_config']['handlers'].keys()): self.config['log_config']['handlers'][handler] = dict( self.config['log_config']['handlers'][handler]) logging.config.dictConfig(self.config['log_config']) self.logger = logging.getLogger('biomaj') self.redis_client = redis.StrictRedis( host=self.config['redis']['host'], port=self.config['redis']['port'], db=self.config['redis']['db'], decode_responses=True) self.logger.info('Daemon service started')
def setschedule(request): if not is_admin(request): return HTTPForbidden() cron_oldname = request.matchdict['name'] body = request.body if sys.version_info >= (3,): body = request.body.decode() cron = json.loads(body) cron_time = cron['slices'] cron_banks = cron['banks'] cron_newname = cron['comment'] proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'cron') r = requests.delete(proxy + '/api/cron/jobs/' + cron_oldname) if not r.status_code == 200: logging.error("Failed to contact cron service") return [] cron_task = { 'slices': cron_time, 'banks': cron_banks, 'comment': cron_newname } proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'cron') r = requests.post(proxy + '/api/cron/jobs/' + cron_newname, json=cron_task) if not r.status_code == 200: logging.error("Failed to contact cron service") return [] return []
def set_options(self, options): super(CurlDownload, self).set_options(options) if "ssl_verifyhost" in options: self.ssl_verifyhost = Utils.to_bool(options["ssl_verifyhost"]) if "ssl_verifypeer" in options: self.ssl_verifypeer = Utils.to_bool(options["ssl_verifypeer"]) if "ssl_server_cert" in options: self.ssl_server_cert = options["ssl_server_cert"] if "tcp_keepalive" in options: self.tcp_keepalive = Utils.to_int(options["tcp_keepalive"]) if "ftp_method" in options: # raw_val is a string which contains the name of the option as in the CLI. # We always convert raw_val to a valid integer raw_val = options["ftp_method"].lower() if raw_val not in self.VALID_FTP_FILEMETHOD: raise ValueError("Invalid value for ftp_method") self.ftp_method = self.VALID_FTP_FILEMETHOD[raw_val] if "ssh_hosts_file" in options: self.ssh_hosts_file = options["ssh_hosts_file"] if "ssh_new_host" in options: raw_val = options["ssh_new_host"].lower() if raw_val not in self.VALID_SSH_NEW_HOST: raise ValueError("Invalid value for ssh_new_host") self.ssh_new_host = self.VALID_SSH_NEW_HOST[raw_val] if "allow_redirections" in options: self.allow_redirections = Utils.to_bool(options["allow_redirections"])
def test_copy(self): from_dir = os.path.dirname(os.path.realpath(__file__)) local_file = 'biomaj_tests.py' files_to_copy = [{'root': from_dir, 'name': local_file}] to_dir = self.utils.data_dir Utils.copy_files(files_to_copy, to_dir) self.assertTrue(os.path.exists(to_dir + '/biomaj_tests.py'))
def _http_parse_result(self, result): rfiles = [] rdirs = [] dirs = re.findall(self.http_parse.dir_line, result) if dirs is not None and len(dirs) > 0: for founddir in dirs: rfile = {} rfile['permissions'] = '' rfile['group'] = '' rfile['user'] = '' rfile['size'] = 0 date = founddir[self.http_parse.dir_date - 1] dirdate = date.split() parts = dirdate[0].split('-') # 19-Jul-2014 13:02 rfile['month'] = Utils.month_to_num(parts[1]) rfile['day'] = int(parts[0]) rfile['year'] = int(parts[2]) rfile['name'] = founddir[self.http_parse.dir_name - 1] rdirs.append(rfile) files = re.findall(self.http_parse.file_line, result) if files is not None and len(files) > 0: for foundfile in files: rfile = {} rfile['permissions'] = '' rfile['group'] = '' rfile['user'] = '' if self.http_parse.file_size != -1: rfile['size'] = humanfriendly.parse_size(foundfile[self.http_parse.file_size - 1]) else: rfile['size'] = 0 if self.http_parse.file_date != -1: date = foundfile[self.http_parse.file_date - 1] if self.http_parse.file_date_format: date_object = datetime.strptime(date, self.http_parse.file_date_format.replace('%%', '%')) rfile['month'] = date_object.month rfile['day'] = date_object.day rfile['year'] = date_object.year else: dirdate = date.split() parts = dirdate[0].split('-') # 19-Jul-2014 13:02 rfile['month'] = Utils.month_to_num(parts[1]) rfile['day'] = int(parts[0]) rfile['year'] = int(parts[2]) else: today = datetime.now() date = '%s-%s-%s' % (today.year, today.month, today.day) rfile['month'] = today.month rfile['day'] = today.day rfile['year'] = today.year rfile['name'] = foundfile[self.http_parse.file_name - 1] filehash = (rfile['name'] + str(date) + str(rfile['size'])).encode('utf-8') rfile['hash'] = hashlib.md5(filehash).hexdigest() rfiles.append(rfile) return (rfiles, rdirs)
def test_uncompress(self): from_file = { 'root': os.path.dirname(os.path.realpath(__file__)), 'name': 'bank/test.fasta.gz' } to_dir = self.utils.data_dir Utils.copy_files([from_file], to_dir) Utils.uncompress(os.path.join(to_dir, from_file['name'])) self.assertTrue(os.path.exists(to_dir + '/bank/test.fasta'))
def test_get_more_recent_file(self): files = [{ 'name': '/test1', 'year': '2013', 'month': '11', 'day': '10', 'size': 10 }, { 'name': '/test2', 'year': '2013', 'month': '11', 'day': '12', 'size': 10 }, { 'name': '/test/test1', 'year': '1988', 'month': '11', 'day': '10', 'size': 10 }, { 'name': '/test/test11', 'year': '2013', 'month': '9', 'day': '23', 'size': 10 }] release = Utils.get_more_recent_file(files) self.assertTrue(release['year'] == '2013') self.assertTrue(release['month'] == '11') self.assertTrue(release['day'] == '12')
def bank_release_remove(request): try: config = request.registry.settings['watcher_config'] user = is_authenticated(request) if not user: return HTTPForbidden() proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'daemon') options = { 'proxy': proxy, 'bank': request.matchdict['id'], 'release': request.matchdict['release'], 'remove': True } headers = {} if user['apikey']: headers = {'Authorization': 'APIKEY ' + user['apikey']} r = requests.post(options['proxy'] + '/api/daemon', headers=headers, json={'options': options}) if not r.status_code == 200: return {'msg': 'Failed to contact BioMAJ daemon'} result = r.json() status = result['status'] msg = result['msg'] return {'msg': str(msg), 'status': str(status)} except Exception as e: logging.error("Removal error:"+ str(e)) return {'msg': str(e)}
def biomaj_version(options, config): ''' Get biomaj version ''' version = pkg_resources.require('biomaj')[0].version # tools = [] biomaj_modules = [ 'biomaj', 'biomaj-core', 'biomaj-daemon', 'biomaj-release', 'biomaj-download', 'biomaj-process', 'biomaj-cron', 'biomaj-ftp', 'biomajwatcher' ] results = [] if not options.json: results = [["Module", "Release", "Latest"]] msg = 'BioMAJ modules version\n' for module_name in biomaj_modules: (version, latest) = Utils.get_module_version(module_name) if version is not None: results.append([module_name, version, latest]) if options.json: return (True, {'version': results}) msg += tabulate(results, headers="firstrow", tablefmt="grid") return (True, 'Version: ' + str(msg))
def get_bool(self, prop, section='GENERAL', escape=True, default=None): """ Get a boolean property from bank or general configration. Optionally in section. """ value = self.get(prop, section, escape, default) return Utils.to_bool(value)
def getschedule(request): proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'cron') r = requests.get(proxy + '/api/cron/jobs') if not r.status_code == 200: logging.error("Failed to contact cron service") return [] cron_jobs = r.json() return cron_jobs['cron']
def download(self, local_dir): ''' Copy local files to local_dir :param local_dir: Directory where files should be copied :type local_dir: str :return: list of downloaded files ''' self.logger.debug('Local:Download') Utils.copy_files(self.files_to_download, local_dir, use_hardlinks=self.use_hardlinks, lock=self.mkdir_lock) for rfile in self.files_to_download: rfile['download_time'] = 0 return self.files_to_download
def __init__(self, config_file=None, rabbitmq=True): self.logger = logging self.session = None self.bank = None self.download_callback = None with open(config_file, 'r') as ymlfile: self.config = yaml.load(ymlfile, Loader=Loader) Utils.service_config_override(self.config) Zipkin.set_config(self.config) if 'log_config' in self.config: for handler in list(self.config['log_config']['handlers'].keys()): self.config['log_config']['handlers'][handler] = dict( self.config['log_config']['handlers'][handler]) logging.config.dictConfig(self.config['log_config']) self.logger = logging.getLogger('biomaj') if not self.redis_client: self.redis_client = redis.StrictRedis( host=self.config['redis']['host'], port=self.config['redis']['port'], db=self.config['redis']['db'], decode_responses=True) if rabbitmq and not self.channel: connection = None rabbitmq_port = self.config['rabbitmq']['port'] rabbitmq_user = self.config['rabbitmq']['user'] rabbitmq_password = self.config['rabbitmq']['password'] rabbitmq_vhost = self.config['rabbitmq']['virtual_host'] if rabbitmq_user: credentials = pika.PlainCredentials(rabbitmq_user, rabbitmq_password) connection = pika.BlockingConnection( pika.ConnectionParameters(self.config['rabbitmq']['host'], rabbitmq_port, rabbitmq_vhost, credentials, heartbeat_interval=0)) else: connection = pika.BlockingConnection( pika.ConnectionParameters(self.config['rabbitmq']['host'], heartbeat_interval=0)) self.channel = connection.channel() self.logger.info('Download service started')
def unsetschedule(request): if not is_admin(request): return HTTPForbidden() proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'cron') r = requests.delete(proxy + '/api/cron/jobs/' + request.matchdict['name']) if not r.status_code == 200: logging.error("Failed to contact cron service") return []
def biomaj_daemon(): ''' Execute a command request (bank update, removal, ...) ''' apikey = request.headers.get('Authorization') token = None if apikey: bearer = apikey.split() if bearer[0] == 'APIKEY': token = bearer[1] try: params = request.get_json() options = params['options'] options_object = Options(options) options_object.token = token options_object.user = None options_object.redis_host = config['redis']['host'] options_object.redis_port = config['redis']['port'] options_object.redis_db = config['redis']['db'] options_object.redis_prefix = config['redis']['prefix'] user = None if token: proxy = Utils.get_service_endpoint(config, 'user') r = requests.get(proxy + '/api/user/info/apikey/' + token) if not r.status_code == 200: abort(404, {'message': 'Invalid API Key or connection issue'}) user = r.json()['user'] if user: options_object.user = user['id'] if options_object.maintenance in ['on', 'off']: if not options_object.user or 'admin' not in config[ 'biomaj'] or options_object.user not in config['biomaj'][ 'admin']: abort(401, { 'message': 'This action requires authentication with api key' }) if options_object.bank: bmaj_options = BmajOptions(options_object) BiomajConfig(options_object.bank, bmaj_options) if not options_object.search and not options_object.show and not options_object.check and not options_object.status: if not user: abort( 401, { 'message': 'This action requires authentication with api key' }) (res, msg) = biomaj_client_action(options_object, config) except Exception as e: logging.exception(e) return jsonify({'status': False, 'msg': str(e)}) return jsonify({'status': res, 'msg': msg})
def user_list(request): if not is_admin(request): return HTTPForbidden('Not authorized to access this resource') proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'user') r = requests.get(proxy + '/api/user/info/user') if not r.status_code == 200: return HTTPNotFound() users = r.json()['users'] return users
def set_options(self, options): """ Set download options. Subclasses that override this method must call this implementation. """ # Copy the option dict self.options = options if "skip_check_uncompress" in options: self.skip_check_uncompress = Utils.to_bool( options["skip_check_uncompress"])
def __api_authentification(request): config = request.registry.settings['watcher_config'] auth = request.headers['Authorization'].split() user_id = auth[0] api_key = auth[1] proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'user') r = requests.post(proxy + '/api/user/bind/user/' + user_id, json={'type': 'apikey', 'value': api_key}) if not r.status_code == 200: return None user = r.json()['user'] return user
def check_user_pw(request, username, password): """checks for plain password vs hashed password in database""" config = request.registry.settings['watcher_config'] if not password or password == '': return None proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'user') r = requests.post(proxy + '/api/user/bind/user/' + username, json={'type': 'password', 'value': password}) if not r.status_code == 200: logging.info("Wrong login for " + str(username)) return None user = r.json()['user'] return user
def biomaj_bank_log_tail(bank, tail=100): apikey = request.headers.get('Authorization') token = None if apikey: bearer = apikey.split() if bearer[0] == 'APIKEY': token = bearer[1] log_file = None try: user = None options_object = Options(OPTIONS_PARAMS) if token: proxy = Utils.get_service_endpoint(config, 'user') r = requests.get(proxy + '/api/user/info/apikey/' + token) if not r.status_code == 200: abort(404, {'message': 'Invalid API Key or connection issue'}) user = r.json()['user'] options_object = Options({'user': user['id']}) bank_log = Bank(bank, options=options_object, no_log=True) if bank_log.bank['properties'][ 'visibility'] != 'public' and not bank_log.is_owner(): abort(403, {'message': 'not authorized to access this bank'}) if 'status' not in bank_log.bank or 'log_file' not in bank_log.bank[ 'status'] or not bank_log.bank['status']['log_file']['status']: return "No log file available" log_file = bank_log.bank['status']['log_file']['status'] except Exception as e: logging.exception(e) return "Failed to access log file: " + str(e) if not log_file or not os.path.exists(log_file): return "Cannot access log file %s" % (str(log_file)) def generate(): with open(log_file) as fp: tail_l = int(tail) if tail_l == 0: for line in fp: yield line else: dq = deque(fp, maxlen=tail_l) for line in dq: yield line yield "##END_OF_LOG" return Response(generate(), mimetype='text/plain')
def validate_authentication(self, username, apikey, handler): """Raises AuthenticationFailed if supplied username and password don't match the stored credentials, else return None. """ # msg = "Authentication failed." #anonymous user : we defined the user as anonymous proxy = Utils.get_service_endpoint(self.cfg, 'user') if username == "biomaj_default": user = {} user['id'] = "BMJ_default" elif proxy: user_req = requests.get(proxy + '/api/user/info/apikey/' + apikey) if not user_req.status_code == 200: raise AuthenticationFailed('Wrong or failed authentication') user = user_req.json() else: user = BmajUser.get_user_by_apikey(apikey) if not user: self.logger.error('User not found: ' + username) raise AuthenticationFailed('User does not exists') #Determining the authorized path dict_bank = {} for db_entry in self.db.banks.find(): home_dir = self.get_home_dir(username, db_entry) dict_bank[home_dir] = [ db_entry['properties']['visibility'], db_entry['properties']['owner'] ] self.bank = dict_bank #Create a new user for biomaj server with specific permission if not self.has_user(username): self.add_user(username, apikey, self.get_home_dir(username)) for directory in dict_bank: if dict_bank[directory][0] == "public": perm = "elr" self.override_perm(username, directory, perm, recursive=True) elif dict_bank[directory][ 1] == username and dict_bank[directory][0] != "public": perm = "elr" self.override_perm(username, directory, perm, recursive=True) elif username == "biomaj_default" or dict_bank[directory][ 0] != "public": #biomaj_default user and private bank perm = "" self.override_perm(username, directory, perm, recursive=True) return
def is_authenticated(request): ''' Use session cookies or look at Authorization header with value: USERNAME APIKEY ''' config = request.registry.settings['watcher_config'] if 'Authorization' in request.headers and request.headers['Authorization']: return __api_authentification(request) else: user_id = request.authenticated_userid if user_id: proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'user') r = requests.get(proxy + '/api/user/info/user/' + user_id) if not r.status_code == 200: return None user = r.json()['user'] return user else: return None
def set_options(self, options): """ Set download options. Subclasses that override this method must call this implementation. """ # Copy the option dict self.options = options if "skip_check_uncompress" in options: self.skip_check_uncompress = Utils.to_bool( options["skip_check_uncompress"]) # If stop_condition or wait_policy is specified, we reconstruct the retryer if "stop_condition" or "wait_policy" in options: stop_condition = options.get( "stop_condition", BiomajConfig.DEFAULTS["stop_condition"]) wait_policy = options.get("wait_policy", BiomajConfig.DEFAULTS["wait_policy"]) self._set_retryer(stop_condition, wait_policy)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--scan', dest="directory", help="Directory to scan") parser.add_argument('--type', dest="ftype", help="Files type") parser.add_argument( '--tags', dest="tags", action="append", default=[], help="tags, format key:value, can be repeated multiple times") args = parser.parse_args() if not os.path.exists(args.directory): sys.exit(1) res = {} for (path, dirs, files) in os.walk(args.directory): for file in files: filename = os.path.join(path, file) (file_format, mime) = Utils.detect_format(filename) if file_format is not None: file_format = file_format.replace('application/', '') filename = filename.replace(args.directory + '/', '') if file_format is not None: if file_format not in res: res[file_format] = [filename] else: res[file_format].append(filename) f_type = '' if args.ftype: f_type = args.ftype tags = '' if args.tags: tags = ','.join(args.tags) for fformat in res.keys(): print('##BIOMAJ#' + fformat + '#' + f_type + '#' + tags + '#' + ','.join(res[fformat]))
def test_service_config_override(self): config = { 'rabbitmq': { 'host': '1.2.3.4' }, 'web': { 'local_endpoint': 'http://localhost' } } Utils.service_config_override(config) self.assertTrue(config['rabbitmq']['host'] == '1.2.3.4') os.environ['RABBITMQ_HOST'] = '4.3.2.1' Utils.service_config_override(config) self.assertTrue(config['rabbitmq']['host'] == '4.3.2.1') os.environ['WEB_LOCAL_ENDPOINT_DOWNLOAD'] = 'http://download' Utils.service_config_override(config) self.assertTrue( config['web']['local_endpoint_download'] == 'http://download') endpoint = Utils.get_service_endpoint(config, 'download') self.assertTrue(endpoint == 'http://download') endpoint = Utils.get_service_endpoint(config, 'process') self.assertTrue(endpoint == 'http://localhost')
def on_download(bank, downloaded_files): metrics = [] if 'prometheus' in config and not config['prometheus']: return if not downloaded_files: metric = {'bank': bank, 'error': 1} metrics.append(metrics) else: for downloaded_file in downloaded_files: metric = {'bank': bank} if 'error' in downloaded_file and downloaded_file['error']: metric['error'] = 1 else: metric['size'] = downloaded_file['size'] metric['download_time'] = downloaded_file['download_time'] if 'hostname' in config['web']: metric['host'] = config['web']['hostname'] metrics.append(metric) proxy = Utils.get_service_endpoint(config, 'download') r = requests.post(proxy + '/api/download/metrics', json=metrics)
def _download(self, file_path, rfile): ''' Download one file and return False in case of success and True otherwise. Subclasses that override this method must call this implementation at the end to perform test on archives. Note that this method is executed inside a retryer. ''' error = False # Check that the archive is correct if not self.skip_check_uncompress: archive_status = Utils.archive_check(file_path) if not archive_status: self.logger.error( 'Archive is invalid or corrupted, deleting file and retrying download' ) error = True if os.path.exists(file_path): os.remove(file_path) return error
def bank_update(request): try: config = request.registry.settings['watcher_config'] user = is_authenticated(request) if not user: return HTTPForbidden() body = request.body if sys.version_info >= (3,): body = request.body.decode() form = json.loads(body) fromscratch = False if 'fromscratch' in form and int(form['fromscratch']) == 1: fromscratch = True proxy = Utils.get_service_endpoint(request.registry.settings['watcher_config'], 'daemon') options = { 'proxy': proxy, 'bank': request.matchdict['id'], 'fromscratch': fromscratch, 'update': True } headers = {} if user['apikey']: headers = {'Authorization': 'APIKEY ' + user['apikey']} r = requests.post(options['proxy'] + '/api/daemon', headers=headers, json={'options': options}) if not r.status_code == 200: return {'msg': 'Failed to contact BioMAJ daemon'} result = r.json() status = result['status'] msg = result['msg'] return {'msg': str(msg), 'status': str(status)} except Exception as e: logging.error("Update error:"+ str(e)) return {'msg': str(e)}
def daemon_api_auth(request): apikey = request.headers.get('Authorization') token = None options_object = None if apikey: bearer = apikey.split() if bearer[0] == 'APIKEY': token = bearer[1] if 'X-API-KEY' in request.headers: token = request.headers['X-API-KEY'] try: options = copy.deepcopy(OPTIONS_PARAMS) options_object = Options(options) options_object.json = True options_object.token = token options_object.user = None options_object.redis_host = config['redis']['host'] options_object.redis_port = config['redis']['port'] options_object.redis_db = config['redis']['db'] options_object.redis_prefix = config['redis']['prefix'] options_object.proxy = True user = None if token: proxy = Utils.get_service_endpoint(config, 'user') r = requests.get(proxy + '/api/user/info/apikey/' + token) if not r.status_code == 200: return (404, options, { 'message': 'Invalid API Key or connection issue' }) user = r.json()['user'] if user: options_object.user = user['id'] except Exception as e: logging.exception(e) return (500, options_object, str(e)) return (200, options_object, None)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--scan', dest="directory",help="Directory to scan") parser.add_argument('--type', dest="ftype",help="Files type") parser.add_argument('--tags', dest="tags", action="append", default=[], help="tags, format key:value, can be repeated multiple times") args = parser.parse_args() if not os.path.exists(args.directory): sys.exit(1) res = {} for (path, dirs, files) in os.walk(args.directory): for file in files: filename = os.path.join(path, file) (file_format, mime) = Utils.detect_format(filename) if file_format is not None: file_format = file_format.replace('application/','') filename = filename.replace(args.directory+'/','') if file_format is not None: if file_format not in res: res[file_format] = [filename] else: res[file_format].append(filename) f_type = '' if args.ftype: f_type = args.ftype tags = '' if args.tags: tags = ','.join(args.tags) for fformat in res.keys(): print('##BIOMAJ#'+fformat+'#'+f_type+'#'+tags+'#'+','.join(res[fformat]))
def __init__(self, config_file): self.logger = logging self.curBank = None self.session = None self.executed_callback = None with open(config_file, 'r') as ymlfile: self.config = yaml.load(ymlfile) Utils.service_config_override(self.config) Zipkin.set_config(self.config) BiomajConfig.load_config(self.config['biomaj']['config']) for svc in Utils.services: service = svc.lower() if self.config['web'].get('local_endpoint_' + service, None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.service.' + service, '1') BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.proxy.' + service, self.config['web']['local_endpoint_' + service]) if self.config['web'].get('local_endpoint', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.proxy', self.config['web']['local_endpoint']) if self.config.get('rabbitmq', None): if self.config['rabbitmq'].get('host', None): BiomajConfig.global_config.set('GENERAL', 'micro.biomaj.rabbit_mq', self.config['rabbitmq']['host']) if self.config['rabbitmq'].get('port', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.rabbit_mq_port', str(self.config['rabbitmq']['port'])) if self.config['rabbitmq'].get('user', None): BiomajConfig.global_config.set('GENERAL', 'micro.biomaj.rabbit_mq_user', self.config['rabbitmq']['user']) if self.config['rabbitmq'].get('password', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.rabbit_mq_password', self.config['rabbitmq']['password']) if self.config['rabbitmq'].get('virtual_host', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.rabbit_mq_virtual_host', self.config['rabbitmq']['virtual_host']) if 'log_config' in self.config: for handler in list(self.config['log_config']['handlers'].keys()): self.config['log_config']['handlers'][handler] = dict( self.config['log_config']['handlers'][handler]) logging.config.dictConfig(self.config['log_config']) self.logger = logging.getLogger('biomaj') self.redis_client = redis.StrictRedis( host=self.config['redis']['host'], port=self.config['redis']['port'], db=self.config['redis']['db'], decode_responses=True) self.logger.info('Daemon service started') signal.signal(signal.SIGTERM, self.catch) signal.siginterrupt(signal.SIGTERM, False)
def main(global_config, **settings): """ This function returns a Pyramid WSGI application. """ config_file = 'config.yml' if 'BIOMAJ_CONFIG' in os.environ: config_file = os.environ['BIOMAJ_CONFIG'] config = None with open(config_file, 'r') as ymlfile: config = yaml.load(ymlfile) Utils.service_config_override(config) BiomajConfig.load_config(config['biomaj']['config']) settings['watcher_config'] = config settings['global_properties'] = config['biomaj']['config'] if config['consul']['host']: consul_agent = consul.Consul(host=config['consul']['host']) consul_agent.agent.service.register( 'biomaj-watcher-static', service_id=config['consul']['id'], address=config['web']['hostname'], port=config['web']['port'], tags=[ 'biomaj', 'watcher', 'static', 'traefik.backend=biomaj-watcher', 'traefik.frontend.rule=PathPrefix:/app', 'traefik.enable=true' ] ) consul_agent.agent.service.register( 'biomaj-watcher-api', service_id=config['consul']['id'] + '_api', address=config['web']['hostname'], port=config['web']['port'], tags=[ 'biomaj', 'watcher', 'api', 'traefik.backend=biomaj-watcher', 'traefik.frontend.rule=PathPrefix:/api/watcher', 'traefik.enable=true' ] ) check = consul.Check.http(url='http://' + config['web']['hostname'] + ':' + str(config['web']['port']) + '/api/watcher', interval=20) consul_agent.agent.check.register(config['consul']['id'] + '_check', check=check, service_id=config['consul']['id']) config = Configurator(settings=settings) config.include('pyramid_chameleon') config.add_subscriber(before_render, BeforeRender) authentication_policy = AuthTktAuthenticationPolicy('seekrit', callback=None, hashalg='sha512') authorization_policy = ACLAuthorizationPolicy() config.set_authentication_policy(authentication_policy) config.set_authorization_policy(authorization_policy) config.add_static_view('static', 'static', cache_max_age=3600) config.add_static_view('app', 'biomajwatcher:webapp/app') config.add_route('home', '/') config.add_route('ping', '/api/watcher') config.add_route('user','/api/watcher/user') config.add_route('user_banks','/api/watcher/user/{id}/banks') config.add_route('api_user','/user') config.add_route('api_user_banks','/user/{id}/banks') config.add_route('bank', '/bank') config.add_route('bankdetails', '/bank/{id}') config.add_route('banklocked', '/bank/{id}/locked') config.add_route('bankstatus', '/bank/{id}/status') config.add_route('bankconfig', '/bank/{id}/config') config.add_route('bankreleaseremove', '/bank/{id}/{release}') config.add_route('sessionlog', '/bank/{id}/log/{session}') config.add_route('api_bank', '/api/watcher/bank') config.add_route('api_bankdetails', '/api/watcher/bank/{id}') config.add_route('api_bankconfig', '/api/watcher/bank/{id}/config') config.add_route('api_banklocked', '/api/watcher/bank/{id}/locked') config.add_route('api_bankstatus', '/api/watcher/bank/{id}/status') config.add_route('api_sessionlog', '/api/watcher/bank/{id}/log/{session}') config.add_route('schedulebank','/schedule') config.add_route('updateschedulebank','/schedule/{name}') config.add_route('api_schedulebank','/api/watcher/schedule') config.add_route('api_updateschedulebank','/api/watcher/schedule/{name}') config.add_route('search', '/search') config.add_route('search_format', '/search/format/{format}') config.add_route('search_format_type', '/search/format/{format}/type/{type}') config.add_route('search_type', '/search/type/{type}') config.add_route('api_search', '/api/watcher/search') config.add_route('api_search_format', '/api/watcher/search/format/{format}') config.add_route('api_search_format_type', '/api/watcher/search/format/{format}/type/{type}') config.add_route('api_search_type', '/api/watcher/search/type/{type}') config.add_route('stat', '/stat') config.add_route('api_stat', '/api/watcher/stat') config.add_route('is_auth', '/auth') config.add_route('auth', '/auth/{id}') config.add_route('logout', '/logout') config.add_route('api_is_auth', '/api/watcher/auth') config.add_route('api_auth', '/api/watcher/auth/{id}') config.add_route('api_logout', '/api/watcher/logout') config.add_route('old_api', 'BmajWatcher/GET') config.scan() # automatically serialize bson ObjectId and datetime to Mongo extended JSON json_renderer = JSON() def pymongo_adapter(obj, request): return json_util.default(obj) json_renderer.add_adapter(ObjectId, pymongo_adapter) json_renderer.add_adapter(datetime.datetime, pymongo_adapter) config.add_renderer('json', json_renderer) return config.make_wsgi_app()
def add_cron(cron_name): ''' .. http:post:: /api/cron/jobs/(str:id) Update or add a cron task :<json dict: cron info containing slices, banks and comment comment is the name to be used for the new task banks is the list of banks to be udpated, comma separated slices is the cron time info in cron format (example: * * * * *) user (optional) is of the user owning the bank, will use his apikey for the update :>json dict: status message :statuscode 200: no error ''' param = request.get_json() cron_time = param['slices'] cron_banks = param['banks'] cron_newname = param['comment'] cron_user = '******' if 'user' in param: cron_user = param['user'] r = requests.get(config['web']['local_endpoint'] + '/api/user/info/user/' + cron_user) if not r.status_code == 200: return jsonify({ 'msg': 'cron task could not be updated', 'cron': cron_name, 'status': False }) user_info = r.json() api_key = user_info['user']['apikey'] try: remove_cron_task(cron_name) except Exception as e: logging.error('cron:error:' + str(e)) return jsonify({ 'msg': 'cron task could not be updated', 'cron': cron_name, 'status': False }) mongo_cron.remove({'name': cron_name}) biomaj_cli = 'biomaj-cli.py' if 'cron' in config and config['cron']['cli']: biomaj_cli = config['cron']['cli'] proxy = Utils.get_service_endpoint(config, 'daemon') cron_cmd = biomaj_cli + " --proxy " + proxy + " --api-key " + api_key + " --update --bank " + cron_banks + " >> /var/log/cron.log 2>&1" try: add_cron_task(cron_time, cron_cmd, cron_newname) except Exception as e: logging.error('cron:error:' + str(e)) return jsonify({ 'msg': 'cron task deleted but could not update it', 'cron': cron_name, 'status': False }) mongo_cron.insert({ 'name': cron_newname, 'cmd': cron_cmd, 'time': cron_time }) return jsonify({ 'msg': 'cron task added', 'cron': cron_newname, 'status': True })
def migrate_pendings(): """ Migrate database 3.0.18: Check the actual BioMAJ version and if older than 3.0.17, do the 'pending' key migration """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return None if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks users = MongoConnector.users schema_version = SchemaVersion.get_dbschema_version(schema) moderate = int(schema_version.split('.')[1]) minor = int(schema_version.split('.')[2]) if moderate == 0 and minor <= 17: print("Migrate from release: %s" % schema_version) # Update pending releases bank_list = banks.find() updated = 0 for bank in bank_list: if 'pending' in bank: # Check we have an old pending type if type(bank['pending']) == dict: updated += 1 pendings = [] for release in sorted(bank['pending'], key=lambda r: bank['pending'][r]): pendings.append({'release': str(release), 'id': bank['pending'][str(release)]}) if len(pendings) > 0: banks.update({'name': bank['name']}, {'$set': {'pending': pendings}}) else: # We remove old type for 'pending' banks.update({'name': bank['name']}, {'$unset': {'pending': ""}}) print("Migration: %d bank(s) updated" % updated) if moderate < 1: updated = 0 user_list = users.find() for user in user_list: if 'apikey' not in user: updated += 1 api_key = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) users.update({'_id': user['_id']}, {'$set': {'apikey': api_key}}) print("Migration: %d user(s) updated" % updated) # production size bank_list = banks.find() updated = 0 for bank in bank_list: for prod in bank['production']: ''' { "_id" : ObjectId("54edb10856e8bb11340b5f51"), "production" : [ { "freeze" : false, "remoterelease" : "2003-11-26", "session" : 1427809848.560108, "data_dir" : "/db", "formats" : [ ], "release" : "2003-11-26", "dir_version" : "ncbi/blast/alu", "prod_dir" : "alu-2003-11-26", "types" : [ ], "size" : 319432 } ] } ''' if 'size' not in prod or prod['size'] == 0: logging.info('Calculate size for bank %s' % (bank['name'])) if 'data_dir' not in prod or not prod['data_dir'] or 'prod_dir' not in prod or not prod['prod_dir'] or 'dir_version' not in prod or not prod['dir_version']: logging.warn('no production directory information for %s, skipping...' % (bank['name'])) continue prod_dir = os.path.join(prod['data_dir'], prod['dir_version'], prod['prod_dir']) if not os.path.exists(prod_dir): logging.warn('production directory %s does not exists for %s, skipping...' % (prod_dir, bank['name'])) continue dir_size = Utils.get_folder_size(prod_dir) banks.update({'name': bank['name'], 'production.release': prod['release']}, {'$set': {'production.$.size': dir_size}}) updated += 1 print("Migration: %d bank production info updated" % updated)
def list(self, directory=''): ''' Try to get file headers to get last_modification and size ''' self._basic_curl_configuration() # Specific configuration self.crl.setopt(pycurl.HEADER, True) self.crl.setopt(pycurl.NOBODY, True) for rfile in self.files_to_download: if self.save_as is None: self.save_as = rfile['name'] rfile['save_as'] = self.save_as file_url = self._file_url(rfile) try: self.crl.setopt(pycurl.URL, file_url) except Exception: self.crl.setopt(pycurl.URL, file_url.encode('ascii', 'ignore')) # Create a buffer and assign it to the pycurl object output = BytesIO() self.crl.setopt(pycurl.WRITEFUNCTION, output.write) self.crl.perform() # Figure out what encoding was sent with the response, if any. # Check against lowercased header name. encoding = None if 'content-type' in self.headers: content_type = self.headers['content-type'].lower() match = re.search(r'charset=(\S+)', content_type) if match: encoding = match.group(1) if encoding is None: # Default encoding for HTML is iso-8859-1. # Other content types may have different default encoding, # or in case of binary data, may have no encoding at all. encoding = 'iso-8859-1' # lets get the output in a string result = output.getvalue().decode(encoding) lines = re.split(r'[\n\r]+', result) for line in lines: parts = line.split(':') if parts[0].strip() == 'Content-Length': rfile['size'] = int(parts[1].strip()) if parts[0].strip() == 'Last-Modified': # Sun, 06 Nov 1994 res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip()) if res: rfile['hash'] = hashlib.md5( str(res.group(0)).encode('utf-8')).hexdigest() rfile['day'] = int(res.group(2)) rfile['month'] = Utils.month_to_num(res.group(3)) rfile['year'] = int(res.group(4)) continue # Sunday, 06-Nov-94 res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip()) if res: rfile['hash'] = hashlib.md5( str(res.group(0)).encode('utf-8')).hexdigest() rfile['day'] = int(res.group(2)) rfile['month'] = Utils.month_to_num(res.group(3)) rfile['year'] = 2000 + int(res.group(4)) continue # Sun Nov 6 08:49:37 1994 res = re.match( r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip()) if res: rfile['hash'] = hashlib.md5( str(res.group(0)).encode('utf-8')).hexdigest() rfile['day'] = int(res.group(3)) rfile['month'] = Utils.month_to_num(res.group(2)) rfile['year'] = int(res.group(4)) continue return (self.files_to_download, [])
def list(self, directory=''): ''' Try to get file headers to get last_modification and size ''' self._network_configuration() # Specific configuration # With those options, cURL will issue a HEAD request. This may not be # supported especially on resources that are accessed using POST. In # this case, HTTP will return code 405. We explicitely handle this case # in this method. # Note also that in many cases, there is no Last-Modified field in # headers since this is usually dynamic content (Content-Length is # usually present). self.crl.setopt(pycurl.HEADER, True) self.crl.setopt(pycurl.NOBODY, True) for rfile in self.files_to_download: if self.save_as is None: self.save_as = rfile['name'] rfile['save_as'] = self.save_as file_url = self._file_url(rfile) try: self.crl.setopt(pycurl.URL, file_url) except Exception: self.crl.setopt(pycurl.URL, file_url.encode('ascii', 'ignore')) # Create a buffer and assign it to the pycurl object output = BytesIO() self.crl.setopt(pycurl.WRITEFUNCTION, output.write) try: self.crl.perform() errcode = int(self.crl.getinfo(pycurl.RESPONSE_CODE)) if errcode == 405: # HEAD not supported by the server for this URL so we can # skip the rest of the loop (we won't have metadata about # the file but biomaj should be fine). msg = 'Listing ' + file_url + ' not supported. This is fine, continuing.' self.logger.info(msg) continue elif errcode not in self.ERRCODE_OK: msg = 'Error while listing ' + file_url + ' - ' + str( errcode) self.logger.error(msg) raise Exception(msg) except Exception as e: msg = 'Error while listing ' + file_url + ' - ' + str(e) self.logger.error(msg) raise e # Figure out what encoding was sent with the response, if any. # Check against lowercased header name. encoding = None if 'content-type' in self.headers: content_type = self.headers['content-type'].lower() match = re.search(r'charset=(\S+)', content_type) if match: encoding = match.group(1) if encoding is None: # Default encoding for HTML is iso-8859-1. # Other content types may have different default encoding, # or in case of binary data, may have no encoding at all. encoding = 'iso-8859-1' # lets get the output in a string result = output.getvalue().decode(encoding) lines = re.split(r'[\n\r]+', result) for line in lines: parts = line.split(':') if parts[0].strip() == 'Content-Length': rfile['size'] = int(parts[1].strip()) if parts[0].strip() == 'Last-Modified': # Sun, 06 Nov 1994 res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip()) if res: rfile['hash'] = hashlib.md5( str(res.group(0)).encode('utf-8')).hexdigest() rfile['day'] = int(res.group(2)) rfile['month'] = Utils.month_to_num(res.group(3)) rfile['year'] = int(res.group(4)) continue # Sunday, 06-Nov-94 res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip()) if res: rfile['hash'] = hashlib.md5( str(res.group(0)).encode('utf-8')).hexdigest() rfile['day'] = int(res.group(2)) rfile['month'] = Utils.month_to_num(res.group(3)) rfile['year'] = 2000 + int(res.group(4)) continue # Sun Nov 6 08:49:37 1994 res = re.match( r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip()) if res: rfile['hash'] = hashlib.md5( str(res.group(0)).encode('utf-8')).hexdigest() rfile['day'] = int(res.group(3)) rfile['month'] = Utils.month_to_num(res.group(2)) rfile['year'] = int(res.group(4)) continue return (self.files_to_download, [])
def main(): """This is the main function treating arguments passed on the command line.""" description = "BioMAJ user: Manager users." parser = argparse.ArgumentParser(description=description) # Options without value parser.add_argument('-A', '--action', dest="action", default=None, help="Action to perform for user " + str(SUPPORTED_ACTIONS) + "'renew': Create new api key", required=True) parser.add_argument('-J', '--json', dest="json", help="output to json", action='store_true') parser.add_argument('-C', '--config', dest="config", metavar='</path/to/config.yml>', type=str, help="Path to config.yml. By default read from env variable BIOMAJ_CONFIG") parser.add_argument('-E', '--email', dest="email", type=str, help="User email, optional") parser.add_argument('-U', '--user', dest="user", metavar='<username>', type=str, required=True, help="User name to manage") parser.add_argument('-P', '--password', dest="passwd", metavar="<password>", type=str, help="User password to use when creating new user. If not given, automatically generated, accepts env variable BIOMAJ_USER_PASSWORD env variable") parser.parse_args(namespace=options) if not len(sys.argv) > 1: parser.print_help() sys.exit(1) if options.action not in SUPPORTED_ACTIONS: print("Unsupported action '%s'" % str(options.action)) sys.exit(1) if options.config: config = options.config elif 'BIOMAJ_CONFIG' in os.environ: config = os.environ['BIOMAJ_CONFIG'] else: config = 'config.yml' with open(config, 'r') as ymlfile: config = yaml.load(ymlfile, Loader=yaml.FullLoader) Utils.service_config_override(config) BmajUser.set_config(config) user = BmajUser(options.user) if options.action in ['add', 'create']: if user.user is None: if options.passwd is None: if 'BIOMAJ_USER_PASSWORD' in os.environ: options.passwd = os.environ['BIOMAJ_USER_PASSWORD'] else: options.passwd = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) user.create(options.passwd, email=options.email) if options.json: del user.user['_id'] print(json.dumps(user.user)) sys.exit(0) print("User successfully created") print(tabulate([["User", "Password", "API Key"], [user.user['id'], str(options.passwd), str(user.user['apikey'])]], headers="firstrow", tablefmt="psql")) sys.exit(0) else: print("User %s already exist" % user.user['id']) sys.exit(1) if user.user is None: print("[%s] User %s does not exist" % (str(options.action), str(options.user))) sys.exit(1) if options.action in ['delete', 'remove', 'rm']: user.remove() print("User %s successfully deleted" % user.user['id']) if options.action == 'update': update = {} if options.passwd: update['hashed_password'] = bcrypt.hashpw(options.passwd, user.user['hashed_password']) if options.email: update['email'] = options.email if update.items(): BmajUser.users.update({'id': user.user['id']}, {'$set': update}) print("User %s successfully updated" % str(user.user['id'])) else: print("[%s] User %s not updated" % (str(options.action), str(options.user))) if options.action == 'renew': user.renew_apikey() user = BmajUser(user.user['id']) print("[%s] User %s, successfully renewed API key: '%s'" % (str(options.action), str(user.user['id']), str(user.user['apikey']))) if options.action == 'view': print(tabulate([["User", "Email", "API Key", "LDAP"], [str(user.user['id']), str(user.user['email']), str(user.user['apikey']), str(user.user['is_ldap'])]], headers="firstrow", tablefmt="psql")) sys.exit(0)
"Bank total download errors.", ['bank']) download_size_metric = Counter("biomaj_download_file_size", "Bank download file size in bytes.", ['bank', 'host']) download_time_metric = Counter("biomaj_download_file_time", "Bank download file time in seconds.", ['bank', 'host']) config_file = 'config.yml' if 'BIOMAJ_CONFIG' in os.environ: config_file = os.environ['BIOMAJ_CONFIG'] config = None with open(config_file, 'r') as ymlfile: config = yaml.load(ymlfile, Loader=Loader) Utils.service_config_override(config) def consul_declare(config): if config['consul']['host']: consul_agent = consul.Consul(host=config['consul']['host']) consul_agent.agent.service.register( 'biomaj-download', service_id=config['consul']['id'], address=config['web']['hostname'], port=config['web']['port'], tags=[ 'biomaj', 'api', 'traefik-int.backend=biomaj-download', 'traefik-int.frontend.rule=PathPrefix:/api/download', 'traefik-int.enable=true' ])
def test_copy_with_regexp(self): from_dir = os.path.dirname(os.path.realpath(__file__)) to_dir = self.utils.data_dir Utils.copy_files_with_regexp(from_dir, to_dir, ['.*\.py']) self.assertTrue(os.path.exists(to_dir + '/biomaj_tests.py'))