class S3DirectorySource(S3FileSource): index_name = 'index.json' def __init__(self, source_url, refresh_interval): if (source_url[-1] == '/' or source_url[-len(self.index_name):] != self.index_name): source_url = posixpath.join(source_url, self.index_name) super(S3DirectorySource, self).__init__(source_url, refresh_interval) def load(self): # for the closures to minimize the number of connections to S3 conn = S3Connection() try: bucket = conn.get_bucket(self.url.netloc) except S3ResponseError, e: if e.status == 404: raise NoDataError("No such bucket \"{0}\"".format( self.url.netloc)) def s3exists(path): # Construct the path to the key key = posixpath.join(posixpath.dirname(self.url.path), path) return bucket.get_key(key) def s3open(path, mode): key = s3exists(path) fp = tempfile.TemporaryFile() key.get_contents_to_file(fp) fp.seek(0) return fp s3key = self._get_key() if not s3key: self.no_data = True raise NoDataError('No index file found at "%s"' % posixpath.join(self.source_url)) with tempfile.TemporaryFile() as fp: s3key.get_contents_to_file(fp) fp.seek(0) try: self._populate_chunks(fp, parse_dir_source, exists_cb=s3exists, open_cb=s3open) except ParseError, e: raise NoDataError("Parsing failure: {0}".format(str(e))) self.current_etag = s3key.etag
def _get_key(self): try: conn = S3Connection() bucket = conn.get_bucket(self.url.netloc) except S3ResponseError, e: raise NoDataError("Could not find bucket \"%s\": %s" % (self.url.netloc, e))
def load(self): if not os.path.exists(self.url.path): self.no_data = True raise NoDataError('Known list, no directory index found: "%s"' % self.url.path) with open(self.url.path, 'r') as f: self._populate_chunks(f, parse_dir_source) self.no_data = False
def load(self): # for the closures to minimize the number of connections to S3 conn = S3Connection() try: bucket = conn.get_bucket(self.url.netloc) except S3ResponseError, e: if e.status == 404: raise NoDataError("No such bucket \"{0}\"".format( self.url.netloc))
def load(self): # for the closures to minimize the number of connections to S3 conn = S3Connection() try: bucket = conn.get_bucket(self.url.netloc) except S3ResponseError as e: if e.status == 404: raise NoDataError("No such bucket \"{0}\"" .format(self.url.netloc)) def s3exists(path): # Construct the path to the key key = posixpath.join(posixpath.dirname(self.url.path), path) return bucket.get_key(key) def s3open(path, mode): key = s3exists(path) fp = tempfile.TemporaryFile() key.get_contents_to_file(fp) fp.seek(0) return fp s3key = self._get_key() if not s3key: self.no_data = True raise NoDataError('No index file found at "%s"' % posixpath.join(self.source_url)) with tempfile.TemporaryFile() as fp: s3key.get_contents_to_file(fp) fp.seek(0) try: self._populate_chunks(fp, parse_dir_source, exists_cb=s3exists, open_cb=s3open) except ParseError as e: raise NoDataError("Parsing failure: {0}".format(str(e))) self.current_etag = s3key.etag self.no_data = False
def load(self): if (not os.path.exists(self.url.path) or os.stat(self.url.path).st_size <= 2): # We can't find the data for that list self.no_data = True raise NoDataError('Known list, no data found: "%s"' % self.url.path) with open(self.url.path, 'rb') as f: self._populate_chunks(f, parse_file_source) self.no_data = False
def load(self): s3key = self._get_key() if not s3key: self.no_data = True raise NoDataError('No chunk file found at "%s"' % self.source_url) with tempfile.TemporaryFile() as fp: s3key.get_contents_to_file(fp) # Need to forcibly reset to the beginning of the file fp.seek(0) self._populate_chunks(fp, parse_file_source) self.current_etag = s3key.etag self.no_data = False
list_configs.append({ 'name': list_name, 'config': list_config }) except ConfigParser.NoSectionError, e: logger.error(e) elif lists_to_serve_scheme == 's3+dir': import boto from boto.exception import S3ResponseError try: conn = boto.connect_s3() bucket = conn.get_bucket(lists_to_serve_url.netloc) except S3ResponseError, e: raise NoDataError("Could not find bucket \"%s\": %s" % (lists_to_serve_url.netloc, e)) for list_key in bucket.get_all_keys(): list_key_name = list_key.key list_name = list_key_name.rstrip('.ini') list_ini = list_key.get_contents_as_string() try: list_config = ConfigParser.ConfigParser() list_config.readfp(StringIO.StringIO(list_ini)) list_configs.append({'name': list_name, 'config': list_config}) except ConfigParser.NoSectionError, e: logger.error(e) else: raise ValueError('lists_served must be dir:// or s3+dir:// value') for list_config in list_configs:
def includeme(config): lists_to_serve = config.registry.settings.get('shavar.lists_served', None) if not lists_to_serve: raise ValueError("lists_served appears to be empty or missing " "in the config \"%s\"!" % config.filename) try: lists_to_serve_url = urlparse(lists_to_serve) except TypeError: raise ValueError('lists_served must be dir:// or s3+dir:// value') lists_to_serve_scheme = lists_to_serve_url.scheme.lower() list_configs = [] serving = {} ver_lists = {} if lists_to_serve_scheme == 'dir': import os list_config_dir = lists_to_serve_url.netloc + lists_to_serve_url.path for list_config_file in os.listdir(list_config_dir): if list_config_file.endswith(".ini"): list_name = list_config_file[:-len(".ini")] try: list_config = configparser.ConfigParser() list_config.readfp( open(os.path.join(list_config_dir, list_config_file))) list_configs.append({ 'name': list_name, 'config': list_config }) except configparser.NoSectionError as e: logger.error(e) elif lists_to_serve_scheme == 's3+dir': import boto from boto.exception import S3ResponseError try: conn = boto.connect_s3() bucket = conn.get_bucket(lists_to_serve_url.netloc) except S3ResponseError as e: raise NoDataError("Could not find bucket \"%s\": %s" % (lists_to_serve_url.netloc, e)) for list_key in bucket.get_all_keys(): list_key_name = list_key.key list_name = list_key_name.rstrip('.ini') list_ini = list_key.get_contents_as_string(encoding='UTF-8') try: list_config = configparser.ConfigParser() list_config.readfp(io.StringIO(list_ini)) list_configs.append({'name': list_name, 'config': list_config}) except configparser.NoSectionError as e: logger.error(e) else: raise ValueError('lists_served must be dir:// or s3+dir:// value') resp = requests.get(GITHUB_API_URL + SHAVAR_PROD_LISTS_BRANCHES_PATH) shavar_prod_lists_branches = resp.json() for list_config in list_configs: list_name = list_config['name'] list_config = list_config['config'] # Make sure we have a refresh interval set for the data source for the # lists setting_name = 'refresh_check_interval' settings = dict(list_config.items(list_name)) default = config.registry.settings.get('shavar.%s' % setting_name, 10 * 60) if setting_name not in settings: settings[setting_name] = default # defaults = config.get_map('shavar') # settings = {'type': 'shavar', # 'source': os.path.join(defaults.get('lists_root', # ''), lname)} type_ = list_config.get(list_name, 'type') list_ = create_list(type_, list_name, settings) serving[list_name] = list_ ver_lists[list_name] = [] versioned = (list_config.has_option(list_name, 'versioned') and list_config.get(list_name, 'versioned')) if versioned: add_versioned_lists_to_registry(settings, serving, ver_lists, type_, list_name, shavar_prod_lists_branches) config.registry['shavar.serving'] = serving config.registry['shavar.versioned_lists'] = ver_lists config.registry.settings['shavar.list_names_served'] = [ list['name'] for list in list_configs ]