def update_metaconfig(metaconfig_name, *args, **kwargs): zk_value, version = _kazoo_client(ZK_HOSTS).get( METACONFIG_ZK_PATH_FORMAT.format(metaconfig_name)) s3_key = METACONFIG_S3_KEY_FORMAT.format(metaconfig_name) s3_path = zk_util.construct_s3_path(s3_key, zk_value) try: metaconfig_data = s3config.S3Config(AWS_KEY_FILE, S3_BUCKET, s3_endpoint=S3_ENDPOINT).get_config_string(s3_path) except ValueError as ve: log.error("Abort downloading from s3 key %s due to ValueError: %s" % (s3_path, ve)) return except Exception as e: log.error("Abort downloading from s3 key %s due to unexpected s3 exception: %s" % (s3_path, e)) return metaconfig_list = json.loads(metaconfig_data) for metaconfig in metaconfig_list: _place_watch_from_metaconfig(metaconfig)
def main(): global args parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "-f", "--file", dest="file", metavar="FILENAME", required=True, help="The target file name for storing the downloaded data.", ) parser.add_argument("-p", "--path", dest="path", metavar="PATH", help="The zookeeper path to download data from") parser.add_argument("-m", "--mode", dest="mode", metavar="MODE", required=True, help="The download mode [config | serverset]") parser.add_argument( "-s", "--show", dest="show", action='store_true', help="Flag to indicate whether to show the value in zookeeper only, " "no change is really made when it is set.") parser.add_argument( "--allow-same-content", dest="allow_same_content", action='store_true', help="Flag to indicate whether the script thinks downloading the same " "content is OK, no setting this flag will make the script exit " "with non-zero code when the content in zookeeper is the same as " "the target file.") parser.add_argument( "-r", "--serverset-rejection-ratio", dest="rejection_ratio", metavar="RATIO", default=DEFAULT_SERVERSET_REJECTION_RATIO, type=float, help="The ratio to reject a new serverset, if the count of servers " "in the new serverset is smaller than the multiplication of the " "count of servers in the existing target file and the ratio, the " "new serverset will be rejected and not written to the target" "file, default is %.2f." % DEFAULT_SERVERSET_REJECTION_RATIO) parser.add_argument( "--serverset-minimum-size", dest="serverset_minimum_size", metavar="MINSIZE", default=DEFAULT_SERVERSET_MINIMUM_SIZE, type=int, help="The minimum size of a serverset. If the serverset's size ever" "falls below this min size, zk_update_monitor will reject the update" "and will not write to the target file. Default is %d." % DEFAULT_SERVERSET_MINIMUM_SIZE) parser.add_argument( "-v", "--value", dest="value", metavar="VALUE", help="The value in zookeeper, once this is provided, the script will " "not try to get data from zookeeper, instead it will use this " "provided value to write to the file. For config mode, the value " "is the same as the data in zookeeper, for serverset mode, the " "value is a list of hosts separated by ','") parser.add_argument("--from-s3", dest="s3_key", metavar="S3_KEY", help="The S3 key to download the data from") parser.add_argument("--report-metadata", dest="report_metadata", default=False, action='store_true', help="If enabled, the downloader script " "will report metadata of serverset" "or config back to the caller (zk_download_data.py)") parser.add_argument("-l", "--from-value-file", dest="from_value_file", help="To get the actual value from a file") parser.add_argument("-o", "--port", dest="port", metavar="PORT", help="The port the flask app in ZUM is running on") ######## REQUIRED ######### # ZK / S3 Configurations. # ########################### parser.add_argument( "-z", "--zk-hosts-file-path", dest="zk_hosts_file", metavar="ZKHOSTS", required=True, help="The path of file which have a list of Zookeeper endpoints " "(host:port) which keeps the metaconfig as well as " "the config/serversets") parser.add_argument( "-a", "--aws-key-file", dest="aws_key_file", metavar="AWSKEY", help="The path of the file storing AWS access and secret keys", ) parser.add_argument("-b", "--s3-bucket", dest="s3_bucket", metavar="BUCKET", help="The S3 bucket storing metaconfigs / configs") parser.add_argument("-e", "--aws-s3-endpoint", dest="s3_endpoint", metavar="ENDPOINT", default="s3.amazonaws.com", help="The S3 endpoint storing metaconfig / configs") args = parser.parse_args() mode = args.mode.upper() initialize_zookeeper_aws_s3_configs(args) initialize_zk_update_monitor_endpoints(args.port) version = None notification_timestamp = time.time() # this is used to calculate MD5 which is consistent with the santinized # value in zk_update_monitor santinized_zk_data = None if mode == 'CONFIG' and args.s3_key: if not AWS_KEY_FILE or not S3_BUCKET or not S3_ENDPOINT: log.error( "No AWS key file or S3 Config is provided for accessing S3.") exit(_NO_AWS_KEY_FILE) log.info("Downloading from s3 key %s in bucket %s" % (args.s3_key, S3_BUCKET)) # We use the s3_key (which is actually a prefix) concatenated with the value from zk node to # form the s3 path. Therefore, each time we update new data to s3, we create a new S3 # key(can be located from the zk_node value), so as to make use of the "almost" # read-after-create guarantee from the s3 "special" url. zk_value = args.value if zk_value: log.debug("Use provided value %s" % zk_value) else: zk_value, version = \ get_data_from_zk(args.path, mode) s3_path = zk_util.construct_s3_path(args.s3_key, zk_value) # Download data from the s3 path. try: zk_data = s3config.S3Config( AWS_KEY_FILE, S3_BUCKET, s3_endpoint=S3_ENDPOINT).get_config_string(s3_path) except ValueError as ve: # If the s3 key specified by the zk_node does not exist, this is probably due to s3 # read-after-write inconsistency. Abort updating the local file. log.error( "Abort downloading from s3 key %s due to ValueError: %s" % (s3_path, str(ve))) exit(_S3_VALUE_ERROR_CODE) except Exception as e: log.error( "Abort downloading from s3 key %s due to unexpected s3 exception: %s" % (s3_path, str(e))) exit(_S3_ERROR_CODE) else: if args.value: log.debug("Use provided value %s" % args.value) if mode == 'SERVERSET': # remove duplicates in the server set value endpoints_set = set(args.value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = args.value elif args.from_value_file: from_value_file = args.from_value_file log.debug("Use provided value in %s" % args.from_value_file) try: with open(from_value_file, 'r') as f: # verify the file content is good. value = f.readline().rstrip('\n') md5digest = f.readline() calculated_md5digest = zk_util.get_md5_digest(value) if calculated_md5digest != md5digest: log.error("Temp file %s content does not match md5" % from_value_file) f.close() exit(_BROKEN_VALUE_FILE) if mode == 'SERVERSET': endpoints_set = set(value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = value except: log.error("Error reading from temp file %s" % from_value_file) exit(_BROKEN_VALUE_FILE) else: zk_data, version = \ get_data_from_zk(args.path, mode) if zk_data is None: log.error("Failed to get data from zookeeper.") exit(_FAILED_TO_GET_DATA_FROM_ZK_CODE) if args.show: print(zk_data) return log.debug("Target file = %s" % (args.file)) try: with open(args.file, "r") as file: existing_content = file.read() except IOError: existing_content = None if args.report_metadata: if mode == 'SERVERSET': serverset_list = zk_data.split('\n') serverset_list.sort() santinized_zk_data = string.join(serverset_list, ',') # Get the file modification time if args.file and os.path.exists(args.file): modification_timestamp = os.path.getmtime(args.file) else: modification_timestamp = 0 report_metadata(mode, santinized_zk_data, args.path, version, notification_timestamp, modification_timestamp) if _same_content(existing_content, zk_data, mode): log.warn("The data in zookeeper is the same as the target file %s, " "ignore." % args.file) if not args.allow_same_content: # Needs to exit with the special return code so the daemon can # ignore it. exit(_SAME_CONTENT_CODE) else: exit(0) found_stale_content = False if os.environ.get('__ZK_UPDATE_MONITOR_REFRESH__', False): found_stale_content = \ _check_stale_file_content(existing_content, args.file, zk_data, mode) log.info("Generating file %s with the data from zookeeper..." % (args.file)) tmp_dir = tempfile.gettempprefix() tmp_filename = os.path.join('/', tmp_dir, args.file.replace("/", "_")) try: with open(tmp_filename, 'w') as f: f.write(zk_data) except: log.exception("Failed to generate file %s from config in zookeeper. " "Data to write: %s" % (tmp_filename, zk_data)) exit(_UNHANDLED_EXCPETION) shutil.copy2(tmp_filename, args.file) log.info("File is generated.") # Report the file modification time. report_file_modification(mode, args.path, time.time()) if found_stale_content: exit(_STALE_LOCAL_FILE_CODE)
def main(): global args parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "-f", "--file", dest="file", metavar="FILENAME", required=True, help="The target file name for storing the downloaded data.",) parser.add_argument( "-p", "--path", dest="path", metavar="PATH", help="The zookeeper path to download data from") parser.add_argument( "-m", "--mode", dest="mode", metavar="MODE", required=True, help="The download mode [config | serverset]") parser.add_argument( "-s", "--show", dest="show", action='store_true', help="Flag to indicate whether to show the value in zookeeper only, " "no change is really made when it is set.") parser.add_argument( "--allow-same-content", dest="allow_same_content", action='store_true', help="Flag to indicate whether the script thinks downloading the same " "content is OK, no setting this flag will make the script exit " "with non-zero code when the content in zookeeper is the same as " "the target file.") parser.add_argument( "-r", "--serverset-rejection-ratio", dest="rejection_ratio", metavar="RATIO", default=DEFAULT_SERVERSET_REJECTION_RATIO, type=float, help="The ratio to reject a new serverset, if the count of servers " "in the new serverset is smaller than the multiplication of the " "count of servers in the existing target file and the ratio, the " "new serverset will be rejected and not written to the target" "file, default is %.2f." % DEFAULT_SERVERSET_REJECTION_RATIO) parser.add_argument( "--serverset-minimum-size", dest="serverset_minimum_size", metavar="MINSIZE", default=DEFAULT_SERVERSET_MINIMUM_SIZE, type=int, help="The minimum size of a serverset. If the serverset's size ever" "falls below this min size, zk_update_monitor will reject the update" "and will not write to the target file. Default is %d." % DEFAULT_SERVERSET_MINIMUM_SIZE) parser.add_argument( "-v", "--value", dest="value", metavar="VALUE", help="The value in zookeeper, once this is provided, the script will " "not try to get data from zookeeper, instead it will use this " "provided value to write to the file. For config mode, the value " "is the same as the data in zookeeper, for serverset mode, the " "value is a list of hosts separated by ','") parser.add_argument( "--from-s3", dest="s3_key", metavar="S3_KEY", help="The S3 key to download the data from") parser.add_argument( "--report-metadata", dest="report_metadata", default=False, action='store_true', help="If enabled, the downloader script " "will report metadata of serverset" "or config back to the caller (zk_download_data.py)" ) parser.add_argument( "-l", "--from-value-file", dest="from_value_file", help="To get the actual value from a file" ) parser.add_argument( "-o", "--port", dest="port", metavar="PORT", help="The port the flask app in ZUM is running on" ) ######## REQUIRED ######### # ZK / S3 Configurations. # ########################### parser.add_argument( "-z", "--zk-hosts-file-path", dest="zk_hosts_file", metavar="ZKHOSTS", required=True, help="The path of file which have a list of Zookeeper endpoints " "(host:port) which keeps the metaconfig as well as " "the config/serversets" ) parser.add_argument( "-a", "--aws-key-file", dest="aws_key_file", metavar="AWSKEY", help="The path of the file storing AWS access and secret keys", ) parser.add_argument( "-b", "--s3-bucket", dest="s3_bucket", metavar="BUCKET", help="The S3 bucket storing metaconfigs / configs" ) parser.add_argument( "-e", "--aws-s3-endpoint", dest="s3_endpoint", metavar="ENDPOINT", default="s3.amazonaws.com", help="The S3 endpoint storing metaconfig / configs" ) args = parser.parse_args() mode = args.mode.upper() initialize_zookeeper_aws_s3_configs(args) initialize_zk_update_monitor_endpoints(args.port) version = None notification_timestamp = time.time() # this is used to calculate MD5 which is consistent with the santinized # value in zk_update_monitor santinized_zk_data = None if mode == 'CONFIG' and args.s3_key: if not AWS_KEY_FILE or not S3_BUCKET or not S3_ENDPOINT: log.error("No AWS key file or S3 Config is provided for accessing S3.") exit(_NO_AWS_KEY_FILE) log.info("Downloading from s3 key %s in bucket %s" % (args.s3_key, S3_BUCKET)) # We use the s3_key (which is actually a prefix) concatenated with the value from zk node to # form the s3 path. Therefore, each time we update new data to s3, we create a new S3 # key(can be located from the zk_node value), so as to make use of the "almost" # read-after-create guarantee from the s3 "special" url. zk_value = args.value if zk_value: log.debug("Use provided value %s" % zk_value) else: zk_value, version = \ get_data_from_zk(args.path, mode) s3_path = zk_util.construct_s3_path(args.s3_key, zk_value) # Download data from the s3 path. try: zk_data = s3config.S3Config(AWS_KEY_FILE, S3_BUCKET, s3_endpoint=S3_ENDPOINT).get_config_string(s3_path) except ValueError as ve: # If the s3 key specified by the zk_node does not exist, this is probably due to s3 # read-after-write inconsistency. Abort updating the local file. log.error("Abort downloading from s3 key %s due to ValueError: %s" % (s3_path, str(ve))) exit(_S3_VALUE_ERROR_CODE) except Exception as e: log.error("Abort downloading from s3 key %s due to unexpected s3 exception: %s" % (s3_path, str(e))) exit(_S3_ERROR_CODE) else: if args.value: log.debug("Use provided value %s" % args.value) if mode == 'SERVERSET': # remove duplicates in the server set value endpoints_set = set(args.value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = args.value elif args.from_value_file: from_value_file = args.from_value_file log.debug("Use provided value in %s" % args.from_value_file) try: with open(from_value_file, 'r') as f: # verify the file content is good. value = f.readline().rstrip('\n') md5digest = f.readline() calculated_md5digest = zk_util.get_md5_digest(value) if calculated_md5digest != md5digest: log.error("Temp file %s content does not match md5" % from_value_file) f.close() exit(_BROKEN_VALUE_FILE) if mode == 'SERVERSET': endpoints_set = set(value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = value except: log.error("Error reading from temp file %s" % from_value_file) exit(_BROKEN_VALUE_FILE) else: zk_data, version = \ get_data_from_zk(args.path, mode) if zk_data is None: log.error("Failed to get data from zookeeper.") exit(_FAILED_TO_GET_DATA_FROM_ZK_CODE) if args.show: print(zk_data) return log.debug("Target file = %s" % (args.file)) try: with open(args.file, "r") as file: existing_content = file.read() except IOError: existing_content = None if args.report_metadata: if mode == 'SERVERSET': serverset_list = zk_data.split('\n') serverset_list.sort() santinized_zk_data = string.join(serverset_list, ',') # Get the file modification time if args.file and os.path.exists(args.file): modification_timestamp = os.path.getmtime(args.file) else: modification_timestamp = 0 report_metadata(mode, santinized_zk_data, args.path, version, notification_timestamp, modification_timestamp) if _same_content(existing_content, zk_data, mode): log.warn("The data in zookeeper is the same as the target file %s, " "ignore." % args.file) if not args.allow_same_content: # Needs to exit with the special return code so the daemon can # ignore it. exit(_SAME_CONTENT_CODE) else: exit(0) found_stale_content = False if os.environ.get('__ZK_UPDATE_MONITOR_REFRESH__', False): found_stale_content = \ _check_stale_file_content(existing_content, args.file, zk_data, mode) log.info("Generating file %s with the data from zookeeper..." % (args.file)) tmp_dir = tempfile.gettempprefix() tmp_filename = os.path.join('/', tmp_dir, args.file.replace("/", "_")) try: with open(tmp_filename, 'w') as f: f.write(zk_data) except: log.exception("Failed to generate file %s from config in zookeeper. " "Data to write: %s" % (tmp_filename, zk_data)) exit(_UNHANDLED_EXCPETION) shutil.copy2(tmp_filename, args.file) log.info("File is generated.") # Report the file modification time. report_file_modification(mode, args.path, time.time()) if found_stale_content: exit(_STALE_LOCAL_FILE_CODE)