def start_cluster(self, reactor): """ Provision cloud cluster for acceptance tests. :return Cluster: The cluster to connect to for acceptance tests. """ metadata = { 'purpose': 'acceptance-testing', 'distribution': self.distribution, } metadata.update(self.metadata) for index in range(self.num_nodes): name = "acceptance-test-%s-%d" % (self.creator, index) try: print "Creating node %d: %s" % (index, name) node = self.provisioner.create_node( name=name, distribution=self.distribution, metadata=metadata, ) except: print "Error creating node %d: %s" % (index, name) print "It may have leaked into the cloud." raise yield remove_known_host(reactor, node.address) self.nodes.append(node) del node commands = parallel([ node.provision(package_source=self.package_source, variants=self.variants) for node in self.nodes ]) if self.dataset_backend == DatasetBackend.zfs: zfs_commands = parallel([ configure_zfs(node, variants=self.variants) for node in self.nodes ]) commands = commands.on(success=lambda _: zfs_commands) yield perform(make_dispatcher(reactor), commands) cluster = yield configured_cluster_for_nodes( reactor, generate_certificates( make_cluster_id( TestTypes.ACCEPTANCE, _provider_for_cluster_id(self.dataset_backend), ), self.nodes), self.nodes, self.dataset_backend, self.dataset_backend_configuration, _save_backend_configuration(self.dataset_backend, self.dataset_backend_configuration) ) returnValue(cluster)
def start_nodes(self, reactor): # Destroy the box to begin, so that we are guaranteed # a clean build. yield run( reactor, ['vagrant', 'destroy', '-f'], path=self.vagrant_path.path) if self.package_source.version: env = extend_environ( FLOCKER_BOX_VERSION=vagrant_version( self.package_source.version)) else: env = os.environ # Boot the VMs yield run( reactor, ['vagrant', 'up'], path=self.vagrant_path.path, env=env) for node in self.NODE_ADDRESSES: yield remove_known_host(reactor, node) yield perform( make_dispatcher(reactor), run_remotely( username='******', address=node, commands=task_pull_docker_images() ), ) returnValue([ VagrantNode(address=address, distribution=self.distribution) for address in self.NODE_ADDRESSES ])
def _upgrade_flocker(self, reactor, nodes, package_source): """ Put the version of Flocker indicated by ``package_source`` onto all of the given nodes. This takes a primitive approach of uninstalling the software and then installing the new version instead of trying to take advantage of any OS-level package upgrade support. Because it's easier. The package removal step is allowed to fail in case the package is not installed yet (other failures are not differentiated). The only action taken on failure is that the failure is logged. :param pvector nodes: The ``ManagedNode``\ s on which to upgrade the software. :param PackageSource package_source: The version of the software to which to upgrade. :return: A ``Deferred`` that fires when the software has been upgraded. """ dispatcher = make_dispatcher(reactor) uninstalling = perform(dispatcher, uninstall_flocker(nodes)) uninstalling.addErrback(write_failure, logger=None) def install(ignored): return perform( dispatcher, install_flocker(nodes, package_source), ) installing = uninstalling.addCallback(install) return installing
def configured_cluster_for_nodes(reactor, certificates, nodes, dataset_backend, dataset_backend_configuration): """ Get a ``Cluster`` with Flocker services running on the right nodes. :param reactor: The reactor. :param Certificates certificates: The certificates to install on the cluster. :param nodes: The ``ManagedNode``s on which to operate. :param NamedConstant dataset_backend: The ``DatasetBackend`` constant representing the dataset backend that the nodes will be configured to use when they are "started". :param dict dataset_backend_configuration: The backend-specific configuration the nodes will be given for their dataset backend. :returns: A ``Deferred`` which fires with ``Cluster`` when it is configured. """ cluster = Cluster(all_nodes=pvector(nodes), control_node=nodes[0], agent_nodes=nodes, dataset_backend=dataset_backend, certificates=certificates) configuring = perform( make_dispatcher(reactor), configure_cluster(cluster, dataset_backend_configuration)) configuring.addCallback(lambda ignored: cluster) return configuring
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner try: nodes = yield runner.start_nodes(reactor, node_count=1) yield perform( make_dispatcher(reactor), install_cli(runner.package_source, nodes[0])) result = yield run_client_tests(reactor=reactor, node=nodes[0]) except: result = 1 raise finally: # Unless the tests failed, and the user asked to keep the nodes, we # delete them. if not (result != 0 and options['keep']): runner.stop_nodes(reactor) elif options['keep']: print "--keep specified, not destroying nodes." raise SystemExit(result)
def configured_cluster_for_nodes( reactor, certificates, nodes, dataset_backend, dataset_backend_configuration ): """ Get a ``Cluster`` with Flocker services running on the right nodes. :param reactor: The reactor. :param Certificates certificates: The certificates to install on the cluster. :param nodes: The ``ManagedNode``s on which to operate. :param NamedConstant dataset_backend: The ``DatasetBackend`` constant representing the dataset backend that the nodes will be configured to use when they are "started". :param dict dataset_backend_configuration: The backend-specific configuration the nodes will be given for their dataset backend. :returns: A ``Deferred`` which fires with ``Cluster`` when it is configured. """ cluster = Cluster( all_nodes=pvector(nodes), control_node=nodes[0], agent_nodes=nodes, dataset_backend=dataset_backend, certificates=certificates ) configuring = perform( make_dispatcher(reactor), configure_cluster(cluster, dataset_backend_configuration) ) configuring.addCallback(lambda ignored: cluster) return configuring
def configured_cluster_for_nodes( reactor, certificates, nodes, dataset_backend, dataset_backend_configuration, dataset_backend_config_file, provider=None ): """ Get a ``Cluster`` with Flocker services running on the right nodes. :param reactor: The reactor. :param Certificates certificates: The certificates to install on the cluster. :param nodes: The ``ManagedNode``s on which to operate. :param NamedConstant dataset_backend: The ``DatasetBackend`` constant representing the dataset backend that the nodes will be configured to use when they are "started". :param dict dataset_backend_configuration: The backend-specific configuration the nodes will be given for their dataset backend. :param FilePath dataset_backend_config_file: A FilePath that has the dataset_backend info stored. :returns: A ``Deferred`` which fires with ``Cluster`` when it is configured. """ # XXX: There is duplication between the values here and those in # f.node.agents.test.blockdevicefactory.MINIMUM_ALLOCATABLE_SIZES. We want # the default volume size to be greater than or equal to the minimum # allocatable size. # # Ideally, the minimum allocatable size (and perhaps the default volume # size) would be something known by an object that represents the dataset # backend. Unfortunately: # 1. There is no such object # 2. There is existing confusion in the code around 'openstack' and # 'rackspace' # # Here, we special-case Rackspace (presumably) because it has a minimum # allocatable size that is different from other Openstack backends. # # FLOC-2584 also discusses this. default_volume_size = GiB(1) if dataset_backend_configuration.get('auth_plugin') == 'rackspace': default_volume_size = RACKSPACE_MINIMUM_VOLUME_SIZE cluster = Cluster( all_nodes=pvector(nodes), control_node=nodes[0], agent_nodes=nodes, dataset_backend=dataset_backend, default_volume_size=int(default_volume_size.to_Byte().value), certificates=certificates, dataset_backend_config_file=dataset_backend_config_file ) configuring = perform( make_dispatcher(reactor), configure_cluster(cluster, dataset_backend_configuration, provider) ) configuring.addCallback(lambda ignored: cluster) return configuring
def main(reactor, args, base_path, top_level): try: options = TestBrewOptions() try: options.parseOptions(args) except UsageError as e: sys.stderr.write("Error: {error}.\n".format(error=str(e))) sys.exit(1) eliot_to_stdout(MESSAGE_FORMATS, {}) recipe_url = options['recipe_url'] options['vmpath'] = FilePath(options['vmpath']) # Open the recipe URL just to validate and verify that it exists. # We do not need to read its content. urllib2.urlopen(recipe_url) yield run(reactor, [ "vmrun", "revertToSnapshot", options['vmpath'].path, options['vmsnapshot'], ]) yield run(reactor, [ "vmrun", "start", options['vmpath'].path, "nogui", ]) yield perform( make_dispatcher(reactor), run_remotely( username=options['vmuser'], address=options['vmhost'], commands=sequence([ task_configure_brew_path(), task_test_homebrew(recipe_url), ]), ), ) yield run(reactor, [ "vmrun", "stop", options['vmpath'].path, "hard", ]) print "Done." except ProcessTerminated as e: sys.stderr.write( ("Error: Command terminated with exit status {code}.\n").format( code=e.exitCode)) raise
def _setup_control_node(self, reactor, node, index): print "Selecting node {} for control service".format(node.name) certificates = Certificates.generate( directory=self.cert_path, control_hostname=node.address, num_nodes=0, cluster_name=self.identity.name, cluster_id=self.identity.id, ) dataset_backend_config_file = save_backend_configuration( self.dataset_backend, self.dataset_backend_configuration ) cluster = Cluster( all_nodes=[node], control_node=node, agent_nodes=[], dataset_backend=self.dataset_backend, default_volume_size=get_default_volume_size( self.dataset_backend_configuration ), certificates=certificates, dataset_backend_config_file=dataset_backend_config_file ) commands = configure_control_node( cluster, 'libcloud', logging_config=self.config.get('logging'), ) d = perform(make_dispatcher(reactor), commands) def configure_failed(failure): print "Failed to configure control node" write_failure(failure) return failure # It should be sufficient to configure just the control service here, # but there is an assumption that the control node is both a control # node and an agent node. d.addCallbacks( lambda _: self._add_node_to_cluster( reactor, cluster, node, index ), errback=configure_failed, ) # Return the cluster. d.addCallback(lambda _: cluster) return d
def main(reactor, args, base_path, top_level): try: options = TestBrewOptions() try: options.parseOptions(args) except UsageError as e: sys.stderr.write("Error: {error}.\n".format(error=str(e))) sys.exit(1) add_destination(eliot_output) recipe_url = options['recipe_url'] options['vmpath'] = FilePath(options['vmpath']) # Open the recipe URL just to validate and verify that it exists. # We do not need to read its content. urllib2.urlopen(recipe_url) yield run(reactor, [ "vmrun", "revertToSnapshot", options['vmpath'].path, options['vmsnapshot'], ]) yield run(reactor, [ "vmrun", "start", options['vmpath'].path, "nogui", ]) yield perform( make_dispatcher(reactor), run_remotely( username=options['vmuser'], address=options['vmhost'], commands=sequence([ task_configure_brew_path(), task_test_homebrew(recipe_url), ]), ), ) yield run(reactor, [ "vmrun", "stop", options['vmpath'].path, "hard", ]) print "Done." except ProcessTerminated as e: sys.stderr.write( ( "Error: Command terminated with exit status {code}.\n" ).format(code=e.exitCode) ) raise
def start_nodes(self, reactor): """ Provision cloud nodes for acceptance tests. :return list: List of addresses of nodes to connect to, for acceptance tests. """ metadata = { 'purpose': 'acceptance-testing', 'distribution': self.distribution, } metadata.update(self.metadata) for index in range(2): name = "acceptance-test-%s-%d" % (self.creator, index) try: print "Creating node %d: %s" % (index, name) node = self.provisioner.create_node( name=name, distribution=self.distribution, metadata=metadata, ) except: print "Error creating node %d: %s" % (index, name) print "It may have leaked into the cloud." raise yield remove_known_host(reactor, node.address) self.nodes.append(node) del node commands = parallel([ node.provision(package_source=self.package_source, variants=self.variants) for node in self.nodes ]) if self.dataset_backend == DatasetBackend.zfs: zfs_commands = parallel([ configure_zfs(node, variants=self.variants) for node in self.nodes ]) commands = commands.on(success=lambda _: zfs_commands) yield perform(make_dispatcher(reactor), commands) returnValue(self.nodes)
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_file = open("%s.log" % base_path.basename(), "a") log_writer = eliot_logging_service(log_file=log_file, reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger('before', 'shutdown', log_writer.stopService) try: nodes = yield runner.start_nodes(reactor, node_count=1) yield perform(make_dispatcher(reactor), install_cli(runner.package_source, nodes[0])) result = yield run_client_tests(reactor=reactor, node=nodes[0]) except: result = 1 raise finally: # Unless the tests failed, and the user asked to keep the nodes, we # delete them. if not (result != 0 and options['keep']): runner.stop_nodes(reactor) elif options['keep']: print "--keep specified, not destroying nodes." raise SystemExit(result)
def run_client_tests(reactor, node): """ Run the client acceptance tests. :param INode node: The node to run client acceptance tests against. :return int: The exit-code of trial. """ def check_result(f): f.trap(ProcessTerminated) if f.value.exitCode is not None: return f.value.exitCode else: return f return perform( make_dispatcher(reactor), run_remotely(username=node.get_default_username(), address=node.address, commands=task_client_installation_test())).addCallbacks( callback=lambda _: 0, errback=check_result, )
def run_client_tests(reactor, node): """ Run the client acceptance tests. :param INode node: The node to run client acceptance tests against. :return int: The exit-code of trial. """ def check_result(f): f.trap(ProcessTerminated) if f.value.exitCode is not None: return f.value.exitCode else: return f return perform(make_dispatcher(reactor), run_remotely( username=node.get_default_username(), address=node.address, commands=task_client_installation_test() )).addCallbacks( callback=lambda _: 0, errback=check_result, )
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_writer = eliot_logging_service( destination=FileDestination( file=open("%s.log" % (base_path.basename(),), "a") ), reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger( 'before', 'shutdown', log_writer.stopService) cluster = None try: yield runner.ensure_keys(reactor) cluster = yield runner.start_cluster(reactor) if options['distribution'] in ('centos-7',): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: capture_journal(reactor, node.address, remote_logs_file) if not options["no-pull"]: yield perform( make_dispatcher(reactor), parallel([ run_remotely( username='******', address=node.address, commands=task_pull_docker_images() ) for node in cluster.agent_nodes ]), ) result = yield run_tests( reactor=reactor, cluster=cluster, trial_args=options['trial-args']) except: result = 1 raise finally: # Unless the tests failed, and the user asked to keep the nodes, we # delete them. if not options['keep']: runner.stop_cluster(reactor) else: print "--keep specified, not destroying nodes." if cluster is None: print ("Didn't finish creating the cluster.") else: print ("To run acceptance tests against these nodes, " "set the following environment variables: ") environment_variables = get_trial_environment(cluster) for environment_variable in environment_variables: print "export {name}={value};".format( name=environment_variable, value=shell_quote( environment_variables[environment_variable]), ) raise SystemExit(result)
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_file = open("%s.log" % base_path.basename(), "a") log_writer = eliot_logging_service(log_file=log_file, reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger('before', 'shutdown', log_writer.stopService) cluster = None try: cluster = yield runner.start_cluster(reactor) if options['distribution'] in ('centos-7', ): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: capture_journal(reactor, node.address, remote_logs_file) if not options["no-pull"]: yield perform( make_dispatcher(reactor), parallel([ run_remotely(username='******', address=node.address, commands=task_pull_docker_images()) for node in cluster.agent_nodes ]), ) result = yield run_tests(reactor=reactor, cluster=cluster, trial_args=options['trial-args']) except: result = 1 raise finally: # Unless the tests failed, and the user asked to keep the nodes, we # delete them. if not options['keep']: runner.stop_cluster(reactor) else: print "--keep specified, not destroying nodes." if cluster is None: print("Didn't finish creating the cluster.") else: print( "To run acceptance tests against these nodes, " "set the following environment variables: ") environment_variables = get_trial_environment(cluster) for environment_variable in environment_variables: print "export {name}={value};".format( name=environment_variable, value=shell_quote( environment_variables[environment_variable]), ) raise SystemExit(result)
def start_cluster(self, reactor): """ Provision cloud cluster for acceptance tests. :return Cluster: The cluster to connect to for acceptance tests. """ metadata = { 'distribution': self.distribution, } metadata.update(self.identity.metadata) metadata.update(self.metadata) # Try to make names unique even if the same creator is starting # multiple clusters at the same time. This lets other code use the # name as a way to identify nodes. This is only necessary in one # place, the node creation code, to perform cleanup when the create # operation fails in a way such that it isn't clear if the instance has # been created or not. random_tag = os.urandom(8).encode("base64").strip("\n=") print "Assigning random tag:", random_tag for index in range(self.num_nodes): name = "%s-%s-%s-%d" % ( self.identity.prefix, self.creator, random_tag, index, ) try: print "Creating node %d: %s" % (index, name) node = self.provisioner.create_node( name=name, distribution=self.distribution, metadata=metadata, ) except: print "Error creating node %d: %s" % (index, name) print "It may have leaked into the cloud." raise yield remove_known_host(reactor, node.address) self.nodes.append(node) del node commands = parallel([ node.provision(package_source=self.package_source, variants=self.variants) for node in self.nodes ]) if self.dataset_backend == DatasetBackend.zfs: zfs_commands = parallel([ configure_zfs(node, variants=self.variants) for node in self.nodes ]) commands = commands.on(success=lambda _: zfs_commands) yield perform(make_dispatcher(reactor), commands) cluster = yield configured_cluster_for_nodes( reactor, generate_certificates( self.identity.name, self.identity.id, self.nodes, self.cert_path, ), self.nodes, self.dataset_backend, self.dataset_backend_configuration, _save_backend_configuration(self.dataset_backend, self.dataset_backend_configuration), logging_config=self.config.get('logging'), ) returnValue(cluster)
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner try: nodes = yield runner.start_nodes(reactor) ca_directory = FilePath(mkdtemp()) print("Generating certificates in: {}".format(ca_directory.path)) certificates = Certificates.generate(ca_directory, nodes[0].address, len(nodes)) yield perform( make_dispatcher(reactor), parallel([ run_remotely( username='******', address=node.address, commands=task_pull_docker_images() ) for node in nodes ]), ) control_node = nodes[0] dataset_backend = options.dataset_backend yield perform( make_dispatcher(reactor), configure_cluster(control_node=control_node, agent_nodes=nodes, certificates=certificates, dataset_backend=dataset_backend)) result = yield run_tests( reactor=reactor, nodes=nodes, control_node=control_node, agent_nodes=nodes, dataset_backend=dataset_backend, trial_args=options['trial-args'], certificates_path=ca_directory) except: result = 1 raise finally: # Unless the tests failed, and the user asked to keep the nodes, we # delete them. if not (result != 0 and options['keep']): runner.stop_nodes(reactor) elif options['keep']: print "--keep specified, not destroying nodes." print ("To run acceptance tests against these nodes, " "set the following environment variables: ") environment_variables = { 'FLOCKER_ACCEPTANCE_NODES': ':'.join(node.address for node in nodes), 'FLOCKER_ACCEPTANCE_CONTROL_NODE': control_node.address, 'FLOCKER_ACCEPTANCE_AGENT_NODES': ':'.join(node.address for node in nodes), 'FLOCKER_ACCEPTANCE_VOLUME_BACKEND': dataset_backend.name, 'FLOCKER_ACCEPTANCE_API_CERTIFICATES_PATH': ca_directory.path, } for environment_variable in environment_variables: print "export {name}={value};".format( name=environment_variable, value=environment_variables[environment_variable], ) raise SystemExit(result)
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_writer = eliot_logging_service( destination=FileDestination( file=open("%s.log" % (base_path.basename(),), "a") ), reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger( 'before', 'shutdown', log_writer.stopService) cluster = None results = [] setup_succeeded = False reached_finally = False def cluster_cleanup(): if not reached_finally: print "interrupted..." print "stopping cluster" return runner.stop_cluster(reactor) cleanup_trigger_id = reactor.addSystemEventTrigger('before', 'shutdown', cluster_cleanup) try: yield runner.ensure_keys(reactor) cluster = yield runner.start_cluster(reactor) if options['distribution'] in ('centos-7',): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append(capture_journal(reactor, node.address, remote_logs_file) ) elif options['distribution'] in ('ubuntu-14.04',): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append(capture_upstart(reactor, node.address, remote_logs_file) ) gather_deferreds(results) if not options["no-pull"]: yield perform( make_dispatcher(reactor), parallel([ run_remotely( username='******', address=node.address, commands=task_pull_docker_images() ) for node in cluster.agent_nodes ]), ) setup_succeeded = True result = yield run_tests( reactor=reactor, cluster=cluster, trial_args=options['trial-args']) finally: reached_finally = True # We delete the nodes if the user hasn't asked to keep them # or if we failed to provision the cluster. if not setup_succeeded: print "cluster provisioning failed" elif not options['keep']: print "not keeping cluster" else: print "--keep specified, not destroying nodes." print ("To run acceptance tests against these nodes, " "set the following environment variables: ") environment_variables = get_trial_environment(cluster) for environment_variable in environment_variables: print "export {name}={value};".format( name=environment_variable, value=shell_quote( environment_variables[environment_variable]), ) reactor.removeSystemEventTrigger(cleanup_trigger_id) raise SystemExit(result)