def publish_report(self, report=None): # Exit as nothing to output if report is None: return # Generate report file for transfer with tempfile.NamedTemporaryFile(delete=False) as report_file: report_file.write(str(report).encode("utf8")) report_filepath = report_file.name # Generate destination file path dest_path = os.path.join(self.final_output_dir, "%s_final_report.json" % self.name) # Transfer report file to bucket options_fast = '-m -o "GSUtil:sliced_object_download_max_components=200"' cmd = "gsutil %s cp -r %s %s 1>/dev/null 2>&1 " % ( options_fast, report_filepath, dest_path) GoogleCloudHelper.run_cmd( cmd, err_msg= "Could not transfer final report to the final output directory!") # Send report to the Pub/Sub report topic if it's known to exist if self.report_topic_validated: GoogleCloudHelper.send_pubsub_message(self.report_topic, message=dest_path, encode=True, compress=True)
def __init__(self, name, nr_cpus, mem, disk_space, **kwargs): # Call super constructor super(Instance, self).__init__(name, nr_cpus, mem, disk_space, **kwargs) # Get required arguments self.zone = kwargs.pop("zone") self.service_acct = kwargs.pop("service_acct") self.disk_image = kwargs.pop("disk_image") # Get optional arguments self.is_boot_disk_ssd = kwargs.pop("is_boot_disk_ssd", False) self.nr_local_ssd = kwargs.pop("nr_local_ssd", 0) # Initialize the region of the instance self.region = GoogleCloudHelper.get_region(self.zone) # Initialize instance random id self.rand_instance_id = self.name.rsplit("-",1)[-1] # Indicates that instance is not resettable self.is_preemptible = False # Google instance type. Will be set at creation time based on google price scheme self.instance_type = None # Initialize the price of the run and the total cost of the run self.price = 0 self.cost = 0 # Flag for whether startup script has completed running self.__startup_script_complete = False
def create(self): # Begin running command to create the instance on Google Cloud if not self.get_status() == Processor.OFF: logging.error("(%s) Cannot create processor! One with that name already exits with current status: %s" % ( self.name, self.get_status())) raise RuntimeError("Processor can only be created if it's 'OFF'!") elif self.is_locked(): logging.error("(%s) Failed to create processor. Processor locked!" % self.name) raise RuntimeError("Cannot create processor while locked!") # Set status to indicate that commands can't be run on processor because it's busy logging.info("(%s) Process 'create' started!" % self.name) # Determine instance type and actual resource usage based on current Google prices in instance zone self.nr_cpus, self.mem, self.instance_type = GoogleCloudHelper.get_optimal_instance_type(self.nr_cpus, self.mem, self.zone, self.is_preemptible) # Determine instance price at time of creation self.price = GoogleCloudHelper.get_instance_price(self.nr_cpus, self.mem, self.disk_space, self.instance_type, self.zone, self.is_preemptible, self.is_boot_disk_ssd, self.nr_local_ssd) logging.debug("(%s) Instance type is %s. Price per hour: %s cents" % (self.name, self.instance_type, self.price)) # Generate gcloud create cmd cmd = self.__get_gcloud_create_cmd() # Try to create instance until either it's successful, we're out of retries, or the processor is locked self.processes["create"] = Process(cmd, cmd=cmd, stdout=sp.PIPE, stderr=sp.PIPE, shell=True, num_retries=self.default_num_cmd_retries) self.wait_process("create") # Wait for startup script to completely finish logging.debug("(%s) Waiting for instance startup-script completion..." % self.name) self.wait_until_ready() logging.debug("(%s) Instance startup complete! %s Now live and ready to run commands!" % (self.name, self.name))
def __poll_startup_script(self): # Return true if instance is currently available for running commands data = GoogleCloudHelper.describe(self.name, self.zone) # Check to see if "READY" has been added to instance metadata indicating startup-script has complete for item in data["metadata"]["items"]: if item["key"] == "READY": return True return False
def validate(self): # Check that final output dir begins with gs:// if not self.final_output_dir.startswith("gs://"): logging.error( "Invalid final output directory: %s. Google bucket paths must begin with 'gs://'" % self.final_output_dir) raise IOError("Invalid final output directory!") # Make gs bucket if it doesn't exists already gs_bucket = GoogleCloudHelper.get_bucket_from_path( self.final_output_dir) if not GoogleCloudHelper.bucket_exists(gs_bucket): logging.info("Bucket {0} does not exists. Creating it now!".format( gs_bucket)) region = GoogleCloudHelper.get_region(self.zone) GoogleCloudHelper.mb(gs_bucket, project=self.google_project, region=region) # Set the minimum disk size based on size of disk image disk_image = self.config["task_processor"]["disk_image"] disk_image_info = GoogleCloudHelper.get_disk_image_info(disk_image) self.MIN_DISK_SPACE = int(disk_image_info["diskSizeGb"]) # Check to see if the reporting Pub/Sub topic exists if not GoogleCloudHelper.pubsub_topic_exists(self.report_topic): logging.error("Reporting topic '%s' was not found!" % self.report_topic) raise IOError("Reporting topic '%s' not found!" % self.report_topic) # Indicate that report topic exists and has been validated self.report_topic_validated = True
def __get_instance_config(self): # Returns complete config for a task processor params = {} inst_params = self.config["task_processor"] for param, value in inst_params.items(): params[param] = value # Add platform-specific options params["zone"] = self.zone params["service_acct"] = self.service_acct # Randomize the zone within the region if specified if self.randomize_zone: region = GoogleCloudHelper.get_region(self.zone) params["zone"] = GoogleCloudHelper.select_random_zone(region) # Get instance type return params
def update_status(self): # Initialize the number of retries retries = 0 # Get status from the cloud while True: try: # Obtain the instance information data = GoogleCloudHelper.describe(self.name, self.zone) # Update the external IP address self.external_IP = data["networkInterfaces"][0][ "accessConfigs"][0].get("natIP", None) # Set the status accordingly if data["status"] in ["TERMINATED", "STOPPING"]: self.set_status(Processor.DESTROYING) break elif data["status"] in ["PROVISIONING", "STAGING"]: self.set_status(Processor.CREATING) break elif data["status"] == "RUNNING": self.set_status(Processor.AVAILABLE if self. ssh_ready else Processor.CREATING) break else: raise RuntimeError( "Unkown Google Compute Engine instance status: %s!" % data["status"]) # If no resource found, then the processor was manually deleted by someone except GoogleResourceNotFound: # Update the external IP address self.external_IP = None # Set the status to OFF self.set_status(Processor.OFF) break # For any other error, retry again except BaseException: # Raise an error when retried the default number of retries if retries >= self.default_num_cmd_retries: raise # Sleep for 5 seconds and retry again else: time.sleep(5) retries += 1
def __init__(self, name, platform_config_file, final_output_dir): # Call super constructor from Platform super(GooglePlatform, self).__init__(name, platform_config_file, final_output_dir) # Get google access fields from JSON file self.key_file = self.config["service_account_key_file"] self.service_acct = GoogleCloudHelper.get_field_from_key_file( self.key_file, field_name="client_email") self.google_project = GoogleCloudHelper.get_field_from_key_file( self.key_file, field_name="project_id") # Get Google compute zone from config self.zone = self.config["zone"] # Determine whether to distribute processors across zones randomly self.randomize_zone = self.config["randomize_zone"] # Obtain the reporting topic self.report_topic = self.config["report_topic"] self.report_topic_validated = False # Boolean for whether worker instance create by platform will be preemptible self.is_preemptible = self.config["task_processor"]["is_preemptible"] # Use authentication key file to gain access to google cloud project using Oauth2 authentication GoogleCloudHelper.authenticate(self.key_file) # Create local gcloud SSH key to be able to directly use SSH GoogleCloudHelper.configure_gcloud_ssh()
def __poll_status(self): if not GoogleCloudHelper.instance_exists(self.name): self.__startup_script_complete = False return Processor.OFF # Try to get instance status status = GoogleCloudHelper.get_instance_status(self.name, self.zone) if status in ["TERMINATED", "STOPPING"]: self.__startup_script_complete = False return Processor.DESTROYING elif status in ["PROVISIONING", "STAGING"]: self.__startup_script_complete = False return Processor.CREATING elif status == "RUNNING": if self.__startup_script_complete or self.__poll_startup_script(): self.__startup_script_complete = True return Processor.AVAILABLE self.__startup_script_complete = False return Processor.CREATING
def create(self): if self.is_locked(): logging.error( "(%s) Failed to create processor. Processor locked!" % self.name) raise RuntimeError("Cannot create processor while locked!") # Set status to indicate that commands can't be run on processor because it's busy logging.info("(%s) Process 'create' started!" % self.name) # Determine instance type and actual resource usage based on current Google prices in instance zone self.nr_cpus, self.mem, self.instance_type = GoogleCloudHelper.get_optimal_instance_type( self.nr_cpus, self.mem, self.zone, self.is_preemptible) # Determine instance price at time of creation self.price = GoogleCloudHelper.get_instance_price( self.nr_cpus, self.mem, self.disk_space, self.instance_type, self.zone, self.is_preemptible, self.is_boot_disk_ssd, self.nr_local_ssd) logging.debug("(%s) Instance type is %s. Price per hour: %s cents" % (self.name, self.instance_type, self.price)) # Generate gcloud create cmd cmd = self.__get_gcloud_create_cmd() # Try to create instance until either it's successful, we're out of retries, or the processor is locked self.processes["create"] = Process( cmd, cmd=cmd, stdout=sp.PIPE, stderr=sp.PIPE, shell=True, num_retries=self.default_num_cmd_retries) self.wait_process("create") # Wait for instance to be accessible through SSH logging.debug("(%s) Waiting for instance to be accessible" % self.name) self.wait_until_ready()
def __init__(self, name, nr_cpus, mem, disk_space, **kwargs): # Call super constructor super(Instance, self).__init__(name, nr_cpus, mem, disk_space, **kwargs) # Get required arguments self.zone = kwargs.pop("zone") self.service_acct = kwargs.pop("service_acct") self.disk_image = kwargs.pop("disk_image") # Get optional arguments self.is_boot_disk_ssd = kwargs.pop("is_boot_disk_ssd", False) self.nr_local_ssd = kwargs.pop("nr_local_ssd", 0) # Initialize the region of the instance self.region = GoogleCloudHelper.get_region(self.zone) # Initialize instance random id self.rand_instance_id = self.name.rsplit("-", 1)[-1] # Indicates that instance is not resettable self.is_preemptible = False # Google instance type. Will be set at creation time based on google price scheme self.instance_type = None # Initialize the price of the run and the total cost of the run self.price = 0 self.cost = 0 # Initialize the SSH status self.ssh_connections_increased = False self.ssh_ready = False # Number of times creation has been reset self.creation_resets = 0 # API Rate limit errors count self.api_rate_limit_retries = 0 # Initialize extenal IP self.external_IP = None
def clean_up(self): logging.info("Cleaning up Google Cloud Platform.") # Remove dummy files from output directory try: logging.debug("Looking for dummy files...") dummy_search_string = os.path.join(self.final_output_dir, "**dummy.txt") dummy_outputs = GoogleCloudHelper.ls(dummy_search_string) if len(dummy_outputs) > 0: cmd = "gsutil rm {0}".format(" ".join(dummy_outputs)) proc = sp.Popen(cmd, stderr=sp.PIPE, stdout=sp.PIPE, shell=True) proc.communicate() logging.debug("Done killing dummy files!") except: logging.warning( "(%s) Could not remove dummy input files on google cloud!") # Initiate destroy process on all the instances that haven't been destroyed for instance_name, instance_obj in self.processors.items(): try: if instance_name not in self.dealloc_procs: instance_obj.destroy(wait=False) except RuntimeError: logging.warning("(%s) Could not destroy instance!" % instance_name) # Now wait for all destroy processes to finish for instance_name, instance_obj in self.processors.items(): try: #if instance_obj.get_status() != Processor.OFF: if instance_name not in self.dealloc_procs: instance_obj.wait_process("destroy") except RuntimeError: logging.warning("(%s) Unable to destroy instance!" % instance_name) logging.info("Clean up complete!")
import importlib configure_import_paths() configure_logging(3) ss_file = "/home/alex/Desktop/projects/gap/test/test_runs/qc_report_merge/ss.json" ss = SampleSet(ss_file) print ss.get_paths() print ss.get_data(data_type="qc_report", samples="Jijoye_early2") print ss.get_data(data_type="qc_report", samples="Jijoye_early1") exit(0) ######################### Test GCH ls command print GoogleCloudHelper.ls("gs://derp_test/new_gap_test_1/**dummy.txt") print GoogleCloudHelper.ls("gs://derp_test/*gasdf") exit(0) ######################### Test GAPfile updating with modules from System.Platform.Google import GooglePlatform from System.Datastore import Datastore # Test cycle checking algorithm graph_file = "/home/alex/Desktop/projects/gap/test/test_runs/simple_success/graph.config" graph = Graph(graph_file) #print graph rk_file = "/home/alex/Desktop/projects/gap/test/test_runs/simple_success/rk.config" rk = ResourceKit(rk_file)