def timedelta_to_duration(obj: "timedelta") -> "Duration": """ Return the Python ``timestamp`` as a Protobuf ``google.protobuf.Duration``. """ d = Duration() d.seconds = obj.total_seconds() d.nanos = obj.microseconds * 1000 return d
def create_cluster(self): """Create the cluster.""" print('Creating cluster...') # idle_delete_ttl only accepts google.protobuf.duration d-type as a duration start = Timestamp() end = Timestamp() duration = Duration() start.FromJsonString('2019-06-01T10:00:20.021-05:00') end.FromJsonString('2019-06-01T10:10:20.021-05:00') duration.seconds = end.seconds - start.seconds # duration will be 10 minute. zone_uri = \ 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( self.project_id, self.zone) cluster_data = { 'project_id': self.project_id, 'cluster_name': self.cluster_name, 'config': { 'gce_cluster_config': { 'zone_uri': zone_uri, "metadata": { 'PIP_PACKAGES': 'pandas requests beautifulsoup4 PyMySQL' } }, 'master_config': { 'num_instances': 1, 'machine_type_uri': 'n1-standard-8' }, 'worker_config': { 'num_instances': 2, 'machine_type_uri': 'n1-standard-8', }, "software_config": { "image_version": "1.4-ubuntu18", "properties": { "dataproc:alpha.state.shuffle.hcfs.enabled": "false" } }, "lifecycle_config": { "idle_delete_ttl": duration }, 'initialization_actions': [{ 'executable_file': 'gs://sparkrecommendationengine/packages.sh' }] } } cluster = self.dataproc_cluster_client.create_cluster( self.project_id, self.region, cluster_data) cluster.add_done_callback(self._callback) global waiting_callback waiting_callback = True
def as_api_duration(t: timedelta) -> Duration: d = Duration() d.seconds = t.total_seconds() d.nanos = t.microseconds * 1000 return d
def test_gcs_time_to_ms_nanos_and_seconds(): duration = Duration() duration.nanos = 300000000 duration.seconds = 5 assert gcs_time_to_ms(duration) == 5300
def test_gcs_time_to_ms_just_seconds(): duration = Duration() duration.seconds = 2 assert gcs_time_to_ms(duration) == 2000
ZONE = os.environ.get("GCP_REGION", cluster_config_var['zone']) CLUSTER_NAME_DAILY = cluster_config_var['cluster_pre_name'] \ + "{{ execution_date.format('%Y%m%d%H%M') }}" CLUSTER_NAME_INITIAL_LOAD = \ f"{cluster_config_var['cluster_pre_name']}initial-" \ + "{{ execution_date.format('%Y%m%d%H%M') }}" PROJECT_ID = Variable.get("config_project-id") PIG_JAR_FILE_URIS = Variable.get("ab_test_jar_file_uris") machine_type_uri = cluster_config_var['machine_type_uri'] num_instances = cluster_config_var['num_instances'] boot_disk_size_gb = cluster_config_var['boot_disk_size_gb'] boot_disk_type = cluster_config_var['boot_disk_type'] image_version = cluster_config_var['image_version'] timeIdle = Duration() timeIdle.seconds = cluster_config_var['idle_delete_ttl_seconds'] subnetwork_uri = cluster_config_var['subnetwork_uri'] initialization_actions_execution_timeout = Duration() initialization_actions_execution_timeout.seconds = \ cluster_config_var['initialization_actions_execution_timeout_seconds'] initialization_actions_execution = \ "gs://dataproc-initialization-actions/python/pip-install.sh" git_branch = cluster_config_var['git_branch'] label_team = cluster_config_var['label_team'] label_task = cluster_config_var['label_task'] pip_install_pkg = \ f"-egit+https://{github_auth['user']}:{github_auth['token']}@github.com/" \ f"pedidosya/data-analytics-factory-food-ontology-model.git@{git_branch}#" \ f"egg=food_ontology&subdirectory=src"