Ejemplo n.º 1
0
def timedelta_to_duration(obj: "timedelta") -> "Duration":
    """
    Return the Python ``timestamp`` as a Protobuf ``google.protobuf.Duration``.
    """
    d = Duration()
    d.seconds = obj.total_seconds()
    d.nanos = obj.microseconds * 1000
    return d
    def create_cluster(self):
        """Create the cluster."""
        print('Creating cluster...')

        # idle_delete_ttl only accepts google.protobuf.duration d-type as a duration
        start = Timestamp()
        end = Timestamp()
        duration = Duration()
        start.FromJsonString('2019-06-01T10:00:20.021-05:00')
        end.FromJsonString('2019-06-01T10:10:20.021-05:00')
        duration.seconds = end.seconds - start.seconds  # duration will be 10 minute.
        zone_uri = \
            'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format(
                self.project_id, self.zone)

        cluster_data = {
            'project_id': self.project_id,
            'cluster_name': self.cluster_name,
            'config': {
                'gce_cluster_config': {
                    'zone_uri': zone_uri,
                    "metadata": {
                        'PIP_PACKAGES':
                        'pandas requests beautifulsoup4 PyMySQL'
                    }
                },
                'master_config': {
                    'num_instances': 1,
                    'machine_type_uri': 'n1-standard-8'
                },
                'worker_config': {
                    'num_instances': 2,
                    'machine_type_uri': 'n1-standard-8',
                },
                "software_config": {
                    "image_version": "1.4-ubuntu18",
                    "properties": {
                        "dataproc:alpha.state.shuffle.hcfs.enabled": "false"
                    }
                },
                "lifecycle_config": {
                    "idle_delete_ttl": duration
                },
                'initialization_actions': [{
                    'executable_file':
                    'gs://sparkrecommendationengine/packages.sh'
                }]
            }
        }

        cluster = self.dataproc_cluster_client.create_cluster(
            self.project_id, self.region, cluster_data)
        cluster.add_done_callback(self._callback)
        global waiting_callback
        waiting_callback = True
Ejemplo n.º 3
0
def as_api_duration(t: timedelta) -> Duration:
    d = Duration()
    d.seconds = t.total_seconds()
    d.nanos = t.microseconds * 1000
    return d
Ejemplo n.º 4
0
def test_gcs_time_to_ms_nanos_and_seconds():
    duration = Duration()
    duration.nanos = 300000000
    duration.seconds = 5
    assert gcs_time_to_ms(duration) == 5300
Ejemplo n.º 5
0
def test_gcs_time_to_ms_just_seconds():
    duration = Duration()
    duration.seconds = 2
    assert gcs_time_to_ms(duration) == 2000
Ejemplo n.º 6
0
ZONE = os.environ.get("GCP_REGION", cluster_config_var['zone'])
CLUSTER_NAME_DAILY = cluster_config_var['cluster_pre_name'] \
                     + "{{ execution_date.format('%Y%m%d%H%M') }}"
CLUSTER_NAME_INITIAL_LOAD = \
    f"{cluster_config_var['cluster_pre_name']}initial-" \
    + "{{ execution_date.format('%Y%m%d%H%M') }}"
PROJECT_ID = Variable.get("config_project-id")
PIG_JAR_FILE_URIS = Variable.get("ab_test_jar_file_uris")

machine_type_uri = cluster_config_var['machine_type_uri']
num_instances = cluster_config_var['num_instances']
boot_disk_size_gb = cluster_config_var['boot_disk_size_gb']
boot_disk_type = cluster_config_var['boot_disk_type']
image_version = cluster_config_var['image_version']
timeIdle = Duration()
timeIdle.seconds = cluster_config_var['idle_delete_ttl_seconds']
subnetwork_uri = cluster_config_var['subnetwork_uri']
initialization_actions_execution_timeout = Duration()
initialization_actions_execution_timeout.seconds = \
    cluster_config_var['initialization_actions_execution_timeout_seconds']
initialization_actions_execution = \
    "gs://dataproc-initialization-actions/python/pip-install.sh"
git_branch = cluster_config_var['git_branch']
label_team = cluster_config_var['label_team']
label_task = cluster_config_var['label_task']
pip_install_pkg = \
    f"-egit+https://{github_auth['user']}:{github_auth['token']}@github.com/" \
    f"pedidosya/data-analytics-factory-food-ontology-model.git@{git_branch}#" \
    f"egg=food_ontology&subdirectory=src"