Beispiel #1
0
def process_row(sm, row, options):
    """ Process row from the log
    sm: RedShiftSchemaMaker object
    row: a dict representing a log line
    options: command line options
    """
    for f in options.foreign:
        foreign_obj = get_deep(row, f)
        if foreign_obj is not None:
            sm.process_foreign_object(f, foreign_obj)

    obj = get_deep(row, options.source)
    if obj is None:
        return

    if options.source_type == 'sparse_list':
        for val in obj.values():
            sm.process_object(val, options.depth)
    elif options.source_type == 'dict':
        sm.process_object(obj, options.depth)
    elif options.source_type == 'list':
        for element in obj:
            sm.process_object(element, options.depth)
    else:
        raise Log2SchemaUnsupportedType(options.source_type, row)
Beispiel #2
0
    def get_cluster_and_restore_status(self, cluster_name):
        """
        gets the specified clusters' status and restore status.  Useful in
        polling as a cluster is restored from a snapshot.

        Possible cluster status values include:

        * available
        * creating
        * deleting
        * rebooting
        * renaming
        * resizing

        Possbile cluster restore status values include:

        * starting
        * restoring
        * completed
        * failed

        :param cluster_name: name of cluster
        :type cluster_name: string

        :returns: a pair of cluster status and restore status
        :rtype: string
        """
        cluster = self.get_cluster_info(cluster_name)
        return "{0},{1}".format(get_deep(cluster, "ClusterStatus"), get_deep(cluster, "RestoreStatus.Status"))
Beispiel #3
0
def process_row(sm, row, options):
    """ Process row from the log
    sm: RedShiftSchemaMaker object
    row: a dict representing a log line
    options: command line options
    """
    for f in options.foreign:
        foreign_obj = get_deep(row, f)
        if foreign_obj is not None:
            sm.process_foreign_object(f, foreign_obj)

    obj = get_deep(row, options.source)
    if obj is None:
        return

    if options.source_type == 'sparse_list':
        for val in obj.values():
            sm.process_object(val, options.depth)
    elif options.source_type == 'dict':
        sm.process_object(obj, options.depth)
    elif options.source_type == 'list':
        for element in obj:
            sm.process_object(element, options.depth)
    else:
        raise Log2SchemaUnsupportedType(options.source_type, row)
    def get_cluster_and_restore_status(self, cluster_name):
        """
        gets the specified clusters' status and restore status.  Useful in
        polling as a cluster is restored from a snapshot.

        Possible cluster status values include:

        * available
        * creating
        * deleting
        * rebooting
        * renaming
        * resizing

        Possbile cluster restore status values include:

        * starting
        * restoring
        * completed
        * failed

        :param cluster_name: name of cluster
        :type cluster_name: string

        :returns: a pair of cluster status and restore status
        :rtype: string
        """
        cluster = self.get_cluster_info(cluster_name)
        return "{0},{1}".format(get_deep(cluster, 'ClusterStatus'),
                                get_deep(cluster, 'RestoreStatus.Status'))
    def restore_from_cluster_snapshot(self,
                                      cluster_name,
                                      snapshot_name,
                                      param_group=None,
                                      vpc_group_id=None,
                                      cluster_subnet_group_name=None,
                                      wait_for_cluster_availability=True):
        """Restore cluster from snapshot

        Wait until cluster and restore status indicate success

        :param cluster_name: the name of the cluster we restore to
        :type cluster_name: string
        :param snapshot_name: the name of the snapshot from which we restore
        :type snapshot_name: string
        :param param_group: parameter group of the redshift cluster
        :type param_group: string
        :param vpc_group_id: vpc group id of the redshift cluster
        :type vpc_group_id: string
        :param cluster_subnet_group_name: the name of the subnet group where
                                          we want the cluster restored
        :type cluster_subnet_group_name: string
        :param wait_for_cluster_availability: determine whether wait until the
                                              the cluster is ready or not
        :type wait_for_cluster_availability: boolean
        """
        snapshot_response = self._conn.describe_cluster_snapshots(
            snapshot_identifier=snapshot_name)

        snapshot_list = get_deep(
            snapshot_response, 'DescribeClusterSnapshotsResponse.\
DescribeClusterSnapshotsResult.Snapshots')
        if len(snapshot_list) > 1:
            raise ValueError("{0}: too many snapshots".format(snapshot_name))

        snapshot = snapshot_list.pop()
        if snapshot['EstimatedSecondsToCompletion'] != 0:
            raise ValueError('snapshot not ready')
        if snapshot['Status'] != 'available':
            raise ValueError("snapshot status: {0}".format(snapshot['Status']))
        restore_response = self._conn.restore_from_cluster_snapshot(
            cluster_name,
            snapshot_name,
            cluster_subnet_group_name=cluster_subnet_group_name,
            cluster_parameter_group_name=param_group,
            vpc_security_group_ids=[vpc_group_id])

        cluster_status = get_deep(
            restore_response, 'RestoreFromClusterSnapshotResponse.\
RestoreFromClusterSnapshotResult.Cluster.ClusterStatus')

        while wait_for_cluster_availability:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            if cluster_status == 'available,completed':
                break
            time.sleep(self.poll_interval_sec)
            cluster_status = self.get_cluster_and_restore_status(cluster_name)
Beispiel #6
0
    def set_parameter_group(self, cluster_name, param_group):
        """
        sets the parameter group for a particular cluster, using an
        exponential backoff to check status.

        :param cluster_name: name of cluster
        :type cluster_name: string
        :param param_group: name of a parameter group
        :type param_group: string

        :returns: a pair of cluster status and parameter group apply status
        :rtype: string
        """
        if not param_group:
            return

        response = self._conn.modify_cluster(cluster_name, cluster_parameter_group_name=param_group)
        cluster = get_deep(response, "ModifyClusterResponse.ModifyClusterResult.Cluster")

        pg_groups = cluster["ClusterParameterGroups"]
        if len(pg_groups) != 1:
            raise ValueError("Unexpected number of parameter groups")
        pg_name = pg_groups[0]["ParameterGroupName"]
        pg_status = pg_groups[0]["ParameterApplyStatus"]
        if pg_name != param_group:
            raise ValueError("Unexpected parameter group name: {0}".format(pg_name))
        if pg_status != "applying":
            raise ValueError("Unexpected parameter group status: {0}".format(pg_status))
        sys.stderr.write("set parameter group: {0}\n".format(param_group))

        cluster_status = "{0},{1}".format(cluster["ClusterStatus"], pg_status)
        while True:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            if cluster_status == "available,pending-reboot":
                time.sleep(self.poll_interval_sec)
                response = self._conn.reboot_cluster(cluster_name)
                cluster = get_deep(response, "RebootClusterResponse.RebootClusterResult.Cluster")
                cluster_status = "{0},{1}".format(
                    cluster["ClusterStatus"], cluster["ClusterParameterGroups"][0]["ParameterApplyStatus"]
                )
                if cluster_status != "rebooting,pending-reboot":
                    raise ValueError("Unexpected cluster status: {0}".format(cluster_status))
                break
            if cluster_status == "available,in-sync":
                break
            time.sleep(self.poll_interval_sec)
            cluster_status = self.get_cluster_and_pg_status(cluster_name)

        while True:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            if cluster_status == "available,in-sync":
                break
            time.sleep(self.poll_interval_sec)
            cluster_status = self.get_cluster_and_pg_status(cluster_name)
Beispiel #7
0
    def delete_cluster(self, cluster_name, save_snapshot=True):
        """
        Delete cluster, optionally save snapshot

        :param cluster_name: name of the cluster to be deleted
        :type cluster_name: string
        :param save_snapshot: True if we should save a snapshot, False if not
        :type save_snapshot: boolean
        """
        snapshot_name = "{0}-{1}".format(cluster_name, datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S"))

        try:
            if save_snapshot:
                sys.stderr.write("\ncreating final snapshot: {0}\n".format(snapshot_name))
                delete_response = self._conn.delete_cluster(
                    cluster_name, skip_final_cluster_snapshot=False, final_cluster_snapshot_identifier=snapshot_name
                )
            else:
                delete_response = self._conn.delete_cluster(cluster_name, skip_final_cluster_snapshot=True)
        except ClusterNotFound:
            # no cluster, nothing to delete, no snapshot to save
            return

        cluster_status = get_deep(delete_response, "DeleteClusterResponse.DeleteClusterResult.Cluster.ClusterStatus")

        # poll until triggering ClusterNotFound exception
        while cluster_status is not None:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            time.sleep(self.poll_interval_sec)
            try:
                cluster_status = self.get_cluster_status(cluster_name)
            except ClusterNotFound as e:
                sys.stderr.write(str(e))
                cluster_status = None
    def set_security_group(self, cluster_name, security_group):
        """
        sets the security group for a particular cluster

        :param cluster_name: name of cluster
        :type cluster_name: string
        :param security_group: name of a security group
        :type security_group: string
        """
        if not security_group:
            return

        response = self._conn.modify_cluster(
            cluster_name,
            cluster_security_groups=[security_group],
        )
        cluster_status = get_deep(
            response, 'ModifyClusterResponse.ModifyClusterResult.Cluster')

        sec_groups = cluster_status['ClusterSecurityGroups']
        if len(sec_groups) != 1:
            raise ValueError("Unexpected number of security groups")
        sg_name = sec_groups[0]['ClusterSecurityGroupName']
        sg_status = sec_groups[0]['Status']

        if sg_name != security_group:
            raise ValueError(
                "Unexpected security group name: {0}".format(sg_name))
        if sg_status != 'active':
            raise ValueError(
                "Unexpected security group status: {0}".format(sg_status))
        sys.stderr.write("set security group: {0}\n".format(security_group))
Beispiel #9
0
    def set_security_group(self, cluster_name, security_group):
        """
        sets the security group for a particular cluster

        :param cluster_name: name of cluster
        :type cluster_name: string
        :param security_group: name of a security group
        :type security_group: string
        """
        if not security_group:
            return

        response = self._conn.modify_cluster(cluster_name, cluster_security_groups=[security_group])
        cluster_status = get_deep(response, "ModifyClusterResponse.ModifyClusterResult.Cluster")

        sec_groups = cluster_status["ClusterSecurityGroups"]
        if len(sec_groups) != 1:
            raise ValueError("Unexpected number of security groups")
        sg_name = sec_groups[0]["ClusterSecurityGroupName"]
        sg_status = sec_groups[0]["Status"]

        if sg_name != security_group:
            raise ValueError("Unexpected security group name: {0}".format(sg_name))
        if sg_status != "active":
            raise ValueError("Unexpected security group status: {0}".format(sg_status))
        sys.stderr.write("set security group: {0}\n".format(security_group))
Beispiel #10
0
    def set_vpc_security_group_ids(self, cluster_name, vpc_security_group_id):
        """
        sets the vpc security group for a particular cluster, using an
        exponential backoff to check status.

        :param cluster_name: name of cluster
        :type cluster_name: string
        :param vpc_security_group_id: id of vpc security group
        :type vpc_security_group_id: string
        """
        if not vpc_security_group_id:
            return

        response = self._conn.modify_cluster(
            cluster_name,
            vpc_security_group_ids=[vpc_security_group_id],
        )
        cluster_status = get_deep(
            response, 'ModifyClusterResponse.ModifyClusterResult.Cluster')

        sec_groups = cluster_status['VpcSecurityGroups']

        while True:
            if len(sec_groups) != 1:
                raise ValueError("Unexpected number of security groups")
            sg_id = sec_groups[0]['VpcSecurityGroupId']
            sg_status = sec_groups[0]['Status']

            if sg_id != vpc_security_group_id:
                raise ValueError(
                    "Unexpected vpc security group id: {0}".format(sg_id))
            if sg_status == 'adding':
                time.sleep(self.poll_interval_sec)
                response = self.get_cluster_info(cluster_name)
                sec_groups = get_deep(response, 'VpcSecurityGroups')
                sys.stderr.write("vpc security group status: adding\n")
            elif sg_status == 'active':
                sys.stderr.write("vpc security group status: active\n")
                break
            else:
                raise ValueError(
                    "Unexpected vpc security group status: {0}".format(
                        sg_status))

        sys.stderr.write(
            "set security group: {0}\n".format(vpc_security_group_id))
Beispiel #11
0
    def set_vpc_security_group_ids(self, cluster_name, vpc_security_group_id):
        """
        sets the vpc security group for a particular cluster, using an
        exponential backoff to check status.

        :param cluster_name: name of cluster
        :type cluster_name: string
        :param vpc_security_group_id: id of vpc security group
        :type vpc_security_group_id: string
        """
        if not vpc_security_group_id:
            return

        response = self._conn.modify_cluster(cluster_name, vpc_security_group_ids=[vpc_security_group_id])
        cluster_status = get_deep(response, "ModifyClusterResponse.ModifyClusterResult.Cluster")

        sec_groups = cluster_status["VpcSecurityGroups"]

        while True:
            if len(sec_groups) != 1:
                raise ValueError("Unexpected number of security groups")
            sg_id = sec_groups[0]["VpcSecurityGroupId"]
            sg_status = sec_groups[0]["Status"]

            if sg_id != vpc_security_group_id:
                raise ValueError("Unexpected vpc security group id: {0}".format(sg_id))
            if sg_status == "adding":
                time.sleep(self.poll_interval_sec)
                response = self.get_cluster_info(cluster_name)
                sec_groups = get_deep(response, "VpcSecurityGroups")
                sys.stderr.write("vpc security group status: adding\n")
            elif sg_status == "active":
                sys.stderr.write("vpc security group status: active\n")
                break
            else:
                raise ValueError("Unexpected vpc security group status: {0}".format(sg_status))

        sys.stderr.write("set security group: {0}\n".format(vpc_security_group_id))
Beispiel #12
0
    def delete_cluster(self, cluster_name, save_snapshot=True):
        """
        Delete cluster, optionally save snapshot

        :param cluster_name: name of the cluster to be deleted
        :type cluster_name: string
        :param save_snapshot: True if we should save a snapshot, False if not
        :type save_snapshot: boolean
        """
        snapshot_name = "{0}-{1}".format(
            cluster_name,
            datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S"))

        try:
            if save_snapshot:
                sys.stderr.write(
                    "\ncreating final snapshot: {0}\n".format(snapshot_name))
                delete_response = self._conn.delete_cluster(
                    cluster_name,
                    skip_final_cluster_snapshot=False,
                    final_cluster_snapshot_identifier=snapshot_name)
            else:
                delete_response = self._conn.delete_cluster(
                    cluster_name,
                    skip_final_cluster_snapshot=True,
                )
        except ClusterNotFound:
            # no cluster, nothing to delete, no snapshot to save
            return

        cluster_status = get_deep(
            delete_response,
            'DeleteClusterResponse.DeleteClusterResult.Cluster.ClusterStatus',
        )

        # poll until triggering ClusterNotFound exception
        while cluster_status is not None:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            time.sleep(self.poll_interval_sec)
            try:
                cluster_status = self.get_cluster_status(cluster_name)
            except ClusterNotFound as e:
                sys.stderr.write(str(e))
                cluster_status = None
Beispiel #13
0
    def get_cluster_list(self, cluster_name=None):
        """
        gets a list of cluster descrptions; if a cluster_name is specified
        gets the specified clusters' description

        :param cluster_name: optional name of cluster
        :type cluster_name: string

        :returns: the parameters for the cluster specified or list of cluster descriptions
        :rtype: a list of cluster descriptions (which are dicts)
        """
        response = self._conn.describe_clusters(cluster_name)
        result_dict = get_deep(response, "DescribeClustersResponse.DescribeClustersResult")
        cluster_list = result_dict["Clusters"]
        marker = result_dict["Marker"]
        if marker is not None:
            # If marker is not None, that means we have more than 100
            # clusters, and we need to pagniated to fetch more results
            raise NotImplementedError
        return cluster_list
Beispiel #14
0
    def get_cluster_parameters(self, parameter_group_name):
        """
        gets the result of a DescribeClusterParameters for a particular
        parameter group.

        :param parameter_group_name: redshift cluster parameter group name
        :type paramenter_group_name: string

        :returns: the parameters for the cluster paramenter_group_name
        :rtype: a dictionary of parameters
        """
        response = self._conn.describe_cluster_parameters(parameter_group_name)
        result_dict = get_deep(response, "DescribeClusterParametersResponse.DescribeClusterParametersResult")
        parameters = result_dict["Parameters"]
        marker = result_dict["Marker"]
        if marker is not None:
            # If marker is not None, that means we have more than 100
            # clusters, and we need to pagniated to fetch more results
            raise NotImplementedError
        return parameters
Beispiel #15
0
    def get_cluster_list(self, cluster_name=None):
        """
        gets a list of cluster descrptions; if a cluster_name is specified
        gets the specified clusters' description

        :param cluster_name: optional name of cluster
        :type cluster_name: string

        :returns: the parameters for the cluster specified or list of cluster descriptions
        :rtype: a list of cluster descriptions (which are dicts)
        """
        response = self._conn.describe_clusters(cluster_name)
        result_dict = get_deep(
            response, 'DescribeClustersResponse.DescribeClustersResult')
        cluster_list = result_dict['Clusters']
        marker = result_dict['Marker']
        if marker is not None:
            # If marker is not None, that means we have more than 100
            # clusters, and we need to pagniated to fetch more results
            raise NotImplementedError
        return cluster_list
Beispiel #16
0
    def get_cluster_parameters(self, parameter_group_name):
        """
        gets the result of a DescribeClusterParameters for a particular
        parameter group.

        :param parameter_group_name: redshift cluster parameter group name
        :type paramenter_group_name: string

        :returns: the parameters for the cluster paramenter_group_name
        :rtype: a dictionary of parameters
        """
        response = self._conn.describe_cluster_parameters(parameter_group_name)
        result_dict = get_deep(
            response,
            'DescribeClusterParametersResponse.DescribeClusterParametersResult'
        )
        parameters = result_dict['Parameters']
        marker = result_dict['Marker']
        if marker is not None:
            # If marker is not None, that means we have more than 100
            # clusters, and we need to pagniated to fetch more results
            raise NotImplementedError
        return parameters
Beispiel #17
0
def test_get_deep(x, path, expected_value):
    v = get_deep(x, path)
    assert v == expected_value
Beispiel #18
0
    def restore_from_cluster_snapshot(
        self,
        cluster_name,
        snapshot_name,
        param_group=None,
        vpc_group_id=None,
        cluster_subnet_group_name=None,
        wait_for_cluster_availability=True,
    ):
        """Restore cluster from snapshot

        Wait until cluster and restore status indicate success

        :param cluster_name: the name of the cluster we restore to
        :type cluster_name: string
        :param snapshot_name: the name of the snapshot from which we restore
        :type snapshot_name: string
        :param param_group: parameter group of the redshift cluster
        :type param_group: string
        :param vpc_group_id: vpc group id of the redshift cluster
        :type vpc_group_id: string
        :param cluster_subnet_group_name: the name of the subnet group where
                                          we want the cluster restored
        :type cluster_subnet_group_name: string
        :param wait_for_cluster_availability: determine whether wait until the
                                              the cluster is ready or not
        :type wait_for_cluster_availability: boolean
        """
        snapshot_response = self._conn.describe_cluster_snapshots(snapshot_identifier=snapshot_name)

        snapshot_list = get_deep(
            snapshot_response,
            "DescribeClusterSnapshotsResponse.\
DescribeClusterSnapshotsResult.Snapshots",
        )
        if len(snapshot_list) > 1:
            raise ValueError("{0}: too many snapshots".format(snapshot_name))

        snapshot = snapshot_list.pop()
        if snapshot["EstimatedSecondsToCompletion"] != 0:
            raise ValueError("snapshot not ready")
        if snapshot["Status"] != "available":
            raise ValueError("snapshot status: {0}".format(snapshot["Status"]))
        restore_response = self._conn.restore_from_cluster_snapshot(
            cluster_name,
            snapshot_name,
            cluster_subnet_group_name=cluster_subnet_group_name,
            cluster_parameter_group_name=param_group,
            vpc_security_group_ids=[vpc_group_id],
        )

        cluster_status = get_deep(
            restore_response,
            "RestoreFromClusterSnapshotResponse.\
RestoreFromClusterSnapshotResult.Cluster.ClusterStatus",
        )

        while wait_for_cluster_availability:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            if cluster_status == "available,completed":
                break
            time.sleep(self.poll_interval_sec)
            cluster_status = self.get_cluster_and_restore_status(cluster_name)
Beispiel #19
0
def test_get_deep(x, path, expected_value):
    v = get_deep(x, path)
    assert v == expected_value
Beispiel #20
0
    def set_parameter_group(self, cluster_name, param_group):
        """
        sets the parameter group for a particular cluster, using an
        exponential backoff to check status.

        :param cluster_name: name of cluster
        :type cluster_name: string
        :param param_group: name of a parameter group
        :type param_group: string

        :returns: a pair of cluster status and parameter group apply status
        :rtype: string
        """
        if not param_group:
            return

        response = self._conn.modify_cluster(
            cluster_name,
            cluster_parameter_group_name=param_group,
        )
        cluster = get_deep(
            response, 'ModifyClusterResponse.ModifyClusterResult.Cluster')

        pg_groups = cluster['ClusterParameterGroups']
        if len(pg_groups) != 1:
            raise ValueError("Unexpected number of parameter groups")
        pg_name = pg_groups[0]['ParameterGroupName']
        pg_status = pg_groups[0]['ParameterApplyStatus']
        if pg_name != param_group:
            raise ValueError(
                "Unexpected parameter group name: {0}".format(pg_name))
        if pg_status != 'applying':
            raise ValueError(
                "Unexpected parameter group status: {0}".format(pg_status))
        sys.stderr.write("set parameter group: {0}\n".format(param_group))

        cluster_status = "{0},{1}".format(cluster['ClusterStatus'], pg_status)
        while True:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            if cluster_status == 'available,pending-reboot':
                time.sleep(self.poll_interval_sec)
                response = self._conn.reboot_cluster(cluster_name)
                cluster = get_deep(
                    response,
                    'RebootClusterResponse.RebootClusterResult.Cluster')
                cluster_status = "{0},{1}".format(
                    cluster['ClusterStatus'], cluster['ClusterParameterGroups']
                    [0]['ParameterApplyStatus'])
                if cluster_status != 'rebooting,pending-reboot':
                    raise ValueError('Unexpected cluster status: {0}'.format(
                        cluster_status))
                break
            if cluster_status == 'available,in-sync':
                break
            time.sleep(self.poll_interval_sec)
            cluster_status = self.get_cluster_and_pg_status(cluster_name)

        while True:
            sys.stderr.write("cluster_status: {0}\n".format(cluster_status))
            if cluster_status == 'available,in-sync':
                break
            time.sleep(self.poll_interval_sec)
            cluster_status = self.get_cluster_and_pg_status(cluster_name)