Beispiel #1
0
    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each RedshiftCluster
        :type avro_schema_object: Schema
        '''
        self._clusters = Records(persistence_object=persistence_object,
                                 avro_schema_object=avro_schema_object)
Beispiel #2
0
    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ScheduledJob
        :type avro_schema_object: Schema
        '''
        self._records = Records(persistence_object=persistence_object,
                                avro_schema_object=avro_schema_object)
        self.username = os.getenv('LOGNAME')
Beispiel #3
0
    def __init__(self, persistence_object=None, avro_schema_object=None):
        """
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ETLRecord
        :type avro_schema_object: Schema
        """
        self._records = Records(persistence_object=persistence_object, avro_schema_object=avro_schema_object)
Beispiel #4
0
    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ScheduledJob
        :type avro_schema_object: Schema
        '''
        self._records = Records(
            persistence_object=persistence_object,
            avro_schema_object=avro_schema_object)
        self.username = os.getenv('LOGNAME')
Beispiel #5
0
class ScheduledJobs(object):

    INDEX_ET_STATUS = 'ETStatusIndex'
    INDEX_LOAD_STATUS = 'LoadStatusIndex'
    INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION = 'LogNameLogSchemaVersionIndex'

    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ScheduledJob
        :type avro_schema_object: Schema
        '''
        self._records = Records(persistence_object=persistence_object,
                                avro_schema_object=avro_schema_object)
        self.username = os.getenv('LOGNAME')

    def get(self, **kwargs):
        '''
        Returns an ScheduledJob

        :param kwargs: all kwarg are used together to get the job. It's
            required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: ScheduledJob that matches given keys
        :rtype: :class:`.ScheduledJob`
        :raises KeyError: if request record is not found
        :raises PrimaryKeyError: if request record does not have conforming
            primary key

        Example::
            >>> scheduled_job = scheduled_jobs.get(
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
            >>> scheduled_job.get(s3_path=None)
            {'s3_path': 's3://my-bucket/logs/apache'}
            >>> # Example of no kwarg
            >>> scheduled_jobs.get()
            ValueError
            >>> # Example of unknown kwarg
            >>> scheduled_jobs.get(color='black')
            ValueError

        '''
        return ScheduledJob(self._records.get(**kwargs))

    def put(self, **kwargs):
        '''
        Puts an ScheduledJob

        :param kwargs: each kwarg becomes key/value pair in the job.
            Passing in unknown kwarg will result in ValueError. If item
            already exists, ValueError will be raised.
        :returns: True if ScheduledJobs successfully persist the job
        :rtype: boolean
        :raises ValueError: if unknown kwarg is given
        :raises ValueError: if a duplicate record already exists
        :raises PrimaryKeyError: if the given record does not have a
            conforming primary key

        Example::
            >>> sucess = scheduled_jobs.put(s3_path='s3://my-bucket/logs/apache',
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
            >>> sucess
            True
            >>> # Trying to put the same item again
            >>> sucess = scheduled_jobs.put(s3_path='s3://my-bucket/logs/apache',
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
            >>> ValueError

            >>> # Example of no kwarg
            >>> scheduled_jobs.put()
            ValueError
            >>> # Example of unknown kwarg
            >>> scheduled_jobs.put(color='black')
            ValueError
        '''
        return self._records.put(**kwargs)

    def delete(self, **kwargs):
        '''
        Delete

        :param kwargs: all kwarg are used together to get the job before deleting it.
            It's required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: True if ScheduledJob is successfully deleted
        :type: boolean
        :raises KeyError: if request record is not found
        :raises PrimaryKeyError: if request record does not have conforming
            primary key

        Example::
            >>> scheduled_job = scheduled_jobs.delete(
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
        '''
        job = self.get(**kwargs)
        return job.delete(self.username, 'user request')

    def get_jobs_with_et_status(self, et_status_value):
        '''
        Get ScheduledJob matching given et_status

        :param et_status_value: value of et_status
        :type et_status_value: string
        :returns: An iterable of ScheduledJob matching given et_status

        Example::
            >>> jobs = scheduled_jobs.get_jobs_with_et_status('running')
            >>> for job in job:
                  print job.get(hash_key=None, et_status=None)
            {'hash_key': '1', 'et_status': 'running'}
            {'hash_key': '2', 'et_status': 'running'}
        '''
        records = self._records.query_by_index(index=self.INDEX_ET_STATUS,
                                               et_status=et_status_value)

        def iter_record():
            for record in records:
                yield ScheduledJob(record=record)

        return iter_record()

    def get_jobs_with_log_name(self, log_name, log_schema_version=None):
        '''
        Get ScheduledJob matching given log_name and optional log_schema_version

        :param log_name: value of log_name
        :type log_name: string
        :param log_schema_version: optional value of log_schema_version
        :type log_schema_version: string or None
        :returns: An iterable of ScheduledJob matching given values

        Example::
            >>> scheduled_jobs = ScheduleJobs()
            >>> jobs = scheduled_jobs.get_jobs_with_log_name('ranger')
            >>> for job in job:
                  print job.get(hash_key=None, log_name=None)
            {'hash_key': '1', 'log_name': 'ranger'}
            {'hash_key': '2', 'log_name': 'ranger'}
        '''
        records = self._records.query_by_index(
            index=self.INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION,
            log_name=log_name,
            log_schema_version=log_schema_version)

        def iter_record():
            for record in records:
                yield ScheduledJob(record=record)

        return iter_record()

    def __iter__(self):
        def iter_records():
            for record in self._records:
                yield ScheduledJob(record=record)

        return iter_records()
Beispiel #6
0
class RedshiftClusters(object):

    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each RedshiftCluster
        :type avro_schema_object: Schema
        '''
        self._clusters = Records(
            persistence_object=persistence_object,
            avro_schema_object=avro_schema_object)

    def get(self, **kwargs):
        '''
        Returns an RedshiftCluster

        :param kwargs: all kwarg are used together to get the cluster. It's
            required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: RedshiftCluster that matches given keys
        :rtype: :class:`.RedshiftCluster`
        :raises KeyError: if request cluster is not found
        :raises PrimaryKeyError: if request cluster does not have conforming
            primary key

        Example::
            >>> redshift_cluster = redshift_clusters.get(redshift_id='cluster')
            >>> redshift_cluster.get(host=None, port=None)
            {'host': 'cluster.us-west-3.redshift.amazonaws.com',
                     'port': 5439}

            >>> # Example of no kwarg
            >>> redshift_clusters.get()
            ValueError
            >>> # Example of unknown kwarg
            >>> redshift_clusters.get(color='black')
            ValueError

        '''
        return RedshiftCluster(self._clusters.get(**kwargs))

    def put(self, **kwargs):
        '''
        Puts a RedshiftCluster

        :param kwargs: each kwarg becomes key/value pair in the cluster.
            Passing in unknown kwarg will result in ValueError. If item
            already exists, ValueError will be raised.
        :returns: True if RedshiftClusters successfully persist the cluster
        :rtype: boolean
        :raises ValueError: if unknown kwarg is given
        :raises ValueError: if a duplicate cluster already exists
        :raises PrimaryKeyError: if the given cluster does not have a
            conforming primary key

        Example::
            >>> success = redshift_clusters.put(redshift_id='cluster',
                    host='cluster.us-west-2.redshift.amazonaws.com',
                    port=5439)
            >>> success
            True
            >>> # Trying to put the same item again
            >>> success = redshift_clusters.put(redshift_id='cluster',
                    host='cluster.us-west-2.redshift.amazonaws.com',
                    port=5439)
            >>> ValueError

            >>> # Example of no kwarg
            >>> redshift_clusters.put()
            ValueError
            >>> # Example of unknown kwarg
            >>> redshift_clusters.put(color='black')
            ValueError
        '''
        return self._clusters.put(**kwargs)

    def __iter__(self):

        def iter_clusters():
            for cluster in self._clusters:
                yield RedshiftCluster(cluster=cluster)

        return iter_clusters()
Beispiel #7
0
class RedshiftClusters(object):
    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each RedshiftCluster
        :type avro_schema_object: Schema
        '''
        self._clusters = Records(persistence_object=persistence_object,
                                 avro_schema_object=avro_schema_object)

    def get(self, **kwargs):
        '''
        Returns an RedshiftCluster

        :param kwargs: all kwarg are used together to get the cluster. It's
            required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: RedshiftCluster that matches given keys
        :rtype: :class:`.RedshiftCluster`
        :raises KeyError: if request cluster is not found
        :raises PrimaryKeyError: if request cluster does not have conforming
            primary key

        Example::
            >>> redshift_cluster = redshift_clusters.get(redshift_id='cluster')
            >>> redshift_cluster.get(host=None, port=None)
            {'host': 'cluster.us-west-3.redshift.amazonaws.com',
                     'port': 5439}

            >>> # Example of no kwarg
            >>> redshift_clusters.get()
            ValueError
            >>> # Example of unknown kwarg
            >>> redshift_clusters.get(color='black')
            ValueError

        '''
        return RedshiftCluster(self._clusters.get(**kwargs))

    def put(self, **kwargs):
        '''
        Puts a RedshiftCluster

        :param kwargs: each kwarg becomes key/value pair in the cluster.
            Passing in unknown kwarg will result in ValueError. If item
            already exists, ValueError will be raised.
        :returns: True if RedshiftClusters successfully persist the cluster
        :rtype: boolean
        :raises ValueError: if unknown kwarg is given
        :raises ValueError: if a duplicate cluster already exists
        :raises PrimaryKeyError: if the given cluster does not have a
            conforming primary key

        Example::
            >>> success = redshift_clusters.put(redshift_id='cluster',
                    host='cluster.us-west-2.redshift.amazonaws.com',
                    port=5439)
            >>> success
            True
            >>> # Trying to put the same item again
            >>> success = redshift_clusters.put(redshift_id='cluster',
                    host='cluster.us-west-2.redshift.amazonaws.com',
                    port=5439)
            >>> ValueError

            >>> # Example of no kwarg
            >>> redshift_clusters.put()
            ValueError
            >>> # Example of unknown kwarg
            >>> redshift_clusters.put(color='black')
            ValueError
        '''
        return self._clusters.put(**kwargs)

    def __iter__(self):
        def iter_clusters():
            for cluster in self._clusters:
                yield RedshiftCluster(cluster=cluster)

        return iter_clusters()
Beispiel #8
0
class ScheduledJobs(object):

    INDEX_ET_STATUS = 'ETStatusIndex'
    INDEX_LOAD_STATUS = 'LoadStatusIndex'
    INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION = 'LogNameLogSchemaVersionIndex'

    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ScheduledJob
        :type avro_schema_object: Schema
        '''
        self._records = Records(
            persistence_object=persistence_object,
            avro_schema_object=avro_schema_object)
        self.username = os.getenv('LOGNAME')

    def get(self, **kwargs):
        '''
        Returns an ScheduledJob

        :param kwargs: all kwarg are used together to get the job. It's
            required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: ScheduledJob that matches given keys
        :rtype: :class:`.ScheduledJob`
        :raises KeyError: if request record is not found
        :raises PrimaryKeyError: if request record does not have conforming
            primary key

        Example::
            >>> scheduled_job = scheduled_jobs.get(
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
            >>> scheduled_job.get(s3_path=None)
            {'s3_path': 's3://my-bucket/logs/apache'}
            >>> # Example of no kwarg
            >>> scheduled_jobs.get()
            ValueError
            >>> # Example of unknown kwarg
            >>> scheduled_jobs.get(color='black')
            ValueError

        '''
        return ScheduledJob(self._records.get(**kwargs))

    def put(self, **kwargs):
        '''
        Puts an ScheduledJob

        :param kwargs: each kwarg becomes key/value pair in the job.
            Passing in unknown kwarg will result in ValueError. If item
            already exists, ValueError will be raised.
        :returns: True if ScheduledJobs successfully persist the job
        :rtype: boolean
        :raises ValueError: if unknown kwarg is given
        :raises ValueError: if a duplicate record already exists
        :raises PrimaryKeyError: if the given record does not have a
            conforming primary key

        Example::
            >>> sucess = scheduled_jobs.put(s3_path='s3://my-bucket/logs/apache',
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
            >>> sucess
            True
            >>> # Trying to put the same item again
            >>> sucess = scheduled_jobs.put(s3_path='s3://my-bucket/logs/apache',
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
            >>> ValueError

            >>> # Example of no kwarg
            >>> scheduled_jobs.put()
            ValueError
            >>> # Example of unknown kwarg
            >>> scheduled_jobs.put(color='black')
            ValueError
        '''
        return self._records.put(**kwargs)

    def delete(self, **kwargs):
        '''
        Delete

        :param kwargs: all kwarg are used together to get the job before deleting it.
            It's required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: True if ScheduledJob is successfully deleted
        :type: boolean
        :raises KeyError: if request record is not found
        :raises PrimaryKeyError: if request record does not have conforming
            primary key

        Example::
            >>> scheduled_job = scheduled_jobs.delete(
                    hash_key='1:public:my_cool_schema_alpha:2014-08-01:2014-08-02')
        '''
        job = self.get(**kwargs)
        return job.delete(self.username, 'user request')

    def get_jobs_with_et_status(self, et_status_value):
        '''
        Get ScheduledJob matching given et_status

        :param et_status_value: value of et_status
        :type et_status_value: string
        :returns: An iterable of ScheduledJob matching given et_status

        Example::
            >>> jobs = scheduled_jobs.get_jobs_with_et_status('running')
            >>> for job in job:
                  print job.get(hash_key=None, et_status=None)
            {'hash_key': '1', 'et_status': 'running'}
            {'hash_key': '2', 'et_status': 'running'}
        '''
        records = self._records.query_by_index(
            index=self.INDEX_ET_STATUS,
            et_status=et_status_value)

        def iter_record():
            for record in records:
                yield ScheduledJob(record=record)

        return iter_record()

    def get_jobs_with_log_name(self, log_name, log_schema_version=None):
        '''
        Get ScheduledJob matching given log_name and optional log_schema_version

        :param log_name: value of log_name
        :type log_name: string
        :param log_schema_version: optional value of log_schema_version
        :type log_schema_version: string or None
        :returns: An iterable of ScheduledJob matching given values

        Example::
            >>> scheduled_jobs = ScheduleJobs()
            >>> jobs = scheduled_jobs.get_jobs_with_log_name('ranger')
            >>> for job in job:
                  print job.get(hash_key=None, log_name=None)
            {'hash_key': '1', 'log_name': 'ranger'}
            {'hash_key': '2', 'log_name': 'ranger'}
        '''
        records = self._records.query_by_index(
            index=self.INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION,
            log_name=log_name,
            log_schema_version=log_schema_version)

        def iter_record():
            for record in records:
                yield ScheduledJob(record=record)

        return iter_record()

    def __iter__(self):

        def iter_records():
            for record in self._records:
                yield ScheduledJob(record=record)

        return iter_records()
Beispiel #9
0
class ETLRecords(object):

    INDEX_JOB_ID_AND_DATA_DATE = 'ETLRecordByJobIdAndDataDate'

    def __init__(self, persistence_object=None, avro_schema_object=None):
        '''
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ETLRecord
        :type avro_schema_object: Schema
        '''
        self._records = Records(persistence_object=persistence_object,
                                avro_schema_object=avro_schema_object)

    def get(self, **kwargs):
        '''
        Returns an ETLRecord

        :param kwargs: all kwarg are used together to get the record. It's
            required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: ETLRecord that matches given keys
        :rtype: :class:`.ETLRecord`
        :raises KeyError: if request record is not found
        :raises PrimaryKeyError: if request record does not have conforming
            primary key

        Example::
            >>> etl_record = etl_records.get(
                    hash_key='1:public:search search_results',
                    data_date='2014-07-26')
            >>> etl_record.get(data_date=None, s3_path=None)
            {'data_date': '2014-07-26',
                    's3_path': 's3://bucket/key1/schema.yaml'}

            >>> # Example of no kwarg
            >>> etl_records.get()
            ValueError
            >>> # Example of unknown kwarg
            >>> etl_records.get(color='black')
            ValueError

        '''
        return ETLRecord(self._records.get(**kwargs))

    def put(self, **kwargs):
        '''
        Puts an ETLRecord

        :param kwargs: each kwarg becomes key/value pair in the record.
            Passing in unknown kwarg will result in ValueError. If item
            already exists, ValueError will be raised.
        :returns: True if ETLRecords successfully persist the record
        :rtype: boolean
        :raises ValueError: if unknown kwarg is given
        :raises ValueError: if a duplicate record already exists
        :raises PrimaryKeyError: if the given record does not have a
            conforming primary key

        Example::
            >>> success = etl_records.put(
                    hash_key='1:public:search search_results',
                    data_date='2014-07-26', et_state='et_started')
            >>> success
            True
            >>> # Trying to put the same item again
            >>> success = etl_records.put(
                    hash_key='1:public:search search_results',
                    data_date='2014-07-26', et_state='et_started')
            >>> ValueError

            >>> # Example of no kwarg
            >>> etl_records.put()
            ValueError
            >>> # Example of unknown kwarg
            >>> etl_records.put(color='black')
            ValueError
        '''
        return self._records.put(**kwargs)

    def get_runs_with_job_id(self, job_id, data_date=None):
        '''
        Get ETLRecord matching given job_id

        :param job_id: id of the job
        :type job_id: string
        :returns: An iterable of ETLRecord matching given job_id

        Example::
            >>> jobs = scheduled_jobs.get_jobs_with_job_id('1af2')
            >>> for job in job:
                  print job.get(hash_key=None, data_date=None)
            {'hash_key': '1af2', 'data_date': '2014-07-01'}
            {'hash_key': '1af2', 'data_date': '2014-07-02'}
        '''
        records = self._records.query_by_index(
            index=self.INDEX_JOB_ID_AND_DATA_DATE,
            job_id=job_id,
            data_date=data_date)

        def iter_record():
            for record in records:
                yield ETLRecord(record=record)

        return iter_record()

    def delete_job_runs(self, job_id):
        '''
        Attempt to delete all runs for a job id

        :param job_id: id of the job
        :type job_id: string
        :returns: True if all runs are successfully deleted
        :type: boolean
        '''
        runs = self.get_runs_with_job_id(job_id)
        self._records.batch_delete(runs, hash_key=job_id, data_date=None)
        runs = self.get_runs_with_job_id(job_id)
        return len([r for r in runs]) == 0

    def __iter__(self):
        def iter_records():
            for record in self._records:
                yield ETLRecord(record=record)

        return iter_records()
Beispiel #10
0
class ETLRecords(object):

    INDEX_JOB_ID_AND_DATA_DATE = "ETLRecordByJobIdAndDataDate"

    def __init__(self, persistence_object=None, avro_schema_object=None):
        """
        Private API. Unstable. Use it at your own risk.

        :param persistence_object: The implementation of a persistence_object;
            for example a dynamo table instance
        :param avro_schema_object: An Avro schema object that describes what's
            allowed in each ETLRecord
        :type avro_schema_object: Schema
        """
        self._records = Records(persistence_object=persistence_object, avro_schema_object=avro_schema_object)

    def get(self, **kwargs):
        """
        Returns an ETLRecord

        :param kwargs: all kwarg are used together to get the record. It's
            required to pass in at least one valid kwarg; an unknown kwarg, or
            no kwargs, will result in ValueError
        :returns: ETLRecord that matches given keys
        :rtype: :class:`.ETLRecord`
        :raises KeyError: if request record is not found
        :raises PrimaryKeyError: if request record does not have conforming
            primary key

        Example::
            >>> etl_record = etl_records.get(
                    hash_key='1:public:search search_results',
                    data_date='2014-07-26')
            >>> etl_record.get(data_date=None, s3_path=None)
            {'data_date': '2014-07-26',
                    's3_path': 's3://bucket/key1/schema.yaml'}

            >>> # Example of no kwarg
            >>> etl_records.get()
            ValueError
            >>> # Example of unknown kwarg
            >>> etl_records.get(color='black')
            ValueError

        """
        return ETLRecord(self._records.get(**kwargs))

    def put(self, **kwargs):
        """
        Puts an ETLRecord

        :param kwargs: each kwarg becomes key/value pair in the record.
            Passing in unknown kwarg will result in ValueError. If item
            already exists, ValueError will be raised.
        :returns: True if ETLRecords successfully persist the record
        :rtype: boolean
        :raises ValueError: if unknown kwarg is given
        :raises ValueError: if a duplicate record already exists
        :raises PrimaryKeyError: if the given record does not have a
            conforming primary key

        Example::
            >>> success = etl_records.put(
                    hash_key='1:public:search search_results',
                    data_date='2014-07-26', et_state='et_started')
            >>> success
            True
            >>> # Trying to put the same item again
            >>> success = etl_records.put(
                    hash_key='1:public:search search_results',
                    data_date='2014-07-26', et_state='et_started')
            >>> ValueError

            >>> # Example of no kwarg
            >>> etl_records.put()
            ValueError
            >>> # Example of unknown kwarg
            >>> etl_records.put(color='black')
            ValueError
        """
        return self._records.put(**kwargs)

    def get_runs_with_job_id(self, job_id, data_date=None):
        """
        Get ETLRecord matching given job_id

        :param job_id: id of the job
        :type job_id: string
        :returns: An iterable of ETLRecord matching given job_id

        Example::
            >>> jobs = scheduled_jobs.get_jobs_with_job_id('1af2')
            >>> for job in job:
                  print job.get(hash_key=None, data_date=None)
            {'hash_key': '1af2', 'data_date': '2014-07-01'}
            {'hash_key': '1af2', 'data_date': '2014-07-02'}
        """
        records = self._records.query_by_index(
            index=self.INDEX_JOB_ID_AND_DATA_DATE, job_id=job_id, data_date=data_date
        )

        def iter_record():
            for record in records:
                yield ETLRecord(record=record)

        return iter_record()

    def delete_job_runs(self, job_id):
        """
        Attempt to delete all runs for a job id

        :param job_id: id of the job
        :type job_id: string
        :returns: True if all runs are successfully deleted
        :type: boolean
        """
        runs = self.get_runs_with_job_id(job_id)
        self._records.batch_delete(runs, hash_key=job_id, data_date=None)
        runs = self.get_runs_with_job_id(job_id)
        return len([r for r in runs]) == 0

    def __iter__(self):
        def iter_records():
            for record in self._records:
                yield ETLRecord(record=record)

        return iter_records()