Example #1
0
    def run(self):
        """
        Run sanity check.
        """
        col = self._get_collection()

        # check that the collection contains at least min_total_results entries
        fields = []
        for field in self.non_null_fields:
            fields.append({field, None})

        if fields:
            limit = self.min_total_results
            num_results = col.find({"$and":fields}).limit(limit).count(True)
            if num_results < limit:
                exception_string = 'Sanity check failed: only found %s / %s expected results in collection %s' % \
                    (num_results, limit, self.collection_name())
                logger.warn(exception_string)
                raise MongoDBTaskException(exception_string)

        # do a check on specific ids
        self._sanity_check_ids(col)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #2
0
    def run(self):
        """
        Run sanity check.
        """
        dynamodb_client = DynamoDBClient()
        table = dynamodb_client.get_table(self.table_name())

        # check that the table contains at least min_total_results entries
        limit = self.min_total_results
        kw = {'limit': limit}
        for field in self.non_null_fields:
            kw['%s__null' % field] = False
        results = [r for r in table.scan(**kw)]
        num_results = len(results)
        if num_results < limit:
            exception_string = 'Sanity check failed: only found %s / %s expected results in table %s' % \
                    (num_results, limit, self.table_name())
            logger.warn(exception_string)
            raise DynamoDBTaskException(exception_string)

        # do a check on specific ids
        self._sanity_check_ids(table)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #3
0
    def run(self):
        """
        Run a Mortar job using the Mortar API.

        This method writes out several "tokens" as it executes to ensure
        idempotence:

        * `running_token`: This token indicates that the job is currently running. If a token
          exists at this path, Luigi will poll the currently running job instead of starting a 
          new one.
        * `success_token`: This token indicates that the job has already completed successfully.
          If this token exists, Luigi will not rerun the task.
        """
        api = self._get_api()
        if self.running_token().exists():
            job_id = self.running_token().open().read().strip()
        else:
            job_id = self._run_job(api)
            # to guarantee idempotence, record that the job is running
            target_factory.write_file(self.running_token(), text=job_id)
        job = self._poll_job_completion(api, job_id)
        final_job_status_code = job.get('status_code')
        # record that the job has finished
        self.running_token().remove()
        if final_job_status_code != jobs.STATUS_SUCCESS:
            for out in self.script_output():
                logger.info('Mortar script failed: removing incomplete data in %s' % out)
                out.remove()
            raise Exception('Mortar job_id [%s] failed with status_code: [%s], error details: %s' % (job_id, final_job_status_code, job.get('error')))
        else:
            target_factory.write_file(self.success_token())
            logger.info('Mortar job_id [%s] completed successfully' % job_id)
Example #4
0
    def run(self):
        """
        Run sanity check.
        """
        dynamodb_client = DynamoDBClient()
        table = dynamodb_client.get_table(self.table_name())

        # check that the table contains at least min_total_results entries
        limit = self.min_total_results
        kw = {'limit': limit}
        for field in self.non_null_fields:
            kw['%s__null' % field] = False
        results = [r for r in table.scan(**kw)]
        num_results = len(results)
        if num_results < limit:
            exception_string = 'Sanity check failed: only found %s / %s expected results in table %s' % \
                    (num_results, limit, self.table_name())
            logger.warn(exception_string)
            raise DynamoDBTaskException(exception_string)

        # do a check on specific ids
        self._sanity_check_ids(table)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #5
0
    def run(self):
        """
        Run sanity check.
        """
        col = self._get_collection()

        # check that the collection contains at least min_total_results entries
        fields = []
        for field in self.non_null_fields:
            fields.append({field, None})

        if fields:
            limit = self.min_total_results
            num_results = col.find({"$and":fields}).limit(limit).count(True)
            if num_results < limit:
                exception_string = 'Sanity check failed: only found %s / %s expected results in collection %s' % \
                    (num_results, limit, self.collection_name())
                logger.warn(exception_string)
                raise MongoDBTaskException(exception_string)

        # do a check on specific ids
        self._sanity_check_ids(col)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #6
0
    def run(self):
        """
        Run a sanity check on the table, ensuring that
        data was loaded appropriately.  Raises a :py:class:`DBMSTaskException`
        if the sanity check fails.
        """
        cur = self.get_connection().cursor()
        overall_query = self._create_overall_query()
        cur.execute(overall_query)
        rows = cur.fetchall()

        if len(rows) < self.min_total_results:
            exception_string = 'Sanity check failed: only found %s / %s expected results in collection %s' % \
                (len(rows), self.min_total_results, self.table_name())
            logger.warn(exception_string)
            cur.close()
            self.get_connection().close()
            raise DBMSTaskException(exception_string)

        # do a check on specific ids
        self._sanity_check_ids()

        cur.close()
        self.get_connection().close()
        # write token to note completion
        target_factory.write_file(self.output_token())
Example #7
0
    def run(self):
        """
        Run a sanity check on the table, ensuring that
        data was loaded appropriately.  Raises a :py:class:`DBMSTaskException`
        if the sanity check fails.
        """
        cur = self.get_connection().cursor()
        overall_query = self._create_overall_query()
        cur.execute(overall_query)
        rows = cur.fetchall()

        if len(rows) < self.min_total_results:
            exception_string = 'Sanity check failed: only found %s / %s expected results in collection %s' % \
                (len(rows), self.min_total_results, self.table_name())
            logger.warn(exception_string)
            cur.close()
            self.get_connection().close()
            raise DBMSTaskException(exception_string)

        # do a check on specific ids
        self._sanity_check_ids()

        cur.close()
        self.get_connection().close()
        # write token to note completion
        target_factory.write_file(self.output_token())
Example #8
0
    def run(self):
        """
        Verify API.
        """
        self._verify_api()

        # write an output token to S3 to confirm that we finished
        target_factory.write_file(self.output()[0])
    def run(self):
        """
        Verify API.
        """
        self._verify_api()

        # write an output token to S3 to confirm that we finished
        target_factory.write_file(self.output()[0])
Example #10
0
 def run(self):
     connection = self.get_connection()
     cur = connection.cursor()
     table_query = self.create_table_query()
     cur.execute(table_query)
     connection.commit()
     cur.close()
     connection.close()
     target_factory.write_file(self.output_token())
Example #11
0
    def run(self):
        """
        Update DynamoDB table throughput.
        """
        dynamodb_client = DynamoDBClient()
        throughput={'read': self.read_throughput,
                    'write': self.write_throughput}
        dynamodb_client.update_throughput(self.table_name(), throughput)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #12
0
 def run(self):
     """
     Create database table.
     """
     connection = self.get_connection()
     cur = connection.cursor()
     table_query = self._create_table_query()
     cur.execute(table_query)
     connection.commit()
     cur.close()
     connection.close()
     # write token to acknowledge table creation
     target_factory.write_file(self.output_token())
Example #13
0
 def run(self):
     """
     Create database table.
     """
     connection = self.get_connection()
     cur = connection.cursor()
     table_query = self._create_table_query()
     cur.execute(table_query)
     connection.commit()
     cur.close()
     connection.close()
     # write token to acknowledge table creation
     target_factory.write_file(self.output_token())
Example #14
0
    def run(self):
        """
        Update DynamoDB table throughput.
        """
        dynamodb_client = DynamoDBClient()
        throughput = {
            'read': self.read_throughput,
            'write': self.write_throughput
        }
        dynamodb_client.update_throughput(self.table_name(), throughput)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #15
0
    def run(self):
        """
        Create the DynamoDB table.
        """
        dynamodb_client = DynamoDBClient()
        schema = [HashKey(self.hash_key, data_type=self.hash_key_type)]
        if self.range_key:
            schema.append(RangeKey(self.range_key, data_type=self.range_key_type))
        throughput={'read': self.read_throughput,
                    'write': self.write_throughput}
        if self.indexes:
            dynamodb_client.create_table(self.table_name(), schema, throughput, indexes=self.generate_indexes())
        else:
            dynamodb_client.create_table(self.table_name(), schema, throughput)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #16
0
    def run(self):
        """
        Run an R script using the Rscript program. Pipes stdout and
        stderr back to the logging facility.
        """
        cmd = self._subprocess_command()
        output = subprocess.Popen(cmd,
                                  shell=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.STDOUT,
                                  bufsize=1)
        for line in iter(output.stdout.readline, b''):
            logger.info(line)
        out, err = output.communicate()
        rc = output.returncode
        if rc != 0:
            raise RuntimeError('%s returned non-zero error code %s' %
                               (self._subprocess_command(), rc))

        target_factory.write_file(self.output_token())
Example #17
0
    def run(self):
        cmd = self.subprocess_commands()
        output = subprocess.Popen(
            cmd,
            shell=True,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE
        )
        out, err = output.communicate()
        # generate output message
        message = self._create_message(cmd, out, err)
        self._check_error(err, message)

        self.cmd_output = {
          'cmd'   : cmd,
          'stdout': out,
          'stderr': err
        }
        logger.debug('%s - output:%s' % (self.__class__.__name__, message))
        if err == '':
            target_factory.write_file(self.output_token())
Example #18
0
    def run(self):
        """
        Run an R script using the Rscript program. Pipes stdout and
        stderr back to the logging facility.
        """
        cmd = self._subprocess_command()
        output = subprocess.Popen(
            cmd,
            shell=True,
            stdout = subprocess.PIPE,
            stderr = subprocess.STDOUT,
            bufsize=1
        )
        for line in iter(output.stdout.readline, b''):
            logger.info(line)
        out, err = output.communicate()
        rc = output.returncode
        if rc != 0:
            raise RuntimeError('%s returned non-zero error code %s' % (self._subprocess_command(), rc) )

        target_factory.write_file(self.output_token())
Example #19
0
 def run(self):
     """
     Run the mortar job.
     """
     api = self._get_api()
     if self.running_token().exists():
         job_id = self.running_token().open().read().strip()
     else:
         job_id = self._run_job(api)
         # to guarantee idempotence, record that the job is running
         target_factory.write_file(self.running_token(), text=job_id)
     job = self._poll_job_completion(api, job_id)
     final_job_status_code = job.get('status_code')
     # record that the job has finished
     self.running_token().remove()
     if final_job_status_code != jobs.STATUS_SUCCESS:
         for out in self.script_output():
             logger.info('Mortar script failed: removing incomplete data in %s' % out)
             out.remove()
         raise Exception('Mortar job_id [%s] failed with status_code: [%s], error details: %s' % (job_id, final_job_status_code, job.get('error')))
     else:
         target_factory.write_file(self.success_token())
         logger.info('Mortar job_id [%s] completed successfully' % job_id)
Example #20
0
    def run(self):
        """
        Create the DynamoDB table.
        """
        dynamodb_client = DynamoDBClient()
        schema = [HashKey(self.hash_key, data_type=self.hash_key_type)]
        if self.range_key:
            schema.append(
                RangeKey(self.range_key, data_type=self.range_key_type))
        throughput = {
            'read': self.read_throughput,
            'write': self.write_throughput
        }
        if self.indexes:
            dynamodb_client.create_table(self.table_name(),
                                         schema,
                                         throughput,
                                         indexes=self._generate_indexes())
        else:
            dynamodb_client.create_table(self.table_name(), schema, throughput)

        # write token to note completion
        target_factory.write_file(self.output_token())
Example #21
0
    def run(self):
        cmd = self.subprocess_commands()
        output = subprocess.Popen(
            cmd,
            shell=True,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE
        )
        out, err = output.communicate()
        rc = output.returncode
        # generate output message
        message = self._create_message(cmd, out, err, rc)
        self._check_error(rc, err, message)

        self.cmd_output = {
          'cmd'         : cmd,
          'stdout'      : out,
          'stderr'      : err,
          'return_code' : rc
        }
        logger.debug('%s - output:%s' % (self.__class__.__name__, message))
        if err == '':
            target_factory.write_file(self.output_token())
Example #22
0
    def run(self):
        """
        Run a Mortar job using the Mortar API.

        This method writes out several "tokens" as it executes to ensure
        idempotence:

        * `running_token`: This token indicates that the job is currently running. If a token
          exists at this path, Luigi will poll the currently running job instead of starting a 
          new one.
        * `success_token`: This token indicates that the job has already completed successfully.
          If this token exists, Luigi will not rerun the task.
        """
        api = self._get_api()
        if self.running_token().exists():
            job_id = self.running_token().open().read().strip()
        else:
            job_id = self._run_job(api)
            # to guarantee idempotence, record that the job is running
            target_factory.write_file(self.running_token(), text=job_id)
        job = self._poll_job_completion(api, job_id)
        final_job_status_code = job.get('status_code')
        # record that the job has finished
        self.running_token().remove()
        if final_job_status_code != jobs.STATUS_SUCCESS:
            for out in self.script_output():
                logger.info(
                    'Mortar script failed: removing incomplete data in %s' %
                    out)
                out.remove()
            raise Exception(
                'Mortar job_id [%s] failed with status_code: [%s], error details: %s'
                % (job_id, final_job_status_code, job.get('error')))
        else:
            target_factory.write_file(self.success_token())
            logger.info('Mortar job_id [%s] completed successfully' % job_id)
    def run(self):
        self._set_tables()

        # write an output token to S3 to confirm that we finished
        target_factory.write_file(self.output()[0])
Example #24
0
    def run(self):
        self._set_tables()

        # write an output token to S3 to confirm that we finished
        target_factory.write_file(self.output()[0])