Example #1
0
    def test_timesince(self):
        now = utc_now()
        then = now - datetime.timedelta(days=365)
        eq_(timesince(then, now), '1 year')
        then = now - datetime.timedelta(days=7)
        eq_(timesince(then, now), '1 week')
        then = now - datetime.timedelta(days=1)
        eq_(timesince(then, now), '1 day')
        then = now - datetime.timedelta(hours=1)
        eq_(timesince(then, now), '1 hour')
        then = now - datetime.timedelta(minutes=1)
        eq_(timesince(then, now), '1 minute')
        then = now - datetime.timedelta(seconds=1)
        eq_(timesince(then, now), '1 second')

        # more than one things
        then = now - datetime.timedelta(days=365 + 7)
        eq_(timesince(then, now), '1 year')
        then = now - datetime.timedelta(days=40)
        eq_(timesince(then, now), '1 month, 1 week')
        then = now - datetime.timedelta(days=2, seconds=60 * 60)
        eq_(timesince(then, now), '2 days, 1 hour')
        then = now - datetime.timedelta(days=2, seconds=60 * 60 * 2)
        eq_(timesince(then, now), '2 days, 2 hours')
        then = now - datetime.timedelta(hours=1, seconds=60)
        eq_(timesince(then, now), '1 hour, 1 minute')
        then = now - datetime.timedelta(hours=2, seconds=60 * 2)
        eq_(timesince(then, now), '2 hours, 2 minutes')
        then = now - datetime.timedelta(minutes=3, seconds=10)
        eq_(timesince(then, now), '3 minutes, 10 seconds')
        then = now - datetime.timedelta(seconds=1)
        eq_(timesince(then, now), '1 second')
        then = now - datetime.timedelta(seconds=0)
        eq_(timesince(then, now), '0 seconds')
Example #2
0
    def test_utc_now(self):
        now = utc_now()
        ok_(now.tzinfo)

        dt = datetime.datetime.utcnow()
        eq_(now.tzinfo.tzname(dt), 'UTC')
        eq_(now.tzinfo.utcoffset(dt), datetime.timedelta(0))
        eq_(now.tzinfo.dst(dt), datetime.timedelta(0))
Example #3
0
    def run(self, date=None):
        # NOTE(willkg): This lets us have a dry-run app that doesn't run as
        # a backfill app. In the normal case, this will get passed a date.
        date = date or utc_now()

        db_class = self.config.primary_destination.database_class
        primary_database = db_class(self.config.primary_destination)
        tx_class = self.config.primary_destination.transaction_executor_class
        primary_transaction = tx_class(
            self.config,
            primary_database,
        )
        transactions = [primary_transaction]

        db_class = self.config.secondary_destination.database_class
        # The reason for checking if this is anything at all is
        # because one way of disabling the secondary destination
        # is to set the database_class to an empty string.
        if db_class:
            secondary_database = db_class(self.config.secondary_destination)
            if secondary_database.config != primary_database.config:
                # The secondary really is different from the first one.
                # By default, if not explicitly set, it'll pick up the same
                # resource values as the first one.
                tx_class = (self.config.secondary_destination.
                            transaction_executor_class)
                secondary_transaction = tx_class(
                    self.config,
                    secondary_database,
                )
                transactions.append(secondary_transaction)

        target_date = (date - datetime.timedelta(days=1)).strftime('%Y-%m-%d')

        raw_adi_logs_pathname = os.path.join(
            tempfile.gettempdir(),
            "%s.raw_adi_logs.TEMPORARY%s" % (target_date, '.txt'))
        try:
            with codecs.open(raw_adi_logs_pathname, 'w', 'utf-8') as f:
                hive = pyhs2.connect(
                    host=self.config.hive_host,
                    port=self.config.hive_port,
                    authMechanism=self.config.hive_auth_mechanism,
                    user=self.config.hive_user,
                    password=self.config.hive_password,
                    database=self.config.hive_database,
                    # the underlying TSocket setTimeout() wants milliseconds
                    timeout=self.config.timeout * 1000)

                cur = hive.cursor()
                query = self.config.query % target_date
                cur.execute(query)
                rows_written = 0
                for row in cur:
                    if None in row:
                        continue
                    f.write("\t".join(
                        self.remove_control_characters(urllib2.unquote(v)).
                        replace('\\', '\\\\') if isinstance(v, basestring
                                                            ) else str(v)
                        for v in row))
                    f.write("\n")
                    rows_written += 1

            if not rows_written:
                raise NoRowsWritten('hive yielded no rows to write')

            self.config.logger.info('Wrote %d rows from doing hive query' %
                                    rows_written)

            self.persist_data(transactions, raw_adi_logs_pathname, target_date)

        finally:
            if os.path.isfile(raw_adi_logs_pathname):
                os.remove(raw_adi_logs_pathname)
Example #4
0
 def test_timesince_oddballs(self):
     now = utc_now()
     then = now - datetime.timedelta(days=7)
     # compare two dates
     eq_(timesince(then.date(), now.date()), '1 week')
    def run(self, date=None):
        # NOTE(willkg): This lets us have a dry-run app that doesn't run as
        # a backfill app. In the normal case, this will get passed a date.
        date = date or utc_now()

        db_class = self.config.primary_destination.database_class
        primary_database = db_class(self.config.primary_destination)
        tx_class = self.config.primary_destination.transaction_executor_class
        primary_transaction = tx_class(
            self.config,
            primary_database,
        )
        transactions = [primary_transaction]

        db_class = self.config.secondary_destination.database_class
        # The reason for checking if this is anything at all is
        # because one way of disabling the secondary destination
        # is to set the database_class to an empty string.
        if db_class:
            secondary_database = db_class(self.config.secondary_destination)
            if secondary_database.config != primary_database.config:
                # The secondary really is different from the first one.
                # By default, if not explicitly set, it'll pick up the same
                # resource values as the first one.
                tx_class = (
                    self.config.secondary_destination
                    .transaction_executor_class
                )
                secondary_transaction = tx_class(
                    self.config,
                    secondary_database,
                )
                transactions.append(secondary_transaction)

        target_date = (date - datetime.timedelta(days=1)).strftime('%Y-%m-%d')

        raw_adi_logs_pathname = os.path.join(
            tempfile.gettempdir(),
            "%s.raw_adi_logs.TEMPORARY%s" % (
                target_date,
                '.txt'
            )
        )
        try:
            with codecs.open(raw_adi_logs_pathname, 'w', 'utf-8') as f:
                hive = pyhs2.connect(
                    host=self.config.hive_host,
                    port=self.config.hive_port,
                    authMechanism=self.config.hive_auth_mechanism,
                    user=self.config.hive_user,
                    password=self.config.hive_password,
                    database=self.config.hive_database,
                    # the underlying TSocket setTimeout() wants milliseconds
                    timeout=self.config.timeout * 1000
                )

                cur = hive.cursor()
                query = self.config.query % target_date
                cur.execute(query)
                rows_written = 0
                for row in cur:
                    if None in row:
                        continue
                    f.write(
                        "\t"
                        .join(
                            self.remove_control_characters(
                                urllib2.unquote(v)
                            ).replace('\\', '\\\\')
                            if isinstance(v, basestring) else str(v)
                            for v in row
                        )
                    )
                    f.write("\n")
                    rows_written += 1

            if not rows_written:
                raise NoRowsWritten('hive yielded no rows to write')

            self.config.logger.info(
                'Wrote %d rows from doing hive query' % rows_written
            )

            self.persist_data(transactions, raw_adi_logs_pathname, target_date)

        finally:
            if os.path.isfile(raw_adi_logs_pathname):
                os.remove(raw_adi_logs_pathname)