예제 #1
    def wait_for_export(self, stream_type, export_id):
        # Poll the export status until it enters a finalized state or
        # exceeds the job timeout time.
        with metrics.job_timer('Export {} for {}'.format(
                export_id, stream_type)):
            timeout_time = pendulum.utcnow().add(seconds=self.job_timeout)
            while pendulum.utcnow() < timeout_time:
                status = self.poll_export(stream_type, export_id)
                singer.log_info("export %s status is %s", export_id, status)

                if status == "Created":
                    # If the status is created, the export has been made but
                    # not started, so enqueue the export.
                    self.enqueue_export(stream_type, export_id)

                elif status in ["Cancelled", "Failed"]:
                    # Cancelled and failed exports fail the current sync.
                    raise ExportFailed(status)

                elif status == "Completed":
                    return True


        raise ExportFailed("Export timed out after {} minutes".format(
            self.job_timeout / 60))
예제 #2
    def handle(self, *args, **options):
        user = ShareUser.objects.get(username=settings.APPLICATION_USERNAME)

        if options['ids']:
            self.harvest_ids(user, options)

        task_kwargs = {'force': options.get('force', False)}

        if options['days_back'] is not None and (options['start']
                                                 or options['end']):
                'Please choose days-back OR a start date with end date, not both'

        if options['days_back'] is not None:
            task_kwargs['end'] = datetime.datetime.utcnow(
            ) + datetime.timedelta(days=-(options['days_back'] - 1))
            task_kwargs['start'] = datetime.datetime.utcnow(
            ) + datetime.timedelta(days=-options['days_back'])
            task_kwargs['start'] = pendulum.parse(
                options['start']) if options.get('start') else pendulum.utcnow(
                ) - datetime.timedelta(days=int(options['days_back'] or 1))
            task_kwargs['end'] = pendulum.parse(
                options['end']) if options.get('end') else pendulum.utcnow()

        task_kwargs['end'] = task_kwargs['end'].isoformat()
        task_kwargs['start'] = task_kwargs['start'].isoformat()

        if options['limit'] is not None:
            task_kwargs['limit'] = options['limit']

        if options['set_spec']:
            task_kwargs['set_spec'] = options['set_spec']

        if not options['harvester'] and options['all']:
            options['harvester'] = [
                x.label for x in apps.get_app_configs()
                if isinstance(x, ProviderAppConfig) and not x.disabled

        for harvester in options['harvester']:
                harvester)  # Die if the AppConfig can not be loaded

            task_args = (
            if options['async']:
                HarvesterTask().apply_async(task_args, task_kwargs)
                    'Started job for harvester {}'.format(harvester))
                self.stdout.write('Running harvester for {}'.format(harvester))
                HarvesterTask().apply(task_args, task_kwargs, throw=True)
def poll_job_until_done(job_id, client, api):
    timeout_time = pendulum.utcnow().add(seconds=DEFAULT_JOB_TIMEOUT)
    while pendulum.utcnow() < timeout_time:
        if api.job_ready(client, job_id):
            return api.get_file_ids(client, job_id)


    raise apis.ExportTimedOut(DEFAULT_JOB_TIMEOUT // 60, "minutes")
예제 #4
def test_swbiodiversity_harvester():
    httpretty.allow_net_connect = False

    config = SourceConfig.objects.get(label=('org.swbiodiversity'))
    url = config.harvester_kwargs['list_url']
    harvester = config.get_harvester()

    collection = furl(url)
    collection.args['collid'] = 223
                           url + ';collid=(\d+)',
    start = pendulum.utcnow() - timedelta(days=3)
    end = pendulum.utcnow()
    results = harvester.fetch_date_range(start, end)
    for result in results:
        assert result.identifier == collection.url
        assert "".join(result.datum.split()) == "".join('''
            <div id="innertext">
            <h1>SEINet - Arizona Chapter Collections </h1>
                Select a collection to see full details.
            <table style="margin:10px;">
            <a href="collprofiles.php?collid=223">
                A. Michael Powell Herbarium
            <div style="margin:10px;">
            <div>Sample description</div>
            <div style="margin-top:5px;">
               Test Author ([email protected])

예제 #5
파일: bot.py 프로젝트: Phxntxm/Bonfire
async def on_ready():
    # Change the status upon connection to the default status
    await bot.change_presence(game=discord.Game(name=utils.default_status, type=0))

    if not hasattr(bot, 'uptime'):
        bot.uptime = pendulum.utcnow()
    await utils.db_check()
예제 #6
파일: feeds.py 프로젝트: rahulsps/bts_tools
    def should_publish_steem(self, node, price):
        # check whether we need to publish again:
        # - if published more than 12 hours ago, publish again
        # - if published price different by more than 3%, publish again
        if 'last_price' not in node.opts:  # make sure we have already published once
                'Steem should publish for the first time since launch of bts_tools'
            return True

        last_published_interval = pendulum.interval(hours=12)
        variance_trigger = 0.03

        if pendulum.utcnow(
        ) - node.opts['last_published'] > last_published_interval:
                'Steem should publish as it has not been published for {}'.
            return True
        if abs(price - node.opts['last_price']
               ) / node.opts['last_price'] >= variance_trigger:
                'Steem should publish as price has moved more than {}%'.format(
                    100 * variance_trigger))
            return True
        log.debug('No need for Steem to publish')
        return False
    def __init__(self, *, cfg, visible_feeds=None):
        self.cfg = copy.deepcopy(cfg)
        from .feeds import DEFAULT_VISIBLE_FEEDS
        self.visible_feeds = list(visible_feeds or DEFAULT_VISIBLE_FEEDS)

        # FIXME: deprecate self.feed_period
            self.feed_period = int(cfg['publish_strategy']['time_interval'] / cfg['check_time_interval'])
        except KeyError:
            self.feed_period = None

        self.check_time_interval = pendulum.interval(seconds=cfg.get('check_time_interval', 600))
            self.publish_time_interval = pendulum.interval(seconds=cfg['publish_strategy']['time_interval'])
        except KeyError:
            self.publish_time_interval = None

        self.feed_slot = cfg.get('publish_strategy', {}).get('time_slot', None)
        if self.feed_slot is not None:
            self.feed_slot = int(self.feed_slot)

        self.nfeed_checked = 0
        self.last_published = pendulum.utcnow().subtract(days=1)

        log.debug('successfully initialized {}'.format(self))
 def get(self):
     data_list = []
     cond = and_(self.TI.dag_id == self.DM.dag_id,
                 self.TI.state == "running", self.DM.owners == self.owners)
     tasks = self.session.query(self.TI.dag_id, self.TI.task_id,
                                self.TI.execution_date, self.TI.start_date,
     for task in tasks:
         last_runs = self.session.query(self.TI.duration).filter(
             self.TI.task_id == task.task_id, self.TI.state == "success",
             self.TI.duration > 5).order_by(
         average = statistics.mean(duration[0] for duration in last_runs)
         runtime = (pendulum.utcnow() - task.start_date).seconds
         execution_date = self.datetime_iso(task.execution_date)
         if runtime > (average + 1800):
                 OrderedDict([('dag_id', task.dag_id),
                              ('task_id', task.task_id),
                              ('execution_date', execution_date),
                              ('runtime', self.seconds_to(runtime)),
                              ('average', self.seconds_to(average))]))
     return jsonify(data_list)
    def __init__(self, *, cfg, visible_feeds=None):
        self.cfg = copy.deepcopy(cfg)
        from .feeds import DEFAULT_VISIBLE_FEEDS
        self.visible_feeds = list(visible_feeds or DEFAULT_VISIBLE_FEEDS)

        # FIXME: deprecate self.feed_period
            self.feed_period = int(cfg['publish_strategy']['time_interval'] /
        except KeyError:
            self.feed_period = None

        self.check_time_interval = pendulum.interval(
            seconds=cfg.get('check_time_interval', 600))
            self.publish_time_interval = pendulum.interval(
        except KeyError:
            self.publish_time_interval = None

        self.feed_slot = cfg.get('publish_strategy', {}).get('time_slot', None)
        if self.feed_slot is not None:
            self.feed_slot = int(self.feed_slot)

        self.nfeed_checked = 0
        self.last_published = pendulum.utcnow().subtract(days=1)

        log.debug('successfully initialized {}'.format(self))
def dummy_trigger__callable(*, dag_run: DagRun, **kwargs):
    In test env we just want to trigger the etl_testing DAG with
    no config.
    trigger_dag("etl_testing", run_id=str(uuid1()), execution_date=utcnow())
def sync_programs(client, state, stream):
    # http://developers.marketo.com/rest-api/assets/programs/#by_date_range
    # Programs are queryable via their updatedAt time but require and
    # end date as well. As there is no max time range for the query,
    # query from the bookmark value until current.
    # The Programs endpoint uses offsets with a return limit of 200
    # per page. If requesting past the final program, an error message
    # is returned to indicate that the endpoint has been fully synced.
    replication_key = determine_replication_key(stream['tap_stream_id'])

    start_date = bookmarks.get_bookmark(state, "programs", replication_key)
    end_date = pendulum.utcnow().isoformat()
    params = {
        "maxReturn": 200,
        "offset": 0,
        "earliestUpdatedAt": start_date,
        "latestUpdatedAt": end_date,
    endpoint = "rest/asset/v1/programs.json"

    record_count = 0
    while True:
        data = client.request("GET",

        # If the no asset message is in the warnings, we have exhausted
        # the search results and can end the sync.
        if "warnings" in data and NO_ASSET_MSG in data["warnings"]:

        time_extracted = utils.now()

        # Each row just needs the values formatted. If the record is
        # newer than the original start date, stream the record.
        for row in data["result"]:
            record = format_values(stream, row)
            if record[replication_key] >= start_date:
                record_count += 1


        # Increment the offset by the return limit for the next query.
        params["offset"] += params["maxReturn"]

    # Now that we've finished every page we can update the bookmark to
    # the end of the query.
    state = bookmarks.write_bookmark(state, "programs", replication_key,
    return state, record_count
파일: strawpoll.py 프로젝트: owl12/Bonfire
    async def strawpolls(self, ctx, poll_id: str = None):
        """This command can be used to show a strawpoll setup on this server"""
        # Strawpolls cannot be 'deleted' so to handle whether a poll is running or not on a server
        # Just save the poll in the config file, which can then be removed when it should not be "running" anymore
        all_polls = await config.get_content('strawpolls')
        server_polls = all_polls.get(ctx.message.server.id) or {}
        if not server_polls:
            await self.bot.say("There are currently no strawpolls running on this server!")
        # If no poll_id was provided, print a list of all current running poll's on this server
        if not poll_id:
            fmt = "\n".join(
                "{}: https://strawpoll.me/{}".format(data['title'], _id) for _id, data in server_polls.items())
            await self.bot.say("```\n{}```".format(fmt))
        # Else if a valid poll_id was provided, print info about that poll
        elif poll_id in server_polls.keys():
            poll = server_polls[poll_id]

            async with self.session.get("{}/{}".format(self.url, poll_id),
                                        headers={'User-Agent': 'Bonfire/1.0.0'}) as response:
                data = await response.json()

            # The response for votes and options is provided as two separate lists
            # We are enumarting the list of options, to print r (the option)
            # And the votes to match it, based on the index of the option
            # The rest is simple formatting
            fmt_options = "\n\t".join(
                "{}: {}".format(r, data['votes'][i]) for i, r in enumerate(data['options']))
            author = discord.utils.get(ctx.message.server.members, id=poll['author'])
            created_ago = (pendulum.utcnow() - pendulum.parse(poll['date'])).in_words()
            link = "https://strawpoll.me/{}".format(poll_id)
            fmt = "Link: {}\nTitle: {}\nAuthor: {}\nCreated: {} ago\nOptions:\n\t{}".format(link, data['title'],
                                                                                            created_ago, fmt_options)
            await self.bot.say("```\n{}```".format(fmt))
 def make(session, args):
     category = EventCategoryFactory.make(args['ec'])
     return Event(category=category,
    def should_publish(self):
        # TODO: update according to: https://bitsharestalk.org/index.php?topic=9348.0;all

        #return False
        if self.nfeed_checked == 0:
            log.debug('Should publish at least once at launch of the bts_tools')
            return True

        if self.feed_period is not None and self.nfeed_checked % self.feed_period == 0:
            log.debug('Should publish because time interval has passed: {} seconds'.format(self.publish_time_interval))
            return True

        now = pendulum.utcnow()

        if self.publish_time_interval and now - self.last_published > self.publish_time_interval:
            log.debug('Should publish because time interval has passed: {}'.format(self.publish_time_interval))
            return True

        if self.feed_slot:
            target = now.replace(minute=self.feed_slot, second=0, microsecond=0)
            targets = [target.subtract(hours=1), target, target.add(hours=1)]
            diff = [now-t for t in targets]
            # check if we just passed our time slot
            if any(pendulum.interval() < d and abs(d) < 1.1*self.check_time_interval for d in diff):
                log.debug('Should publish because time slot has arrived: time {:02d}:{:02d}'.format(now.hour, now.minute))
                return True

        log.debug('No need to publish feeds')
        return False
    def choose_branch(self, context: Dict) -> Union[str, Iterable[str]]:
        # If the DAG Run is externally triggered, then return without
        # skipping downstream tasks
        if context['dag_run'] and context['dag_run'].external_trigger:
                "Externally triggered DAG_Run: allowing execution to proceed.")
            return list(

        now = pendulum.utcnow()
        left_window = context['dag'].following_schedule(
        right_window = context['dag'].following_schedule(left_window)
            'Checking latest only with left_window: %s right_window: %s now: %s',
            left_window, right_window, now)

        if not left_window < now <= right_window:
            self.log.info('Not latest execution, skipping downstream.')
            # we return an empty list, thus the parent BaseBranchOperator
            # won't exclude any downstream tasks from skipping.
            return []
            self.log.info('Latest, allowing execution to proceed.')
            return list(
파일: __init__.py 프로젝트: jiahut/orator
    def test_timestamp_with_timezone(self):
        now = pendulum.utcnow()
        user = OratorTestUser.create(email='*****@*****.**', created_at=now)
        fresh_user = OratorTestUser.find(user.id)

        self.assertEqual(user.created_at, fresh_user.created_at)
        self.assertEqual(now, fresh_user.created_at)
def elasticsearch_janitor(self,
    Looks for discrepancies between postgres and elastic search numbers
    Re-indexes time periods that differ in count

    # get range of date_created in database; assumes current time is the max
    logger.debug('Starting Elasticsearch JanitorTask')

    min_date = AbstractCreativeWork.objects.all().aggregate(
    if not min_date:
        logger.warning('No CreativeWorks are present in Postgres. Exiting')

    max_date = pendulum.utcnow()
    min_date = pendulum.instance(min_date)

    pseudo_bisection.apply((es_url, es_index, min_date, max_date), {
        'dry': dry,
        'to_daemon': to_daemon
예제 #18
    def execute(self, context):
        # If the DAG Run is externally triggered, then return without
        # skipping downstream tasks
        if context['dag_run'] and context['dag_run'].external_trigger:
            self.log.info("Externally triggered DAG_Run: allowing execution to proceed.")

        now = pendulum.utcnow()
        left_window = context['dag'].following_schedule(
        right_window = context['dag'].following_schedule(left_window)
            'Checking latest only with left_window: %s right_window: %s now: %s',
            left_window, right_window, now

        if not left_window < now <= right_window:
            self.log.info('Not latest execution, skipping downstream.')

            downstream_tasks = context['task'].get_flat_relatives(upstream=False)
            self.log.debug("Downstream task_ids %s", downstream_tasks)

            if downstream_tasks:

            self.log.info('Latest, allowing execution to proceed.')
    def all(self, cutoff=None, allow_full_harvest=True, **kwargs):
            cutoff (date, optional): The upper bound to schedule harvests to. Default to today.
            allow_full_harvest (bool, optional): Allow a SourceConfig to generate a full harvest. Defaults to True.
                The SourceConfig.full_harvest must be marked True and have earliest_date set.
            **kwargs: Forwarded to .range

            A list of harvest jobs

        if cutoff is None:
            cutoff = pendulum.utcnow().date()

        # TODO take harvest/sourceconfig version into account here
        if hasattr(self.source_config, 'latest'):
            latest_date = self.source_config.latest
            latest_date = self.source_config.harvest_jobs.aggregate(

        # If we can build full harvests and the earliest job that would be generated does NOT exist
        # Go ahead and reset the latest_date to the earliest_date
        if allow_full_harvest and self.source_config.earliest_date and self.source_config.full_harvest:
            if not self.source_config.harvest_jobs.filter(
                latest_date = self.source_config.earliest_date

        # If nothing sets latest_date, default to the soonest possible harvest
        if not latest_date:
            latest_date = cutoff - self.source_config.harvest_interval

        return self.range(latest_date, cutoff, **kwargs)
    async def strawpolls(self, ctx, poll_id: str = None):
        """This command can be used to show a strawpoll setup on this server"""
        # Strawpolls cannot be 'deleted' so to handle whether a poll is running or not on a server
        # Just save the poll in the config file, which can then be removed when it should not be "running" anymore
        all_polls = config.get_content('strawpolls') or {}
        server_polls = all_polls.get(ctx.message.server.id) or {}
        if not server_polls:
            await self.bot.say("There are currently no strawpolls running on this server!")
        # If no poll_id was provided, print a list of all current running poll's on this server
        if not poll_id:
            fmt = "\n".join(
                "{}: https://strawpoll.me/{}".format(data['title'], _id) for _id, data in server_polls.items())
            await self.bot.say("```\n{}```".format(fmt))
        # Else if a valid poll_id was provided, print info about that poll
        elif poll_id in server_polls.keys():
            poll = server_polls[poll_id]

            async with self.session.get("{}/{}".format(self.url, poll_id),
                                        headers={'User-Agent': 'Bonfire/1.0.0'}) as response:
                data = await response.json()

            # The response for votes and options is provided as two separate lists
            # We are enumarting the list of options, to print r (the option)
            # And the votes to match it, based on the index of the option
            # The rest is simple formatting
            fmt_options = "\n\t".join(
                "{}: {}".format(r, data['votes'][i]) for i, r in enumerate(data['options']))
            author = discord.utils.get(ctx.message.server.members, id=poll['author'])
            created_ago = (pendulum.utcnow() - pendulum.parse(poll['date'])).in_words()
            link = "https://strawpoll.me/{}".format(poll_id)
            fmt = "Link: {}\nTitle: {}\nAuthor: {}\nCreated: {} ago\nOptions:\n\t{}".format(link, data['title'],
                                                                                            created_ago, fmt_options)
            await self.bot.say("```\n{}```".format(fmt))
def sync_activities(client, state, stream, config):
    # http://developers.marketo.com/rest-api/bulk-extract/bulk-activity-extract/
    replication_key = determine_replication_key(stream['tap_stream_id'])
    export_start = pendulum.parse(
        bookmarks.get_bookmark(state, stream["tap_stream_id"],
    job_started = pendulum.utcnow()
    record_count = 0
    while export_start < job_started:
        export_id, export_end = get_or_create_export_for_activities(
            client, state, stream, export_start, config)
        state = wait_for_export(client, state, stream, export_id)
        for row in stream_rows(client, "activities", export_id):
            time_extracted = utils.now()

            row = flatten_activity(row, stream)
            record = format_values(stream, row)

            record_count += 1

        state = update_state_with_export_info(
            state, stream, bookmark=export_start.isoformat())
        export_start = export_end

    return state, record_count
    async def uptime(self):
        """Provides a printout of the current bot's uptime

        EXAMPLE: !uptime
        await self.bot.say("Uptime: ```\n{}```".format(
            (pendulum.utcnow() - self.bot.uptime).in_words()))
def _latest_only(**context):
    now = pendulum.utcnow()
    left_window = context["dag"].following_schedule(context["execution_date"])
    right_window = context["dag"].following_schedule(left_window)

    if not left_window < now <= right_window:
        raise AirflowSkipException()
    def refresh_token(self):
        # http://developers.marketo.com/rest-api/authentication/#creating_an_access_token
        params = {
            "grant_type": "client_credentials",
            "client_id": self.client_id,
            "client_secret": self.client_secret,
        singer.log_info("Refreshing token")

            url = self.get_url("identity/oauth/token")
            resp = requests.get(url, params=params)
            resp_time = pendulum.utcnow()
        except requests.exceptions.ConnectionError as e:
            raise ApiException(
                "Connection error while refreshing token at {}.".format(
                    url)) from e

        if resp.status_code != 200:
            raise ApiException("Error refreshing token [{}]: {}".format(
                resp.status_code, resp.content))

        data = resp.json()
        if "error" in data:
            if data["error"] == "unauthorized":
                msg = "Authorization failed: "
                msg = "Marketo API returned an error: "

            msg += data.get("error_description", "No message from api")
            raise ApiException(msg)

        self.access_token = data["access_token"]
        self.token_expires = resp_time.add(seconds=data["expires_in"] - 15)
        singer.log_info("Token valid until %s", self.token_expires)
