Beispiel #1
0
def test_precise_diff_timezone():
    paris = pendulum.timezone("Europe/Paris")
    toronto = pendulum.timezone("America/Toronto")

    dt1 = paris.datetime(2013, 3, 31, 1, 30)
    dt2 = paris.datetime(2013, 4, 1, 1, 30)

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, days=1, hours=0)

    dt2 = toronto.datetime(2013, 4, 1, 1, 30)

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, days=1, hours=5)

    # pytz
    paris = pytz.timezone("Europe/Paris")
    toronto = pytz.timezone("America/Toronto")

    dt1 = paris.localize(datetime(2013, 3, 31, 1, 30))
    dt2 = paris.localize(datetime(2013, 4, 1, 1, 30))

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, days=1, hours=0)

    dt2 = toronto.localize(datetime(2013, 4, 1, 1, 30))

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, days=1, hours=5)

    # Issue238
    dt1 = timezone("UTC").datetime(2018, 6, 20, 1, 30)
    dt2 = timezone("Europe/Paris").datetime(2018, 6, 20, 3, 40)  # UTC+2
    diff = precise_diff(dt1, dt2)
    assert_diff(diff, minutes=10)
Beispiel #2
0
    def _get_parsed_value(
        self, token, value, parsed, now
    ):  # type: (str, str, dict, pendulum.DateTime) -> None
        parsed_token = self._PARSE_TOKENS[token](value)

        if "Y" in token:
            if token == "YY":
                parsed_token = now.year // 100 * 100 + parsed_token

            parsed["year"] = parsed_token
        elif "Q" == token:
            parsed["quarter"] = parsed_token
        elif token in ["MM", "M"]:
            parsed["month"] = parsed_token
        elif token in ["DDDD", "DDD"]:
            parsed["day_of_year"] = parsed_token
        elif "D" in token:
            parsed["day"] = parsed_token
        elif "H" in token:
            parsed["hour"] = parsed_token
        elif token in ["hh", "h"]:
            if parsed_token > 12:
                raise ValueError("Invalid date")

            parsed["hour"] = parsed_token
        elif "m" in token:
            parsed["minute"] = parsed_token
        elif "s" in token:
            parsed["second"] = parsed_token
        elif "S" in token:
            parsed["microsecond"] = parsed_token
        elif token in ["d", "E"]:
            parsed["day_of_week"] = parsed_token
        elif token in ["X", "x"]:
            parsed["timestamp"] = parsed_token
        elif token in ["ZZ", "Z"]:
            negative = True if value.startswith("-") else False
            tz = value[1:]
            if ":" not in tz:
                if len(tz) == 2:
                    tz = "{}00".format(tz)

                off_hour = tz[0:2]
                off_minute = tz[2:4]
            else:
                off_hour, off_minute = tz.split(":")

            offset = ((int(off_hour) * 60) + int(off_minute)) * 60

            if negative:
                offset = -1 * offset

            parsed["tz"] = pendulum.timezone(offset)
        elif token == "z":
            # Full timezone
            if value not in pendulum.timezones:
                raise ValueError("Invalid date")

            parsed["tz"] = pendulum.timezone(value)
Beispiel #3
0
def test_just_before_last_transition():
    tz = pendulum.timezone("Asia/Shanghai")
    dt = datetime(1991, 4, 20, 1, 49, 8)
    dt = tz.convert(dt, dst_rule=pendulum.POST_TRANSITION)

    epoch = datetime(1970, 1, 1, tzinfo=timezone("UTC"))
    expected = (dt - epoch).total_seconds()
    assert expected == 672079748.0
Beispiel #4
0
    def resolve_date_range(cls, start, end):
        logger.debug('Coercing start and end (%r, %r) into UTC dates', start, end)

        if bool(start) ^ bool(end):
            raise ValueError('"start" and "end" must either both be supplied or omitted')

        if not start and not end:
            start, end = datetime.date.today() - datetime.timedelta(days=1), datetime.date.today()
        if type(end) is str:
            end = pendulum.timezone('utc').convert(pendulum.parse(end.rstrip('Z'))).date()
        if type(start) is str:
            start = pendulum.timezone('utc').convert(pendulum.parse(start.rstrip('Z'))).date()

        logger.debug('Interpretting start and end as %r and %r', start, end)
        return start, end
Beispiel #5
0
def test_on_last_transition():
    tz = pendulum.timezone("Europe/Paris")
    dt = pendulum.naive(2037, 10, 25, 2, 30)
    dt = tz.convert(dt, dst_rule=pendulum.POST_TRANSITION)

    assert dt.year == 2037
    assert dt.month == 10
    assert dt.day == 25
    assert dt.hour == 2
    assert dt.minute == 30
    assert dt.second == 0
    assert dt.microsecond == 0
    assert dt.utcoffset().total_seconds() == 3600

    dt = pendulum.naive(2037, 10, 25, 2, 30)
    dt = tz.convert(dt, dst_rule=pendulum.PRE_TRANSITION)

    assert dt.year == 2037
    assert dt.month == 10
    assert dt.day == 25
    assert dt.hour == 2
    assert dt.minute == 30
    assert dt.second == 0
    assert dt.microsecond == 0
    assert dt.utcoffset().total_seconds() == 7200
Beispiel #6
0
    def test_following_previous_schedule(self):
        """
        Make sure DST transitions are properly observed
        """
        local_tz = pendulum.timezone('Europe/Zurich')
        start = local_tz.convert(datetime.datetime(2018, 10, 28, 2, 55),
                                 dst_rule=pendulum.PRE_TRANSITION)
        self.assertEqual(start.isoformat(), "2018-10-28T02:55:00+02:00",
                         "Pre-condition: start date is in DST")

        utc = timezone.convert_to_utc(start)

        dag = DAG('tz_dag', start_date=start, schedule_interval='*/5 * * * *')
        _next = dag.following_schedule(utc)
        next_local = local_tz.convert(_next)

        self.assertEqual(_next.isoformat(), "2018-10-28T01:00:00+00:00")
        self.assertEqual(next_local.isoformat(), "2018-10-28T02:00:00+01:00")

        prev = dag.previous_schedule(utc)
        prev_local = local_tz.convert(prev)

        self.assertEqual(prev_local.isoformat(), "2018-10-28T02:50:00+02:00")

        prev = dag.previous_schedule(_next)
        prev_local = local_tz.convert(prev)

        self.assertEqual(prev_local.isoformat(), "2018-10-28T02:55:00+02:00")
        self.assertEqual(prev, utc)
Beispiel #7
0
    def test_following_previous_schedule_daily_dag_CET_to_CEST(self):
        """
        Make sure DST transitions are properly observed
        """
        local_tz = pendulum.timezone('Europe/Zurich')
        start = local_tz.convert(datetime.datetime(2018, 3, 25, 2),
                                 dst_rule=pendulum.PRE_TRANSITION)

        utc = timezone.convert_to_utc(start)

        dag = DAG('tz_dag', start_date=start, schedule_interval='0 3 * * *')

        prev = dag.previous_schedule(utc)
        prev_local = local_tz.convert(prev)

        self.assertEqual(prev_local.isoformat(), "2018-03-24T03:00:00+01:00")
        self.assertEqual(prev.isoformat(), "2018-03-24T02:00:00+00:00")

        _next = dag.following_schedule(utc)
        next_local = local_tz.convert(_next)

        self.assertEqual(next_local.isoformat(), "2018-03-25T03:00:00+02:00")
        self.assertEqual(_next.isoformat(), "2018-03-25T01:00:00+00:00")

        prev = dag.previous_schedule(_next)
        prev_local = local_tz.convert(prev)

        self.assertEqual(prev_local.isoformat(), "2018-03-24T03:00:00+01:00")
        self.assertEqual(prev.isoformat(), "2018-03-24T02:00:00+00:00")
Beispiel #8
0
    def test_sync_to_db(self, mock_now):
        dag = DAG(
            'dag',
            start_date=DEFAULT_DATE,
        )
        with dag:
            DummyOperator(task_id='task', owner='owner1')
            SubDagOperator(
                task_id='subtask',
                owner='owner2',
                subdag=DAG(
                    'dag.subtask',
                    start_date=DEFAULT_DATE,
                )
            )
        now = datetime.datetime.utcnow().replace(tzinfo=pendulum.timezone('UTC'))
        mock_now.return_value = now
        session = settings.Session()
        dag.sync_to_db(session=session)

        orm_dag = session.query(DagModel).filter(DagModel.dag_id == 'dag').one()
        self.assertEqual(set(orm_dag.owners.split(', ')), {'owner1', 'owner2'})
        self.assertEqual(orm_dag.last_scheduler_run, now)
        self.assertTrue(orm_dag.is_active)
        self.assertIsNone(orm_dag.default_view)
        self.assertEqual(orm_dag.get_default_view(),
                         configuration.conf.get('webserver', 'dag_default_view').lower())
        self.assertEqual(orm_dag.safe_dag_id, 'dag')

        orm_subdag = session.query(DagModel).filter(
            DagModel.dag_id == 'dag.subtask').one()
        self.assertEqual(set(orm_subdag.owners.split(', ')), {'owner1', 'owner2'})
        self.assertEqual(orm_subdag.last_scheduler_run, now)
        self.assertTrue(orm_subdag.is_active)
        self.assertEqual(orm_subdag.safe_dag_id, 'dag__dot__subtask')
def test_not_equal_with_timezone_true():
    d1 = pendulum.datetime(2000, 1, 1, tz="America/Toronto")
    d2 = pendulum.datetime(2000, 1, 1, tz="America/Vancouver")
    d3 = datetime(2000, 1, 1, tzinfo=pendulum.timezone("America/Toronto"))

    assert d2 != d1
    assert d3 == d1
Beispiel #10
0
    def before_index(self, data_dict):
        data_modified = copy.deepcopy(data_dict)
        start_end_time = []
        responsible_party = data_dict.get('extras_responsible-party')
        if responsible_party is not None:
            originators = get_originator_names(responsible_party)
            if len(originators) > 0:
                data_modified['data_provider'] = originators

        for field in ('temporal-extent-begin', 'temporal-extent-end'):
            if field in data_dict:
                log.debug("Found time for field {}: {}".format(field, data_dict[field]))
                # "now" is probably not strictly valid ISO 19139 but it occurs
                # fairly often
                if data_dict.get(field, '').lower() == 'now':
                    log.info("Converting 'now' to current date and time")
                    utc = pendulum.timezone("UTC")
                    parsed_val = pendulum.now(utc).replace(
                                        microsecond=0).to_iso8601_string()
                else:
                    try:
                        # TODO: Add some sane support for indeterminate dates
                        parsed_val = convert_date(data_dict[field], True, True)
                    except ValueError, pendulum.parsing.exceptions.ParserError:
                        log.exception("data_dict[field] does not convert to "
                                        "datetime, skipping storage of temporal "
                                        "extents into Solr")
                        return data_dict
                start_end_time.append(parsed_val)
Beispiel #11
0
def test_convert_accept_pendulum_instance():
    dt = pendulum.datetime(2016, 8, 7, 12, 53, 54)
    tz = timezone("Europe/Paris")
    new = tz.convert(dt)

    assert isinstance(new, pendulum.DateTime)
    assert_datetime(new, 2016, 8, 7, 14, 53, 54)
    def test_skip(self, mock_now):
        session = settings.Session()
        now = datetime.datetime.utcnow().replace(tzinfo=pendulum.timezone('UTC'))
        mock_now.return_value = now
        dag = DAG(
            'dag',
            start_date=DEFAULT_DATE,
        )
        with dag:
            tasks = [DummyOperator(task_id='task')]
        dag_run = dag.create_dagrun(
            run_id='manual__' + now.isoformat(),
            state=State.FAILED,
        )
        SkipMixin().skip(
            dag_run=dag_run,
            execution_date=now,
            tasks=tasks,
            session=session)

        session.query(TI).filter(
            TI.dag_id == 'dag',
            TI.task_id == 'task',
            TI.state == State.SKIPPED,
            TI.start_date == now,
            TI.end_date == now,
        ).one()
Beispiel #13
0
def test_from_format_with_timezone():
    d = pendulum.from_format(
        "1975-05-21 22:32:11",
        "YYYY-MM-DD HH:mm:ss",
        tz=pendulum.timezone("Europe/London"),
    )
    assert_datetime(d, 1975, 5, 21, 22, 32, 11)
    assert "Europe/London" == d.timezone_name
def test_replace_tzinfo_dst():
    d = pendulum.datetime(2013, 3, 31, 2, 30)
    new = d.replace(tzinfo=pendulum.timezone("Europe/Paris"))

    assert_datetime(new, 2013, 3, 31, 3, 30)
    assert new.is_dst()
    assert new.offset == 7200
    assert new.timezone_name == "Europe/Paris"
Beispiel #15
0
def test_convert_sets_fold_attribute_properly():
    tz = pendulum.timezone("US/Eastern")

    dt = tz.convert(datetime(2014, 11, 2, 1, 30), dst_rule=pendulum.PRE_TRANSITION)
    assert dt.fold == 0

    dt = tz.convert(datetime(2014, 11, 2, 1, 30), dst_rule=pendulum.POST_TRANSITION)
    assert dt.fold == 1
Beispiel #16
0
def test_convert_fold_attribute_is_honored():
    tz = pendulum.timezone("US/Eastern")
    dt = datetime(2014, 11, 2, 1, 30)

    new = tz.convert(dt)
    assert new.strftime("%z") == "-0400"

    new = tz.convert(dt.replace(fold=1))
    assert new.strftime("%z") == "-0500"
Beispiel #17
0
def test_constructor_fold_attribute_is_honored():
    tz = pendulum.timezone("US/Eastern")
    dt = datetime(2014, 11, 2, 1, 30, tzinfo=tz)

    assert dt.strftime("%z") == "-0400"

    dt = datetime(2014, 11, 2, 1, 30, tzinfo=tz, fold=1)

    assert dt.strftime("%z") == "-0500"
Beispiel #18
0
def test_astimezone():
    d = pendulum.datetime(2015, 1, 15, 18, 15, 34)
    now = pendulum.datetime(2015, 1, 15, 18, 15, 34)
    assert d.timezone_name == "UTC"
    assert_datetime(d, now.year, now.month, now.day, now.hour, now.minute)

    d = d.astimezone(pendulum.timezone("Europe/Paris"))
    assert d.timezone_name == "Europe/Paris"
    assert_datetime(d, now.year, now.month, now.day, now.hour + 1, now.minute)
Beispiel #19
0
def setup():
    pendulum.set_local_timezone(pendulum.timezone("America/Toronto"))

    yield

    pendulum.set_test_now()
    pendulum.set_locale("en")
    pendulum.set_local_timezone()
    pendulum.week_starts_at(pendulum.MONDAY)
    pendulum.week_ends_at(pendulum.SUNDAY)
Beispiel #20
0
def test_short_timezones_should_not_modify_time():
    tz = pendulum.timezone("EST")
    dt = tz.datetime(2017, 6, 15, 14, 0, 0)

    assert dt.year == 2017
    assert dt.month == 6
    assert dt.day == 15
    assert dt.hour == 14
    assert dt.minute == 0
    assert dt.second == 0

    tz = pendulum.timezone("HST")
    dt = tz.datetime(2017, 6, 15, 14, 0, 0)

    assert dt.year == 2017
    assert dt.month == 6
    assert dt.day == 15
    assert dt.hour == 14
    assert dt.minute == 0
    assert dt.second == 0
Beispiel #21
0
def test_after_last_transition():
    tz = pendulum.timezone("Europe/Paris")
    dt = tz.datetime(2135, 6, 15, 14, 0, 0)

    assert dt.year == 2135
    assert dt.month == 6
    assert dt.day == 15
    assert dt.hour == 14
    assert dt.minute == 0
    assert dt.second == 0
    assert dt.microsecond == 0
Beispiel #22
0
def test_dst_fold_attribute_is_honored():
    tz = pendulum.timezone("US/Eastern")
    dt = datetime(2014, 11, 2, 1, 30)

    offset = tz.dst(dt)

    assert offset.total_seconds() == 3600

    offset = tz.dst(dt.replace(fold=1))

    assert offset.total_seconds() == 0
Beispiel #23
0
def test_tzname_fold_attribute_is_honored():
    tz = pendulum.timezone("US/Eastern")
    dt = datetime(2014, 11, 2, 1, 30)

    name = tz.tzname(dt)

    assert name == "EDT"

    name = tz.tzname(dt.replace(fold=1))

    assert name == "EST"
Beispiel #24
0
def test_precise_diff():
    dt1 = datetime(2003, 3, 1, 0, 0, 0)
    dt2 = datetime(2003, 1, 31, 23, 59, 59)

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, months=-1, seconds=-1)

    diff = precise_diff(dt2, dt1)
    assert_diff(diff, months=1, seconds=1)

    dt1 = datetime(2012, 3, 1, 0, 0, 0)
    dt2 = datetime(2012, 1, 31, 23, 59, 59)

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, months=-1, seconds=-1)

    diff = precise_diff(dt2, dt1)
    assert_diff(diff, months=1, seconds=1)

    dt1 = datetime(2001, 1, 1)
    dt2 = datetime(2003, 9, 17, 20, 54, 47, 282310)

    diff = precise_diff(dt1, dt2)
    assert_diff(
        diff,
        years=2,
        months=8,
        days=16,
        hours=20,
        minutes=54,
        seconds=47,
        microseconds=282310,
    )

    dt1 = datetime(2017, 2, 17, 16, 5, 45, 123456)
    dt2 = datetime(2018, 2, 17, 16, 5, 45, 123256)

    diff = precise_diff(dt1, dt2)
    assert_diff(
        diff, months=11, days=30, hours=23, minutes=59, seconds=59, microseconds=999800
    )

    # DST
    tz = timezone("America/Toronto")
    dt1 = tz.datetime(2017, 3, 7)
    dt2 = tz.datetime(2017, 3, 13)

    diff = precise_diff(dt1, dt2)
    assert_diff(diff, days=6, hours=0)
Beispiel #25
0
def test_skipped_time_with_explicit_pre_rule():
    dt = datetime(2013, 3, 31, 2, 30, 45, 123456)
    tz = timezone("Europe/Paris")
    dt = tz.convert(dt, dst_rule=pendulum.PRE_TRANSITION)

    assert dt.year == 2013
    assert dt.month == 3
    assert dt.day == 31
    assert dt.hour == 1
    assert dt.minute == 30
    assert dt.second == 45
    assert dt.microsecond == 123456
    assert dt.tzinfo.name == "Europe/Paris"
    assert dt.tzinfo.utcoffset(dt) == timedelta(seconds=3600)
    assert dt.tzinfo.dst(dt) == timedelta()
Beispiel #26
0
def test_repeated_time_explicit_post_rule():
    dt = datetime(2013, 10, 27, 2, 30, 45, 123456)
    tz = timezone("Europe/Paris")
    dt = tz.convert(dt, dst_rule=pendulum.POST_TRANSITION)

    assert dt.year == 2013
    assert dt.month == 10
    assert dt.day == 27
    assert dt.hour == 2
    assert dt.minute == 30
    assert dt.second == 45
    assert dt.microsecond == 123456
    assert dt.tzinfo.name == "Europe/Paris"
    assert dt.tzinfo.utcoffset(dt) == timedelta(seconds=3600)
    assert dt.tzinfo.dst(dt) == timedelta()
Beispiel #27
0
def test_repeated_time_pre_rule():
    dt = datetime(2013, 10, 27, 2, 30, 45, 123456, fold=0)
    tz = timezone("Europe/Paris")
    dt = tz.convert(dt)

    assert dt.year == 2013
    assert dt.month == 10
    assert dt.day == 27
    assert dt.hour == 2
    assert dt.minute == 30
    assert dt.second == 45
    assert dt.microsecond == 123456
    assert dt.tzinfo.name == "Europe/Paris"
    assert dt.tzinfo.utcoffset(dt) == timedelta(seconds=7200)
    assert dt.tzinfo.dst(dt) == timedelta(seconds=3600)
Beispiel #28
0
def test_basic_convert():
    dt = datetime(2016, 6, 1, 12, 34, 56, 123456, fold=1)
    tz = timezone("Europe/Paris")
    dt = tz.convert(dt)

    assert dt.year == 2016
    assert dt.month == 6
    assert dt.day == 1
    assert dt.hour == 12
    assert dt.minute == 34
    assert dt.second == 56
    assert dt.microsecond == 123456
    assert dt.tzinfo.name == "Europe/Paris"
    assert dt.tzinfo.utcoffset(dt) == timedelta(seconds=7200)
    assert dt.tzinfo.dst(dt) == timedelta(seconds=3600)
Beispiel #29
0
def test_skipped_time_with_post_rule():
    dt = datetime(2013, 3, 31, 2, 30, 45, 123456, fold=1)
    tz = timezone("Europe/Paris")
    dt = tz.convert(dt)

    assert dt.year == 2013
    assert dt.month == 3
    assert dt.day == 31
    assert dt.hour == 3
    assert dt.minute == 30
    assert dt.second == 45
    assert dt.microsecond == 123456
    assert dt.tzinfo.name == "Europe/Paris"
    assert dt.tzinfo.utcoffset(dt) == timedelta(seconds=7200)
    assert dt.tzinfo.dst(dt) == timedelta(seconds=3600)
Beispiel #30
0
def test_datetime():
    tz = timezone("Europe/Paris")

    dt = tz.datetime(2013, 3, 24, 1, 30)
    assert dt.year == 2013
    assert dt.month == 3
    assert dt.day == 24
    assert dt.hour == 1
    assert dt.minute == 30
    assert dt.second == 0
    assert dt.microsecond == 0

    dt = tz.datetime(2013, 3, 31, 2, 30)
    assert dt.year == 2013
    assert dt.month == 3
    assert dt.day == 31
    assert dt.hour == 3
    assert dt.minute == 30
    assert dt.second == 0
    assert dt.microsecond == 0
Beispiel #31
0
"""
# Rebuild `dea/unstable` module on the NCI

"""
from datetime import datetime, timedelta

import pendulum
from airflow import DAG
from airflow.contrib.operators.ssh_operator import SSHOperator
from airflow.operators.email_operator import EmailOperator

local_tz = pendulum.timezone("Australia/Canberra")

default_args = {
    'owner': 'dayers',
    'start_date': datetime(2020, 3, 12, tzinfo=local_tz),
    'retries': 1,
    'retry_delay': timedelta(minutes=10),
    'timeout': 1200,  # For running SSH Commands
    'email_on_failure': True,
    'email': '*****@*****.**',
}

dag = DAG(
    'nci_build_dea_unstable_module',
    default_args=default_args,
    schedule_interval='@daily',
    catchup=False,
    tags=['nci'],
)
class TestDagBuilder(object):
    default_config = {
        "default_args": {
            "owner": "default_owner",
            "start_date": datetime.date(2018, 3, 1),
        },
        "max_active_runs": 1,
        "schedule_interval": "0 1 * * *",
    }
    dag_config = {
        "default_args": {
            "owner": "custom_owner"
        },
        "description": "this is an example dag",
        "schedule_interval": "0 3 * * *",
        "tasks": {
            "task_1": {
                "operator": "airflow.operators.bash_operator.BashOperator",
                "bash_command": "echo 1",
            },
            "task_2": {
                "operator": "airflow.operators.bash_operator.BashOperator",
                "bash_command": "echo 2",
                "dependencies": ["task_1"],
            },
            "task_3": {
                "operator": "airflow.operators.bash_operator.BashOperator",
                "bash_command": "echo 3",
                "dependencies": ["task_1"],
            },
        },
    }
    utc = pendulum.timezone("UTC")

    def test_get_dag_params(self):
        td = dagbuilder.DagBuilder("test_dag", self.dag_config,
                                   self.default_config)
        expected = {
            "dag_id": "test_dag",
            "default_args": {
                "owner": "custom_owner",
                "start_date": datetime.datetime(2018,
                                                3,
                                                1,
                                                0,
                                                0,
                                                tzinfo=self.utc),
            },
            "description": "this is an example dag",
            "schedule_interval": "0 3 * * *",
            "max_active_runs": 1,
            "tasks": {
                "task_1": {
                    "operator": "airflow.operators.bash_operator.BashOperator",
                    "bash_command": "echo 1",
                },
                "task_2": {
                    "operator": "airflow.operators.bash_operator.BashOperator",
                    "bash_command": "echo 2",
                    "dependencies": ["task_1"],
                },
                "task_3": {
                    "operator": "airflow.operators.bash_operator.BashOperator",
                    "bash_command": "echo 3",
                    "dependencies": ["task_1"],
                },
            },
        }
        actual = td.get_dag_params()
        assert actual == expected

    def test_get_dag_params_no_start_date(self):
        td = dagbuilder.DagBuilder("test_dag", {}, {})
        with pytest.raises(Exception):
            td.get_dag_params()

    def test_make_task_valid(self):
        td = dagbuilder.DagBuilder("test_dag", self.dag_config,
                                   self.default_config)
        operator = "airflow.operators.bash_operator.BashOperator"
        task_params = {"task_id": "test_task", "bash_command": "echo 1"}
        actual = td.make_task(operator, task_params)
        assert actual.task_id == "test_task"
        assert actual.bash_command == "echo 1"
        assert isinstance(actual, BashOperator)

    def test_make_task_bad_operator(self):
        td = dagbuilder.DagBuilder("test_dag", self.dag_config,
                                   self.default_config)
        operator = "not_real"
        task_params = {"task_id": "test_task", "bash_command": "echo 1"}
        with pytest.raises(Exception):
            td.make_task(operator, task_params)

    def test_make_task_missing_required_param(self):
        td = dagbuilder.DagBuilder("test_dag", self.dag_config,
                                   self.default_config)
        operator = "airflow.operators.bash_operator.BashOperator"
        task_params = {"task_id": "test_task"}
        with pytest.raises(Exception):
            td.make_task(operator, task_params)

    def test_build(self):
        td = dagbuilder.DagBuilder("test_dag", self.dag_config,
                                   self.default_config)
        actual = td.build()
        assert actual["dag_id"] == "test_dag"
        assert isinstance(actual["dag"], DAG)
        assert len(actual["dag"].tasks) == 3
        assert actual["dag"].task_dict["task_1"].downstream_task_ids == {
            "task_2",
            "task_3",
        }
Beispiel #33
0
    def refresh(self):
        request = requests.get(self.url)

        cal = Calendar.from_ical(request.text)
        if self.name is None:
            self.name = cal.get("X-WR-CALNAME")

        for event in self.events:
            event.displayed = False

        local_tz = pendulum.timezone("Europe/London")
        alerts = []
        uids_seen = set()
        out_of_range_event = False
        for component in cal.walk():
            if component.name == "VEVENT":
                summary = component.get("Summary")

                # postgres converts to UTC if given an aware datetime, so strip it up front
                start_dt = pendulum.instance(component.get("dtstart").dt)
                start_dt = local_tz.convert(start_dt).naive()

                end_dt = pendulum.instance(component.get("dtend").dt)
                end_dt = local_tz.convert(end_dt).naive()

                name = summary
                if summary and start_dt:
                    name = "'{}' at {}".format(summary, start_dt)
                elif summary:
                    name = "'{}'".format(summary)
                elif start_dt:
                    name = "Event at {}".format(start_dt)
                else:
                    name = len(self.events) + 1

                if not component.get("uid"):
                    alerts.append(("danger", "{} has no UID".format(name)))
                    continue

                uid = str(component["uid"])
                if uid in uids_seen:
                    alerts.append(
                        ("danger", "{} has duplicate UID {}".format(name, uid))
                    )
                    continue
                uids_seen.add(uid)

                if "rrule" in component:
                    alerts.append(
                        ("warning", "{} has rrule, which is not processed".format(uid))
                    )

                # Allow a bit of slop for build-up events
                if (
                    start_dt < event_start() - pendulum.duration(days=2)
                    and not out_of_range_event
                ):
                    alerts.append(
                        (
                            "warning",
                            "At least one event ({}) is before the start of the event".format(
                                uid
                            ),
                        )
                    )
                    out_of_range_event = True

                if (
                    end_dt > event_end() + pendulum.duration(days=1)
                    and not out_of_range_event
                ):
                    alerts.append(
                        (
                            "warning",
                            "At least one event ({}) is after the end of the event".format(
                                uid
                            ),
                        )
                    )
                    out_of_range_event = True

                if start_dt > end_dt:
                    alerts.append(
                        (
                            "danger",
                            "Start time for {} is after its end time".format(uid),
                        )
                    )
                    out_of_range_event = True

                try:
                    event = CalendarEvent.query.filter_by(
                        source_id=self.id, uid=uid
                    ).one()

                except NoResultFound:
                    event = CalendarEvent(uid=uid)
                    self.events.append(event)
                    if len(self.events) > 1000:
                        raise Exception("Too many events in feed")

                event.start_dt = start_dt
                event.end_dt = end_dt
                event.summary = component.get("summary")
                event.description = component.get("description")
                event.location = component.get("location")
                event.displayed = True

        self.refreshed_at = pendulum.now()

        return alerts
Beispiel #34
0
def test_replace_tzinfo():
    d = pendulum.datetime(2016, 7, 2, 0, 41, 20)
    new = d.replace(tzinfo=pendulum.timezone('Europe/Paris'))

    assert new.timezone_name == 'Europe/Paris'
Beispiel #35
0
def detik():
    detik = pendulum.now(pendulum.timezone('Asia/Jakarta')).second
    realwaktu = "%s" % (detik)
    timing = {}
    timing.update({"timming": [realwaktu]})
    return int(timing["timming"][0])
Beispiel #36
0
def jam():
    jam = pendulum.now(pendulum.timezone('Asia/Jakarta')).hour
    realwaktu = "%s" % (jam)
    timing = {}
    timing.update({"timming": [realwaktu]})
    return int(timing["timming"][0])
Beispiel #37
0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
import datetime as dt

import pendulum

from airflow.settings import TIMEZONE

# UTC time zone as a tzinfo instance.
utc = pendulum.timezone('UTC')


def is_localized(value):
    """
    Determine if a given datetime.datetime is aware.
    The concept is defined in Python's docs:
    http://docs.python.org/library/datetime.html#datetime.tzinfo
    Assuming value.tzinfo is either None or a proper datetime.tzinfo,
    value.utcoffset() implements the appropriate logic.
    """
    return value.utcoffset() is not None


def is_naive(value):
    """
Beispiel #38
0
def dt():
    tz = timezone("Europe/Paris")

    return tz.convert(datetime(2016, 8, 27, 12, 34, 56, 123456))
Beispiel #39
0
import pendulum
from sqlalchemy import create_engine, exc
from sqlalchemy.engine import Engine
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.orm.session import Session as SASession
from sqlalchemy.pool import NullPool

# noinspection PyUnresolvedReferences
from airflow.configuration import AIRFLOW_HOME, WEBSERVER_CONFIG, conf  # NOQA F401
from airflow.logging_config import configure_logging
from airflow.utils.module_loading import import_string
from airflow.utils.sqlalchemy import setup_event_handlers

log = logging.getLogger(__name__)

TIMEZONE = pendulum.timezone('UTC')
try:
    tz = conf.get("core", "default_timezone")
    if tz == "system":
        TIMEZONE = pendulum.local_timezone()
    else:
        TIMEZONE = pendulum.timezone(tz)
except Exception:
    pass
log.info("Configured default timezone %s" % TIMEZONE)

HEADER = '\n'.join([
    r'  ____________       _____________',
    r' ____    |__( )_________  __/__  /________      __',
    r'____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /',
    r'___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /',
Beispiel #40
0
 def python_value(self, value: str) -> pendulum.DateTime:
     if value is not None:
         return pendulum.parse(value, tz=pendulum.timezone("UTC"))
Beispiel #41
0
"""
Code that goes along with the Airflow tutorial located at:
https://github.com/apache/incubator-airflow/blob/master/airflow/example_dags/tutorial.py
"""
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta
import pendulum

local_tz = pendulum.timezone('Europe/Zurich')
START_DATE = datetime(2018, 10, 10, 16, 20, tzinfo=local_tz)

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': START_DATE,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('tz_test',
          default_args=default_args,
          schedule_interval='*/5 * * * *',
          concurrency=1,
          max_active_runs=1,
          catchup=False)

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)
Beispiel #42
0
import datetime
import pendulum

from airflow import models
from utils.gcp import bigquery_etl_query
from airflow.operators.subdag_operator import SubDagOperator
from utils.amplitude import export_to_amplitude

# https://airflow.apache.org/docs/stable/timezone.html#time-zone-aware-dags
pt_tz = pendulum.timezone("America/Los_Angeles")

default_args = {
    'owner': '*****@*****.**',
    'start_date': datetime.datetime(2020, 4, 1, tzinfo=pt_tz),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=20),
}

dag_name = 'fxa_export_to_amplitude'
"""
A Note on Times:
FxA logs become available to BigQuery within seconds.
The `timestamp` field of an event is when it occurred on the server,
and the `receiveTimestamp` was when it was received by Cloud Logging.
Usually these are at most seconds apart.
Reference: https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry

From there the data is streamed to BigQuery. This is one record at-a-time
from airflow.providers.postgres.operators.postgres import PostgresOperator
from airflow.utils.dates import datetime
from airflow.providers.postgres.hooks.postgres import PostgresHook
from airflow.decorators import dag, task
from airflow.operators.python import task, get_current_context
import pendulum
# Pandas to transform data
from pandas import DataFrame
import requests
from pandas import json_normalize
import pandas as pd
from sqlalchemy import create_engine
from datetime import timedelta

# Setting timezone to pacific
local_tz = pendulum.timezone("US/Pacific")
# Setting database name
db_name = "userdata"
# The api that we need to call
NY_API = "https://health.data.ny.gov/resource/xdss-u53e.json?"

# These args will get passed on to each operator
# You can override them on a per-task basis during operator initialization
default_args = {
    'owner': 'Anil',
    'dag_id': 'LOAD_NY_COVID_DLY',
    'start_date': datetime(2020, 3, 1, tzinfo=local_tz),
    'schedule_interval': '0 9 * * *'
}

# Using postgress Hook to get connection url and modifying it to have the right databasename
Beispiel #44
0
def bulan():
    bulan = pendulum.now(pendulum.timezone('Asia/Jakarta')).month
    realwaktu = "%s" % (bulan)
    timing = {}
    timing.update({"timming": [realwaktu]})
    return int(timing["timming"][0])
Beispiel #45
0
import os, django
os.environ['DJANGO_SETTINGS_MODULE'] = 'electron_project.settings'
django.setup()

from django.conf import settings

from devices.models import *
from receipts.models import *
from expenses.models import *

from MyPackage import DB

import pickle, datetime, pendulum

tz = pendulum.timezone(settings.TIME_ZONE)


def remove_duplicate(iterable):

    new_list = []

    for item in iterable:
        if item not in new_list:
            new_list.append(item)

    return new_list


class APIConverter(object):
    def convert_all(self):
Beispiel #46
0
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.mysql_operator import MySqlOperator
from airflow.operators.email_operator import EmailOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import BranchPythonOperator

# import python functions
from Airflow_Tutorial.python_files.main import *

# import cutom defined modules to make some variables config driven
from Airflow_Tutorial.utilities.utilities import *
from Airflow_Tutorial.utilities.variables import *


local_tz = pendulum.timezone('Asia/Kathmandu')
start_date = datetime(**START_DATE, tzinfo=local_tz)


# define default arguments
default_args={
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': start_date,
    'email': EMAIL_LIST,
    'email_on_failure': False,
    'email_on_retry': False,
    # 'retries': 1,
    # 'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
Beispiel #47
0
import os
import pendulum
from datetime import date, timedelta
import datetime
from dateutil import tz

#==================================================================================================================================
#                                                                                                            [ENVIRONMENT AND TIME]
#==================================================================================================================================
ENVIRONMENT = 'development'
LOCAL_TIME_ZONE = pendulum.timezone("Asia/Ho_Chi_Minh")
to_zone = tz.gettz('Asia/Ho_Chi_Minh')
from_zone = tz.tzutc()
utc = datetime.datetime.utcnow()
utc = utc.replace(tzinfo=from_zone)
now = utc.astimezone(to_zone)
end = datetime.datetime(now.year,
                        now.month,
                        now.day,
                        hour=0,
                        minute=0,
                        second=0,
                        microsecond=0,
                        tzinfo=None)
start = end - timedelta(days=1)

#==================================================================================================================================
#                                                                                                                 [ELROND DATABASE]
#==================================================================================================================================
ELROND_DATABASE = 'elrond'
ELROND_PROJECT_COLLECTION = 'projects'
Beispiel #48
0
from datetime import datetime, timedelta
import pendulum

# Third party imports
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.utils.trigger_rule import TriggerRule

# Local application imports
from prdn_wf_utils.etl_wf import etl_wf
import prdn_wf_r_traffic_gmta_brd.helpers as helpers

# different default arguments for DAG
local_tz = pendulum.timezone("Europe/Moscow")

default_args = {
    'owner': 'gmta',
    'depends_on_past': False,
    'start_date': datetime(year=2019, month=4, day=5, tzinfo=local_tz),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'provide_context': True,
    'mssql_conn_id': 'prdn_gmta_mssql_main',
    'wf_name': 'prdn_wf_r_traffic_gmta_brd'
}

with DAG(dag_id='prdn_wf_r_traffic_gmta_brd',
         default_args=default_args,
Beispiel #49
0
 def test_create_from_format_with_timezone(self):
     d = Pendulum.create_from_format('1975-05-21 22:32:11',
                                     '%Y-%m-%d %H:%M:%S',
                                     timezone('Europe/London'))
     self.assertPendulum(d, 1975, 5, 21, 22, 32, 11)
     self.assertEqual('Europe/London', d.timezone_name)
Beispiel #50
0
import pendulum
from airflow import DAG
from airflow.utils import timezone
from airflow.operators.dummy_operator import DummyOperator

from datetime import timedelta, datetime

local_tz = pendulum.timezone("Europe/Paris")

default_args = {'start_date': datetime(2019, 3, 29, 1), 'owner': 'Airflow'}

with DAG(dag_id='tz_dag',
         schedule_interval="0 1 * * *",
         default_args=default_args) as dag:
    dummy_task = DummyOperator(task_id='dummy_task')

    run_dates = dag.get_run_dates(start_date=dag.start_date)
    next_execution_date = run_dates[-1] if len(run_dates) != 0 else None

    # Uncomment when you use the DAG, comment when not
    # use docker logs -f docker_ID to see print statements below
    print('datetime from Python is Naive: {0}'.format(
        timezone.is_naive(datetime(2019, 9, 19))))
    print('datetime from Airflow is Aware: {0}'.format(
        timezone.is_naive(timezone.datetime(2019, 9, 19)) == False))
    print(
        '[DAG:tz_dag] timezone: {0} - start_date: {1} - schedule_interval: {2} - Last execution_date: {3} - next execution_date {4} in UTC - next execution_date {5} in local time'
        .format(
            dag.timezone, dag.default_args['start_date'],
            dag._schedule_interval, dag.latest_execution_date,
            next_execution_date,
Beispiel #51
0
def menit():
    menit = pendulum.now(pendulum.timezone('Asia/Jakarta')).minute
    realwaktu = "%s" % (menit)
    timing = {}
    timing.update({"timming": [realwaktu]})
    return int(timing["timming"][0])
Beispiel #52
0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
import datetime as dt
import pendulum

from airflow.settings import TIMEZONE

# UTC time zone as a tzinfo instance.
#utc = pendulum.timezone('UTC')
utc = pendulum.timezone('Asia/Shanghai')


def is_localized(value):
    """
    Determine if a given datetime.datetime is aware.
    The concept is defined in Python's docs:
    http://docs.python.org/library/datetime.html#datetime.tzinfo
    Assuming value.tzinfo is either None or a proper datetime.tzinfo,
    value.utcoffset() implements the appropriate logic.
    """
    return value.utcoffset() is not None


def is_naive(value):
    """
Beispiel #53
0
    Módulo de inicialização da DAG
"""
from airflow import DAG
from airflow.operators.python_operator import (
    PythonOperator, )
from datetime import datetime, timedelta
import pendulum

from airflow_dags.example_sql_dag.utils import (
    get_module,
    run_sql,
)

result = get_module(__file__)
module_name = result.get("module_name")
local_tz = pendulum.timezone("America/Sao_Paulo")

args = {
    "owner": "David",
    "depends_on_past": False,
    "start_date": datetime(2020, 7, 20, 0, 0, 0, tzinfo=local_tz),
    "provide_context": True,
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
}

dag = DAG(f"{module_name}",
          schedule_interval=None,
          default_args=args,
          catchup=False,
          max_active_runs=1)
Beispiel #54
0
    def parse_updates(self, locate=True):
        if self.root:
            base = '{{http://esb.hlag.com/services/mobile/MobileService}}{0}'

            # check if anything was returned
            found = [
                e for e in self.root.iter(tag=base.format("eGrpTracingData"))
            ]

            if found:
                for e in self.root.iter(tag=base.format("eGrpTracingData")):

                    temp = {
                        "location": "",
                        "vessel": "",
                        "voyage": "",
                        "movement": "",
                        "mode": "",
                        "date": ""
                    }
                    _date = "0000-00-00"
                    _time = "00:00:00"

                    for i in e.iter():

                        if i.tag.endswith("businessLocode"):
                            temp["location"] = (i.text, )
                        elif i.tag.endswith("eLineVessel"):
                            temp["vessel"] = i[0].text
                        elif i.tag.endswith("eLineOperation"):
                            temp["movement"] = i[0].text
                        elif i.tag.endswith("plannedArrDate"):
                            _date = i.text
                        elif i.tag.endswith("plannedArrTime"):
                            _time = i.text.split(".")[0]
                        elif i.tag.endswith("eLineMot"):
                            temp["mode"] = i[0].text
                        elif i.tag.endswith("scheduleVoyageNo"):
                            temp["voyage"] = i.text

                    try:
                        pt = _date + " " + _time
                        # date/times are local to each place of activty.
                        try:
                            rd = pendulum.from_format(
                                pt,
                                "YYYY-MM-DD HH:mm:ss",
                                tz=temp["location"].timezone)
                        except AttributeError:
                            rd = pendulum.from_format(pt,
                                                      "YYYY-MM-DD HH:mm:ss",
                                                      tz="UTC")

                        # convert each time to UTC
                        utc = pendulum.timezone("UTC")
                        nrd = utc.convert(rd)
                        temp["date"] = nrd
                    except ValueError as ve:
                        pass

                    self.updates.append(temp)

        else:
            pass
Beispiel #55
0
def hari():
    hari = pendulum.now(pendulum.timezone('Asia/Jakarta')).day
    realwaktu = "%s" % (hari)
    timing = {}
    timing.update({"timming": [realwaktu]})
    return int(timing["timming"][0])
Beispiel #56
0
import pendulum
from airflow import DAG
from airflow.models import Variable
from airflow.operators.email import EmailOperator
from airflow.providers.ssh.operators.ssh import SSHOperator
from airflow.sensors.external_task import ExternalTaskSensor
from auxiliary.outils import get_json_secret

default_args = {
    'owner':
    'airflow',
    'depends_on_past':
    False,
    'start_date':
    datetime(2019, 7, 29, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'email': ['*****@*****.**'],
    'email_on_failure':
    True,
    'email_on_retry':
    False,
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=3)
}

dag = DAG('send_bmt',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 12 * * 1')
def process(time, rowRdd):
    print("========= %s =========" % str(time))
    if rowRdd.isEmpty():
        print("Rdd is empty")
        return
    tw = pendulum.timezone("Asia/Taipei")
    time = tw.convert(time)
    utc = pendulum.timezone("UTC")
    end = pendulum.instance(utc.convert(time)).subtract(minutes=2)
    start = end.subtract(minutes=3)
    end.set_to_string_format("%Y-%m-%dT%H:%M:%SZ")
    start.set_to_string_format("%Y-%m-%dT%H:%M:%SZ")

    taxi_df = spark.createDataFrame(rowRdd)
    taxi_df = taxi_df.filter(taxi_df.utc < str(end)) \
                     .filter(taxi_df.utc > str(start))
    taxi_df_grid = taxi_df \
            .withColumn("grid_block", grid_block_udf("lng_x", "lat_y"))
    taxi_df_grid = taxi_df_grid \
            .withColumn("grid", taxi_df_grid.grid_block.grid) \
            .withColumn("block", taxi_df_grid.grid_block.block)
    taxi_df_grid = taxi_df_grid.drop("car_type", "height", "speed", "course",
                                     "grid_block")
    taxi_df_grid = taxi_df_grid.where("grid >= 0")
    taxi_df_grid.cache()
    print("taxis in this batch:", taxi_df_grid.select("memsn").distinct().count())
    # merge last batch
    tableNames = spark.catalog.listTables()
    if "last_df" in [t.name for t in tableNames]:
        taxi_df_grid.createOrReplaceTempView("taxi")
        taxi_df_grid = spark.sql("""
                                 select * from last_df
                                 union all
                                 select * from taxi
                                 """)
    taxi_df_grid = taxi_df_grid.withColumn("idx",
                    monotonically_increasing_id()).cache()
    taxi_df_grid.createOrReplaceTempView("taxi")
    # status changed
    busy_cars = spark.sql("""
                    with ts as (
                      select
                        row_number() over (partition by memsn
                          order by utc) as rownum,
                        utc, memsn, grid, block, acc, meter, busy,
                        lng_x, lat_y
                      from taxi
                    )
                    select
                      cur.utc, cur.memsn, cur.grid, cur.block, 
                      cur.lng_x, cur.lat_y
                    from ts cur inner join ts prev
                      on prev.rownum = cur.rownum - 1
                      and prev.memsn = cur.memsn
                    where prev.acc = 1 and prev.meter = 0 and prev.busy = 0
                      and cur.acc = 1 and cur.meter = 1
                    """)
    print("status changed:", busy_cars.count())
#    host = "52.246.188.40:3306"
#    db = "realtime_car"
#    try:
#        busy_cars \
#          .write.format("jdbc") \
#          .option("url",
#                  "jdbc:mysql://{}/{}?useSSL=false".format(host, db)) \
#          .option("driver", "com.mysql.jdbc.Driver") \
#          .option("dbtable", "busy_cars") \
#          .option("user", "taxi_dashboard") \
#          .option("password", "dashboard123") \
#          .save(mode="append")
#    except Exception as e:
#        logging.exception(e)
    # find the last record of each driver
    last_records = spark.sql("""
                    select t1.* from taxi t1
                      left join taxi t2 on t1.memsn = t2.memsn
                        and ((t1.utc < t2.utc) or
                        (t1.utc = t2.utc and t1.idx < t2.idx))
                    where t2.memsn is null
                    """)
    last_records = last_records.drop("idx").coalesce(6).checkpoint()
    last_records_free = last_records \
            .where("acc = 1 and meter = 0 and busy = 0") \
            .drop("meter", "busy", "acc")
    # write to db
    host = "52.246.185.78:3306"
    db = "taxi"
    try:
        last_records_free \
              .write.format("jdbc") \
              .option("url",
                      "jdbc:mysql://{}/{}?useSSL=false".format(host, db)) \
              .option("driver", "com.mysql.jdbc.Driver") \
              .option("dbtable", "taxi_gps") \
              .option("user", "taxi_manager") \
              .option("password", "taxi1215@") \
              .save(mode="overwrite")
    except Exception as e:
        logging.exception(e)
    else:
        domain = "40.115.238.9"
        url = "http://{}:3851/passengersradar/realtimeData/".format(domain)
        requests.post(url, json={"result": "ok"})

    # clean up dataframe
    spark.catalog.clearCache()
    last_records.createOrReplaceTempView("last_df")
    del last_records
import datetime

import pendulum
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airflow import models
import os
import logging
import requests

from microservice.operators.load_sql_operator import LoadSqlOperator
from microservice.transform import JsonSplitter, equipments_tables, normalize_equipments

tz = pendulum.timezone('America/Lima')


def transform_data(**context):
    ti = context["ti"]
    URL = models.Variable.get('url_equipment')
    ti.xcom_push(key="url", value=URL)


default_args = {'retries': 3, 'retry_delay': datetime.timedelta(minutes=10)}

dag = DAG(
    dag_id='etl_equipment_dev',
    start_date=tz.convert(days_ago(1)),
    max_active_runs=1,
    # dagrun_timeout=datetime.timedelta(minutes=5),
Beispiel #59
0
    def __init__(
        self,
        name,
        cron_schedule,
        pipeline_name,
        run_config=None,
        run_config_fn=None,
        tags=None,
        tags_fn=None,
        solid_selection=None,
        mode="default",
        should_execute=None,
        environment_vars=None,
        execution_timezone=None,
        execution_fn=None,
        description=None,
    ):

        if not croniter.is_valid(cron_schedule):
            raise DagsterInvalidDefinitionError(
                f"Found invalid cron schedule '{cron_schedule}' for schedule '{name}''."
            )

        self._name = check_valid_name(name)
        self._pipeline_name = check.str_param(pipeline_name, "pipeline_name")
        self._mode = check.opt_str_param(mode, "mode", DEFAULT_MODE_NAME)
        self._solid_selection = check.opt_nullable_list_param(
            solid_selection, "solid_selection", of_type=str)
        self._description = check.opt_str_param(description, "description")

        self._cron_schedule = check.str_param(cron_schedule, "cron_schedule")
        self._environment_vars = check.opt_dict_param(environment_vars,
                                                      "environment_vars",
                                                      key_type=str,
                                                      value_type=str)
        self._execution_timezone = check.opt_str_param(execution_timezone,
                                                       "execution_timezone")

        if execution_fn and (run_config_fn or tags_fn or should_execute or tags
                             or run_config):
            raise DagsterInvalidDefinitionError(
                "Attempted to provide both execution_fn and individual run_config/tags arguments "
                "to ScheduleDefinition. Must provide only one of the two.")
        elif execution_fn:
            self._execution_fn = check.opt_callable_param(
                execution_fn, "execution_fn")
        else:
            if run_config_fn and run_config:
                raise DagsterInvalidDefinitionError(
                    "Attempted to provide both run_config_fn and run_config as arguments"
                    " to ScheduleDefinition. Must provide only one of the two."
                )
            run_config_fn = check.opt_callable_param(
                run_config_fn,
                "run_config_fn",
                default=lambda _context: check.opt_dict_param(
                    run_config, "run_config"),
            )

            if tags_fn and tags:
                raise DagsterInvalidDefinitionError(
                    "Attempted to provide both tags_fn and tags as arguments"
                    " to ScheduleDefinition. Must provide only one of the two."
                )
            elif tags:
                check_tags(tags, "tags")
                tags_fn = lambda _context: tags
            else:
                tags_fn = check.opt_callable_param(tags_fn,
                                                   "tags_fn",
                                                   default=lambda _context: {})

            should_execute = check.opt_callable_param(
                should_execute,
                "should_execute",
                default=lambda _context: True)

            def _execution_fn(context):
                with user_code_error_boundary(
                        ScheduleExecutionError,
                        lambda:
                        f"Error occurred during the execution of should_execute for schedule {name}",
                ):
                    if not should_execute(context):
                        yield SkipReason(
                            "should_execute function for {schedule_name} returned false."
                            .format(schedule_name=name))
                        return

                with user_code_error_boundary(
                        ScheduleExecutionError,
                        lambda:
                        f"Error occurred during the execution of run_config_fn for schedule {name}",
                ):
                    evaluated_run_config = run_config_fn(context)

                with user_code_error_boundary(
                        ScheduleExecutionError,
                        lambda:
                        f"Error occurred during the execution of tags_fn for schedule {name}",
                ):
                    evaluated_tags = tags_fn(context)

                yield RunRequest(
                    run_key=None,
                    run_config=evaluated_run_config,
                    tags=evaluated_tags,
                )

            self._execution_fn = _execution_fn

        if self._execution_timezone:
            try:
                # Verify that the timezone can be loaded
                pendulum.timezone(self._execution_timezone)
            except Exception:
                raise DagsterInvalidDefinitionError(
                    "Invalid execution timezone {timezone} for {schedule_name}"
                    .format(schedule_name=name,
                            timezone=self._execution_timezone))
Beispiel #60
0
def convert_timezone(dt: Union[pendulum.datetime, datetime.datetime],
                     timezone: str) -> pendulum.datetime:
    tz = pendulum.timezone(timezone)
    return tz.convert(dt)