def create_tables_from_basiskaartdb_to_masterdb(source_connection,
                                                source_select_statement,
                                                target_base_table, *args,
                                                **kwargs):
    """
    Source_connection contains the environment variable name (as defined in the docker-compose.yml) of the source connection i.e. AIRFLOW_CONN_POSTGRES_BASISKAART
    Source_select_statement contains the SQL select query that will be executed on the source DB.
    Target_base_table contains the table in master DB where the data (the result of source_select_statement execution) is inserted into.
    """

    try:
        # setup the DB source connection
        source_engine = create_engine(env(source_connection).split("?")[0])
    except SQLAlchemyError as e:
        raise Exception(str(e)) from e

    # fetch data from source DB
    with source_engine.connect() as source_connection:
        count = 0
        cursor = source_connection.execute(source_select_statement)
        while True:
            fetch_iterator = cursor.fetchmany(size=import_step)
            batch_count = copy_data_in_batch(target_base_table, fetch_iterator)
            count += batch_count
            if batch_count < import_step:
                break
    logger.info(f"Total records imported: {count}")
Exemple #2
0
def copy_data_from_dbwaarnemingen_to_masterdb(*args, **kwargs):
    try:
        waarnemingen_engine = create_engine(
            env("AIRFLOW_CONN_POSTGRES_DBWAARNEMINGEN").split("?")[0])
    except SQLAlchemyError as e:
        raise Exception(str(e)) from e

    with waarnemingen_engine.connect() as waarnemingen_connection:
        count = 0
        cursor = waarnemingen_connection.execute(f"""
SET TIME ZONE 'Europe/Amsterdam';
WITH cmsa_1h_v6 AS (
  SELECT sensor
       , date_trunc('hour'::text, timestamp_rounded) AS datum_uur
       , SUM(total_count) AS aantal_passanten
  FROM cmsa_15min_view_v6_materialized
  WHERE timestamp_rounded > to_date('2019-01-01'::text, 'YYYY-MM-DD'::text)
  GROUP BY sensor, (date_trunc('hour'::text,timestamp_rounded)))
SELECT v.sensor, s.location_name, v.datum_uur, v.aantal_passanten, s.gebied, s.geom as geometrie
FROM cmsa_1h_v6 v
JOIN peoplemeasurement_sensors s ON s.objectnummer::text = v.sensor::text;
""")
        while True:
            fetch_iterator = cursor.fetchmany(size=import_step)
            batch_count = copy_data_in_batch(fetch_iterator)
            count += batch_count
            if batch_count < import_step:
                break
        print(f"Imported: {count}")
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--token",
                        type=str,
                        required=True,
                        help="github token")

    args = parser.parse_args()

    repo = env("GITHUB_REPOSITORY")
    milestones = get_latest_milestones(repo)
    milestones.sort(key=lambda milestone: milestone["title"], reverse=True)

    patch_version = get_patch_version()
    milestone_version = f"v{patch_version}"
    current_milestone = next((milestone for milestone in milestones
                              if milestone["title"] == milestone_version),
                             None)
    # do nothing if milestone with expected title is already created
    if current_milestone is not None:
        return

    prev_milestone_version = f"v{patch_version - 1}"
    prev_milestone = next((milestone for milestone in milestones
                           if milestone["title"] == prev_milestone_version),
                          None)
    due_on = None
    if prev_milestone is not None and prev_milestone["due_on"] is not None:
        date = datetime.strptime(prev_milestone["due_on"],
                                 "%Y-%m-%dT%H:%M:%SZ") + timedelta(weeks=2)
        due_on = "%04d-%02d-%02dT%02d:%02d:%02dZ" % (date.year, date.month,
                                                     date.day, date.hour,
                                                     date.minute, date.second)

    maintainers = [
        "Undin", "ortem", "mchernyavsky", "vlad20012", "dima74", "avrong",
        "ozkriff"
    ]

    for m in milestones:
        if len(maintainers) == 1:
            break
        desc = m["description"]
        res = re.search(RELEASE_MANAGER_RE, desc)
        if res is not None:
            try:
                maintainers.remove(res.group(1))
            except ValueError:
                pass

    description = f"Release manager: @{maintainers[0]}"
    create_milestone(repo,
                     args.token,
                     milestone_version,
                     description=description,
                     due_on=due_on)
Exemple #4
0
def test():
    signal.alarm(45)
    with env(HOROVOD_STALL_CHECK_TIME_SECONDS="2",
             HOROVOD_STALL_SHUTDOWN_TIME_SECONDS="5"):
        hvd.init()
        tensor = torch.IntTensor([[1, 2], [3, 4]])
        if hvd.rank() != 0:
            time.sleep(10 * hvd.rank())
        try:
            summed = hvd.allreduce(tensor, average=False)
        except:
            pass
        finally:
            hvd.shutdown()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--token",
                        type=str,
                        required=True,
                        help="github token")

    args = parser.parse_args()

    repo = env("GITHUB_REPOSITORY")

    nightly_branch = get_branch(repo, args.token, NIGHTLY_BRANCH)
    if nightly_branch is not None:
        print("Repo already has nightly branch")
        return

    git_command("checkout", "-b", NIGHTLY_BRANCH)

    output = execute_command("rustc", "-V")
    match_result = RUSTC_VERSION_RE.match(output)
    date = match_result.group(1)
    with open(CHECK_WORKFLOW_PATH) as f:
        workflow_text = f.read()

    result = re.search(WORKFLOW_RUSTC_VERSION_RE, workflow_text)
    if result is None:
        raise ValueError("Failed to find the current version of nightly rust")

    new_workflow_text = re.sub(WORKFLOW_RUSTC_VERSION_RE,
                               f"\\g<1>{date}\\g<2>", workflow_text)
    if new_workflow_text == workflow_text:
        print("The latest nightly rustc version is already used")
        return

    with open(CHECK_WORKFLOW_PATH, "w") as f:
        f.write(new_workflow_text)

    if has_git_changes():
        git_command("add", CHECK_WORKFLOW_PATH)
        git_command("commit", "-m", ":arrow_up: nightly")

        git_command("push", "origin", NIGHTLY_BRANCH)
        pull_request = create_pull_request(repo, args.token, NIGHTLY_BRANCH,
                                           ":arrow_up: nightly")
        add_assignee(repo, args.token, pull_request["number"],
                     DEFAULT_ASSIGNEE)
    else:
        print("Everything is up to date")
Exemple #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--token",
                        type=str,
                        required=True,
                        help="github token")
    args = parser.parse_args()

    repo = env("GITHUB_REPOSITORY")

    updater = NightlyUpdater(repo,
                             args.token,
                             branch_name="nightly",
                             message=":arrow_up: nightly",
                             assignee="Undin")
    updater.update()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--token",
                        type=str,
                        required=True,
                        help="github token")
    args = parser.parse_args()

    repo = env("GITHUB_REPOSITORY")

    updater = CompilerFeatureUpdater(repo,
                                     args.token,
                                     branch_name="update-compiler-features",
                                     message="Update compiler features",
                                     assignee="Undin")
    updater.update()
Exemple #8
0
    def test_timeline(self):
        with tempfile.NamedTemporaryFile() as t:
            with env(HOROVOD_TIMELINE=t.name, HOROVOD_TIMELINE_MARK_CYCLES='1'):
                hvd.init()

                # Perform a simple allreduce operation
                hvd.allreduce(torch.tensor([1, 2, 3], dtype=torch.float32), name='test_allreduce')

                # Wait for it to register in the timeline.
                time.sleep(0.1)

                if hvd.rank() == 0:
                    with open(t.name, 'r') as tf:
                        timeline_text = tf.read()
                        assert 'allreduce.test_allreduce' in timeline_text, timeline_text
                        assert 'NEGOTIATE_ALLREDUCE' in timeline_text, timeline_text
                        assert 'ALLREDUCE' in timeline_text, timeline_text
                        assert 'CYCLE_START' in timeline_text, timeline_text
def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--token", type=str, required=True)
    parser.add_argument("--repo_owner", type=str, required=True)
    parser.add_argument("--repo_name", type=str, required=True)
    args = parser.parse_args()

    repo = env("GITHUB_REPOSITORY")
    # the script is supposed to be invoked only after release branch creation
    # so we need previous patch version
    release_patch_version = get_patch_version() - 1
    changelog_repo = f"{args.repo_owner}/{args.repo_name}"
    branch_name = changelog_branch_name(release_patch_version)

    branches = get_all_branches(changelog_repo, args.token)
    existing_branch = next(
        (branch["name"]
         for branch in branches if branch["name"].endswith(branch_name)), None)
    if existing_branch is not None:
        print(
            f"Branch for v{release_patch_version} release already exists: `{existing_branch}`"
        )
        return

    milestone = get_current_milestone(repo, release_patch_version)

    # TODO: find out more correct way to parse data
    release_date = datetime.strptime(milestone["due_on"],
                                     "%Y-%m-%dT%H:%M:%SZ").date()
    today = date.today()
    if today >= release_date or milestone["state"] == "closed":
        print(f"Milestone v{release_patch_version} is over")
        return

    delta = release_date - today
    five_days = timedelta(5)

    if delta > five_days:
        print("Too early to create release changelog")
        return

    add_changelog_template(args.token, release_patch_version, args.repo_name)
    create_changelog_pull_request(changelog_repo, args.token,
                                  release_patch_version, milestone)
Exemple #10
0
from postgres_table_init_operator import PostgresTableInitOperator
from postgres_table_copy_operator import PostgresTableCopyOperator
from postgres_permissions_operator import PostgresPermissionsOperator
from http_gob_operator import HttpGobOperator
from common import (
    default_args,
    DATAPUNT_ENVIRONMENT,
    MessageOperator,
    slack_webhook_token,
    env,
)
from schematools import TMP_TABLE_POSTFIX
from schematools.utils import schema_def_from_url

MAX_RECORDS = 1000 if DATAPUNT_ENVIRONMENT == "development" else None
GOB_PUBLIC_ENDPOINT = env("GOB_PUBLIC_ENDPOINT")
GOB_SECURE_ENDPOINT = env("GOB_SECURE_ENDPOINT")
OAUTH_TOKEN_EXPIRES_MARGIN = env.int("OAUTH_TOKEN_EXPIRES_MARGIN", 5)
SCHEMA_URL = env("SCHEMA_URL")

dag_id = "gob"
owner = "gob"

graphql_path = pathlib.Path(__file__).resolve().parents[0] / "graphql"


@dataclass
class DatasetInfo:
    """Dataclass to provide canned infomation about the dataset for
    other operators to work with."""
import argparse
from urllib.request import urlopen

from common import env, get_patch_version_from_text
from github import get_current_milestone, set_milestone

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--token",
                        type=str,
                        help="GitHub token",
                        required=True)
    parser.add_argument("--pull-request",
                        type=int,
                        help="Pull request number",
                        required=True)

    args = parser.parse_args()

    repo = env("GITHUB_REPOSITORY")
    text = urlopen(f"https://github.com/{repo}/raw/master/gradle.properties"
                   ).read().decode("utf-8")
    patch_version = get_patch_version_from_text(text)
    milestone = get_current_milestone(repo, patch_version)

    set_milestone(args.token, repo, args.pull_request, milestone["number"])
Exemple #12
0
from dynamic_dagrun_operator import TriggerDynamicDagRunOperator
from sqlalchemy_create_object_operator import SqlAlchemyCreateObjectOperator
from postgres_table_init_operator import PostgresTableInitOperator
from postgres_table_copy_operator import PostgresTableCopyOperator
from http_gob_operator import HttpGobOperator
from common import (
    default_args,
    DATAPUNT_ENVIRONMENT,
    MessageOperator,
    slack_webhook_token,
    env,
)
from schematools import TMP_TABLE_POSTFIX

MAX_RECORDS = 1000 if DATAPUNT_ENVIRONMENT == "development" else None
GOB_PUBLIC_ENDPOINT = env("GOB_PUBLIC_ENDPOINT")
GOB_SECURE_ENDPOINT = env("GOB_SECURE_ENDPOINT")
OAUTH_TOKEN_EXPIRES_MARGIN = env.int("OAUTH_TOKEN_EXPIRES_MARGIN", 5)

dag_id = "gob"
owner = "gob"

graphql_path = pathlib.Path(__file__).resolve().parents[0] / "graphql"


def create_gob_dag(is_first, gob_dataset_name, gob_table_name):

    gob_db_table_name = f"{gob_dataset_name}_{gob_table_name}"
    graphql_dir_path = graphql_path / f"{gob_dataset_name}-{gob_table_name}"
    graphql_params_path = graphql_dir_path / "args.json"
    extra_kwargs = {}
Exemple #13
0
from sql.wior import (
    DROP_COLS,
    SQL_DROP_TMP_TABLE,
    SQL_GEOM_VALIDATION,
    SQL_ADD_PK,
    SQL_SET_DATE_DATA_TYPES,
)

dag_id: str = "wior"
variables: Dict = Variable.get(dag_id, deserialize_json=True)
data_endpoint: Dict = variables["data_endpoints"]["wfs"]
tmp_dir: str = f"{SHARED_DIR}/{dag_id}"
data_file: str = f"{tmp_dir}/{dag_id}.geojson"
db_conn: DatabaseEngine = DatabaseEngine()
password: str = env("AIRFLOW_CONN_WIOR_PASSWD")
user: str = env("AIRFLOW_CONN_WIOR_USER")
base_url: str = URL(env("AIRFLOW_CONN_WIOR_BASE_URL"))
total_checks: list = []
count_checks: list = []
geo_checks: list = []
to_zone: Optional[tzinfo] = tz.gettz("Europe/Amsterdam")


class DataSourceError(Exception):
    """Custom exeception for not available data source"""

    pass


# data connection