Example #1
0
def test_transform_tnds_empty(asserts, xslt, mock_ids):
    # Set modification to 'delete' - should be excluded
    raw = et.parse(TNDS_RAW)
    raw.getroot().set("Modification", "Delete")
    output = xslt_transform(raw, xslt, region="Y", file="SVRYSBO120.xml")

    asserts.xml_elements_equal(output.getroot(), et.XML("<Data/>"))
Example #2
0
def test_transform_tnds_wrong_mode(asserts, xslt, mock_ids):
    # Set service mode to ferry - should be excluded
    raw = et.parse(TNDS_RAW)
    ns = {"t": raw.xpath("namespace-uri()")}
    raw.xpath("//t:Service/t:Mode", namespaces=ns)[0].text = "ferry"
    output = xslt_transform(raw, xslt, region="Y", file="SVRYSBO120.xml")

    asserts.xml_elements_equal(output.getroot(), et.XML("<Data/>"))
Example #3
0
def test_transform_tnds_missing_mode(asserts, xslt, mock_ids):
    # Remove service mode - this should default to bus (1)
    raw = et.parse(TNDS_RAW)
    ns = {"t": raw.xpath("namespace-uri()")}
    mode = raw.xpath("//t:Service/t:Mode", namespaces=ns)[0]
    mode.getparent().remove(mode)

    output = xslt_transform(raw, xslt, region="Y", file="SVRYSBO120.xml")
    assert output.xpath("/Data/Service/mode")[0].text == "1"
Example #4
0
def test_transform_tnds(asserts, xslt, mock_ids, service_codes):
    output = xslt_transform(et.parse(TNDS_RAW),
                            xslt,
                            region="Y",
                            file="SVRYSBO120A.xml")
    expected = et.parse(TNDS_OUT, parser=et.XMLParser(remove_blank_text=True))

    print(et.tostring(output, pretty_print=True))

    asserts.xml_elements_equal(output.getroot(), expected.getroot())
Example #5
0
def populate_tnds_data(connection, path=None, delete=True, warn=False):
    """ Commits TNDS data to database.

        :param connection: Connection for population.
        :param path: Path for zip files with TNDS XML documents and named
        after region codes. Global expansion is supported - all unique files
        matching region codes will be used. The archives will be downloaded if
        this is None.
        :param delete: Truncate all data from TNDS tables before populating.
        :param warn: Log warning if no FTP credentials exist. If False an error
        will be raised instead.
    """
    data = _get_archives(connection, path, warn)
    if data is None:
        return

    # Check if operators exist first
    operators_exist = connection.execute(
        db.exists(db.select([models.Operator.code])).select()).scalar()
    if not operators_exist:
        raise ValueError(
            "No operators were found. The TNDS dataset requires the database "
            "to be populated from NOC data first.")

    row_ids = setup_row_ids(connection, check_existing=not delete)
    setup_stop_exists(connection)
    setup_service_codes()

    # We don't want to delete any NOC data if they have been added
    excluded = models.Operator, models.LocalOperator
    metadata = utils.reflect_metadata(connection)
    with open_binary("nextbus.populate", "tnds.xslt") as file_:
        xslt = et.XSLT(et.parse(file_))

    del_ = delete
    for region, archive in data.items():
        for file_ in file_ops.iter_archive(archive):
            path = os.path.join(os.path.basename(archive), file_.name)
            utils.logger.info(f"Parsing file {path!r}")
            try:
                data = utils.xslt_transform(file_,
                                            xslt,
                                            region=region,
                                            file=file_.name)
            except RowIdError:
                # IDs do not match in XML file; log error and move on
                utils.logger.error(f"Invalid IDs in file {path!r}", exc_info=1)
            else:
                utils.populate_database(connection,
                                        utils.collect_xml_data(data),
                                        metadata=metadata,
                                        delete=del_,
                                        exclude=excluded)
            row_ids.clear()
            del_ = False
Example #6
0
def test_transform_alt_description(asserts, xslt, mock_ids, service_codes):
    data = et.parse(TNDS_RAW)
    ns = {"txc": data.xpath("namespace-uri()")}
    description = data.xpath(
        "/txc:TransXChange/txc:Services/txc:Service/txc:Description",
        namespaces=ns)[0]
    # Clear description text, output should be same as origin/destination from
    # standard service will be used instead
    description.text = ""

    output = xslt_transform(et.parse(TNDS_RAW),
                            xslt,
                            region="Y",
                            file="SVRYSBO120A.xml")
    expected = et.parse(TNDS_OUT, parser=et.XMLParser(remove_blank_text=True))

    asserts.xml_elements_equal(output.getroot(), expected.getroot())
Example #7
0
def populate_noc_data(connection, path=None):
    """ Convert NOC data (service operators) to database objects and commit them
        to the application database.

        :param connection: Connection for population.
        :param path: Path to raw data in XML form
    """
    temp = current_app.config.get("TEMP_DIRECTORY")
    if not temp:
        raise ValueError("TEMP_DIRECTORY is not defined.")

    if path is None:
        file_path = file_ops.download(NOC_URL, directory=temp)
    else:
        file_path = path

    utils.logger.info(f"Opening NOC XML file {file_path!r}")
    try:
        data = et.parse(file_path)
    except (UnicodeDecodeError, et.XMLSyntaxError):
        # NOC data is encoded in Windows-1252 for some reason despite the XML
        # declaration specifying UTF-8 encoding
        utils.logger.warning(
            f"NOC XML file {file_path!r} cannot be parsed with UTF-8 - trying "
            f"again with CP1252"
        )
        data = et.parse(file_path, et.XMLParser(encoding="CP1252"))

    with open_binary("nextbus.populate", "noc.xslt") as file_:
        xslt = et.XSLT(et.parse(file_))

    utils.populate_database(
        connection,
        utils.collect_xml_data(utils.xslt_transform(data, xslt)),
        delete=True
    )

    if file_path is None:
        utils.logger.info(f"New file {file_path!r} downloaded; can be deleted")
    utils.logger.info("NOC population done")
Example #8
0
def populate_nptg_data(connection, archive=None, list_files=None):
    """ Convert NPTG data (regions admin areas, districts and localities) to
        database objects and commit them to the application database.

        :param connection: Connection & transaction for population
        :param archive: Path to zipped archive file for NPTG XML files.
        :param list_files: List of file paths for NPTG XML files.
    """
    temp = current_app.config.get("TEMP_DIRECTORY")
    if not temp:
        raise ValueError("TEMP_DIRECTORY is not defined.")

    if archive is not None and list_files is not None:
        raise ValueError("Can't specify both archive file and list of files.")
    elif archive is not None:
        iter_files = file_ops.iter_archive(archive)
    elif list_files is not None:
        iter_files = iter(list_files)
    else:
        downloaded = file_ops.download(NPTG_URL,
                                       directory=temp,
                                       params={"format": "xml"})
        iter_files = file_ops.iter_archive(downloaded)

    metadata = utils.reflect_metadata(connection)
    with open_binary("nextbus.populate", "nptg.xslt") as file_:
        xslt = et.XSLT(et.parse(file_))

    deleted = False
    for i, file_ in enumerate(iter_files):
        file_name = file_.name if hasattr(file_, "name") else file_
        utils.logger.info(f"Parsing file {file_name!r}")
        utils.populate_database(connection,
                                utils.collect_xml_data(
                                    utils.xslt_transform(file_, xslt)),
                                metadata=metadata,
                                delete=not deleted)
        deleted = True
Example #9
0
def test_update_tnds_data(load_db):
    with open_binary("nextbus.populate", "tnds.xslt") as file_:
        xslt = et.XSLT(et.parse(file_))
    setup_service_codes()

    # All relevant data already exists for Dagenham Sunday market shuttle;
    # just overwrite route data using a newer file
    file_name = "66-DSM-_-y05-1"
    with db.engine.begin() as connection:
        setup_stop_exists(connection)
        setup_row_ids(connection, check_existing=False)
        transformed = xslt_transform(TNDS_DSM,
                                     xslt,
                                     region="L",
                                     file=file_name)
        data = collect_xml_data(transformed)
        populate_database(connection,
                          data,
                          delete=True,
                          exclude=(models.Operator, models.LocalOperator))

    assert _as_dict(models.Service.query.one()) == {
        "id": 1,
        "code": "dagenham-sunday-market-shuttle",
        "line": "Dagenham Sunday Market Shuttle",
        "description": "Barking – Dagenham Sunday Market",
        "short_description": "Barking – Dagenham Sunday Market",
        "mode": 1,
        "filename": file_name
    }

    patterns = (models.JourneyPattern.query.order_by(
        models.JourneyPattern.id).all())
    assert len(patterns) == 2
    assert _as_dict(patterns[0]) == dict(id=1,
                                         origin="Barking Station",
                                         destination="Dagenham Sunday Market",
                                         service_ref=1,
                                         direction=False,
                                         date_start=datetime.date(2019, 12, 8),
                                         date_end=datetime.date(2020, 5, 31),
                                         local_operator_ref="ATC",
                                         region_ref="L")

    journeys = (models.Journey.query.order_by(models.Journey.id).all())
    assert len(journeys) == 26
    assert _as_dict(journeys[0]) == dict(id=1,
                                         pattern_ref=1,
                                         start_run=None,
                                         end_run=None,
                                         departure=datetime.time(8, 30),
                                         days=0b10000000,
                                         weeks=None,
                                         include_holidays=0b0000010001010010,
                                         exclude_holidays=0b0000001000101000,
                                         note_code=None,
                                         note_text=None)

    special_days = (models.SpecialPeriod.query.order_by(
        models.SpecialPeriod.id).all())
    assert len(special_days) == 26
    assert _as_dict(special_days[0]) == dict(
        id=1,
        journey_ref=1,
        date_start=datetime.date(2020, 5, 8),
        date_end=datetime.date(2020, 5, 8),
        operational=True)
Example #10
0
def populate_naptan_data(connection,
                         archive=None,
                         list_files=None,
                         split=True):
    """ Convert NaPTAN data (stop points and areas) to database objects and
        commit them to the application database.

        :param connection: Connection for population
        :param archive: Path to zipped archive file for NaPTAN XML files.
        :param list_files: List of file paths for NaPTAN XML files.
        :param split: Splits NaPTAN XML files in archive by admin area code. Has
        no effect if list_files is used.
    """
    # Get complete list of ATCO admin areas and localities from NPTG data
    query_area = connection.execute(db.select([models.AdminArea.code]))
    query_local = connection.execute(db.select([models.Locality.code]))
    areas = [a[0] for a in query_area]
    localities = [local[0] for local in query_local]

    if not areas or not localities:
        raise ValueError("NPTG tables are not populated; stop point data "
                         "cannot be added without the required locality data. "
                         "Populate the database with NPTG data first.")

    temp = current_app.config.get("TEMP_DIRECTORY")
    if not temp:
        raise ValueError("TEMP_DIRECTORY is not defined.")

    if archive is not None and list_files is not None:
        raise ValueError("Can't specify both archive file and list of files.")
    elif archive is not None:
        path = archive
    elif list_files is not None:
        path = None
    else:
        downloaded = file_ops.download(NAPTAN_URL,
                                       directory=temp,
                                       params={"dataFormat": "XML"})

        utils.logger.info(f"Zipping {downloaded!r}")
        # The downloaded file is not zipped. Move it into an archive
        path = os.path.join(temp, "NaPTAN.zip")
        with zipfile.ZipFile(path, "w",
                             compression=zipfile.ZIP_DEFLATED) as zf:
            zf.write(downloaded)

        os.remove(downloaded)

    if path is not None and split:
        split_path = os.path.join(temp, "NaPTAN_split.zip")
        _split_naptan_data(areas, path, split_path)
        path = split_path

    if path is not None:
        iter_files = file_ops.iter_archive(path)
    else:
        iter_files = iter(list_files)

    # Go through data and create objects for committing to database
    _setup_naptan_functions()

    metadata = utils.reflect_metadata(connection)
    with open_binary("nextbus.populate", "naptan.xslt") as file_:
        xslt = et.XSLT(et.parse(file_))

    deleted = False
    for i, file_ in enumerate(iter_files):
        file_name = file_.name if hasattr(file_, "name") else file_
        utils.logger.info(f"Parsing file {file_name!r}")
        utils.populate_database(connection,
                                utils.collect_xml_data(
                                    utils.xslt_transform(file_, xslt)),
                                metadata=metadata,
                                delete=not deleted)
        deleted = True
Example #11
0
def test_naptan_transform_all(asserts):
    _setup_naptan_functions()
    data = xslt_transform(NAPTAN_RAW, naptan_xslt())
    expected = et.parse(NAPTAN_ALL, PARSER)

    asserts.xml_elements_equal(data.getroot(), expected.getroot())
Example #12
0
def test_nptg_transform_all(asserts):
    data = xslt_transform(NPTG_RAW, nptg_xslt())
    expected = et.parse(NPTG_ALL, et.XMLParser(remove_blank_text=True))

    asserts.xml_elements_equal(data.getroot(), expected.getroot())