Exemple #1
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-s",
                        "--source-dir",
                        required=True,
                        help="Source directory, as downloaded and produced by "
                        "`download-from-drive` script.")

    parser.add_argument(
        "-v",
        "--version",
        required=True,
        choices=ALL_VERSIONS,
        help=f"Version of the spreadsheets to use (e.g. '{CURRENT_VERSION}').")

    args = parser.parse_args(sys.argv[1:])

    if not os.path.isdir(args.source_dir):
        parser.error(f"No such directory '{args.source_dir}'")

    version_dir = os.path.join(args.source_dir, args.version)
    sh = SpreadsheetHandler(version_dir)

    cvs_dir = os.path.join(version_dir, "AMF_CVs")
    pyessv_dir = os.path.join(version_dir, "amf-pyessv-vocabs")

    for dr in (cvs_dir, pyessv_dir):
        if not os.path.isdir(dr):
            os.makedirs(dr)

    sh.write_cvs(cvs_dir, write_pyessv=True, pyessv_root=pyessv_dir)
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "spreadsheets_dir",
        help="Directory containing spreadsheet data, as produced by "
        "download_from_drive.py")
    parser.add_argument("output_dir",
                        help="Directory to write output JSON CVs to")

    # Note: default dir is not actually set in this code -- if not given
    # just use pyessv's default. Will need to update the help text if this
    # ever changes...
    parser.add_argument(
        "--pyessv-dir",
        default=None,
        dest="pyessv_root",
        help=
        "Directory to write pyessv CVs to [default: ~/.esdoc/pyessv-archive/]")

    args = parser.parse_args(sys.argv[1:])

    if not os.path.isdir(args.spreadsheets_dir):
        parser.error("No such directory '{}'".format(args.spreadsheets_dir))
    for dirname in (args.output_dir, args.pyessv_root):
        if dirname and not os.path.isdir(dirname):
            os.mkdir(dirname)

    sh = SpreadsheetHandler(args.spreadsheets_dir)
    sh.write_cvs(args.output_dir,
                 write_pyessv=True,
                 pyessv_root=args.pyessv_root)
    def test_basic(self, spreadsheets_dir, tmpdir):
        # variables
        s_dir = spreadsheets_dir
        prod = s_dir.join("product-definitions")
        var = (prod.mkdir("tsv").mkdir("my-great-product").join(
            "variables-specific.tsv"))
        var.write("\n".join(
            ("Variable\tAttribute\tValue", "wind_speed\t\t",
             "\tname\twind_speed", "\ttype\tfloat32", "eastward_wind\t\t",
             "\tname\teastward_wind", "\tunits\tm s-1")))

        # dimensions
        prod2 = s_dir.mkdir("other-cool-product")
        dim = prod2.join("Dimensions - Specific.tsv")
        dim = (prod.mkdir("other-cool-product").mkdir(
            "other-cool-product.xlsx").join("Dimensions - Specific.tsv"))
        dim.write("\n".join(
            ("Name\tLength\tunits", "layer_index\t<i>\t1", "other\t42\tm")))

        output = tmpdir.mkdir("cvs")
        sh = SpreadsheetHandler(str(s_dir))
        sh.write_cvs(str(output))

        var_cv = output.join("AMF_product_my-great-product_variable.json")
        dim_cv = output.join("AMF_product_other-cool-product_dimension.json")
        assert var_cv.check()
        assert dim_cv.check()

        decoded = []
        for f in (var_cv, dim_cv):
            try:
                decoded.append(json.load(f))
            except json.decoder.JSONDecodeError:
                assert False, "{} is invalid JSON".format(str(f))

        # check variables - variable CV
        assert decoded[0] == {
            "product_my-great-product_variable": {
                "wind_speed": {
                    "type": "float32"
                },
                "eastward_wind": {
                    "units": "m s-1"
                }
            }
        }
        # check dimensions CV
        assert decoded[1] == {
            "product_other-cool-product_dimension": {
                "layer_index": {
                    "length": "<i>",
                    "units": "1"
                },
                "other": {
                    "length": "42",
                    "units": "m"
                }
            }
        }
    def test_pyessv_cvs_are_generated(self, spreadsheets_dir, tmpdir):
        # Create spreadsheets to generate some CVs
        s_dir = spreadsheets_dir

        # Create scientists CV, to test that @ are allowed in namespaces
        sci_tsv = s_dir.join("Vocabularies.xlsx").join("Creators.tsv")
        sci_tsv.write("\n".join((
            "name\temail\torcid\tconfirmed",
            "Bob Smith\[email protected]\thttps://orcid.org/123\tyes",
            "Jane Smith\[email protected]\thttps://orcid.org/999\tyes",
        )))

        # Create products CV, since this is a list rather dict like other CVs
        prod_tsv = s_dir.join("Vocabularies.xlsx").join("Data Products.tsv")
        prod_tsv.write("\n".join(
            ("Data Product", "snr-winds", "aerosol-backscatter")))

        # Write JSON CVs and pyessv CVs
        sh = SpreadsheetHandler(str(s_dir))
        json_cvs_output = tmpdir.mkdir("json_cvs")
        pyessv_cvs_output = tmpdir.mkdir("pyessv_cvs")
        sh.write_cvs(str(json_cvs_output),
                     write_pyessv=True,
                     pyessv_root=str(pyessv_cvs_output))

        root = pyessv_cvs_output.join("ncas")
        assert root.join("MANIFEST").check()
        assert root.join("amf").check()

        # Check the contents of some CVs
        bob_term = root.join("amf").join("scientist").join("*****@*****.**")
        assert bob_term.check()
        bob_term_decoded = json.load(bob_term)
        assert "data" in bob_term_decoded
        assert bob_term_decoded["data"] == {
            "primary_email": "*****@*****.**",
            "previous_emails": [],
            "name": "Bob Smith",
            "orcid": "https://orcid.org/123"
        }

        jane_term = root.join("amf").join("scientist").join("*****@*****.**")
        assert jane_term.check()
        jane_term_decoded = json.load(jane_term)
        assert "data" in jane_term_decoded
        assert jane_term_decoded["data"] == {
            "primary_email": "*****@*****.**",
            "previous_emails": [],
            "name": "Jane Smith",
            "orcid": "https://orcid.org/999"
        }

        product_term = root.join("amf").join("product").join("snr-winds")
        assert product_term.check()
    def test_duplicate_instrument_id(self, spreadsheets_dir, tmpdir):
        """
        Check that if there are two instruments with the same ID, a warning is
        printed and one of them is overwritten
        """
        s_dir = spreadsheets_dir
        instr = s_dir.join("Vocabularies.xlsx").join(
            "Instrument Name & Descriptors.tsv")
        instr.write("\n".join(
            ("Old Instrument Name\tNew Instrument Name\tDescriptor",
             "old1\tmyinstr\tFirst instrument",
             "old2\tmyinstr\tSecond instrument")))
        output = tmpdir.mkdir("cvs")
        stderr = StringIO()
        sh = SpreadsheetHandler(str(s_dir))
        sys.stderr = stderr
        sh.write_cvs(str(output))
        sys.stderr = sys.__stderr__

        instr_output = output.join("AMF_instrument.json")
        assert instr_output.check()
        assert json.load(instr_output) == {
            "instrument": {
                "myinstr": {
                    "instrument_id": "myinstr",
                    "previous_instrument_ids": ["old1"],
                    "description": "First instrument"
                }
            }
        }
        stderr_contents = stderr.getvalue().lower()
        assert "duplicate instrument name" in stderr_contents

        # Normal case: warning not shown
        instr.write("\n".join(
            ("Old Instrument Name\tNew Instrument Name\tDescriptor",
             "old1\tmyinstr1\tFirst instrument",
             "old2\tmyinstr2\tSecond instrument")))
        stderr = StringIO()
        sh = SpreadsheetHandler(str(s_dir))
        sys.stderr = stderr
        sh.write_cvs(str(output))
        sys.stderr = sys.__stderr__
        stderr_contents = stderr.getvalue().lower()
        assert "duplicate instrument name" not in stderr_contents
    def test_product(self, spreadsheets_dir, tmpdir):
        s_dir = spreadsheets_dir
        prod = s_dir.join("Vocabularies.xlsx").join("Data Products.tsv")
        prod.write("\n".join(
            ("Data Product", "snr-winds", "aerosol-backscatter",
             "aerosol-extinction", "cloud-base", "o3-concentration-profiles")))

        sh = SpreadsheetHandler(str(s_dir))
        output = tmpdir.mkdir("cvs")
        sh.write_cvs(str(output))
        prod_cv = output.join("AMF_product.json")
        assert prod_cv.check()
        assert json.load(prod_cv) == {
            "product": [
                "snr-winds", "aerosol-backscatter", "aerosol-extinction",
                "cloud-base", "o3-concentration-profiles"
            ]
        }
    def test_instruments(self, spreadsheets_dir, tmpdir):
        s_dir = spreadsheets_dir
        instr = s_dir.join("Vocabularies.xlsx").join(
            "Instrument Name & Descriptors.tsv")
        instr.write("\n".join((
            # Include some missing old names, some multiple names, and
            # extraneous whitespace
            "Old Instrument Name\tNew Instrument Name\tDescriptor",
            "man-radar-1290mhz\tncas-radar-wind-profiler-1\tNCAS Mobile Radar Wind Profiler unit 1",
            "\tncas-ceilometer-4\t NCAS Lidar Ceilometer unit 4",
            "man-o3lidar\tncas-o3-lidar-1\tNCAS Mobile O3 lidar unit 1",
            "cv-met-tower, cv-met-webdaq\tncas-aws-7\tNCAS Automatic Weather Station unit 7"
        )))

        sh = SpreadsheetHandler(str(s_dir))
        output = tmpdir.mkdir("cvs")
        sh.write_cvs(str(output))
        instr_cv = output.join("AMF_instrument.json")
        assert instr_cv.check()
        assert json.load(instr_cv) == {
            "instrument": {
                "ncas-radar-wind-profiler-1": {
                    "instrument_id": "ncas-radar-wind-profiler-1",
                    "previous_instrument_ids": ["man-radar-1290mhz"],
                    "description": "NCAS Mobile Radar Wind Profiler unit 1"
                },
                "ncas-ceilometer-4": {
                    "instrument_id": "ncas-ceilometer-4",
                    "previous_instrument_ids": [],
                    "description": "NCAS Lidar Ceilometer unit 4"
                },
                "ncas-o3-lidar-1": {
                    "instrument_id": "ncas-o3-lidar-1",
                    "previous_instrument_ids": ["man-o3lidar"],
                    "description": "NCAS Mobile O3 lidar unit 1"
                },
                "ncas-aws-7": {
                    "instrument_id": "ncas-aws-7",
                    "previous_instrument_ids":
                    ["cv-met-tower", "cv-met-webdaq"],
                    "description": "NCAS Automatic Weather Station unit 7"
                }
            }
        }
    def get_var_inner_cv(self, s_dir, tsv):
        """
        Create a TSV from the given list of lists of columns, and process it
        as a variable TSV file. Return the inner dictionary of the generated
        JSON CV
        """
        prod_dir = (s_dir.join("Product Definition Spreadsheets").mkdir(
            "wind-speed").mkdir("wind-speed.xlsx"))
        var_sheet = prod_dir.join("Variables - Specific.tsv")
        var_sheet.write("\n".join(("\t".join(x for x in row)) for row in tsv))
        output = s_dir.mkdir("../output")
        sh = SpreadsheetHandler(str(s_dir))
        sh.write_cvs(str(output))

        cv_file = output.join("AMF_product_wind-speed_variable.json")
        assert cv_file.check()
        obj = json.load(cv_file)
        assert "product_wind-speed_variable" in obj
        return obj["product_wind-speed_variable"]
    def test_common(self, spreadsheets_dir, tmpdir):
        s_dir = spreadsheets_dir
        common_dir = s_dir.join("Common.xlsx")
        var_air = common_dir.join("Variables - Air.tsv")
        var_sea = common_dir.join("Variables - Sea.tsv")
        dim_land = common_dir.join("Dimensions - Land.tsv")

        var_air.write("\n".join(
            ("Variable\tAttribute\tValue", "some_air_variable\t\t",
             "\tthingy\tthis_thing", "\ttype\tfloat32")))
        var_sea.write("\n".join(
            ("Variable\tAttribute\tValue", "some_sea_variable\t\t",
             "\tthingy\tthat_thing", "\ttype\tstring")))
        dim_land.write("\n".join(("Name\tLength\tunits", "some_dim\t42\tm")))

        sh = SpreadsheetHandler(str(s_dir))

        cv_output = tmpdir.mkdir("cvs")
        yaml_output = tmpdir.mkdir("yaml")
        sh.write_cvs(str(cv_output))
        sh.write_yaml(str(yaml_output))

        # Check CV and YAML files exist
        var_air_output = cv_output.join("AMF_product_common_variable_air.json")
        assert var_air_output.check()
        assert cv_output.join("AMF_product_common_variable_sea.json").check()
        assert cv_output.join("AMF_product_common_dimension_land.json").check()

        assert yaml_output.join("AMF_product_common_variable_air.yml").check()
        assert yaml_output.join("AMF_product_common_variable_sea.yml").check()

        # Check the content of one of the CVs
        assert json.load(var_air_output) == {
            "product_common_variable_air": {
                "some_air_variable": {
                    "thingy": "this_thing",
                    "type": "float32"
                }
            }
        }
Exemple #10
0
    def test_platform(self, spreadsheets_dir, tmpdir):
        s_dir = spreadsheets_dir
        plat = s_dir.join("Vocabularies.xlsx").join("Platforms.tsv")
        plat.write("\n".join(("Platform ID\tPlatform Description",
                              "wao\tweybourne atmospheric observatory",
                              "cvao\tcape verde atmospheric observatory")))
        output = tmpdir.mkdir("cvs")
        sh = SpreadsheetHandler(str(s_dir))
        sh.write_cvs(str(output))

        plat_output = output.join("AMF_platform.json")
        assert plat_output.check()
        assert json.load(plat_output) == {
            "platform": {
                "wao": {
                    "platform_id": "wao",
                    "description": "weybourne atmospheric observatory"
                },
                "cvao": {
                    "platform_id": "cvao",
                    "description": "cape verde atmospheric observatory"
                }
            }
        }
Exemple #11
0
    def test_scientist(self, spreadsheets_dir, tmpdir):
        s_dir = spreadsheets_dir
        plat = s_dir.join("Vocabularies.xlsx").join("Creators.tsv")
        plat.write("\n".join((
            "name\temail\torcid\tconfirmed",
            # With 'confirmed' column
            "Bob Smith\[email protected]\thttps://orcid.org/123\tyes",
            "Bob Smath\[email protected]\thttps://orcid.org/234\tno",
            # and without
            "Dave Jones\[email protected]\thttps://orcid.org/345",
            # Without orcid
            "Paul Jones\[email protected]\t\tyes",
            "Paul Janes\[email protected]\t",
            "Paul Junes\[email protected]")))
        output = tmpdir.mkdir("cvs")
        sh = SpreadsheetHandler(str(s_dir))
        sh.write_cvs(str(output))

        sci_output = output.join("AMF_scientist.json")
        assert sci_output.check()
        print(json.dumps(json.load(sci_output), indent=4))
        assert json.load(sci_output) == {
            "scientist": {
                "*****@*****.**": {
                    "name": "Bob Smith",
                    "primary_email": "*****@*****.**",
                    "previous_emails": [],
                    "orcid": "https://orcid.org/123"
                },
                "*****@*****.**": {
                    "name": "Bob Smath",
                    "primary_email": "*****@*****.**",
                    "previous_emails": [],
                    "orcid": "https://orcid.org/234"
                },
                "*****@*****.**": {
                    "name": "Dave Jones",
                    "primary_email": "*****@*****.**",
                    "previous_emails": [],
                    "orcid": "https://orcid.org/345"
                },
                "*****@*****.**": {
                    "name": "Paul Jones",
                    "primary_email": "*****@*****.**",
                    "previous_emails": [],
                    "orcid": None
                },
                "*****@*****.**": {
                    "name": "Paul Janes",
                    "primary_email": "*****@*****.**",
                    "previous_emails": [],
                    "orcid": None
                },
                "*****@*****.**": {
                    "name": "Paul Junes",
                    "primary_email": "*****@*****.**",
                    "previous_emails": [],
                    "orcid": None
                }
            }
        }