Ejemplo n.º 1
0
                    def test_loads_the_values_into_the_database(sesh_with_station_and_history_records, stations, source):
                        sesh = sesh_with_station_and_history_records

                        n_loaded, n_skipped = load_pcic_climate_baseline_values(sesh, var_name, source)
                        assert n_loaded == 2
                        assert n_skipped == 0

                        derived_values = sesh.query(DerivedValue)

                        assert derived_values.count() == 12 * len(stations)

                        expected_variable = sesh.query(Variable).filter_by(name=var_name).first()
                        for station in stations:
                            station_values = derived_values.join(History).join(Station) \
                                .filter(Station.id == station.id) \
                                .order_by(DerivedValue.time)
                            latest_history = sesh.query(History)\
                                .filter(History.station.has(id=station.id))\
                                .order_by(History.sdate.desc())\
                                .first()
                            for i, value in enumerate(station_values):
                                month = i + 1
                                last_day = monthrange(2000, month)[1]
                                assert value.time == datetime.datetime(2000, month, last_day, 23)
                                assert value.datum == 100*station.id + 2*month + 0.5
                                assert value.history == latest_history
                                assert value.variable == expected_variable
                def it_loads_only_non_absent_values(
                        sesh_with_station_and_history_records, stations, var_name, source):
                    sesh = sesh_with_station_and_history_records

                    n_lines_added, n_values_added, n_lines_errored, n_lines_excluded, n_lines_skipped = \
                        load_pcic_climate_baseline_values(sesh, var_name, source)
                    assert n_lines_added == 1
                    assert n_values_added == 8
                    assert n_lines_errored == 0
                    assert n_lines_excluded == 0
                    assert n_lines_skipped == 0

                    derived_values = sesh.query(DerivedValue) \
                        .join(DerivedValue.variable) \
                        .filter(Variable.name == var_name)
                    station_values = derived_values.join(History).join(Station) \
                        .filter(Station.id == stations[0].id)
                    assert set([sv.time.month for sv in station_values]) == {2, 3, 4, 5, 7, 8, 11, 12}
                def it_correctly_converts_and_loads_values_into_the_database(
                        sesh_with_station_and_history_records, stations, var_name, source, exclude, n_exclude_matching):
                    sesh = sesh_with_station_and_history_records

                    n_lines_added, n_values_added, n_lines_errored, n_lines_excluded, n_lines_skipped = \
                        load_pcic_climate_baseline_values(sesh, var_name, source, exclude)
                    assert n_lines_added == len(stations) - n_exclude_matching
                    assert n_values_added == n_lines_added * 12
                    assert n_lines_errored == 1
                    assert n_lines_excluded == n_exclude_matching
                    assert n_lines_skipped == 0

                    derived_values = sesh.query(DerivedValue)\
                        .join(DerivedValue.variable)\
                        .filter(Variable.name == var_name)

                    assert derived_values.count() == 12 * n_lines_added

                    expected_variable = sesh.query(Variable)\
                        .filter_by(name=var_name) \
                        .filter(Variable.network.has(name=pcic_climate_variable_network_name)) \
                        .first()
                    for station in stations:
                        station_values = derived_values.join(History).join(Station) \
                            .filter(Station.id == station.id) \
                            .order_by(DerivedValue.time)
                        latest_history = sesh.query(History)\
                            .filter(History.station.has(id=station.id))\
                            .order_by(History.sdate.desc())\
                            .first()
                        for i, value in enumerate(station_values):
                            month = i + 1
                            last_day = monthrange(2000, month)[1]
                            assert value.time == datetime.datetime(2000, month, last_day, 23)
                            if var_name in ['Tx_Climatology', 'Tn_Climatology']:
                                assert value.datum == 100*station.id + 2*month + 0.5
                            else:
                                assert value.datum == 100*station.id + 2*month
                            assert value.history == latest_history
                            assert value.variable == expected_variable
    f = open(args.file)

    # Header processing: decduced from file and from R code that processes it
    # Headers are optional
    # Header if present, is 2 lines:
    #   - crs?: GEO | ALB | UTM
    #   - some mysterious number, e.g, 21
    # We don't use these header values, and (naturally, therefore) we skip them if present
    line = next(f)
    if line.rstrip(' \0\n') in ['GEO','ALB','UTM']:
        script_logger.debug('Header lines detected; skipping first 2 lines in file')
        line = next(f)  # header present; skip second header line
    else:
        script_logger.debug('No header lines detected')
        f.seek(0)  # no header; reset to beginning of file

    # Load excluded station native ids, if provided
    if args.exclude:
        with open(args.exclude) as e:
            exclude = e.readlines()
    else:
        exclude = []
    exclude = [x.strip() for x in exclude]

    try:
        load_pcic_climate_baseline_values(session, args.variable, f, exclude=exclude)
        session.commit()
    finally:
        session.close()

 def it_throws_an_exception(sesh_with_station_and_history_records, network_name, var_name):
     sesh = sesh_with_station_and_history_records
     with raises(ValueError):
         load_pcic_climate_baseline_values(sesh, var_name, [], network_name=network_name)
Ejemplo n.º 6
0
    postgresql://scott:tiger@localhost/mydatabase
    postgresql+psycopg2://scott:tiger@localhost/mydatabase
    postgresql+pg8000://scott:tiger@localhost/mydatabase
""")
    parser.add_argument("-d", "--dsn", help="Database DSN in which to create new network")
    parser.add_argument("-v", "--variable", help="Name of variable to be loaded")
    parser.add_argument("-f", "--file", help="Path of file containing climate baseline values to be loaded")
    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    engine = create_engine(args.dsn)
    session = sessionmaker(bind=engine)()

    f = open(args.file)

    # Header processing: decduced from file and from R code that processes it
    # Headers are optional
    # Header if present, is 2 lines:
    #   - crs?: GEO | ALB | UTM
    #   - some mysterious number, e.g, 21
    # We don't use these header values, and (naturally, therefore) we skip them if present
    line = next(f)
    if line.rstrip(' \0\n') in ['GEO','ALB','UTM']:
        line = next(f)  # header present; skip second header line
    else:
        f.seek(0)  # no header; reset to beginning of file

    load_pcic_climate_baseline_values(session, args.variable, f)