def test_loads_the_values_into_the_database(sesh_with_station_and_history_records, stations, source): sesh = sesh_with_station_and_history_records n_loaded, n_skipped = load_pcic_climate_baseline_values(sesh, var_name, source) assert n_loaded == 2 assert n_skipped == 0 derived_values = sesh.query(DerivedValue) assert derived_values.count() == 12 * len(stations) expected_variable = sesh.query(Variable).filter_by(name=var_name).first() for station in stations: station_values = derived_values.join(History).join(Station) \ .filter(Station.id == station.id) \ .order_by(DerivedValue.time) latest_history = sesh.query(History)\ .filter(History.station.has(id=station.id))\ .order_by(History.sdate.desc())\ .first() for i, value in enumerate(station_values): month = i + 1 last_day = monthrange(2000, month)[1] assert value.time == datetime.datetime(2000, month, last_day, 23) assert value.datum == 100*station.id + 2*month + 0.5 assert value.history == latest_history assert value.variable == expected_variable
def it_loads_only_non_absent_values( sesh_with_station_and_history_records, stations, var_name, source): sesh = sesh_with_station_and_history_records n_lines_added, n_values_added, n_lines_errored, n_lines_excluded, n_lines_skipped = \ load_pcic_climate_baseline_values(sesh, var_name, source) assert n_lines_added == 1 assert n_values_added == 8 assert n_lines_errored == 0 assert n_lines_excluded == 0 assert n_lines_skipped == 0 derived_values = sesh.query(DerivedValue) \ .join(DerivedValue.variable) \ .filter(Variable.name == var_name) station_values = derived_values.join(History).join(Station) \ .filter(Station.id == stations[0].id) assert set([sv.time.month for sv in station_values]) == {2, 3, 4, 5, 7, 8, 11, 12}
def it_correctly_converts_and_loads_values_into_the_database( sesh_with_station_and_history_records, stations, var_name, source, exclude, n_exclude_matching): sesh = sesh_with_station_and_history_records n_lines_added, n_values_added, n_lines_errored, n_lines_excluded, n_lines_skipped = \ load_pcic_climate_baseline_values(sesh, var_name, source, exclude) assert n_lines_added == len(stations) - n_exclude_matching assert n_values_added == n_lines_added * 12 assert n_lines_errored == 1 assert n_lines_excluded == n_exclude_matching assert n_lines_skipped == 0 derived_values = sesh.query(DerivedValue)\ .join(DerivedValue.variable)\ .filter(Variable.name == var_name) assert derived_values.count() == 12 * n_lines_added expected_variable = sesh.query(Variable)\ .filter_by(name=var_name) \ .filter(Variable.network.has(name=pcic_climate_variable_network_name)) \ .first() for station in stations: station_values = derived_values.join(History).join(Station) \ .filter(Station.id == station.id) \ .order_by(DerivedValue.time) latest_history = sesh.query(History)\ .filter(History.station.has(id=station.id))\ .order_by(History.sdate.desc())\ .first() for i, value in enumerate(station_values): month = i + 1 last_day = monthrange(2000, month)[1] assert value.time == datetime.datetime(2000, month, last_day, 23) if var_name in ['Tx_Climatology', 'Tn_Climatology']: assert value.datum == 100*station.id + 2*month + 0.5 else: assert value.datum == 100*station.id + 2*month assert value.history == latest_history assert value.variable == expected_variable
f = open(args.file) # Header processing: decduced from file and from R code that processes it # Headers are optional # Header if present, is 2 lines: # - crs?: GEO | ALB | UTM # - some mysterious number, e.g, 21 # We don't use these header values, and (naturally, therefore) we skip them if present line = next(f) if line.rstrip(' \0\n') in ['GEO','ALB','UTM']: script_logger.debug('Header lines detected; skipping first 2 lines in file') line = next(f) # header present; skip second header line else: script_logger.debug('No header lines detected') f.seek(0) # no header; reset to beginning of file # Load excluded station native ids, if provided if args.exclude: with open(args.exclude) as e: exclude = e.readlines() else: exclude = [] exclude = [x.strip() for x in exclude] try: load_pcic_climate_baseline_values(session, args.variable, f, exclude=exclude) session.commit() finally: session.close()
def it_throws_an_exception(sesh_with_station_and_history_records, network_name, var_name): sesh = sesh_with_station_and_history_records with raises(ValueError): load_pcic_climate_baseline_values(sesh, var_name, [], network_name=network_name)
postgresql://scott:tiger@localhost/mydatabase postgresql+psycopg2://scott:tiger@localhost/mydatabase postgresql+pg8000://scott:tiger@localhost/mydatabase """) parser.add_argument("-d", "--dsn", help="Database DSN in which to create new network") parser.add_argument("-v", "--variable", help="Name of variable to be loaded") parser.add_argument("-f", "--file", help="Path of file containing climate baseline values to be loaded") args = parser.parse_args() logger = logging.getLogger(__name__) logging.basicConfig(stream=sys.stdout, level=logging.INFO) engine = create_engine(args.dsn) session = sessionmaker(bind=engine)() f = open(args.file) # Header processing: decduced from file and from R code that processes it # Headers are optional # Header if present, is 2 lines: # - crs?: GEO | ALB | UTM # - some mysterious number, e.g, 21 # We don't use these header values, and (naturally, therefore) we skip them if present line = next(f) if line.rstrip(' \0\n') in ['GEO','ALB','UTM']: line = next(f) # header present; skip second header line else: f.seek(0) # no header; reset to beginning of file load_pcic_climate_baseline_values(session, args.variable, f)