def get_station_dicts(start_date, end_date): # repository_parameter = RepositoryParameter.START repository_parameter = RepositoryParameter.ONLY_OUTDOOR_AND_SHADED station_repository = StationRepository( *get_repository_parameters(repository_parameter)) station_dicts = station_repository.load_all_stations(start_date, end_date) return station_dicts
def check_correlation(start_date, end_date): station_repository = StationRepository( *get_repository_parameters(RepositoryParameter.ONLY_OUTDOOR)) station_dicts = station_repository.load_all_stations(start_date, end_date) reference_temperature_df = load_husconet_temperature_average( start_date, end_date) reference_radiation_df = load_husconet_radiation_average( start_date, end_date) r_and_p_values = [ check_station(station_dict["data_frame"], reference_temperature_df, reference_radiation_df, just_check_correlation=True) for station_dict in station_dicts ] r_values = [] for r_value, p_value in r_and_p_values: if p_value < .05: r_values.append(r_value) pyplot.hist(r_values, 10, color="gray") pyplot.xlim(-1, 1) pyplot.xlabel("Korrelationskoeffizient $r$") pyplot.ylabel("Anzahl PWS") pyplot.show()
def run(): start_date = "2016-01-01T00:00" end_date = "2016-12-31T23:59" eddh_df = load_eddh(start_date, end_date) station_repository = StationRepository(*get_repository_parameters( RepositoryParameter.ONLY_OUTDOOR_AND_SHADED)) station_dicts = station_repository.load_all_stations( start_date, end_date, # limit=5, # for testing purposes limit_to_temperature=False) random.shuffle(station_dicts) split_point = int(len(station_dicts) * .7) training_dicts, evaluation_dicts = station_dicts[: split_point], station_dicts[ split_point:] logging.info("training stations: %s" % [station["name"] for station in training_dicts]) logging.info("evaluation stations: %s" % [station["name"] for station in evaluation_dicts]) training_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks", "training_data_filtered.csv") join_to_big_vector(training_csv_file, training_dicts, eddh_df) evaluation_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks", "evaluation_data_filtered.csv") join_to_big_vector(evaluation_csv_file, evaluation_dicts, eddh_df)
def gather_statistics(repository_parameter, start_date, end_date): logging.info("repository: %s" % repository_parameter.value) station_repository = StationRepository( *get_repository_parameters(repository_parameter)) availabilities = [] station_dicts = station_repository.load_all_stations(start_date=start_date, end_date=end_date) logging.info("total: %i" % len(station_dicts)) while True: if len(station_dicts) == 0: break station_dict = station_dicts.pop() position = station_dict["meta_data"]["position"] station_dict["data_frame"] = sample_up(station_dict["data_frame"], start_date, end_date) row_result = { "station_name": station_dict["name"], "lat": position["lat"], "lon": position["lon"], "available_data": get_available_data(station_dict) } availabilities.append(row_result) logging.debug("{station_name}: {lat} {lon} -- {available_data}".format( **row_result)) df = pandas.DataFrame(availabilities) result_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "log", "calculate_available_data_%s.csv" % repository_parameter.value) df.to_csv(result_file)
def demo(): start_date = "2016-01-31" end_date = "2016-02-01" repository_parameters = get_repository_parameters( RepositoryParameter.ONLY_OUTDOOR_AND_SHADED) score_algorithm(start_date, end_date, repository_parameters, limit=10, interpolation_name="test")
def demo(): start_date = "2016-01-31" end_date = "2016-02-15" repository_parameters = get_repository_parameters( RepositoryParameter.START) score_algorithm(start_date, end_date, repository_parameters, limit=30, interpolation_name="test")
def run(testing=False): start_date = "2016-01-01T00:00" end_date = "2016-12-31T23:59" if not testing else "2016-03-31" eddh_df = load_eddh(start_date, end_date) station_repository = StationRepository( *get_repository_parameters(RepositoryParameter.START_FULL_SENSOR)) station_dicts = station_repository.load_all_stations( start_date, end_date, limit_to_temperature=False, limit=0 if not testing else 15 # for testing purposes ) husconet_dicts = HusconetStationRepository().load_all_stations( start_date, end_date, limit=0 if not testing else 3 # for testing purposes ) random.shuffle(husconet_dicts) split_point = int(len(husconet_dicts) * .7) training_dicts, evaluation_dicts = husconet_dicts[: split_point], husconet_dicts[ split_point:] logging.info("training stations: %s" % [station["name"] for station in training_dicts]) logging.info("evaluation stations: %s" % [station["name"] for station in evaluation_dicts]) logging.debug("prepare evaluation") evaluation_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks", "evaluation_data_husconet.csv") join_to_big_vector(evaluation_csv_file, station_dicts[:], evaluation_dicts, eddh_df) logging.debug("prepare training") training_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks", "training_data_husconet.csv") join_to_big_vector(training_csv_file, station_dicts, training_dicts, eddh_df) logging.debug("done")
def run_clustering(repository_parameter_name, start_date, end_date, limit): """ :param repository_parameter_name: One of the types from ``RepositoryParameter`` :param start_date: First day :param end_date: Last day :param limit: Limit the number of examined stations :return: Show clustering """ params = get_repository_parameters(repository_parameter_name) station_repository = StationRepository(*params) station_dicts = station_repository.load_all_stations(start_date, end_date, limit=limit) station_time_series_comparator = StationTimeSeriesComparator(station_dicts) stations = [Station(station_dict) for station_dict in station_dicts] cluster = HierarchicalClustering( stations, station_time_series_comparator.compare_time_series, num_processes=4) cluster.cluster() cluster.display(print_function=logging.debug) logging.info(cluster._data)
def plot(): start = get_repository_parameters(RepositoryParameter.START_FULL_SENSOR) start_date = "2016-01-01" end_date = "2016-12-31" plot_station("Niederschlag 2016", *start, start_date, end_date)
def gather_statistics(repository_parameter, start_date, end_date): logging.info("repository: %s" % repository_parameter.value) station_repository = StationRepository( *get_repository_parameters(repository_parameter)) available_precipitation = {} available_wind = {} station_dicts = station_repository.load_all_stations( start_date=start_date, end_date=end_date, limit_to_temperature=False, # limit=10 # for testing purposes ) logging.info("total: %i" % len(station_dicts)) stations_with_precipitation = set() stations_with_wind = set() while True: if len(station_dicts) == 0: break station_dict = station_dicts.pop() # free memory whenever you can precipitation = get_available_precipitation(station_dict) if len(precipitation): available_precipitation[station_dict["name"]] = precipitation stations_with_precipitation.add(station_dict["name"]) wind = get_available_wind(station_dict) if len(wind): available_wind[station_dict["name"]] = wind stations_with_wind.add(station_dict["name"]) df_precipitation = pandas.DataFrame(available_precipitation) df_wind = pandas.DataFrame(available_wind) result_file_precipitation = os.path.join( PROCESSED_DATA_DIR, "misc", "precipitation_per_month_%s.csv" % repository_parameter.value) df_precipitation.to_csv(result_file_precipitation) result_file_wind = os.path.join( PROCESSED_DATA_DIR, "misc", "wind_per_month_%s.csv" % repository_parameter.value) df_wind.to_csv(result_file_wind) station_dicts_wind = os.path.join(PROCESSED_DATA_DIR, "filtered_stations", "station_dicts_wind.csv") df_data = [] for station_with_wind in stations_with_wind: meta_info = station_repository.get_meta_info(station_with_wind) df_data.append({ "station": station_with_wind, "lat": meta_info.lat, "lon": meta_info.lon }) df = pandas.DataFrame(df_data) df.set_index("station", inplace=True) df.to_csv(station_dicts_wind) station_dicts_precipitation = os.path.join( PROCESSED_DATA_DIR, "filtered_stations", "station_dicts_precipitation.csv") df_data = [] for station_with_precipitation in stations_with_precipitation: meta_info = station_repository.get_meta_info( station_with_precipitation) df_data.append({ "station": station_with_precipitation, "lat": meta_info.lat, "lon": meta_info.lon, }) df = pandas.DataFrame(df_data) df.set_index("station", inplace=True) df.to_csv(station_dicts_precipitation)