for i, city in enumerate(cities): url = urls[i] cdf = create_weather_df(url, http, current_time) cdf['city'] = city df = df.append(cdf) df['wind_speed'] = None df['humidity'] = None df['precipitation_per'] = None df['precipitation_l'] = None df['wind_direction'] = None df['snow'] = None df['uvi'] = None df['website'] = 'https://www.wetter.de' df['date_of_acquisition'] = current_time.strftime('%Y%m%d%H') df.date_of_acquisition = df.date_of_acquisition.apply( lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date()) df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply( lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date()) #pkl_name='./wetter_de/daily/'+current_time.strftime('%Y%m%d%H')+'.pkl' try: db_manager.insert_df("DailyPrediction", df) finally: filename = os.path.expanduser( '~/Documents/webscraping_2018/data_wetter_de/daily') timestamp = datetime.datetime.now().strftime('%Y%m%d%H') filename += timestamp + ".pkl" df.to_pickle(filename)
wind_text = wind_mapping[wind_text_ger] else: wind_text = None all_features[ci][i][hi][7] = wind_text temp_condition = temp_info.find('span',class_="temperature-condition").text all_features[ci][i][hi][8] = temp_condition all_features = all_features.reshape(number_of_predictions,number_of_features) hourly_dict['city'] = list(all_features[:,0]) hourly_dict['date_for_which_weather_is_predicted'] = list(all_features[:,1]) hourly_dict['temperature'] = list(all_features[:,2]) hourly_dict['wind_speed'] = list(all_features[:,3]) hourly_dict['humidity'] = list(all_features[:,4]) hourly_dict['precipitation_per'] = list(all_features[:,5]) hourly_dict['precipitation_l'] = list(all_features[:,6]) hourly_dict['wind_direction'] = list(all_features[:,7]) hourly_dict['condition'] = list(all_features[:,8]) hourly_dict['snow'] = [None]*number_of_predictions hourly_dict['uvi'] = [None]*number_of_predictions df = pd.DataFrame(data=hourly_dict) df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date()) df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date()) try: db_manager.insert_df("HourlyPrediction", df) finally: filename = os.path.expanduser('~/Documents/webscraping_2018/data_wetter_de/hourly_period_') timestamp = datetime.datetime.now().strftime('%Y%m%d%H') filename += timestamp + ".pkl" df.to_pickle(filename)
else: daily_dict['precipitation_per'].append(None) daily_dict['precipitation_l'].append(None) daily_dict['condition'].append(None) daily_dict['snow'].append(None) daily_dict['uvi'].append(None) #convert to dataframe and save to file df_daily = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in daily_dict.items() ])) print(df_daily) df_daily.date_of_acquisition = df_daily.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d%H').date()) print(df_daily.date_for_which_weather_is_predicted) df_daily.date_for_which_weather_is_predicted = df_daily.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d%H').date()) try: db_manager.insert_df("DailyPrediction", df_daily) finally: filename = os.path.expanduser('~/Documents/webscraping_2018/data_bild/daily/daily_') timestamp = datetime.datetime.now().strftime('%Y%m%d%H') filename += timestamp + ".pkl" df_daily.to_pickle(filename) #SECOND PART: FOUR-TIMES-A-DAY PREDICTIONS #scrape specified cities for morning, noon, afternoon, night, extract temperature, # precipitation in percent and condition PREDICTION_TIMES = [datetime.timedelta(days=0, hours=8), #morning datetime.timedelta(days=0, hours=14), #afternoon datetime.timedelta(days=0, hours=20), #evening datetime.timedelta(days=1, hours=2)] #night (tomorrow)