def main(): data_path = '../data/' nightlights_bins_file = data_path + 'nightlights_bins.csv' satellite_images_dir = data_path + 'images/' training_images_dir = data_path + 'train_val/' report_dir = data_path + 'report/' report_file = report_dir + 'report.csv' try: data_download.get_satellite_images_with_labels( nightlights_bins_file, satellite_images_dir, report_dir, scale=1, zoom=17, imgsize=(400,400) ) except: logging.debug("Could not download satellite images. Please set your API keys.") # To see how nightlights_bins_file was generated, please refer to # notebooks/01_lights_eda.ipynb nightlights = pd.read_csv(nightlights_bins_file) report = pd.read_csv(report_file) # Initialize train and val sets nightlights = nightlights.sample(frac=1, random_state=SEED).reset_index(drop=True) train, val = data_utils.train_val_split(nightlights, train_size=0.9) train_balanced = data_utils.balance_dataset(train, size=30000) # Split dataset into training and validation sets data_utils.train_val_split_images(val, report, training_images_dir, phase='val') data_utils.train_val_split_images(train_balanced, report, training_images_dir, phase='train')
nightlights = nightlights.sample( frac=1, random_state=SEED).reset_index(drop=True) train, val = data_utils.train_val_split(nightlights, train_size=0.9) print('Size of training set: ', len(train)) print(train['label'].value_counts()) print('\nSize of validation set: ', len(val)) print(val['label'].value_counts()) # ### Upsample Minority Class of Training Set # In[32]: train_balanced = data_utils.balance_dataset(train, size=30000) print('Number of images in training set (balanced): ', len(train_balanced)) train_balanced['label'].value_counts() # ## Downloading the Google Static Maps (GSM) Images # # To download the GSM images, run `src/data_download.py` as follows: # ``` # cd src # python data_download.py # ``` # In[ ]: