Exemple #1
0
def main():
    data_path = '../data/'
    nightlights_bins_file = data_path + 'nightlights_bins.csv'
    satellite_images_dir = data_path + 'images/'
    training_images_dir = data_path + 'train_val/'
    report_dir = data_path + 'report/'
    report_file = report_dir + 'report.csv'

    try:
        data_download.get_satellite_images_with_labels(
            nightlights_bins_file, 
            satellite_images_dir, 
            report_dir,
            scale=1, 
            zoom=17, 
            imgsize=(400,400)
        )
    except:
        logging.debug("Could not download satellite images. Please set your API keys.")
    
    # To see how nightlights_bins_file was generated, please refer to 
    #    notebooks/01_lights_eda.ipynb
    nightlights = pd.read_csv(nightlights_bins_file)
    report = pd.read_csv(report_file)
    
    # Initialize train and val sets
    nightlights = nightlights.sample(frac=1, random_state=SEED).reset_index(drop=True)
    train, val = data_utils.train_val_split(nightlights, train_size=0.9)
    train_balanced = data_utils.balance_dataset(train, size=30000)
    
    # Split dataset into training and validation sets
    data_utils.train_val_split_images(val, report, training_images_dir, phase='val')
    data_utils.train_val_split_images(train_balanced, report, training_images_dir, phase='train')
Exemple #2
0

nightlights = nightlights.sample(
    frac=1, random_state=SEED).reset_index(drop=True)
train, val = data_utils.train_val_split(nightlights, train_size=0.9)
print('Size of training set: ', len(train))
print(train['label'].value_counts())
print('\nSize of validation set: ', len(val))
print(val['label'].value_counts())


# ### Upsample Minority Class of Training Set

# In[32]:


train_balanced = data_utils.balance_dataset(train, size=30000)
print('Number of images in training set (balanced): ', len(train_balanced))
train_balanced['label'].value_counts()


# ## Downloading the Google Static Maps (GSM) Images
#
# To download the GSM images, run `src/data_download.py` as follows:
# ```
# cd src
# python data_download.py
# ```

# In[ ]: