Beispiel #1
0
def main():
    data_path = '../data/'
    nightlights_bins_file = data_path + 'nightlights_bins.csv'
    satellite_images_dir = data_path + 'images/'
    training_images_dir = data_path + 'train_val/'
    report_dir = data_path + 'report/'
    report_file = report_dir + 'report.csv'

    try:
        data_download.get_satellite_images_with_labels(
            nightlights_bins_file, 
            satellite_images_dir, 
            report_dir,
            scale=1, 
            zoom=17, 
            imgsize=(400,400)
        )
    except:
        logging.debug("Could not download satellite images. Please set your API keys.")
    
    # To see how nightlights_bins_file was generated, please refer to 
    #    notebooks/01_lights_eda.ipynb
    nightlights = pd.read_csv(nightlights_bins_file)
    report = pd.read_csv(report_file)
    
    # Initialize train and val sets
    nightlights = nightlights.sample(frac=1, random_state=SEED).reset_index(drop=True)
    train, val = data_utils.train_val_split(nightlights, train_size=0.9)
    train_balanced = data_utils.balance_dataset(train, size=30000)
    
    # Split dataset into training and validation sets
    data_utils.train_val_split_images(val, report, training_images_dir, phase='val')
    data_utils.train_val_split_images(train_balanced, report, training_images_dir, phase='train')
Beispiel #2
0
def train():
    """
    Trains a convolutional network on CIFAR-10 for one epoch, and saves the resulting model.
    """
    # Load data from file into memory.
    _, images_dev, labels_dev, images_test, labels_test = load_cifar10()

    # Take a random 10% of development data for validation. Use the rest for training.
    # Randomization is used to achieve similar distributions for training and validation data.
    images_train, labels_train, images_val, labels_val = train_val_split(images_dev, labels_dev, val_fraction=0.1)

    # Create network.
    my_model = model.Model()

    # Train network for 5 epochs.
    my_trainer = trainer.Trainer(model=my_model, learning_rate=5e-4)
    my_trainer.train(images_train, labels_train, images_val, labels_val, batch_size=64, epochs=5, print_every=100)

    # Evaluate network on test set.
    my_trainer.evaluate(images_test, labels_test, batch_size=64, print_every=100)

    # Save the resulting model.
    my_trainer.save(os.path.join(".", "checkpoints", "my_model"))
Beispiel #3
0
# In[25]:


nightlights = pd.read_csv(nightlights_bins_file)
print('Total number images downloaded: ', len(nightlights))


# ### Shuffle NTL Dataset and Split into Train/Val Sets

# In[26]:


nightlights = nightlights.sample(
    frac=1, random_state=SEED).reset_index(drop=True)
train, val = data_utils.train_val_split(nightlights, train_size=0.9)
print('Size of training set: ', len(train))
print(train['label'].value_counts())
print('\nSize of validation set: ', len(val))
print(val['label'].value_counts())


# ### Upsample Minority Class of Training Set

# In[32]:


train_balanced = data_utils.balance_dataset(train, size=30000)
print('Number of images in training set (balanced): ', len(train_balanced))
train_balanced['label'].value_counts()