Beispiel #1
0
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.xgboost.train import fit
from engine.parameters.special_parameters import get_parameters

max_depth = get_parameters('max_depth', 2)

# loading dataset
train, _, test = occurrence_loader(
    EnvironmentalDataset, source='gbif_taxref', validation_size=0, size_patch=1
)

# training model
training_params = {
    'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies(),
                ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True),
                ValidationAccuracyForAllSpecies(train=train, final_validation=True))
}
fit(train=train, test=test, training_params=training_params,
    objective='multi:softprob', max_depth=max_depth, seed=4242, eval_metric='merror', num_class=4520,
    num_boost_round=360, early_stopping_rounds=10, verbose_eval=1, updater='grow_gpu',
    predictor='gpu_predictor', tree_method='gpu_hist')
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalIGNDataset
from datascience.visu.util import save_fig
from datascience.visu.patch import pplot_patch
import numpy as np

# with option --more idx=12 to change the index from the command line...
from engine.logging import print_info
from engine.parameters.special_parameters import get_parameters

# load the idx + 1 first elements

idx = get_parameters('idx', 0)

train, _, _ = occurrence_loader(EnvironmentalIGNDataset,
                                source='full_ign',
                                id_name='X_key',
                                label_name='glc19SpId',
                                validation_size=0,
                                test_size=0,
                                limit=idx + 1)

patch, _ = train[idx]

patch = [l.int() for l in patch]

patch = patch[:-3] + [np.transpose(np.stack(patch[-3:], axis=0), (1, 2, 0))]

print_info('Printing patch at ' + str(train.dataset[idx]))

pplot_patch(patch, header=train.named_dimensions)
Beispiel #3
0
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.sklearn.train import fit
from datascience.ml.sklearn.util import load_or_create
from datascience.model_selection import train_test_split_stratified
from engine.parameters.special_parameters import get_parameters
from sklearn.ensemble.forest import RandomForestClassifier

max_depth = get_parameters('max_depth', 12)
save = get_parameters('save', True)

# loading dataset
train, _, test = occurrence_loader(EnvironmentalDataset,
                                   source='gbif_taxref',
                                   validation_size=0,
                                   size_patch=1)

model = load_or_create(RandomForestClassifier,
                       n_estimators=100,
                       max_depth=max_depth)

# training model
training_params = {
    'metrics':
    (ValidationAccuracyMultipleBySpecies([1, 10,
                                          30]), ValidationMRRBySpecies(),
     ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True),
     ValidationAccuracyForAllSpecies(train=train, final_validation=True))
}
from matplotlib import cm

import pandas as pd

from datascience.visu.util import plt, save_fig, get_figure

from sklearn.metrics import roc_curve, auc, confusion_matrix

import numpy as np
import os

from engine.parameters.special_parameters import get_parameters
from engine.path import last_experiment_path

experiment_name = get_parameters('roc_experiment', 'country')
path = os.path.join(last_experiment_path(experiment_name), 'results.csv')

df = pd.read_csv(path, header='infer', sep=';')

print(df)

fpr, tpr, thresholds = roc_curve(df.true_label, df.prediction, pos_label=1)

ax = plt('roc_curve').gca()

ax.set_xlim([-0.007, 1.0])
ax.set_ylim([0.0, 1.01])
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Receiver operating characteristic (AUC: %.3f)' % auc(fpr, tpr))
Beispiel #5
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import select_species_by_neuron
from datascience.data.util.source_management import check_source
from engine.parameters.special_parameters import get_parameters

species = get_parameters('species', 0)
mean_size = get_parameters('mean_size', 1)
figsize = get_parameters('figsize', 5)
neuron = get_parameters('neuron', 0)

# loading dataset
_, _, grid_points = occurrence_loader(GeoLifeClefDataset,
                                      source='grid_occs_1km',
                                      id_name='id',
                                      test_size=1,
                                      label_name=None)

sources = check_source('gbif_taxref')

# get activations
select_species_by_neuron(grid_points,
                         label_species=sources['label_species'],
                         neuron=neuron,
                         mean_size=mean_size,
                         figsize=figsize)
Beispiel #6
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import plot_species_on_map
from datascience.data.util.source_management import check_source
from engine.parameters.special_parameters import get_parameters

species = get_parameters('species', 0)
mean_size = get_parameters('mean_size', 1)
figsize = get_parameters('figsize', 5)
log_scale = get_parameters('log_scale', False)
softmax = get_parameters('softmax', False)
alpha = get_parameters('alpha', None)

# loading dataset
_, _, grid_points = occurrence_loader(GeoLifeClefDataset,
                                      source='grid_occs_1km',
                                      id_name='id',
                                      test_size=1,
                                      label_name=None)

sources = check_source('gbif_taxref')

# get activations
plot_species_on_map(grid_points,
                    label_species=sources['label_species'],
                    species=species,
                    mean_size=mean_size,
                    figsize=figsize,
                    log_scale=log_scale,
                    softmax=softmax,
                    alpha=alpha)
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import plot_activations_on_map
from engine.parameters.special_parameters import get_parameters

n_rows = get_parameters('n_rows', 3)
n_cols = get_parameters('n_rows', 5)
mean_size = get_parameters('mean_size', 1)
figsize = get_parameters('figsize', 4)
log_scale = get_parameters('log_scale', False)
selected = get_parameters('selected', tuple())


# loading dataset
_, _, grid_points = occurrence_loader(GeoLifeClefDataset, source='grid_occs_1km', id_name='id',
                                      test_size=1, label_name=None)

# get activations
plot_activations_on_map(grid_points, n_rows=n_rows, n_cols=n_cols, log_scale=log_scale, figsize=figsize,
                        mean_size=mean_size, selected=selected)
Beispiel #8
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import species_train_test_occurrences
from datascience.data.util.source_management import check_source
from engine.parameters.special_parameters import get_parameters

species = get_parameters('species', 0)

# loading dataset
train, val, test = occurrence_loader(GeoLifeClefDataset,
                                     source='gbif_taxref',
                                     validation_size=0.1,
                                     size_patch=1,
                                     test_size=0.1)

sources = check_source('gbif_taxref')

species_train_test_occurrences(sources['label_species'],
                               train,
                               val,
                               test,
                               species=species)
from datascience.ml.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.ml.neural.models import load_create_nn, InceptionEnv
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit
from sklearn.model_selection import train_test_split
from engine.parameters.special_parameters import get_parameters

from projects.ecography.configs.inception import model_params, training_params

temperature = get_parameters('temperature', 1.)

model_params['temperature'] = temperature

# loading/creating model
model = load_create_nn(model_class=InceptionEnv, model_params=model_params)

# loading dataset
train, val, test = occurrence_loader(EnvironmentalDataset,
                                     source='gbif_taxref',
                                     splitter=train_test_split)

# training model
validation_params = {
    'metrics':
    (ValidationAccuracyMultipleBySpecies([1, 10,
                                          30]), ValidationMRRBySpecies(),
     ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True),
     ValidationAccuracyForAllSpecies(train=train, final_validation=True))
}
"""
This code extract the IGN archives and export the results into patches

usage: sjobs projects/max_env/extract_ign.py  #  for 5m patch
       sjobs projects/max_env/extract_ign.py -m source50cm=True  # for 50cm patch
"""

from datascience.tools.ign.check_extraction import check_extraction
from datascience.tools.ign.extract_7z import extract_7z
from datascience.tools.ign.extract_patch import extract_patch
from engine.parameters.special_parameters import get_parameters

if get_parameters('test', False):
    test = '_test'
else:
    test = ''
if get_parameters('source50cm', False):
    source = 'ign_50cm_maps_and_patches' + test
else:
    source = 'ign_5m_maps_and_patches' + test

if get_parameters('check_only', False):
    check_extraction(source=source)
else:
    if get_parameters('uncompress', False):
        # uncompress the IGN maps
        extract_7z(source=source)

    # extract patches from a dataset and the IGN maps
    extract_patch(source, offset=get_parameters('offset', 0))
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.environmental_dataset import EnvironmentalDataset
from datascience.visu.spatial_map_plots import plot_on_map
from engine.parameters.special_parameters import get_parameters
from datascience.data.rasters.environmental_raster_glc import raster_metadata
import numpy as np
import math

raster = get_parameters('raster', 'alti')
cmap = get_parameters('cmap', 'viridis')

# loading dataset
_, _, grid_points = occurrence_loader(EnvironmentalDataset,
                                      source='grid_occs_1km',
                                      test_size=1,
                                      label_name=None,
                                      size_patch=1,
                                      add_all=False)

grid_points.extractor.append(raster)

# r = np.zeros((len(grid_points.dataset), 1), dtype=float)
r = np.full((len(grid_points.dataset), 1), np.nan, dtype=float)

max = -2000
min = 10000
print(raster_metadata[raster]['nan'])

list_neg = []

for i, data in enumerate(grid_points.dataset):