コード例 #1
0
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.ml.sklearn.train import fit
from datascience.ml.sklearn.util import load_or_create
from datascience.model_selection import train_test_split_stratified
from sklearn.ensemble.forest import RandomForestClassifier

# loading dataset
train, _, test = occurrence_loader(GeoLifeClefDataset,
                                   source='glc19_pl_complete',
                                   validation_size=0)

model = load_or_create(RandomForestClassifier, n_estimators=100, max_depth=12)

# training model
training_params = {
    'metrics':
    (ValidationAccuracyMultipleBySpecies([1, 10,
                                          30]), ValidationMRRBySpecies())
}
fit(model, train=train, test=test, training_params=training_params)
コード例 #2
0
from projects.max_env.configs.inception import training_params, validation_params, model_params
from datascience.ml.neural.models import load_create_nn, InceptionEnv
from datascience.data.loader import occurrence_loader
from datascience.model_selection import SplitterGeoQuadra
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit

# loading/creating model
model_params['n_input'] = 77
model_params['dropout'] = 0.75

model = load_create_nn(model_class=InceptionEnv, model_params=model_params)

# loading dataset
train, val, test = occurrence_loader(EnvironmentalDataset, source='glc18', splitter=SplitterGeoQuadra(quad_size=10))

fit(model, train=train, val=val, test=test, training_params=training_params, validation_params=validation_params)
コード例 #3
0
import matplotlib.pyplot as plt
from pyproj import Proj, Transformer
import numpy as np
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.model_selection import SplitterGeoQuadra
from datascience.tools.occurrences_plot.occurrences_plot import plot_occurrences
from engine.parameters import get_parameters

source = get_parameters('source', 'glc18')
quad_size = get_parameters('quad_size', 0)
validation_size = get_parameters('validation_size', 0.1)
test_size = get_parameters('test_size', 0.1)

# loading dataset
train, val, test = occurrence_loader(
    EnvironmentalDataset,
    source=source,
    splitter=SplitterGeoQuadra(quad_size=quad_size),
    validation_size=validation_size,
    test_size=test_size)

plot_occurrences(train, val, test)
コード例 #4
0
from datascience.visu.util import save_fig
from datascience.visu.patch import pplot_patch
import numpy as np

# with option --more idx=12 to change the index from the command line...
from engine.logging import print_info
from engine.parameters.special_parameters import get_parameters

# load the idx + 1 first elements

idx = get_parameters('idx', 0)

train, _, _ = occurrence_loader(EnvironmentalIGNDataset,
                                source='full_ign',
                                id_name='X_key',
                                label_name='glc19SpId',
                                validation_size=0,
                                test_size=0,
                                limit=idx + 1)

patch, _ = train[idx]

patch = [l.int() for l in patch]

patch = patch[:-3] + [np.transpose(np.stack(patch[-3:], axis=0), (1, 2, 0))]

print_info('Printing patch at ' + str(train.dataset[idx]))

pplot_patch(patch, header=train.named_dimensions)

save_fig()
コード例 #5
0
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit

model_params = {
    'n_labels': 6823,
    'n_input': 77,
    'exp': True,  # poisson loss,
    'normalize_weight': 2.  # poisson loss
}

model = load_create_nn(model_class=InceptionEnv, model_params=model_params)


# loading dataset
train, val, test = occurrence_loader(
    EnvironmentalDataset,
    source='glc18',
    id_name='patch_id',
    label_name='species_glc_id',
    limit=1000
)

training_params['loss'] = CategoricalPoissonLoss()
training_params['log_modulo'] = 1
training_params['iterations'] = [10]
training_params['lr'] = 0.01

validation_params['metrics'] = (ValidationAccuracy(1),)  # let us just analyse convergence first

fit(model, train=train, val=val, test=test, training_params=training_params, validation_params=validation_params)
コード例 #6
0
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.xgboost.train import fit
from engine.parameters.special_parameters import get_parameters

max_depth = get_parameters('max_depth', 2)

# loading dataset
train, _, test = occurrence_loader(
    EnvironmentalDataset, source='gbif_taxref', validation_size=0, size_patch=1
)

# training model
training_params = {
    'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies(),
                ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True),
                ValidationAccuracyForAllSpecies(train=train, final_validation=True))
}
fit(train=train, test=test, training_params=training_params,
    objective='multi:softprob', max_depth=max_depth, seed=4242, eval_metric='merror', num_class=4520,
    num_boost_round=360, early_stopping_rounds=10, verbose_eval=1, updater='grow_gpu',
    predictor='gpu_predictor', tree_method='gpu_hist')
コード例 #7
0
from datascience.ml.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.ml.neural.models import load_create_nn, InceptionEnv
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit
from datascience.model_selection import train_test_split_stratified
from datascience.tools.activations_map.get_activations import get_species_neurons_activations

from projects.ecography.configs.inception import model_params

# loading/creating model
model = load_create_nn(model_class=InceptionEnv,
                       model_params=model_params,
                       from_scratch=False)

# loading dataset
_, _, grid_points = occurrence_loader(EnvironmentalDataset,
                                      source='grid_occs_1km',
                                      id_name='id',
                                      test_size=1,
                                      label_name=None)

# get activations
get_species_neurons_activations(model, grid_points)
コード例 #8
0
from datascience.ml.neural.checkpoints import create_model
from projects.max_env.configs.inception import training_params, validation_params, model_params, optim_params
from datascience.ml.neural.models import InceptionEnv
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit

# loading/creating model
model_params['n_input'] = 77
model_params['dropout'] = 0.75

model = create_model(model_class=InceptionEnv, model_params=model_params)

# loading dataset
train, val, test = occurrence_loader(EnvironmentalDataset,
                                     source='glc19_fulldataset')

test.limit = 30000

fit(model,
    train=train,
    val=val,
    test=test,
    training_params=training_params,
    validation_params=validation_params,
    optim_params=optim_params)
コード例 #9
0
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.ml.sklearn.train import fit
from datascience.ml.sklearn.util import load_or_create
from datascience.model_selection import train_test_split_stratified
from sklearn.ensemble.forest import RandomForestClassifier

# loading dataset
train, _, test = occurrence_loader(
    GeoLifeClefDataset, source='gbif_taxref', validation_size=0, test_size=0
)

model = load_or_create(RandomForestClassifier, n_estimators=100, max_depth=17)

# training model
training_params = {
    'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies())
}
fit(model, train=train, test=test, training_params=training_params)
コード例 #10
0
from datascience.ml.neural.supervised import fit
from datascience.ml.neural.supervised.predict.predict_grid import predict_grid
from datascience.ml.evaluation.export import export_results
from datascience.ml.evaluation.filters import FilterLabelsList
from datascience.tools.knn_tools.knn_index import extract_cooccurrences_multipoints

model_params = {'dropout': 0.8, 'n_labels': 3336, 'n_input': 77, 'config': 0}

# loading/creating model
model = load_create_nn(model_class=InceptionEnvCoocs,
                       model_params=model_params)

# loading dataset
_, _, test = occurrence_loader(GeoLifeClefDataset,
                               source='glc19_test',
                               test_size=1,
                               id_name='glc19TestOccId',
                               label_name=None)

train, _, _ = occurrence_loader(GeoLifeClefDataset,
                                source='glc18',
                                test_size=0.0,
                                validation_size=0.0,
                                id_name='patch_id',
                                label_name='species_glc_id',
                                second_neihbour=False)

extract_cooccurrences_multipoints(train,
                                  test,
                                  leaf_size=2,
                                  validation=None,
コード例 #11
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import species_train_test_occurrences
from datascience.data.util.source_management import check_source
from engine.parameters.special_parameters import get_parameters

species = get_parameters('species', 0)

# loading dataset
train, val, test = occurrence_loader(GeoLifeClefDataset,
                                     source='gbif_taxref',
                                     validation_size=0.1,
                                     size_patch=1,
                                     test_size=0.1)

sources = check_source('gbif_taxref')

species_train_test_occurrences(sources['label_species'],
                               train,
                               val,
                               test,
                               species=species)
コード例 #12
0
from datascience.ml.neural.models import InceptionEnv
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalIGNDataset
from datascience.ml.neural.supervised import fit
from datascience.ml.neural.checkpoints import create_model
from projects.max_env.configs.inception import training_params, validation_params, model_params, optim_params

# creating environmental inception (more channels than classical inception)
model = create_model(model_class=InceptionEnv, model_params=model_params)

# loading dataset
train, val, test = occurrence_loader(EnvironmentalIGNDataset,
                                     source='full_ign_5m')

# memory issue on full_ign_5m due to size
test.limit = 30000

# training model
fit(model,
    train=train,
    val=val,
    test=test,
    training_params=training_params,
    validation_params=validation_params,
    optim_params=optim_params)
コード例 #13
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit
from datascience.ml.neural.supervised.predict.predict_grid import predict_grid
from datascience.ml.evaluation.export import export_results
from datascience.ml.evaluation.filters import FilterLabelsList

from projects.best_of_labs.configs.inception import model_params, training_params

# loading/creating model
model = load_create_nn(model_class=InceptionEnv, model_params=model_params)

# loading dataset
_, _, test = occurrence_loader(EnvironmentalDataset,
                               source='glc19_test',
                               test_size=1,
                               id_name='glc19TestOccId',
                               label_name=None)

predictions = predict_grid(model,
                           test,
                           batch_size=128,
                           features_activation=False,
                           logit=False)

f = FilterLabelsList(
    '/home/benjamin/pycharm/Data-science-2.0/projects/best_of_labs/allowed_classes.txt'
)

f(predictions)
コード例 #14
0
from datascience.ml.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies
from datascience.ml.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies
from datascience.ml.neural.models import load_create_nn, InceptionEnv
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.neural.supervised import fit
from sklearn.model_selection import train_test_split
from projects.ecography.configs.inception import model_params, training_params

# loading/creating model
model = load_create_nn(model_class=InceptionEnv, model_params=model_params)

# loading dataset
train, val, test = occurrence_loader(EnvironmentalDataset,
                                     source='gbif_taxref',
                                     splitter=train_test_split)

# training model
validation_params = {
    'metrics':
    (ValidationAccuracyMultipleBySpecies([1, 10,
                                          30]), ValidationMRRBySpecies(),
     ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True),
     ValidationAccuracyForAllSpecies(train=train, final_validation=True))
}

fit(model,
    train=train,
    val=val,
    test=test,
    training_params=training_params,
コード例 #15
0
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import plot_species_on_map
from datascience.data.util.source_management import check_source
from engine.parameters.special_parameters import get_parameters

species = get_parameters('species', 0)
mean_size = get_parameters('mean_size', 1)
figsize = get_parameters('figsize', 5)
log_scale = get_parameters('log_scale', False)
softmax = get_parameters('softmax', False)
alpha = get_parameters('alpha', None)

# loading dataset
_, _, grid_points = occurrence_loader(GeoLifeClefDataset,
                                      source='grid_occs_1km',
                                      id_name='id',
                                      test_size=1,
                                      label_name=None)

sources = check_source('gbif_taxref')

# get activations
plot_species_on_map(grid_points,
                    label_species=sources['label_species'],
                    species=species,
                    mean_size=mean_size,
                    figsize=figsize,
                    log_scale=log_scale,
                    softmax=softmax,
                    alpha=alpha)
コード例 #16
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets import EnvironmentalDataset
from datascience.ml.light_gbm import fit
from datascience.ml.metrics import ValidationAccuracy

train, val, test = occurrence_loader(EnvironmentalDataset, source='glc18', limit=100, size_patch=1)

fit(train, test, val, validation_params={'metrics': (ValidationAccuracy(),)})
コード例 #17
0
from datascience.data.loader import occurrence_loader
from datascience.data.datasets.environmental_dataset import EnvironmentalDataset
from datascience.visu.spatial_map_plots import plot_on_map
from engine.parameters.special_parameters import get_parameters
from datascience.data.rasters.environmental_raster_glc import raster_metadata
import numpy as np
import math

raster = get_parameters('raster', 'alti')
cmap = get_parameters('cmap', 'viridis')

# loading dataset
_, _, grid_points = occurrence_loader(EnvironmentalDataset,
                                      source='grid_occs_1km',
                                      test_size=1,
                                      label_name=None,
                                      size_patch=1,
                                      add_all=False)

grid_points.extractor.append(raster)

# r = np.zeros((len(grid_points.dataset), 1), dtype=float)
r = np.full((len(grid_points.dataset), 1), np.nan, dtype=float)

max = -2000
min = 10000
print(raster_metadata[raster]['nan'])

list_neg = []

for i, data in enumerate(grid_points.dataset):