from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies from datascience.data.loader import occurrence_loader from datascience.data.datasets.dataset_simple import GeoLifeClefDataset from datascience.ml.sklearn.train import fit from datascience.ml.sklearn.util import load_or_create from datascience.model_selection import train_test_split_stratified from sklearn.ensemble.forest import RandomForestClassifier # loading dataset train, _, test = occurrence_loader(GeoLifeClefDataset, source='glc19_pl_complete', validation_size=0) model = load_or_create(RandomForestClassifier, n_estimators=100, max_depth=12) # training model training_params = { 'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies()) } fit(model, train=train, test=test, training_params=training_params)
from projects.max_env.configs.inception import training_params, validation_params, model_params from datascience.ml.neural.models import load_create_nn, InceptionEnv from datascience.data.loader import occurrence_loader from datascience.model_selection import SplitterGeoQuadra from datascience.data.datasets import EnvironmentalDataset from datascience.ml.neural.supervised import fit # loading/creating model model_params['n_input'] = 77 model_params['dropout'] = 0.75 model = load_create_nn(model_class=InceptionEnv, model_params=model_params) # loading dataset train, val, test = occurrence_loader(EnvironmentalDataset, source='glc18', splitter=SplitterGeoQuadra(quad_size=10)) fit(model, train=train, val=val, test=test, training_params=training_params, validation_params=validation_params)
import matplotlib.pyplot as plt from pyproj import Proj, Transformer import numpy as np from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.model_selection import SplitterGeoQuadra from datascience.tools.occurrences_plot.occurrences_plot import plot_occurrences from engine.parameters import get_parameters source = get_parameters('source', 'glc18') quad_size = get_parameters('quad_size', 0) validation_size = get_parameters('validation_size', 0.1) test_size = get_parameters('test_size', 0.1) # loading dataset train, val, test = occurrence_loader( EnvironmentalDataset, source=source, splitter=SplitterGeoQuadra(quad_size=quad_size), validation_size=validation_size, test_size=test_size) plot_occurrences(train, val, test)
from datascience.visu.util import save_fig from datascience.visu.patch import pplot_patch import numpy as np # with option --more idx=12 to change the index from the command line... from engine.logging import print_info from engine.parameters.special_parameters import get_parameters # load the idx + 1 first elements idx = get_parameters('idx', 0) train, _, _ = occurrence_loader(EnvironmentalIGNDataset, source='full_ign', id_name='X_key', label_name='glc19SpId', validation_size=0, test_size=0, limit=idx + 1) patch, _ = train[idx] patch = [l.int() for l in patch] patch = patch[:-3] + [np.transpose(np.stack(patch[-3:], axis=0), (1, 2, 0))] print_info('Printing patch at ' + str(train.dataset[idx])) pplot_patch(patch, header=train.named_dimensions) save_fig()
from datascience.data.datasets import EnvironmentalDataset from datascience.ml.neural.supervised import fit model_params = { 'n_labels': 6823, 'n_input': 77, 'exp': True, # poisson loss, 'normalize_weight': 2. # poisson loss } model = load_create_nn(model_class=InceptionEnv, model_params=model_params) # loading dataset train, val, test = occurrence_loader( EnvironmentalDataset, source='glc18', id_name='patch_id', label_name='species_glc_id', limit=1000 ) training_params['loss'] = CategoricalPoissonLoss() training_params['log_modulo'] = 1 training_params['iterations'] = [10] training_params['lr'] = 0.01 validation_params['metrics'] = (ValidationAccuracy(1),) # let us just analyse convergence first fit(model, train=train, val=val, test=test, training_params=training_params, validation_params=validation_params)
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.ml.xgboost.train import fit from engine.parameters.special_parameters import get_parameters max_depth = get_parameters('max_depth', 2) # loading dataset train, _, test = occurrence_loader( EnvironmentalDataset, source='gbif_taxref', validation_size=0, size_patch=1 ) # training model training_params = { 'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies(), ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True), ValidationAccuracyForAllSpecies(train=train, final_validation=True)) } fit(train=train, test=test, training_params=training_params, objective='multi:softprob', max_depth=max_depth, seed=4242, eval_metric='merror', num_class=4520, num_boost_round=360, early_stopping_rounds=10, verbose_eval=1, updater='grow_gpu', predictor='gpu_predictor', tree_method='gpu_hist')
from datascience.ml.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies from datascience.ml.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies from datascience.ml.neural.models import load_create_nn, InceptionEnv from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.ml.neural.supervised import fit from datascience.model_selection import train_test_split_stratified from datascience.tools.activations_map.get_activations import get_species_neurons_activations from projects.ecography.configs.inception import model_params # loading/creating model model = load_create_nn(model_class=InceptionEnv, model_params=model_params, from_scratch=False) # loading dataset _, _, grid_points = occurrence_loader(EnvironmentalDataset, source='grid_occs_1km', id_name='id', test_size=1, label_name=None) # get activations get_species_neurons_activations(model, grid_points)
from datascience.ml.neural.checkpoints import create_model from projects.max_env.configs.inception import training_params, validation_params, model_params, optim_params from datascience.ml.neural.models import InceptionEnv from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.ml.neural.supervised import fit # loading/creating model model_params['n_input'] = 77 model_params['dropout'] = 0.75 model = create_model(model_class=InceptionEnv, model_params=model_params) # loading dataset train, val, test = occurrence_loader(EnvironmentalDataset, source='glc19_fulldataset') test.limit = 30000 fit(model, train=train, val=val, test=test, training_params=training_params, validation_params=validation_params, optim_params=optim_params)
from datascience.ml.metrics.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies from datascience.ml.metrics.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies from datascience.data.loader import occurrence_loader from datascience.data.datasets.dataset_simple import GeoLifeClefDataset from datascience.ml.sklearn.train import fit from datascience.ml.sklearn.util import load_or_create from datascience.model_selection import train_test_split_stratified from sklearn.ensemble.forest import RandomForestClassifier # loading dataset train, _, test = occurrence_loader( GeoLifeClefDataset, source='gbif_taxref', validation_size=0, test_size=0 ) model = load_or_create(RandomForestClassifier, n_estimators=100, max_depth=17) # training model training_params = { 'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies()) } fit(model, train=train, test=test, training_params=training_params)
from datascience.ml.neural.supervised import fit from datascience.ml.neural.supervised.predict.predict_grid import predict_grid from datascience.ml.evaluation.export import export_results from datascience.ml.evaluation.filters import FilterLabelsList from datascience.tools.knn_tools.knn_index import extract_cooccurrences_multipoints model_params = {'dropout': 0.8, 'n_labels': 3336, 'n_input': 77, 'config': 0} # loading/creating model model = load_create_nn(model_class=InceptionEnvCoocs, model_params=model_params) # loading dataset _, _, test = occurrence_loader(GeoLifeClefDataset, source='glc19_test', test_size=1, id_name='glc19TestOccId', label_name=None) train, _, _ = occurrence_loader(GeoLifeClefDataset, source='glc18', test_size=0.0, validation_size=0.0, id_name='patch_id', label_name='species_glc_id', second_neihbour=False) extract_cooccurrences_multipoints(train, test, leaf_size=2, validation=None,
from datascience.data.loader import occurrence_loader from datascience.data.datasets.dataset_simple import GeoLifeClefDataset from datascience.tools.activations_map.plot_activations_maps import species_train_test_occurrences from datascience.data.util.source_management import check_source from engine.parameters.special_parameters import get_parameters species = get_parameters('species', 0) # loading dataset train, val, test = occurrence_loader(GeoLifeClefDataset, source='gbif_taxref', validation_size=0.1, size_patch=1, test_size=0.1) sources = check_source('gbif_taxref') species_train_test_occurrences(sources['label_species'], train, val, test, species=species)
from datascience.ml.neural.models import InceptionEnv from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalIGNDataset from datascience.ml.neural.supervised import fit from datascience.ml.neural.checkpoints import create_model from projects.max_env.configs.inception import training_params, validation_params, model_params, optim_params # creating environmental inception (more channels than classical inception) model = create_model(model_class=InceptionEnv, model_params=model_params) # loading dataset train, val, test = occurrence_loader(EnvironmentalIGNDataset, source='full_ign_5m') # memory issue on full_ign_5m due to size test.limit = 30000 # training model fit(model, train=train, val=val, test=test, training_params=training_params, validation_params=validation_params, optim_params=optim_params)
from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.ml.neural.supervised import fit from datascience.ml.neural.supervised.predict.predict_grid import predict_grid from datascience.ml.evaluation.export import export_results from datascience.ml.evaluation.filters import FilterLabelsList from projects.best_of_labs.configs.inception import model_params, training_params # loading/creating model model = load_create_nn(model_class=InceptionEnv, model_params=model_params) # loading dataset _, _, test = occurrence_loader(EnvironmentalDataset, source='glc19_test', test_size=1, id_name='glc19TestOccId', label_name=None) predictions = predict_grid(model, test, batch_size=128, features_activation=False, logit=False) f = FilterLabelsList( '/home/benjamin/pycharm/Data-science-2.0/projects/best_of_labs/allowed_classes.txt' ) f(predictions)
from datascience.ml.metrics import ValidationAccuracyMultipleBySpecies, ValidationMRRBySpecies from datascience.ml.metrics import ValidationAccuracyRangeBySpecies, ValidationAccuracyForAllSpecies from datascience.ml.neural.models import load_create_nn, InceptionEnv from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.ml.neural.supervised import fit from sklearn.model_selection import train_test_split from projects.ecography.configs.inception import model_params, training_params # loading/creating model model = load_create_nn(model_class=InceptionEnv, model_params=model_params) # loading dataset train, val, test = occurrence_loader(EnvironmentalDataset, source='gbif_taxref', splitter=train_test_split) # training model validation_params = { 'metrics': (ValidationAccuracyMultipleBySpecies([1, 10, 30]), ValidationMRRBySpecies(), ValidationAccuracyRangeBySpecies(max_top_k=100, final_validation=True), ValidationAccuracyForAllSpecies(train=train, final_validation=True)) } fit(model, train=train, val=val, test=test, training_params=training_params,
from datascience.data.datasets.dataset_simple import GeoLifeClefDataset from datascience.tools.activations_map.plot_activations_maps import plot_species_on_map from datascience.data.util.source_management import check_source from engine.parameters.special_parameters import get_parameters species = get_parameters('species', 0) mean_size = get_parameters('mean_size', 1) figsize = get_parameters('figsize', 5) log_scale = get_parameters('log_scale', False) softmax = get_parameters('softmax', False) alpha = get_parameters('alpha', None) # loading dataset _, _, grid_points = occurrence_loader(GeoLifeClefDataset, source='grid_occs_1km', id_name='id', test_size=1, label_name=None) sources = check_source('gbif_taxref') # get activations plot_species_on_map(grid_points, label_species=sources['label_species'], species=species, mean_size=mean_size, figsize=figsize, log_scale=log_scale, softmax=softmax, alpha=alpha)
from datascience.data.loader import occurrence_loader from datascience.data.datasets import EnvironmentalDataset from datascience.ml.light_gbm import fit from datascience.ml.metrics import ValidationAccuracy train, val, test = occurrence_loader(EnvironmentalDataset, source='glc18', limit=100, size_patch=1) fit(train, test, val, validation_params={'metrics': (ValidationAccuracy(),)})
from datascience.data.loader import occurrence_loader from datascience.data.datasets.environmental_dataset import EnvironmentalDataset from datascience.visu.spatial_map_plots import plot_on_map from engine.parameters.special_parameters import get_parameters from datascience.data.rasters.environmental_raster_glc import raster_metadata import numpy as np import math raster = get_parameters('raster', 'alti') cmap = get_parameters('cmap', 'viridis') # loading dataset _, _, grid_points = occurrence_loader(EnvironmentalDataset, source='grid_occs_1km', test_size=1, label_name=None, size_patch=1, add_all=False) grid_points.extractor.append(raster) # r = np.zeros((len(grid_points.dataset), 1), dtype=float) r = np.full((len(grid_points.dataset), 1), np.nan, dtype=float) max = -2000 min = 10000 print(raster_metadata[raster]['nan']) list_neg = [] for i, data in enumerate(grid_points.dataset):