Python check_source Examples, datascience.data.util.source_management.check_source Python Examples

Example #1

0

Show file

def create_ign_sparse(source_occ,
                      source_ign,
                      patch_size=64,
                      error_path=output_path("error_extract/"),
                      **kwargs):

    r = check_source(source_occ)
    occurrences = r['occurrences']
    r = check_source(source_ign)
    ign_images = r['maps']

    la93 = Proj(init='epsg:2154')

    # extract manager
    im_manager = IGNImageManager(ign_images)
    extract_size = patch_size
    extract_step = 1

    # loading the occurrence file
    df = pd.read_csv(occurrences, header='infer', sep=';', low_memory=False)
    max_lat = df['Latitude'].max()
    print(max_lat)

    # sorting the dataset to optimise the extraction
    df.sort_values('Latitude', inplace=True)

    print_info(str(len(df)) + ' occurrences to extract!')

Example #2

0

Show file

File: patch.py Project: maximiliense/Data-science-2.0

def pplot(latitude,
          longitude,
          source,
          resolution=1.,
          style=special_parameters.plt_style,
          nb_cols=5,
          alpha=1.):
    """
    patch plot
    :param style:
    :param latitude:
    :param longitude:
    :param source:
    :param resolution:
    :return:
    """
    r = check_source(source)
    rasters = r['rasters']
    extractor = PatchExtractor(rasters, resolution=resolution)
    extractor.add_all()
    extractor.plot(item=(latitude, longitude),
                   return_fig=True,
                   style=style,
                   nb_cols=nb_cols,
                   alpha=alpha)

Example #3

0

Show file

    def __init__(self, source, transform=None, input_size=299):
        r = check_source(source)
        self.source = source
        path = r['path']
        if transform is None:
            self.train_transform = transforms.Compose([
                transforms.RandomResizedCrop(input_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])
            self.test_transform = transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])
        else:
            self.transform = transform

        dataset = _load_dataset(path)
        random.shuffle(dataset)

        self.country = []
        self.painter = []
        self.type = []
        self.path = []

        for row in dataset:
            self.country.append(row[0])
            self.painter.append(row[1])
            self.type.append(row[2])
            self.path.append(os.path.join(*row))

Example #4

0

Show file

def extract_7z(source, extension='.7z'):

    # loading a specific source
    r = check_source(source)

    dir_name = r['archive']
    dest_name = r['maps']

    os.chdir(dir_name)  # change directory from working dir to dir with files

    n = len(os.listdir(dir_name))

    for i, item in enumerate(
            os.listdir(dir_name)):  # loop through items in dir
        print_info(
            '\n------------------------------------------------------------------------------'
        )
        print_info(str(i + 1) + '/' + str(n))
        if item.endswith(
                extension):  # check for ".zip" or ".7z", etc. extension
            file_name = os.path.abspath(item)  # get full path of files
            print_h2(file_name)
            print_info('\n')

            os.system('7z x ' + file_name + ' -o' + dest_name)

Example #5

0

Show file

    def __init__(self, source, nb_try_max=1000, islands_sup=0, close_target=False, auto_restart=True):
        """
        :param root_dir: the root dir of the grib files
        :param polar: the polar path to the file
        :param nb_try_max: the number of allowed tries
        """
        super().__init__()

        self.autorestart = auto_restart

        r = check_source(source)
        if 'path' not in r:
            print_errors('The source ' + source + ' does not contain path', do_exit=True)
        if 'polar' not in r:
            print_errors('The source ' + source + ' does not contain polar', do_exit=True)

        self.root_dir = r['path']

        self.game = None

        self.numpy_grib = None
        self.polar = Polar(path_polar_file=r['polar'])

        self.target = None
        self.position = None
        self.start_position = None

        self.grib_list = [file for file in os.listdir(self.root_dir) if file.endswith('.npz')]

        self.start_timestamp = None
        self.timedelta = None
        self.track = None

        self.score = 0
        self.score_ = 0

        self.nb_try = 0
        self.nb_try_max = nb_try_max

        self.dist = None
        self.old_dist = None
        self.dir = None
        self.sog = None
        self.cog = None

        self.twa = None
        self.tws = None

        self.twd = None

        self.close_target = close_target

        self.islands_sup = islands_sup

        self.bins = np.array([i * 45 for i in range(8)])
        self.start()

Example #6

0

Show file

File: patch.py Project: maximiliense/Data-science-2.0

def raster_characteristics(source):
    """
    print infos about the rasters
    :param source:
    :return:
    """
    r = check_source(source)
    rasters = r['rasters']
    extractor = PatchExtractor(rasters)
    extractor.add_all()

    print_statistics(str(extractor))

Example #7

0

Show file

def occurrence_loader(dataset_class,
                      source=None,
                      validation_size=0.1,
                      test_size=0.1,
                      splitter=train_test_split,
                      filters=tuple(),
                      online_filters=tuple(),
                      postprocessing=tuple(),
                      save_index='default',
                      limit=None,
                      **kwargs):
    """
    Load an occurrence dataset.
    :param dataset_class: the type of dataset (with rasters or not, etc.)
    :param source: the source name
    :param validation_size: [0, 1]
    :param test_size: [0, 1]
    :param splitter: the train test split. By default train_test_split from sklearn
    :param filters: post filters
    :param online_filters: filters that are applied when loading the data
    :param postprocessing: additional transformations
    :param save_index: load, save, default, load_and_save, auto
    :param limit: the number of elements to load
    :param kwargs:
    :return: train, validation, test sets
    """
    if source is not None:
        r = check_source(source)
        merge_smooth(kwargs, r)

    return _occurrence_loader(dataset_class,
                              validation_size=validation_size,
                              test_size=test_size,
                              splitter=splitter,
                              filters=filters,
                              online_filters=online_filters,
                              postprocessing=postprocessing,
                              save_index=save_index,
                              limit=limit,
                              **kwargs)

Example #8

0

Show file

File: extract_patch.py Project: maximiliense/Data-science-2.0

def extract_patch(source, offset=0, check_file=True):
    """
    Extract IGN patch from IGN maps.
    :param source:
    :param offset:
    :param check_file:
    :return:
    """

    # checking the source
    r = check_source(source)

    # extract manager
    im_manager = IGNImageManager(r['maps'])
    extract_size = 64
    extract_step = 1

    # loading the occurrence file
    df = pd.read_csv(r['occurrences'],
                     header='infer',
                     sep=';',
                     low_memory=False)

    # sorting the dataset to optimise the extraction
    df.sort_values('Latitude', inplace=True)

    # offset management
    df = df.iloc[offset:]

    print_info(str(len(df)) + ' occurrences to extract!')

    im_manager.extract_patches(
        df[[r['longitude'], r['latitude'], r['id_name']]],
        r['patches'],
        size=extract_size,
        step=extract_step,
        check_file=check_file)

Example #9

0

Show file

File: multitask_bernoulli.py Project: maximiliense/Data-science-2.0

def load_multitask_bernoulli_dataset(source,
                                     test_size=0.1,
                                     val_size=0.1,
                                     transform=None,
                                     splitter=train_test_split):
    r = check_source(source)
    path = r['path']
    classes_index = {}
    labels_index = {}
    labels = []
    classes = []
    images = []

    for c in os.listdir(path):
        classes_index[c] = len(classes_index)  # to index the classes
        path_class = os.path.join(path, c)
        for label in os.listdir(path_class):
            path_label = os.path.join(path_class, label)

            # it is a bernoulli task and there must be only two labels: positive and negative
            # the name should be shared among classes
            if label not in labels_index:
                if len(labels_index) >= 2:
                    raise PosNegLabelException(
                        'All positive and negative labels folder must have the same name...'
                    )
                if 'pos' in label.lower():
                    labels_index[label] = 1
                elif 'neg' in label.lower():
                    labels_index[label] = 0
                else:
                    labels_index[label] = len(labels_index)
            for image in os.listdir(path_label):
                labels.append(
                    (label, labels_index[label]))  # label name, label ID
                classes.append((c, classes_index[c]))  # class name, class ID
                images.append(os.path.join(path_label, image))  # image path
    dataset = (labels, classes, images)

    # dataset split
    train, test = perform_split(dataset, test_size, splitter)
    train, val = perform_split(train, val_size, splitter)

    if transform is None:
        transform = {
            'train': transforms.Compose([transforms.ToTensor()]),
            'test': transforms.Compose([transforms.ToTensor()])
        }
    train = ImageDatasetMTBernoulli(train,
                                    len(classes_index),
                                    transform=transform['train'])
    val = ImageDatasetMTBernoulli(val,
                                  len(classes_index),
                                  transform=transform['test'])
    test = ImageDatasetMTBernoulli(test,
                                   len(classes_index),
                                   transform=transform['test'])

    print_dataset_statistics(len(train), len(val), len(test), source,
                             len(classes_index))
    return train, val, test, len(classes_index)

Example #10

0

Show file

from datascience.data.datasets.dataset_simple import GeoLifeClefDataset
from datascience.tools.activations_map.plot_activations_maps import plot_species_on_map
from datascience.data.util.source_management import check_source
from engine.parameters.special_parameters import get_parameters

species = get_parameters('species', 0)
mean_size = get_parameters('mean_size', 1)
figsize = get_parameters('figsize', 5)
log_scale = get_parameters('log_scale', False)
softmax = get_parameters('softmax', False)
alpha = get_parameters('alpha', None)

# loading dataset
_, _, grid_points = occurrence_loader(GeoLifeClefDataset,
                                      source='grid_occs_1km',
                                      id_name='id',
                                      test_size=1,
                                      label_name=None)

sources = check_source('gbif_taxref')

# get activations
plot_species_on_map(grid_points,
                    label_species=sources['label_species'],
                    species=species,
                    mean_size=mean_size,
                    figsize=figsize,
                    log_scale=log_scale,
                    softmax=softmax,
                    alpha=alpha)

Example #11

0

Show file

from datascience.data.util.source_management import check_source
import json
import pandas as pd

from engine.logging import print_info

source = check_source('glc20')
raw_occurrences_path = source['raw_source']
occurrences_path = source['occurrences']  # destination

with open(raw_occurrences_path, 'rb') as f:
    d = json.load(f)

data = {'id': [], 'lat': [], 'lon': [], 'species_id': [], 'species_name': []}

for row in d:
    if row['results']['status'] == 'BEST_REF':
        data['id'].append(row['id'])
        data['lat'].append(row['lat'])
        data['lon'].append(row['lon'])
        data['species_id'].append(row['results']['id'])
        data['species_name'].append(row['results']['name'])

df = pd.DataFrame(data=data)

print_info('Saving file')

df.to_csv(occurrences_path, header=True, sep=';', index=False)

Example #12

0

Show file

File: check_extraction.py Project: maximiliense/Data-science-2.0

def check_extraction(source,
                     save_errors=True,
                     save_filtered=True,
                     id_name='X_key'):
    """
    check if all patches from an occurrences file have been extracted. Can save the list of errors and
    filtered the dataset keeping the correctly extracted data.

    :param id_name: the column that contains the patch id that will be used to construct its path
    :param save_filtered: save the dataframe filtered from the error
    :param save_errors: save the errors found in a file
    :param source: the source referring the occurrence file and the patches path
    """

    # retrieve details of the source
    r = check_source(source)
    if 'occurrences' not in r or 'patches' not in r:
        print_errors(
            'Only sources with occurrences and patches can be checked',
            do_exit=True)

    df = pd.read_csv(r['occurrences'],
                     header='infer',
                     sep=';',
                     low_memory=False)
    nb_errors = 0
    errors = []
    for idx, row in progressbar.progressbar(enumerate(df.iterrows())):
        patch_id = str(int(row[1][id_name]))

        # constructing the path of a patch given its id
        path = os.path.join(r['patches'], patch_id[-2:], patch_id[-4:-2],
                            patch_id + '.npy')

        # if the path does not correspond to a file, then it's an error
        if not os.path.isfile(path):
            errors.append(row[1][id_name])
            nb_errors += 1

    if nb_errors > 0:
        # summary of the error
        print_info(str(nb_errors) + ' errors found during the check...')

        if save_errors:
            # filter the dataframe using the errors
            df_errors = df[df[id_name].isin(errors)]

            error_path = output_path('_errors.csv')
            print_info('Saving error file at: ' + error_path)

            # save dataframe to the error file
            df_errors.to_csv(error_path, header=True, index=False, sep=';')
        if save_filtered:
            # filter the dataframe keeping the non errors
            df_filtered = df[~df[id_name].isin(errors)]
            filtered_path = r['occurrences'] + '.tmp'
            print_info('Saving filtered dataset at: ' + filtered_path)
            df_filtered.to_csv(filtered_path,
                               header=True,
                               index=False,
                               sep=';')
    else:
        print_info('No error has been found!')