Python ThreadPool.map Exemples, pathos.threading.ThreadPool.map Python Exemples

Exemple #1

0

Afficher le fichier

 def _get_results_by_threading(self, func, params):
     """
     Query github API by multithreading.
     return a list containing all results.
     """
     num_workers = self.num_workers
     if func.__name__ not in [
             "multi_pulls", "multi_commits", "multi_watchers"
     ]:
         num_workers = 1
     if self.debug_counts:
         p = ThPool(num_workers)
         pool_args = params[:self.debug_counts]
         return p.map(func, pool_args)
     else:
         stats = []
         start = time.time()
         for i in range(int(params.totalCount / self.batch_size) + 1):
             if self.num_workers != 1 and i != 0 and (
                     i + 1) * self.batch_size % 800 == 0:
                 print("Sleep 30 sec")
                 sleep(30)
             p = ThPool(num_workers)
             temp = p.map(
                 func,
                 params[i * self.batch_size:(i + 1) * self.batch_size])
             stats += temp
         print(
             f"{self.repo_name}, {func.__name__} takes: {round(time.time() - start, 3)} secs"
         )
     return stats

Exemple #2

0

Afficher le fichier

Fichier : Crawl.py Projet : dvlden/tuts-rape

    def begin_processing(self):
        pool = ThreadPool(nodes=Helper.config('threads'))

        for course in self.course_data:
            pool.map(self.download_lesson, course['lessons'])
            print(
                '--- Course "{course_title}" has been downloaded, with total of "{lessons_amount}" lessons.'
                .format(course_title=course['title'],
                        lessons_amount=len(course['lessons'])))
            time.sleep(Helper.config('sleep'))

Exemple #3

0

Afficher le fichier

 def _split_variable(self):
     """Split by variable."""
     outputfiles = [
         self._define_outputfilename(var, self.years)
         for var in self.variables
     ]
     years = len(outputfiles) * [self.years]
     if not self.threads:
         pool = Pool()
     else:
         pool = Pool(nodes=self.threads)
     pool.map(self._getdata, self.variables, years, outputfiles)

Exemple #4

0

Afficher le fichier

def build(
    charm_list,
    layer_list,
    layer_index,
    charm_branch,
    layer_branch,
    resource_spec,
    filter_by_tag,
    to_channel,
    rebuild_cache,
):
    build_env = BuildEnv(build_type=BuildType.CHARM)
    build_env.db["build_args"] = {
        "artifact_list": charm_list,
        "layer_list": layer_list,
        "layer_index": layer_index,
        "charm_branch": charm_branch,
        "layer_branch": layer_branch,
        "resource_spec": resource_spec,
        "filter_by_tag": list(filter_by_tag),
        "to_channel": to_channel,
        "rebuild_cache": rebuild_cache,
    }

    build_env.pull_layers()

    entities = []
    for charm_map in build_env.artifacts:
        for charm_name, charm_opts in charm_map.items():
            if not any(match in filter_by_tag for match in charm_opts["tags"]):
                continue

            charm_entity = f"cs:~{charm_opts['namespace']}/{charm_name}"
            entities.append(
                BuildEntity(build_env, charm_name, charm_opts, charm_entity))
            click.echo(f"Queued {charm_entity} for building")

    def _run_build(build_entity):
        build_entity.setup()

        if not build_entity.has_changed:
            return

        build_entity.proof_build()

        build_entity.push()
        build_entity.attach_resource("unpublished")
        build_entity.promote(to_channel=to_channel)

    pool = ThreadPool()
    pool.map(_run_build, entities)
    build_env.save()

Exemple #5

0

Afficher le fichier

 def _split_variable_yr(self):
     """Fetch variable split by variable and year."""
     outputfiles = []
     variables = []
     for var in self.variables:
         outputfiles = [
             self._define_outputfilename(var, [yr]) for yr in self.years
         ]
         variables += len(outputfiles) * [var]
     if not self.threads:
         pool = Pool()
     else:
         pool = Pool(nodes=self.threads)
     pool.map(self._getdata, variables, self.years, outputfiles)

Exemple #6

0

Afficher le fichier

Fichier : runner2.py Projet : arennax/exp4icse2021

    def _get_results_by_threading(self, func, params):
        """
        Query github API by multithreading.
        return a list containing all results.
        """
        num_workers = self.num_workers
        if func.__name__ not in [
                "multi_pulls", "multi_commits", "multi_watchers"
        ]:
            num_workers = 1
        stats = []
        start = time.time()
        for i in range(len(params) // NUM_PER_PAGE):
            # pdb.set_trace()
            if self.num_workers != 1 and (i == 0 or
                                          (i + 1) * NUM_PER_PAGE % 400 == 0):
                sec = random.choice(range(10, 60))
                print("Sleep {} sec".format(sec))
                sleep(sec)
            p = ThPool(num_workers)
            temp = p.map(func, params[i * NUM_PER_PAGE:(i + 1) * NUM_PER_PAGE])
            stats += temp

        print(
            f"{self.repo_name}, {func.__name__} takes: {round(time.time()-start,3)} secs"
        )
        return stats

Exemple #7

0

Afficher le fichier

    def pull_layers(self):
        """ clone all downstream layers to be processed locally when doing charm builds
        """
        if self.rebuild_cache:
            click.echo("-  rebuild cache triggered, cleaning out cache.")
            shutil.rmtree(str(self.layers_dir))
            shutil.rmtree(str(self.interfaces_dir))
            os.mkdir(str(self.layers_dir))
            os.mkdir(str(self.interfaces_dir))

        layers_to_pull = []
        for layer_map in self.layers:
            layer_name = list(layer_map.keys())[0]

            if layer_name == "layer:index":
                continue

            layers_to_pull.append(layer_name)

        pool = ThreadPool()
        pool.map(self.download, layers_to_pull)

        self.db["pull_layer_manifest"] = []
        _paths_to_process = {
            "layer": glob("{}/*".format(str(self.layers_dir))),
            "interface": glob("{}/*".format(str(self.interfaces_dir))),
        }
        for prefix, paths in _paths_to_process.items():
            for _path in paths:
                build_path = _path
                if not build_path:
                    raise BuildException(
                        f"Could not determine build path for {_path}")

                git.checkout(self.layer_branch, _cwd=build_path)

                layer_manifest = {
                    "rev":
                    git("rev-parse", "HEAD",
                        _cwd=build_path).stdout.decode().strip(),
                    "url":
                    f"{prefix}:{Path(build_path).stem}",
                }
                self.db["pull_layer_manifest"].append(layer_manifest)
                click.echo(
                    f"- {layer_manifest['url']} at commit: {layer_manifest['rev']}"
                )

Exemple #8

0

Afficher le fichier

            def data_func(measurement):
                if not use_threads:
                    data = numpy.full(sources.shape + geobox.shape,
                                      measurement['nodata'],
                                      dtype=measurement['dtype'])
                    for index, datasets in numpy.ndenumerate(sources.values):
                        _fuse_measurement(
                            data[index],
                            datasets,
                            geobox,
                            measurement,
                            fuse_func=fuse_func,
                            skip_broken_datasets=skip_broken_datasets,
                            driver_manager=driver_manager)
                else:

                    def work_load_data(array_name, index, datasets):
                        data = sa.attach(array_name)
                        _fuse_measurement(
                            data[index],
                            datasets,
                            geobox,
                            measurement,
                            fuse_func=fuse_func,
                            skip_broken_datasets=skip_broken_datasets,
                            driver_manager=driver_manager)

                    array_name = '_'.join(
                        ['DCCORE',
                         str(uuid.uuid4()),
                         str(os.getpid())])
                    sa.create(array_name,
                              shape=sources.shape + geobox.shape,
                              dtype=measurement['dtype'])
                    data = sa.attach(array_name)
                    data[:] = measurement['nodata']

                    pool = ThreadPool(32)
                    pool.map(work_load_data, repeat(array_name),
                             *zip(*numpy.ndenumerate(sources.values)))
                    sa.delete(array_name)
                return data

Exemple #9

0

Afficher le fichier

    def pull_layers(self):
        """clone all downstream layers to be processed locally when doing charm builds"""
        layers_to_pull = []
        for layer_map in self.layers:
            layer_name = list(layer_map.keys())[0]

            if layer_name == "layer:index":
                continue

            layers_to_pull.append(layer_name)

        pool = ThreadPool()
        results = pool.map(self.download, layers_to_pull)

        self.db["pull_layer_manifest"] = [result for result in results]

Exemple #10

0

Afficher le fichier

    def filter_results(self,
                       im_array,
                       results,
                       image_times,
                       model,
                       psf_sigma=1.0,
                       batch_size=32,
                       chunk_size=10000):
        """
        Use a keras neural network model to detect real objects based upon
        the coadded postage stamps of those objects. Filter and keep only
        actual objects going forward.

        Parameters
        ----------

        im_array: numpy array, required
        The masked original images. See loadMaskedImages
        in searchImage.py.

        results_arr: numpy recarray, required
        The results output from findObjects in searchImage.

        image_times: numpy array, required
        An array containing the image times in DAYS with the first image at
        time 0.
        Note: This is different than other methods so the  units of 
        this may change. Watch this documentation.

        model: keras model, required
        A previously trained model loaded from an hdf5 file.

        batch_size: int
        Batch size for keras predict.

        Returns
        -------

        filtered_results: numpy array
        An edited version of results_arr with only the rows where 
        true objects were classified.
        
        """

        keep_objects = np.array([])
        total_chunks = np.ceil(len(results) / float(chunk_size))
        chunk_num = 1
        circle_vals = []

        enumerated_results = list(enumerate(results))
        self.im_array = im_array
        self.image_times = image_times
        self.psf_sigma = psf_sigma

        #        for chunk_start in range(0, len(results), chunk_size):
        #            test_class = []
        #            p_stamp_arr = []
        #            #circle_chunk = []
        #            for imNum in range(chunk_start, chunk_start+chunk_size):
        #                try:
        #                    p_stamp = self.createPostageStamp(im_array,
        #                                                      list(results[['t0_x', 't0_y']][imNum]),
        #                                                      np.array(list(results[['v_x', 'v_y']][imNum])),
        #                                                      image_times, [25., 25.])[0]
        #                    p_stamp = np.array(p_stamp)
        #                    p_stamp[np.isnan(p_stamp)] = 0.
        #                    p_stamp[np.isinf(p_stamp)] = 0.
        #                    #p_stamp -= np.min(p_stamp)
        #                    #p_stamp /= np.max(p_stamp)
        #                    #p_stamp
        #                    image_thresh = np.max(p_stamp)*0.5
        #                    image = (p_stamp > image_thresh)*1.
        #                    #pre_image = p_stamp > image_thresh
        #                    #image = np.array(pre_image*1.)
        #                    mom = measure.moments(image)
        #                    cr = mom[0,1]/mom[0,0]
        #                    cc = mom[1,0]/mom[0,0]
        #                    #moments = measure.moments(image, order=3)
        #                    #cr = moments[0,1]/moments[0,0]
        #                    #cc = moments[1,0]/moments[0,0]
        #                    cent_mom = measure.moments_central(image, cr, cc, order=4)
        #                    norm_mom = measure.moments_normalized(cent_mom)
        #                    hu_mom = measure.moments_hu(norm_mom)
        #                    #p_stamp_arr.append(hu_mom)
        #                    #print moments[0,0], measure.perimeter(image)
        #                    #circularity = (4*np.pi*moments[0,0])/(measure.perimeter(image)**2.)
        #                    #circularity = (cent_mom[0,0]**2.)/(2.*np.pi*(cent_mom[2,0] + cent_mom[0,2]))
        #                    circularity = (1/(2.*np.pi))*(1/hu_mom[0])
        #                    #circularity = (cent_mom[0,0]**2.)/(2*np.pi*(cent_mom[2,0] + cent_mom[0,2]))
        #                    psf_sigma = psf_sigma
        #                    gaussian_fwhm = psf_sigma*2.35
        #                    fwhm_area = np.pi*(gaussian_fwhm/2.)**2.
        #                    #print circularity, cr, cc
        #                    if ((circularity > 0.6) & (cr > 10.) & (cr < 14.) & (cc > 10.) & (cc < 14.) &
        #                        (cent_mom[0,0] < (9.0*fwhm_area)) & (cent_mom[0,0] > 3.0)): #Use 200% error margin on psf_sigma for now
        #                    #    test_class.append(1.)
        #                    #    print circularity, cr, cc, moments[0,0]
        #                    #else:
        #                    #    test_class.append(0.)
        #                        test_class.append(1.)
        #                    else:
        #                        test_class.append(0.)
        #                    circle_vals.append([circularity, cr, cc, cent_mom[0,0], image_thresh])
        #                    #print circularity, cr, cc, cent_mom[0,0], image_thresh
        #                except:
        #                    #p_stamp_arr.append(np.ones((25, 25)))
        #                    p_stamp_arr.append(np.zeros(7))
        #                    test_class.append(0.)
        #                    circle_vals.append([0., 0., 0., 0., 0.])
        #                    continue
        #            p_stamp_arr = np.array(p_stamp_arr)#.reshape(chunk_size, 625)
        #test_class = model.predict_classes(p_stamp_arr, batch_size=batch_size,
        #                                   verbose=1)
        pool = Pool(nodes=8)
        test_classes = pool.map(self.circularity_test, enumerated_results)
        test_classes = np.array(test_classes).T
        keep_idx = test_classes[0][np.where(
            np.array(test_classes[1]) > .5)]  # + chunk_start
        print keep_idx
        #print np.where(np.array(test_class) > .5)
        print test_classes[0][np.where(np.array(test_classes[1]) > .5)]
        keep_objects = keep_idx  #np.append(keep_objects, keep_idx)
        #circle_vals[keep_idx] = np.array(circle_chunk)
        print "Finished chunk %i of %i" % (chunk_num, total_chunks)
        chunk_num += 1

        #        keep_objects = np.arange(len(results))
        filtered_results = results[np.array(keep_objects, dtype=np.int)]
        #circle_vals = np.array(circle_vals)
        #circle_vals_keep = circle_vals[np.array(keep_objects, dtype=np.int)]

        return filtered_results  #, circle_vals_keep

Exemple #11

0

Afficher le fichier

    def create_storage(coords,
                       geobox,
                       measurements,
                       data_func=None,
                       use_threads=False):
        """
        Create a :class:`xarray.Dataset` and (optionally) fill it with data.

        This function makes the in memory storage structure to hold datacube data, loading data from datasets that have
         been grouped appropriately by :meth:`group_datasets`.

        :param dict coords:
            OrderedDict holding `DataArray` objects defining the dimensions not specified by `geobox`

        :param GeoBox geobox:
            A GeoBox defining the output spatial projection and resolution

        :param measurements:
            list of :class:`datacube.model.Measurement`

        :param data_func:
            function to fill the storage with data. It is called once for each measurement, with the measurement
            as an argument. It should return an appropriately shaped numpy array. If not provided, an empty
            :class:`xarray.Dataset` is returned.

        :param bool use_threads:
            Optional. If this is set to True, IO will be multi-thread.
            May not work for all drivers due to locking/GIL.

            Default is False.

        :rtype: :class:`xarray.Dataset`

        .. seealso:: :meth:`find_datasets` :meth:`group_datasets`
        """
        def empty_func(measurement_):
            coord_shape = tuple(coord_.size for coord_ in coords.values())
            return numpy.full(coord_shape + geobox.shape,
                              measurement_.nodata,
                              dtype=measurement_.dtype)

        data_func = data_func or empty_func

        result = xarray.Dataset(attrs={'crs': geobox.crs})
        for name, coord in coords.items():
            result[name] = coord
        for name, coord in geobox.coordinates.items():
            result[name] = (name, coord.values, {'units': coord.units})

        def work_measurements(measurement, data_func):
            return data_func(measurement)

        use_threads = use_threads and THREADING_REQS_AVAILABLE

        if use_threads:
            pool = ThreadPool(32)
            results = pool.map(work_measurements, measurements,
                               repeat(data_func))
        else:
            results = [data_func(a) for a in measurements]

        for measurement in measurements:
            data = results.pop(0)
            attrs = measurement.dataarray_attrs()
            attrs['crs'] = geobox.crs
            dims = tuple(coords.keys()) + tuple(geobox.dimensions)
            result[measurement.name] = (dims, data, attrs)

        return result

Exemple #12

0

Afficher le fichier

x = hub.array(shape, name='imagenet/test:latest', dtype='uint8')
print(x.shape)

index = 1


def upload_val(index):
    t1 = time.time()
    # Preprocess the image
    img = Image.open(val_path[index])
    img = img.resize((500, 375), Image.ANTIALIAS)
    img = np.asarray(img)
    if len(img.shape) == 2:
        img = np.expand_dims(img, -1)
    if img.shape[-1] == 4:
        img = img[..., :3]
    img = np.transpose(img, axes=(1, 0, 2))

    # Upload the image
    t2 = time.time()
    x[index] = np.expand_dims(img, 0)
    t3 = time.time()
    print("uploading {}/{}: downloded in {}s and uploaded in {}s ".format(
        index, len(val_path), t2 - t1, t3 - t2))


t1 = time.time()
list(pool.map(upload_val, list(range(len(val_path)))))
t2 = time.time()
print('uploaded {} images in {}s'.format(len(val_path), t2 - t1))

Exemple #13

0

Afficher le fichier

from pathos.threading import ThreadPool
import time
pool = ThreadPool(nodes=4)

# do a blocking map on the chosen function
print(pool.map(pow, [1,2,3,4], [5,6,7,8]))

# do a non-blocking map, then extract the results from the iterator
results = pool.imap(pow, [1,2,3,4], [5,6,7,8])
print("...")
print(list(results))


# do an asynchronous map, then get the results
results = pool.amap(pow, [1,2,3,4], [5,6,7,8])
while not results.ready():
    time.sleep(5)
    print(".")

print(results.get())


# do one item at a time, using a pipe

print(pool.pipe(pow, 1, 5))
print(pool.pipe(pow, 2, 6))

# do one item at a time, using an asynchronous pipe

result1 = pool.apipe(pow, 1, 5)
result2 = pool.apipe(pow, 2, 6)

Exemple #14

0

Afficher le fichier

Fichier : tuneHyperParameters.py Projet : skylbc/SMBAE

def tuneHyperParameters(simsettingsFileName,
                        hyperSettings=None,
                        saved_fd_model_path=None):
    """
        For some set of parameters the function will sample a number of them
        In order to find a more optimal configuration.
    """
    import os

    result_data = {}

    file = open(simsettingsFileName)
    settings = json.load(file)
    print("Settings: " + str(json.dumps(settings, indent=4)))
    file.close()
    file = open(hyperSettings)
    hyper_settings = json.load(file)
    print("Settings: " + str(json.dumps(settings, indent=4)))
    file.close()
    num_sim_samples = hyper_settings['meta_sim_samples']

    ## Check to see if there exists a saved fd model, if so save the path in the hyper settings
    if (not (saved_fd_model_path is None)):
        directory = getDataDirectory(settings)
        # file_name_dynamics=directory+"forward_dynamics_"+"_Best_pretrain.pkl"
        if not os.path.exists(directory):
            hyper_settings['saved_fd_model_path'] = saved_fd_model_path

    param_settings = get_param_values(hyper_settings)
    result_data['hyper_param_settings_files'] = []
    sim_data = []
    data_name = settings['data_folder']
    for params in param_settings:  ## Loop over each setting of parameters
        data_name_tmp = ""
        for par in range(
                len(params)
        ):  ## Assemble the vector of parameters and data folder name
            param_of_interest = hyper_settings['param_to_tune'][par]
            data_name_tmp = data_name_tmp + "/_" + param_of_interest + "_" + str(
                params[par]) + "/"
            settings[param_of_interest] = params[par]

        settings['data_folder'] = data_name + data_name_tmp
        directory = getBaseDataDirectory(settings)
        if not os.path.exists(directory):
            os.makedirs(directory)
        # file = open(settingsFileName, 'r')

        out_file_name = directory + os.path.basename(simsettingsFileName)
        result_data['hyper_param_settings_files'].append(out_file_name)
        print("Saving settings file with data to: ", out_file_name)
        print("settings['data_folder']: ", settings['data_folder'])
        out_file = open(out_file_name, 'w')
        out_file.write(json.dumps(settings, indent=4))
        # file.close()

        out_file.close()
        sim_data.append(
            (simsettingsFileName, num_sim_samples,
             copy.deepcopy(settings), hyper_settings['meta_sim_threads'],
             copy.deepcopy(hyper_settings)))

    # p = ProcessingPool(2)
    p = ThreadPool(hyper_settings['tuning_threads'])
    t0 = time.time()
    result = p.map(_trainMetaModel, sim_data)
    t1 = time.time()
    print("Hyper parameter tuning complete in " +
          str(datetime.timedelta(seconds=(t1 - t0))) + " seconds")
    result_data['sim_time'] = "Meta model training complete in " + str(
        datetime.timedelta(seconds=(t1 - t0))) + " seconds"
    result_data['meta_sim_result'] = result
    result_data['raw_sim_time_in_seconds'] = t1 - t0
    result_data['Number_of_simulations_sampled'] = len(param_settings)
    result_data['Number_of_threads_used'] = hyper_settings['tuning_threads']
    print(result)
    return result_data

Exemple #15

0

Afficher le fichier

    def create_storage(coords,
                       geobox,
                       measurements,
                       data_func=None,
                       use_threads=False):
        """
        Create a :class:`xarray.Dataset` and (optionally) fill it with data.

        This function makes the in memory storage structure to hold datacube data, loading data from datasets that have
         been grouped appropriately by :meth:`group_datasets`.

        :param dict coords:
            OrderedDict holding `DataArray` objects defining the dimensions not specified by `geobox`

        :param GeoBox geobox:
            A GeoBox defining the output spatial projection and resolution

        :param measurements:
            list of measurement dicts with keys: {'name', 'dtype', 'nodata', 'units'}

        :param data_func:
            function to fill the storage with data. It is called once for each measurement, with the measurement
            as an argument. It should return an appropriately shaped numpy array.

        :param bool use_threads:
            Optional. If this is set to True, IO will be multi-thread.
            May not work for all drivers due to locking/GIL.

            Default is False.

        :rtype: :class:`xarray.Dataset`

        .. seealso:: :meth:`find_datasets` :meth:`group_datasets`
        """
        def empty_func(measurement_):
            coord_shape = tuple(coord_.size for coord_ in coords.values())
            return numpy.full(coord_shape + geobox.shape,
                              measurement_['nodata'],
                              dtype=measurement_['dtype'])

        data_func = data_func or empty_func

        result = xarray.Dataset(attrs={'crs': geobox.crs})
        for name, coord in coords.items():
            result[name] = coord
        for name, coord in geobox.coordinates.items():
            result[name] = (name, coord.values, {'units': coord.units})

        def work_measurements(measurement, data_func):
            return data_func(measurement)

        if use_threads and ('SharedArray' not in sys.modules
                            or 'pathos.threading' not in sys.modules):
            use_threads = False

        if use_threads:
            pool = ThreadPool(32)
            results = pool.map(work_measurements, measurements,
                               repeat(data_func))
        else:
            results = [data_func(a) for a in measurements]

        for measurement in measurements:
            data = results.pop(0)

            attrs = {
                'nodata': measurement.get('nodata'),
                'units': measurement.get('units', '1'),
                'crs': geobox.crs
            }
            if 'flags_definition' in measurement:
                attrs['flags_definition'] = measurement['flags_definition']
            if 'spectral_definition' in measurement:
                attrs['spectral_definition'] = measurement[
                    'spectral_definition']

            dims = tuple(coords.keys()) + tuple(geobox.dimensions)
            result[measurement['name']] = (dims, data, attrs)

        return result

Exemple #16

0

Afficher le fichier

 def process(self):
     """ process rules
     """
     pool = ThreadPool()
     pool.map(self.__process, self.files_to_process)

Exemple #17

0

Afficher le fichier

Fichier : aa_download_http.py Projet : faizan90/dwd

def main():

    main_dir = Path(r'E:\dwd_meteo')
    os.chdir(main_dir)

    out_dir = Path(r'zipped_DWD_data')

    test_exist_dir = Path('extracted')

    main_site = r'https://opendata.dwd.de'

    out_dir_names = [
        'hist_daily_met',
        'pres_daily_met',
        'hist_daily_more_precip',
        'pres_daily_more_precip',
        'hist_daily_soil_temp',
        'pres_daily_soil_temp',
        'daily_solar',
        'hist_hourly_precip',
        'pres_hourly_precip',
        'hist_hourly_temp',
        'pres_hourly_temp',
        'hist_hourly_cloud_type',
        'pres_hourly_cloud_type',
        'hist_hourly_cloudiness',
        'pres_hourly_cloudiness',
        'hist_hourly_pressure',
        'pres_hourly_pressure',
        'hist_hourly_soil_temp',
        'pres_hourly_soil_temp',
        'hourly_solar',
        'hist_hourly_sun',
        'pres_hourly_sun',
        'hist_hourly_visib',
        'pres_hourly_visib',
    ]

    sub_links = [
        r'/climate_environment/CDC/observations_germany/climate/daily/kl/historical/',
        r'/climate_environment/CDC/observations_germany/climate/daily/kl/recent/',
        r'/climate_environment/CDC/observations_germany/climate/daily/more_precip/historical/',
        r'/climate_environment/CDC/observations_germany/climate/daily/more_precip/recent/',
        r'/climate_environment/CDC/observations_germany/climate/daily/soil_temperature/historical/',
        r'/climate_environment/CDC/observations_germany/climate/daily/soil_temperature/recent/',
        r'/climate_environment/CDC/observations_germany/climate/daily/solar/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/precipitation/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/air_temperature/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/air_temperature/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloudiness/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloudiness/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/pressure/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/pressure/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/solar/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/sun/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/sun/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/visibility/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/visibility/recent/',
    ]

    assert len(out_dir_names) == len(sub_links)

    out_dir.mkdir(exist_ok=True)

    n_threads = len(out_dir_names)

    if n_threads == 1:
        for i in range(len(out_dir_names)):
            download_data(main_site + sub_links[i], out_dir / out_dir_names[i],
                          test_exist_dir)

    else:
        thread_pool = ThreadPool(nodes=n_threads)

        thread_pool.map(
            download_data, [main_site + sub_link for sub_link in sub_links],
            [out_dir / out_dir_name
             for out_dir_name in out_dir_names], [test_exist_dir] * n_threads)

    return