Esempi in Python per Client.upload_file, esempi in Python per dask.distributed.Client.upload_file

Esempio n. 1

0

Mostra file

File: interp_dataset.py Progetto: alexsalr/ciat_monitor_crops

def main_optical(dates):

    saldana = regionstack.regionStack('Saldana', attrs=['S2', 'LC08'])

    #saldana.harmonize_L8()

    boi = ['red', 'blue', 'green', 'nir', 'swir1', 'swir2']

    vars_loc = os.environ['WIN_SVR_DATA'] + 'Saldana/vars/'

    for band in boi:
        if not os.path.isfile(vars_loc + 'opt_' + band + '.nc'):

            ## TO-DO add isel for relevant dates
            sentinel = saldana.S2[
                band]  #.isel(time=slice(min(dates)-120?D-security_margin, max(dates)+16D+security_margin))

            landsat = saldana.LC08[
                band]  #.isel(time=slice(min(dates)-120?D, max(dates)+16D))

            aligned = xr.align(sentinel, landsat, exclude={'time'})

            da = xr.concat([aligned[0], aligned[1]], dim='time')

            da.sortby('time').to_netcdf(vars_loc + 'opt_' + band + '.nc')

    client = Client(n_workers=12)

    client.upload_file('c_Class_Models/interpolatets.py')

    files = list(filter(re.compile(r'^opt_.*').search, os.listdir(vars_loc)))
    files = list(map(lambda x: vars_loc + x, files))

    print('Reading concatenated Dataset')
    dataset = xr.open_mfdataset(files,
                                chunks={
                                    'y': 1000,
                                    'x': 750,
                                    'time': -1
                                })

    print('Concat Dataset\n')
    print(dataset)

    _location = os.environ['WIN_SVR_DATA'] + 'Saldana/features/'

    itp.interpolate_dataset(dataset,
                            _location,
                            boi,
                            date_of_analysis=dates,
                            der=False)

    print('Dataset interpolation done!\n')

Esempio n. 2

0

Mostra file

File: interp_dataset.py Progetto: alexsalr/ciat_monitor_crops

def main_radar_text(dates):

    saldana = regionstack.regionStack(
        'Saldana', attrs=['S1_ASCENDING_GLCM', 'S1_DESCENDING_GLCM'])

    boi = [
        'VV_ASM', 'VV_Contrast', 'VV_Dissimilarity', 'VV_Energy', 'VV_Entropy',
        'VV_GLCMCorrelation', 'VV_GLCMMean', 'VV_GLCMVariance',
        'VV_Homogeneity'
    ]

    vars_loc = os.environ['WIN_SVR_DATA'] + 'Saldana/vars/'

    for band in boi:

        if not os.path.isfile(vars_loc + 'rad_' + band + '.nc'):
            asc = saldana.S1_ASCENDING_GLCM[band]

            dsc = saldana.S1_DESCENDING_GLCM[band]

            da = xr.concat([asc, dsc], dim='time')

            print('{} band was concatenated. Writing DataArray'.format(band))

            da.to_netcdf(vars_loc + 'rad_' + band + '.nc')

    client = Client(n_workers=12)

    client.upload_file('b_Temporal_Stack/interpolatets.py')

    files = list(filter(re.compile(r'^rad_.*').search, os.listdir(vars_loc)))
    files = list(map(lambda x: vars_loc + x, files))

    print('Reading concatenated Dataset')
    dataset = xr.open_mfdataset(files,
                                chunks={
                                    'y': 1000,
                                    'x': 750,
                                    'time': -1
                                })

    print('Concat Dataset\n')
    print(dataset)

    _location = os.environ['WIN_SVR_DATA'] + 'Saldana/features/'

    itp.interpolate_dataset(dataset, _location, boi, date_of_analysis=dates)

    itp.interpolate_dataset(dataset,
                            _location,
                            boi,
                            date_of_analysis=dates,
                            der=True)

Esempio n. 3

0

Mostra file

###############################################################################
###############################################################################

##### Cluster initialization

if parallel:
    if cluster == 'distributed':
        cluster_ = DISTCLUSTER
    else:
        nworkers = NLOCALWORKERS
        cluster_ = LocalCluster(n_workers=nworkers,
                                threads_per_worker=1,
                                memory_limit='5GB')
    client = Client(cluster_)
    client.upload_file('structure.py')
    client.upload_file('decoding_functions.py')
else:
    client = None

###############################################################################
for (dataseed, taskvar, monkey, stable, region, subspace, ensemble,
     permutes) in product(dataseeds, taskvars, monkeys, stables, regions,
                          subspaces, ensembles, permutations):
    print(
        f"monkey:{monkey} | region:{region} | var:{taskvar} | stable:{stable} | subspace:{subspace} | "
        f"permutes:{permutes} | ensemble:{ensemble} | seed:{dataseed}")

    params_preproc_ens = get_ens_pp(stable)
    params_preproc_test = get_test_pp()

Esempio n. 4

0

Mostra file

File: dask-bag_histogram.py Progetto: glatard/paper-big-data-engines

    parser.add_argument("experiment",
                        type=str,
                        help="Name of the experiment being performed")
    parser.add_argument("--benchmark",
                        action="store_true",
                        help="benchmark results")

    args = parser.parse_args()

    # Cluster scheduler
    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    # Allow workers to use module
    client.upload_file("/nfs/paper-big-data-engines/utils.py")
    client.upload_file("/nfs/paper-big-data-engines/kmeans/Kmeans.py")

    # Read images
    paths = crawl_dir(os.path.abspath(args.bb_dir))
    paths = db.from_sequence(paths, npartitions=len(paths))
    img_rdd = paths.map(lambda p: read_img(p, start=start, args=args))

    start_time = time() - start

    voxels = img_rdd.map(lambda x: x[1].flatten("F")).flatten()
    frequency_pair = voxels.frequencies().compute()

    end_time = time() - start

    if args.benchmark:

Esempio n. 5

0

Mostra file

    parser.add_argument("experiment",
                        type=str,
                        help="Name of the experiment being performed")
    parser.add_argument("--benchmark",
                        action="store_true",
                        help="benchmark results")

    args = parser.parse_args()

    # Cluster scheduler
    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    # Allow workers to use module
    client.upload_file("/nfs/paper-big-data-engines/utils.py")
    client.upload_file("/nfs/paper-big-data-engines/histogram/Histogram.py")
    from utils import benchmark, crawl_dir, read_img
    from Histogram import (
        calculate_histogram,
        combine_histogram,
        flatten,
        save_histogram,
    )

    # Read images
    paths = crawl_dir(os.path.abspath(args.bb_dir))
    paths = db.from_sequence(paths, npartitions=len(paths))
    img = paths.map(lambda p: read_img(p, start=start, args=args))

    img = img.map(

Esempio n. 6

0

Mostra file

    parser.add_argument(
        "benchmark_dir", help="Directory where thebenchmark files are written."
    )
    parser.add_argument(
        "experiment", type=str, help="Name of the experiment being performed"
    )
    parser.add_argument("--benchmark", action="store_true", help="benchmark results")

    args = parser.parse_args()

    # Cluster scheduler
    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    client.upload_file("/nfs/paper-big-data-engines/utils.py")
    client.upload_file("/nfs/paper-big-data-engines/bidsApp-examples/Example.py")
    from Example import run_group, run_participant, site_crawler, subject_crawler

    # Retrieve all subject path
    subjects = subject_crawler(args.bids_dir)
    client.scatter(subjects)

    results = list()
    for subject in subjects:
        results.append(
            client.submit(
                run_participant,
                subject_id=subject[1],
                start=start,
                args=args,

Esempio n. 7

0

Mostra file

    parser.add_argument("iterations", type=int, help="number of iterations")
    parser.add_argument("delay",
                        type=float,
                        help="sleep delay during "
                        "incrementation")
    parser.add_argument("--benchmark",
                        action="store_true",
                        help="benchmark results")

    args = parser.parse_args()

    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    client.upload_file(
        "/nfs/SOEN-499-Project/utils.py")  # Allow workers to use module
    client.upload_file("/nfs/SOEN-499-Project/incrementation/Increment.py")

    # Read images
    paths = crawl_dir(os.path.abspath(args.bb_dir))
    paths = db.from_sequence(paths, npartitions=len(paths))
    img_rdd = paths.map(lambda p: read_img(p, start=start, args=args))

    # Increment the data n time:
    for _ in range(args.iterations):
        img_rdd = img_rdd.map(
            lambda x: increment(x, delay=args.delay, start=start, args=args))

    # Save the data
    img_rdd = img_rdd.map(lambda x: save_results(x, start=start, args=args))

Esempio n. 8

0

Mostra file

File: outlier-detection-experiments.py Progetto: AndreCNF/data-utils

# +
# Change to parent directory (presumably "Documents")
os.chdir("../../..")

# Path to the CSV dataset files
data_path = 'Documents/Datasets/Outlier_Detection/UniversityOfOregon_SolarAndMeteorologicalData_Eugene/'
project_path = 'Documents/GitHub/eICU-mortality-prediction/'
# -

# Set up local cluster
client = Client('tcp://127.0.0.1:61980')
client

# Upload the utils.py file, so that the Dask cluster has access to relevant auxiliary functions
client.upload_file(f'{project_path}NeuralNetwork.py')
client.upload_file(f'{project_path}utils.py')
client.upload_file(f'{project_path}search_explore.py')
client.upload_file(f'{project_path}data_processing.py')

# **Problem:** Somehow, all works fine if I initialize the Dask client without specifying the tcp address. But if I specify the one obtained from the Jupyter Lab Dask extension, it returns "ModuleNotFoundError: No module named 'torch'"! Perhaps the Jupyter Lab Dask extension is associated to a different Python environment.
#
# **Solution:** Jupyter Lab must be started from within the desired virtual environment's shell.

client.run(os.getcwd)

# ## Loading data

all_files = glob.glob(f'{data_path}/*.txt')

# +

Esempio n. 9

0

Mostra file

File: launch_distributed.py Progetto: ismadrai/ML-model

# Mois de modélisation
idmois="_201708"
segmmm=""

# Chemin vers le dossier ou sont les codes du projet
path="/mnt/smb/TAMPON/Igor/RFR_V1/"
path_code = path + "script/"

#path_data = "/mnt/smb/TAMPON/Partages/Moteur_industr/03_PROD/Restitution/EAS/"
path_data = path + "data_src/"
path_rslt = path + "data_rslt/"

os.chdir(path_code)

#import local packages on workers 
client.upload_file('K_means.py')
client.upload_file('IntConf.py')
client.upload_file('prediction.py')
client.upload_file('correlation.py')
client.upload_file('data_preparation.py')
client.upload_file('main2.py')

import main2 as workflow
import data_preparation
import K_means as K_means
import IntConf
import prediction


fich_deb = "ech_variables_explicatives" + idmois + ".csv" 
#fich_vae = "revenu_client" + segmmm + idmois + ".csv"

Esempio n. 10

0

Mostra file

File: sketch.py Progetto: christopher5106/distributed-trueskill-eval-of-agents

    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    # setup the game
    game, agents, ratings = game_setup(args.num_agents)

    # run the game matches on the cluster
    nodes = get_nodes(args.ip_file)
    print("Connecting to cluster scheduler {} with workers:".format(nodes[0]))
    client = Client(nodes[0] + ':8786')
    for worker, cores in client.ncores().items():
        print("{:>35} {} cores".format(worker, cores))
    client.upload_file('game.py')

    start = default_timer()
    matches = run_games(game, agents, args.num_matches, client)
    check_status(matches)
    print("Game run in {:.2f}".format(default_timer() - start))

    # here we could do something with failed matches (errors)

    # run rating evaluations
    start = default_timer()
    compute_ratings(matches, ratings)
    print("Skills computed in {:.2f}".format(default_timer() - start))

    # compute approximate accuracy of ratings
    accuracy = estimate_accuracy(agents, ratings)

Esempio n. 11

0

Mostra file

class Remote(object):
    """
    Remote.
    
    Args:
        address (str): Remote scheduler address formed by `ip:port`.
        tls_ca_file (str, optional): TLS CA certificate file path. Defaults to None.
        tls_client_cert (str, optional): TLS certificate file path. Defaults to None.
        tls_client_key (str, optional): TLS private key file path. Defaults to None.
        require_encryption (bool, optional): Encrypt data exchange. Defaults to False.
        
    Note:
        TLS will be enabled only if all three TLS arguments are provided. 
        Remember to change network protocol to `tls://<address>`.
    """
    def __init__(self, address: str,
                 tls_ca_file: str = None, tls_client_cert: str = None, tls_client_key: str = None,
                 require_encryption: bool = False):
        # authentication
        sec = None
        if tls_ca_file and tls_client_cert and tls_client_key:
            sec = Security(tls_ca_file=tls_ca_file,
                           tls_client_cert=tls_client_cert,
                           tls_client_key=tls_client_key,
                           require_encryption=require_encryption)

        # init
        self._client = Client(address=address, security=sec)
        self._client.register_worker_callbacks(Remote._worker_startup)

    @staticmethod
    def _worker_startup(dask_worker: Worker):
        os.chdir(dask_worker.local_dir)

    def add_dependencies(self, files):
        """
        Add list of dependencies, order matters.
        
        Args:
            files (list): List of dependent files.
        """
        # TODO: automatically resolve module dependencies
        if isinstance(files, str):
            files = [files]
        for f in files:
            self._client.upload_file(f)

    def scatter(self, *args, **kwargs):
        """
        Scatter data.
        """
        return self._client.scatter(*args, **kwargs)

    def submit(self, func, *args, **kwargs):
        """
        Submit function and data.
        
        Args:
            func (callable): User function.
        """
        return self._client.submit(func, *args, **kwargs)

    def fetch(self, futures_, **kwargs):
        """
        Fetch data of future objects.
        
        Args:
            futures_ (list): Future objects.
        """
        return self._client.gather(futures_, **kwargs)

    def cancel(self, futures_, **kwargs):
        """
        Cancel job of future objects.
        
        Args:
            futures_ (list): Future objects.
        """
        return self._client.cancel(futures_, **kwargs)

    def close(self, *args, **kwargs):
        """
        Close connection.
        """
        return self._client.close(*args, **kwargs)

Esempio n. 12

0

Mostra file

def main(args):
    """Main function of cellanneal."""
    if (args.start_temp is not None
            or args.end_temp is not None) and args.auto_temp == 1:
        raise Exception(
            "when auto_temp is set to 1(default value), starting temperature or ending temperature should not be set manually"
        )

    if not args.no_parallel:
        import dask
        from dask.distributed import Client, LocalCluster
        if not args.cluster:
            cluster = LocalCluster(
                n_workers=args.workers,
                threads_per_worker=1,
            )
            client = Client(cluster)
        else:
            cluster = args.cluster
            client = Client(cluster)
            client.restart()
            cwd = Path(__file__).parent.absolute()
            client.upload_file(cwd / 'drawing.py')
            client.upload_file(cwd / 'mathhelper.py')
            client.upload_file(cwd / 'cell.py')
            client.upload_file(cwd / 'colony.py')
            client.upload_file(cwd / 'optimization.py')
            client.upload_file(cwd / 'drawing.py')
            client.upload_file(cwd / 'global_optimization.py')
            client.upload_file(cwd / 'main.py')
    else:
        client = None

    lineagefile = None
    start = time.time()

    try:
        config = load_config(args.config)

        simulation_config = config["simulation"]
        #Maybe better to store the image type in the config file in the first place, instead of using cmd?
        if args.graySynthetic == True:
            simulation_config["image.type"] = "graySynthetic"
        elif args.phaseContrast == True:
            simulation_config["image.type"] = "phaseContrastImage"
        elif args.binary == True:
            simulation_config["image.type"] = "binary"
        else:
            raise ValueError(
                "Invalid Command: Synthetic image type must be specified")

        if not args.output.is_dir():
            args.output.mkdir()
        if not args.bestfit.is_dir():
            args.bestfit.mkdir()
        if args.residual and not args.residual.is_dir():
            args.residual.mkdir()

        seed = int(start * 1000) % (2**32)
        if args.seed != None:
            seed = args.seed
        np.random.seed(seed)
        print("Seed: {}".format(seed))

        celltype = config['global.cellType'].lower()

        # setup the colony from a file with the initial properties
        lineageframes = LineageFrames()
        colony = lineageframes.forward()
        imagefiles = get_inputfiles(args)
        if args.lineage_file:
            load_colony(colony,
                        args.lineage_file,
                        config,
                        initial_frame=imagefiles[0].name)
        else:
            load_colony(colony, args.initial, config)
        cost_diff = (-1, -1)

        # open the lineage file for writing
        lineagefile = open(args.output / 'lineage.csv', 'w')
        header = ['file', 'name']
        if celltype == 'bacilli':
            header.extend([
                'x', 'y', 'width', 'length', 'rotation', "split_alpha",
                "opacity"
            ])
        print(','.join(header), file=lineagefile)

        if args.debug:
            with open(args.debug / 'debug.csv', 'w') as debugfile:
                print(','.join([
                    'window_start', 'window_end', 'pbad_total', 'bad_count',
                    'temperature', 'total_cost_diff', 'current_iteration',
                    'total_iterations'
                ]),
                      file=debugfile)

        if args.global_optimization:
            global useDistanceObjective

            useDistanceObjective = args.dist
            realimages = [
                optimization.load_image(imagefile) for imagefile in imagefiles
            ]
            window = config["global_optimizer.window_size"]
            if args.lineage_file:
                lineage = global_optimization.build_initial_lineage(
                    imagefiles, args.lineage_file, args.continue_from,
                    config["simulation"])
            else:
                lineage = global_optimization.build_initial_lineage(
                    imagefiles, args.initial, args.continue_from,
                    config["simulation"])
            lineage = global_optimization.find_optimal_simulation_confs(
                imagefiles, lineage, realimages, args.continue_from)
            sim_start = args.continue_from - args.frame_first
            print(sim_start)
            shape = realimages[0].shape
            synthimages = []
            cellmaps = []
            distmaps = []
            iteration_per_cell = config["iteration_per_cell"]
            if not useDistanceObjective:
                distmaps = [None] * len(realimages)
            for window_start in range(1 - window, len(realimages)):
                window_end = window_start + window
                print(window_start, window_end)
                if window_end <= len(realimages):
                    # get initial estimate
                    if window_start >= sim_start:
                        if window_end > 1:
                            lineage.copy_forward()
                    realimage = realimages[window_end - 1]
                    synthimage, cellmap = optimization.generate_synthetic_image(
                        lineage.frames[window_end - 1].nodes, shape,
                        lineage.frames[window_end - 1].simulation_config)
                    synthimages.append(synthimage)
                    cellmaps.append(cellmap)
                    if useDistanceObjective:
                        distmap = distance_transform_edt(realimage < .5)
                        distmap /= config[
                            f'{config["global.cellType"].lower()}.distanceCostDivisor'] * config[
                                'global.pixelsPerMicron']
                        distmap += 1
                        distmaps.append(distmap)
                    if args.auto_temp == 1 and window_end == 1:
                        print("auto temperature schedule started")
                        args.start_temp, args.end_temp = \
                            global_optimization.auto_temp_schedule(imagefiles, lineage, realimages, synthimages, cellmaps, distmaps, 0, 1, lineagefile, args, config)
                        print("auto temperature schedule finished")
                        print("starting temperature is ", args.start_temp,
                              "ending temperature is ", args.end_temp)
                    if args.auto_meth == "frame" and optimization.auto_temp_schedule_frame(
                            window_end, 3):
                        print("auto temperature schedule restarted")
                        args.start_temp, args.end_temp = \
                            global_optimization.auto_temp_schedule(imagefiles, lineage, realimages, synthimages, cellmaps, distmaps, window_start, window_end, lineagefile, args, config)
                        print("auto temperature schedule finished")
                        print("starting temperature is ", args.start_temp,
                              "ending temperature is ", args.end_temp)
                if window_start >= sim_start:
                    if useDistanceObjective:
                        global_optimization.totalCostDiff = optimization.dist_objective(
                            realimage, synthimage, distmap, cellmap,
                            config["overlap.cost"])
                    else:
                        global_optimization.totalCostDiff = optimization.objective(
                            realimage, synthimage, cellmap,
                            config["overlap.cost"], config["cell.importance"])
                    lineage, synthimages, distmaps, cellmaps = global_optimization.optimize(
                        imagefiles,
                        lineage,
                        realimages,
                        synthimages,
                        cellmaps,
                        distmaps,
                        window_start,
                        window_end,
                        lineagefile,
                        args,
                        config,
                        iteration_per_cell,
                        client=client)
                if window_start >= 0:
                    global_optimization.save_lineage(
                        imagefiles[window_start].name,
                        lineage.frames[window_start].nodes, lineagefile)
                    global_optimization.save_output(
                        imagefiles[window_start].name,
                        synthimages[window_start], realimages[window_start],
                        lineage.frames[window_start].nodes, args, config)
            return 0

        config["simulation"] = optimization.find_optimal_simulation_conf(
            config["simulation"], optimization.load_image(imagefiles[0]),
            list(colony))
        if args.auto_temp == 1:
            print("auto temperature schedule started")
            args.start_temp, args.end_temp = optimization.auto_temp_schedule(
                imagefiles[0], lineageframes.forward(), args, config)
            print("auto temperature schedule finished")
            print("starting temperature is ", args.start_temp,
                  "ending temperature is ", args.end_temp)

        frame_num = 0
        prev_cell_num = len(colony)
        for imagefile in imagefiles:  # Recomputing temperature when needed

            frame_num += 1

            if args.auto_meth == "frame":
                if optimization.auto_temp_schedule_frame(frame_num, 8):
                    print("auto temperature schedule started (recomputed)")
                    args.start_temp, args.end_temp = optimization.auto_temp_schedule(
                        imagefile, colony, args, config)
                    print("auto temperature schedule finished")
                    print("starting temperature is ", args.start_temp,
                          "ending temperature is ", args.end_temp)

            elif args.auto_meth == "factor":
                if optimization.auto_temp_schedule_factor(
                        len(colony), prev_cell_num, 1.1):
                    print("auto temperature schedule started (recomputed)")
                    args.start_temp, args.end_temp = optimization.auto_temp_schedule(
                        imagefile, colony, args, config)
                    print("auto temperature schedule finished")
                    print("starting temperature is ", args.start_temp,
                          "ending temperature is ", args.end_temp)
                    prev_cell_num = len(colony)

            elif args.auto_meth == "const":
                if optimization.auto_temp_schedule_const(
                        len(colony), prev_cell_num, 10):
                    print("auto temperature schedule started (recomputed)")
                    args.start_temp, args.end_temp = optimization.auto_temp_schedule(
                        imagefile, colony, args, config)
                    print("auto temperature schedule finished")
                    print("starting temperature is ", args.start_temp,
                          "ending temperature is ", args.end_temp)
                    prev_cell_num = len(colony)

            elif args.auto_meth == "cost":
                print(cost_diff, frame_num,
                      optimization.auto_temp_shcedule_cost(cost_diff))
                if frame_num >= 2 and optimization.auto_temp_shcedule_cost(
                        cost_diff):
                    print(
                        "auto temperature schedule started cost_diff (recomputed)"
                    )
                    args.start_temp, args.end_temp = optimization.auto_temp_schedule(
                        imagefile, colony, args, config)
                    print("auto temperature schedule finished")
                    print("starting temperature is ", args.start_temp,
                          "ending temperature is ", args.end_temp)

            colony = optimize(imagefile, lineageframes, args, config, client)

            cost_diff = optimization.update_cost_diff(colony, cost_diff)

            # flatten modifications and save cell properties

            colony.flatten()
            for cellnode in colony:
                properties = [imagefile.name, cellnode.cell.name]
                if celltype == 'bacilli':
                    properties.extend([
                        str(cellnode.cell.x),
                        str(cellnode.cell.y),
                        str(cellnode.cell.width),
                        str(cellnode.cell.length),
                        str(cellnode.cell.rotation)
                    ])
                print(','.join(properties), file=lineagefile)

    except KeyboardInterrupt as error:
        raise error
    finally:
        if lineagefile:
            lineagefile.close()

    print(f'{time.time() - start} seconds')
    if client and not cluster:
        client.shutdown()

    return 0

Esempio n. 13

0

Mostra file

ak.behavior.update(candidate.behavior)

from TTbarResProcessor import TTbarResProcessor
from Filesets import filesets

LoadingUnweightedFiles = False
UsingDaskExecutor = True

if UsingDaskExecutor == True:
    if __name__ == "__main__":
        tic = time.time()
        cluster = LPCCondorCluster()
        # minimum > 0: https://github.com/CoffeaTeam/coffea/issues/465
        cluster.adapt(minimum=1, maximum=10)
        client = Client(cluster)
        client.upload_file('TTbarAllHadUproot/TTbarResProcessor.py')

tstart = time.time()

outputs_unweighted = {}

seed = 1234577890
prng = RandomState(seed)
#Chunk = [10000, 100] # [chunksize, maxchunks]

for name,files in filesets.items(): 
    if not LoadingUnweightedFiles:
        print('Processing', name)
        if not UsingDaskExecutor:
            chosen_exec = 'futures'
            output = processor.run_uproot_job({name:files},

Esempio n. 14

0

Mostra file

File: main.py Progetto: neudinger/DistributedComputingTongji

    nbofcluster = int(sys.argv[2])
    fromline = 0
    toline = 0
    csv.register_dialect("tabulation", delimiter='\t')
    jobs = []
    clusters = []
    with open(filename) as f:
        nbline = sum(1 for nbline in csv.reader(f, dialect="tabulation"))
        f.close()
    for cluster in range(nbofcluster):
        toline = int((nbline / nbofcluster) * (cluster + 1))
        cluster = Client()
        clusters.append(cluster)
        cluster.set_metadata("clusternb", str(cluster))
        cluster.upload_file(filename)
        jobs.append(
            cluster.submit(network_operator, filename, (fromline, toline)))
        jobs.append(
            cluster.submit(calc_customer_call, filename, (fromline, toline)))
        jobs.append(
            cluster.submit(call_proportion, filename, (fromline, toline)))
        fromline = int(toline)

    call_proportion_data = {}
    network_operator_data = {}
    calc_customer_call_data = {}
    for job in jobs:
        result = job.result()
        if result[0] == "network_operator":
            for key, value in result[1].items():

Esempio n. 15

0

Mostra file

File: dask_cloudprovider_bug_minimal_example_importer.py Progetto: dandavison/dask-cloud-provider-problem

from pathlib import Path

from dask_cloudprovider_problem import dask_cloudprovider_bug_minimal_example

if __name__ == "__main__":
    from dask_cloudprovider import FargateCluster
    from dask.distributed import Client

    egg_file = Path(
        "__file__"
    ).parent / "dist" / "dask_cloudprovider_problem-0.0.0-py3.7.egg"
    assert egg_file.exists()
    client = Client(FargateCluster(n_workers=1))
    client.upload_file(egg_file)
    dask_cloudprovider_bug_minimal_example.run_tasks()

Esempio n. 16

0

Mostra file

import scipy.ndimage as ndi
import scipy.sparse as sp
from scipy.interpolate import interp1d

from skimage import filters

# Most relevant functions can be found in registration
from registration import *

plt.rcParams["figure.figsize"] = [12., 8.]
SAVEFIG = True
# -

cluster = LocalCluster(n_workers=1, threads_per_worker=8)
client = Client(cluster)
client.upload_file('registration.py')
client


def plot_stack(images, n, grid=False):
    """Plot the n-th image from a stack of n images.
    For interactive use with ipython widgets"""
    im = images[n, :, :].compute()
    plt.figure(figsize=[12,10])
    plt.imshow(im.T, cmap='gray', vmax=im.max())
    if grid:
        plt.grid()
    plt.show()

# +
# A bunch of constants

Esempio n. 17

0

Mostra file

    parser.add_argument("experiment",
                        type=str,
                        help="Name of the experiment being performed")
    parser.add_argument("iterations", type=int, help="number of iterations")
    parser.add_argument("--benchmark",
                        action="store_true",
                        help="benchmark results")

    args = parser.parse_args()

    # Cluster scheduler
    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    client.upload_file(
        "nfs/SOEN-499-Project/utils.py")  # Allow workers to use module
    client.upload_file("nfs/SOEN-499-Project/kmeans/Kmeans.py")

    # Read images
    paths = crawl_dir(os.path.abspath("test/data"))

    img = [read_img(path, start=start, args=args) for path in paths]

    voxels = da.concatenate([x[1] for x in img]).reshape(-1)

    centroids = [0.0, 125.8, 251.6, 377.4]  # Initial centroids
    voxel_pair = None

    bincount = da.bincount(voxels)
    bincount = bincount[bincount != 0]
    unique = da.unique(voxels)

Esempio n. 18

0

Mostra file

File: bokeh_server.py Progetto: jdehning/two_photon_imaging

from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from dask.distributed import Client
import dask.delayed
import allen_comparison
import time

#output_notebook()
directory = "/home/jdehning/tmp/Emx1-s_highzoom"
#directory = "/scratch.local/jdehning/calcium_ephys_comparison_data/processed_data/Emx1-s_highzoom"
#directory = "/scratch.local/jdehning/calcium_ephys_comparison_data/processed_data/Emx1-s_lowzoom"

#client = Client('localhost:8786')
client = Client('localhost:42747')

client.upload_file('allen_comparison.py')
client.run(importlib.import_module, 'allen_comparison')
futures = []
last_pos = 0


def modify_doc(doc):
    # Set up data
    #ephys, ophys, dt = allen_comparison.open_dir(directory)
    ephys = allen_comparison.open_ephys(directory, client)
    ophys = allen_comparison.open_ophys(directory, client)
    k_arr = np.arange(1, 35)
    sources = []
    plots = []
    for i in range(len(ephys)):
        source1 = ColumnDataSource(data=dict(x=k_arr, y=np.zeros_like(k_arr)))

Esempio n. 19

0

Mostra file

File: dask-futures_incrementation.py Progetto: mathdugre/paper-big-data-engines

    parser.add_argument("delay",
                        type=float,
                        help="sleep delay during "
                        "incrementation")
    parser.add_argument("--benchmark",
                        action="store_true",
                        help="benchmark results")

    args = parser.parse_args()

    # Cluster scheduler
    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    client.upload_file(
        "/nfs/paper-big-data-engines/utils.py")  # Allow workers to use module
    client.upload_file(
        "/nfs/paper-big-data-engines/incrementation/Increment.py")

    # Read images
    paths = crawl_dir(os.path.abspath(args.bb_dir))
    client.scatter(paths)

    results = []
    for path in paths:
        img = client.submit(read_img, path, start=start, args=args)

        # Increment the data n time:
        for _ in range(args.iterations):
            img = client.submit(increment,
                                img,

Esempio n. 20

0

Mostra file

    parser.add_argument(
        "bb_dir",
        type=str,
        help=("The folder containing BigBrain NIfTI images" "(local fs only)"),
    )
    parser.add_argument(
        "output_dir",
        type=str,
        help=("the folder to save incremented images to" "(local fs only)"),
    )
    parser.add_argument(
        "experiment", type=str, help="Name of the experiment being performed"
    )
    parser.add_argument("iterations", type=int, help="number of iterations")
    parser.add_argument(
        "delay", type=float, help="sleep delay during " "incrementation"
    )
    parser.add_argument("--benchmark", action="store_true", help="benchmark results")

    args = parser.parse_args()

    # Cluster scheduler
    cluster = args.scheduler
    client = Client(cluster)

    print(client)
    client.upload_file("utils.py")  # Allow workers to use module
    client.upload_file("Increment.py")

Esempio n. 21

0

Mostra file

File: TeamGuesser.py Progetto: cesaralba/jimenezIntelligence

        configParallel['backend'] = "dask"
        error = 0
        if 'scheduler' not in args:
            logger.error("Backend: %s. Falta scheduler '-x' o '--scheduler'.")
            error += 1
        if 'package' not in args:
            logger.error("Backend: %s. Falta package '-p' o '--package'.")
            error += 1
        if error:
            logger.error("Backend: %s. Hubo %d errores. Saliendo." %
                         (args.backend, error))
            exit(1)

        client = Client('tcp://%s:8786' % args.scheduler)
        for egg in args.package:
            client.upload_file(egg)
        configParallel['scheduler_host'] = (args.scheduler, 8786)
    elif args.backend == 'daskyarn':
        configParallel['backend'] = "dask"
        error = 0
        if 'package' not in args:
            logger.error("Backend: %s. Falta package '-p' o '--package'.")
            error += 1
        if error:
            logger.error("Backend: %s. Hubo %d errores. Saliendo." %
                         (args.backend, error))
            exit(1)
    else:
        pass

    # Carga datos