Exemple #1
0
def CalculateNumberOfActiveThreads(numberOfTasks):
    if (cpu_count() == 2):
        return cpu_count()
    elif numberOfTasks < cpu_count():
        return numberOfTasks
    else:
        return cpu_count()
Exemple #2
0
    def __init__(self, **kwargs):
        """
        Base estimator with the following allowed keyword args

            memory (bool/str/joblib.Memory): The path or Memory for caching the computational
                results, default None means no cache.
            verbose (bool): Whether to show the progress of feature calculations.
            n_jobs (int): The number of parallel jobs. 0 means no parallel computations.
                If this value is set to negative or greater than the total cpu
                then n_jobs is set to the number of cpu on system.

        Args:
            **kwargs: keyword args that contain possibly memory (str/joblib.Memory),
                verbose (bool), n_jobs (int)
        """
        allowed_kwargs = ['memory', 'verbose', 'n_jobs']
        for k, v in kwargs.items():
            if k not in allowed_kwargs:
                raise TypeError("%s not allowed as kwargs" % (str(k)))
        memory = kwargs.get("memory", None)
        if isinstance(memory, bool):
            memory = tempfile.mkdtemp()
            logger.info("Created temporary directory %s" % memory)
        verbose = kwargs.get("verbose", False)
        n_jobs = kwargs.get("n_jobs", 0)
        self.memory = check_memory(memory)
        self.verbose = verbose
        # find out the number of parallel jobs
        if (n_jobs < 0) or (n_jobs > cpu_count()):
            n_jobs = cpu_count()
            logger.info(f"Using {n_jobs} jobs for computation")
        self.n_jobs = n_jobs
Exemple #3
0
def test_effective_n_jobs_with_context():
    assert_equal(threaded.effective_n_jobs_with_context(), 1,
                 "Default to 1 job")
    assert_equal(
        threaded.effective_n_jobs_with_context(-1),
        joblib.cpu_count(),
        "Use all cores with num_jobs=-1",
    )
    assert_equal(threaded.effective_n_jobs_with_context(2), 2,
                 "Use n_jobs if specified")
    with joblib.parallel_backend("threading"):
        assert_equal(
            threaded.effective_n_jobs_with_context(),
            joblib.cpu_count(),
            "Use all cores with context manager",
        )
    with joblib.parallel_backend("threading", n_jobs=3):
        assert_equal(
            threaded.effective_n_jobs_with_context(),
            3,
            "Use n_jobs from context manager",
        )
    with joblib.parallel_backend("threading", n_jobs=3):
        assert_equal(
            threaded.effective_n_jobs_with_context(2),
            2,
            "Use n_jobs specified rather than from context manager",
        )
Exemple #4
0
def upload_chunks(vol, files, bin_paths, parallel=True):
    """Push tif images into vols with or without joblib Parallel

    Arguments:
        vol {cloudvolume.CloudVolume} -- volume that will contain image data
        files {list} -- strings of tif image filepaths
        bin_paths {list} -- binary paths to tif files
        parallel {bool} -- True to use parallel version, false otherwise
    """
    # all tifs will be this size, should be 528x400x208 for mouselight
    chunk_size = vol.info["scales"][-1]["size"]
    num_workers = len(files) if len(files) < cpu_count() else cpu_count()
    if parallel:
        print("Doing parallel stuff")
        for f, bin_path in tqdm(
                zip(chunks(files, num_workers), chunks(bin_paths,
                                                       num_workers)),
                total=len(files) // num_workers,
                desc="uploading tiffs",
        ):
            parallel_upload_chunks(vol, f, bin_path, chunk_size, num_workers)
    else:
        print("Not paralleling")
        for f, bin_path in zip(files, bin_paths):
            if vol.mip == len(vol.info["scales"]) - 1:
                img = np.squeeze(tf.imread(f))
                vol[:, :, :] = img.T
            else:
                ranges = get_data_ranges(bin_path, chunk_size)
                img = np.squeeze(tf.imread(f))
                upload_chunk(vol, ranges, img)
Exemple #5
0
def process_dataset(prefix: str, batch_size=8):
    features = make_lazy_features(
        data_path=f'data/{prefix}_set.csv',
        metadata_path=f'data/{prefix}_set_metadata.csv')
    pool = jl.Parallel(n_jobs=jl.cpu_count(), backend='multiprocessing')

    with open(f'data/processed_{prefix}.csv', 'w') as out:
        current_keys = None
        is_finished = False

        while not is_finished:
            batch = fetch_batch_from_gen(batch_size=batch_size *
                                         jl.cpu_count(),
                                         g=features)
            for obj_id, keys, values in pool(batch):
                if current_keys is None:
                    keys = [
                        'object_id',
                    ] + list(keys)
                    line = ';'.join(keys) + '\n'
                    out.write(line)
                    current_keys = keys
                else:
                    assert tuple(
                        current_keys[1:]
                    ) == keys, f'{tuple(current_keys[1:])[:10]}, {keys[:10]}'
                    values = [
                        str(obj_id),
                    ] + list(values)
                    line = ';'.join(map(str, values)) + '\n'
                    out.write(line)

            if len(batch) < batch_size:
                is_finished = True
def load_data(wavelet, scales, sampling_rate, filename="./dataset/mitdb.pkl"):
    import pickle
    from sklearn.preprocessing import RobustScaler

    with open(filename, "rb") as f:
        train_data, test_data = pickle.load(f)

    cpus = 22 if joblib.cpu_count(
    ) > 22 else joblib.cpu_count() - 1  # for multi-process

    # for training
    x1_train, x2_train, y_train, groups_train = [], [], [], []
    with ProcessPoolExecutor(max_workers=cpus) as executor:
        for x1, x2, y, groups in executor.map(
                partial(worker,
                        wavelet=wavelet,
                        scales=scales,
                        sampling_period=1. / sampling_rate), train_data):
            x1_train.append(x1)
            x2_train.append(x2)
            y_train.append(y)
            groups_train.append(groups)

    x1_train = np.expand_dims(np.concatenate(x1_train, axis=0),
                              axis=1).astype(np.float32)
    x2_train = np.concatenate(x2_train, axis=0).astype(np.float32)
    y_train = np.concatenate(y_train, axis=0).astype(np.int64)
    groups_train = np.concatenate(groups_train, axis=0)

    # for test
    x1_test, x2_test, y_test, groups_test = [], [], [], []
    with ProcessPoolExecutor(max_workers=cpus) as executor:
        for x1, x2, y, groups in executor.map(
                partial(worker,
                        wavelet=wavelet,
                        scales=scales,
                        sampling_period=1. / sampling_rate), test_data):
            x1_test.append(x1)
            x2_test.append(x2)
            y_test.append(y)
            groups_test.append(groups)

    x1_test = np.expand_dims(np.concatenate(x1_test, axis=0),
                             axis=1).astype(np.float32)
    x2_test = np.concatenate(x2_test, axis=0).astype(np.float32)
    y_test = np.concatenate(y_test, axis=0).astype(np.int64)
    groups_test = np.concatenate(groups_test, axis=0)

    # normalization
    scaler = RobustScaler()
    x2_train = scaler.fit_transform(x2_train)
    x2_test = scaler.transform(x2_test)

    return (x1_train, x2_train, y_train, groups_train), (x1_test, x2_test,
                                                         y_test, groups_test)
def bootstrap(data, n_boot=1000, alpha=0.05, n_cores=1, func=np.sum):
    if n_cores is None or n_cores > cpu_count():
        n_cores = cpu_count() - 1

    def sample(X):
        idx = np.random.choice(X.shape[0], size=X.shape[0], replace=True)
        return func(X[idx, :], axis=0)

    results = (Parallel(n_jobs=n_cores, verbose=8)(delayed(sample)(data)
                                                   for _ in range(n_boot)))
    means = np.vstack(results)
    lower = np.percentile(means, 100 * alpha / 2, axis=0)
    upper = np.percentile(means, 100 * (1 - alpha / 2), axis=0)
    return np.mean(means, axis=0), lower, upper
def do_the_job(dset,
               feats,
               model,
               calibration=None,
               lso=True,
               regression_model=('linreg', LinearRegression),
               results_dir=op.join(MANYSOURCES_DATA_ROOT, 'results', 'loss_by_cooc'),
               n_jobs=None,
               by_source=False):
    rm_name, rm_factory = regression_model

    results_dir = op.join(results_dir,
                          'dset=%s' % dset,
                          'feats=%s' % feats,
                          'model=%s' % model,
                          'calibration=%s' % calibration,
                          'LSO=%r' % lso,
                          'reg_model=%s' % rm_name,
                          'bysource=%r' %by_source)
    ensure_dir(results_dir)

    _, molids, _, _ = molecules_coocurrences_df(dset=dset, feats=feats, model=model, lso=lso)

    if n_jobs is None:
        n_jobs = cpu_count()

    Parallel(n_jobs=n_jobs)(delayed(do_for_one_molid)(calibration,
                                                      dset, feats, lso, model,
                                                      molid, results_dir, rm_factory, by_source)
                            for molid in sorted(molids))
Exemple #9
0
    def fit(self, **kwargs):

        # Handle the number of jobs and the time for them
        if self.n_jobs is None or self.n_jobs == 1:
            self._n_jobs = 1
        elif self.n_jobs == -1:
            self._n_jobs = joblib.cpu_count()
        else:
            self._n_jobs = self.n_jobs

        # Automatically set the cutoff time per task
        if self.per_run_time_limit is None:
            self.per_run_time_limit = self._n_jobs * self.time_left_for_this_task // 10

        seed = self.seed
        self.automl_ = self.build_automl(
            seed=seed,
            ensemble_size=self.ensemble_size,
            initial_configurations_via_metalearning=(
                self.initial_configurations_via_metalearning),
            tmp_folder=self.tmp_folder,
            output_folder=self.output_folder,
        )
        self.automl_.fit(load_models=True, **kwargs)

        return self
Exemple #10
0
def getXY(e, n, yvec, d, t, extent):
    print "getting point cloud ..."

    if os.name == 'nt':
        o = Parallel(n_jobs=cpu_count(), verbose=0)(
            delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent)
            for k in xrange(len(n)))

        #eating, northing, distance to sonar, depth, heading
        X, Y = zip(*o)

    else:
        X = []
        Y = []
        for k in xrange(len(n)):
            out1, out2 = xyfunc(e[k], n[k], yvec, d[k], t[k], extent)
            X.append(out1)
            Y.append(out2)

    # merge flatten and stack
    X = np.asarray(X, 'float').T
    X = X.flatten()

    # merge flatten and stack
    Y = np.asarray(Y, 'float').T
    Y = Y.flatten()

    return X, Y
Exemple #11
0
def glms_from_glm(glm_design, Q, n_jobs, return_w, voxels):
    """
    Performs a GLM-separate from a GLM design matrix as input

    Needs a numpy array (no sparse matrix) as input

    **Note** output is unnormalized
    """
    n_basis = Q.shape[1]
    glms_design = classic_to_obo(glm_design, n_basis)
    if n_jobs == -1:
        n_jobs = cpu_count()
    glms_split = np.array_split(glms_design, n_jobs, axis=0)
    out = Parallel(n_jobs=n_jobs)(
        delayed(_separate_innerloop)(glms_i, n_basis, voxels)
        for glms_i in glms_split)
    betas = []
    w = []
    for o in out:
        betas.append(o[0])
        w.append(o[1])
    full_betas = np.concatenate(betas, axis=1)
    full_w = np.concatenate(w, axis=1)
    hrfs = full_betas.T
    norm = np.sqrt((hrfs * hrfs).sum(-1))
    hrfs /= norm[..., None]
    betas = norm
    if return_w:
        hrfs_w = full_w.T.dot(Q.T)
        norm_w = np.sqrt((hrfs_w * hrfs_w).sum(-1))
        hrfs_w = hrfs_w  / norm_w[..., None]
        betas_w = norm_w
        return hrfs.T, betas.T, betas_w.T
    return hrfs.T, betas.T
Exemple #12
0
    def __init__(self, features=[], n_jobs=1, indexing_type='label', **kwargs):

        logging.info("comparing - initialize {} class".format(
            self.__class__.__name__))

        self.features = []
        self.add(features)

        # public
        if n_jobs == -1:
            self.n_jobs = cpu_count()
        else:
            self.n_jobs = n_jobs
        self.indexing_type = indexing_type  # label of position

        # logging
        self._i = 1
        self._i_max = None
        self._n = []
        self._eta = []
        self._output_log_total = True

        # private
        self._compare_functions = []

        if isinstance(features, (pandas.MultiIndex, pandas.Index)):
            warnings.warn(
                "It seems you are using the older version of the Compare API, "
                "see the documentation about how to update to the new API. "
                "http://recordlinkage.readthedocs.io/"
                "en/latest/ref-compare.html", DeprecationWarning)
    def _parallel_learning(self, X, Y, w):
        n_samples = len(X)
        objective, positive_slacks = 0, 0
        verbose = max(0, self.verbose - 3)
        if self.batch_size is not None:
            raise ValueError("If n_jobs != 1, batch_size needs to" "be None")
        # generate batches of size n_jobs
        # to speed up inference
        if self.n_jobs == -1:
            n_jobs = cpu_count()
        else:
            n_jobs = self.n_jobs

        n_batches = int(np.ceil(float(len(X)) / n_jobs))
        slices = gen_even_slices(n_samples, n_batches)
        for batch in slices:
            X_b = X[batch]
            Y_b = Y[batch]
            candidate_constraints = Parallel(
                n_jobs=self.n_jobs,
                verbose=verbose)(delayed(find_constraint)(self.model, x, y, w)
                                 for x, y in zip(X_b, Y_b))
            djoint_feature = np.zeros(self.model.size_joint_feature)
            for x, y, constraint in zip(X_b, Y_b, candidate_constraints):
                y_hat, delta_joint_feature, slack, loss = constraint
                if slack > 0:
                    objective += slack
                    djoint_feature += delta_joint_feature
                    positive_slacks += 1
            w = self._solve_subgradient(djoint_feature, n_samples, w)
        return objective, positive_slacks, w
Exemple #14
0
def simus(nmtx, ncriteria, nweights,
          rank_by=1, b=None, solver="pulp", njobs=None):
            # determine the njobs
            njobs = njobs or joblib.cpu_count()

            t_nmtx = nmtx.T

            # check the b array and complete the missing values
            b = np.asarray(b)
            if None in b:
                mins = np.min(t_nmtx, axis=1)
                maxs = np.max(t_nmtx, axis=1)

                auto_b = np.where(ncriteria == MAX, maxs, mins)
                b = np.where(b.astype(bool), b, auto_b)

            # multiprocessing environment
            with joblib.Parallel(n_jobs=njobs) as jobs:

                # create and execute the stages
                stages, stage_results = solve_stages(
                    t_nmtx=t_nmtx, b=b, ncriteria=ncriteria,
                    solver=solver, jobs=jobs)

                # first methods points
                points1 = first_method(stage_results)
                points2, tita_j_p, tita_j_d, doms, dom_by_crit = second_method(
                    stage_results, jobs)

            points = [points1, points2][rank_by - 1]
            ranking = rank.rankdata(points, reverse=True)

            return (
                ranking, stages, stage_results, points1,
                points2, tita_j_p, tita_j_d, doms, dom_by_crit)
def getXY(e,n,yvec,d,t,extent):
   print("getting point cloud ...")

   #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))

   o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))  
   
   #eating, northing, distance to sonar, depth, heading
   X, Y, D, h, t = zip(*o)

   # merge flatten and stack
   X = np.asarray(X,'float').T
   X = X.flatten()

   # merge flatten and stack
   Y = np.asarray(Y,'float').T
   Y = Y.flatten()

   # merge flatten and stack
   D = np.asarray(D,'float').T
   D = D.flatten()

   # merge flatten and stack
   h = np.asarray(h,'float').T
   h = h.flatten()
   
   # merge flatten and stack
   t = np.asarray(t,'float').T
   t = t.flatten()
         
   return X, Y, D, h, t
Exemple #16
0
def parallel_distance_computation(A, B, distance, n_jobs=-1,
                                  granularity=2, verbose=False,
                                  job_size_min=1000):
    """Computes the distance matrix between all objects in A and all
    objects in B in parallel over all cores.

    This function can be partially instantiated with a given distance,
    in order to obtain a the parallel version of a distance function
    with the same signature as the distance function. Example:
    distance_parallel = functools.partial(parallel_distance_computation, distance=distance)
    """
    if (len(A) > job_size_min) and joblib_available and (n_jobs != 1):
        if n_jobs is None or n_jobs == -1:
            n_jobs = cpu_count()

        if verbose:
            print("Parallel computation of the distance matrix: %s cpus." % n_jobs)

        if n_jobs > 1:
            tmp = np.linspace(0, len(A), granularity * n_jobs + 1).astype(np.int)
        else:  # corner case: joblib detected 1 cpu only.
            tmp = (0, len(A))

        chunks = zip(tmp[:-1], tmp[1:])
        dissimilarity_matrix = np.vstack(Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(distance)(A[start:stop], B) for start, stop in chunks))
    else:
        dissimilarity_matrix = distance(A, B)

    if verbose:
        print("Done.")

    return dissimilarity_matrix
Exemple #17
0
def tree_parallel_query(my_tree, A, k=None, r=None, n_jobs=-1, query_radius=False):
    """Parallel query of the global Tree 'tree'.
    """
    global tree
    tree = my_tree
    tmp = cpu_count()
    if (n_jobs is None or n_jobs == -1) and A.shape[0] >= tmp:
        n_jobs = tmp

    if n_jobs > 1:
        tmp = np.linspace(0, A.shape[0], n_jobs + 1).astype(np.int)
    else:  # corner case: joblib detected 1 cpu only.
        tmp = (0, A.shape[0])

    chunks = zip(tmp[:-1], tmp[1:])
    print("chunks: %s" % chunks)
    if query_radius:
        if r is None:
            r = estimate_radius(tree, A, k)

        results = Parallel(n_jobs=n_jobs)(delayed(worker_query_radius)(A[start:stop, :], r) for start, stop in chunks)
        D, I = zip(*results)
        D = np.concatenate(D)
        I = np.concatenate(I)
    else:
        results = Parallel(n_jobs=n_jobs)(delayed(worker_query)(A[start:stop, :], k) for start, stop in chunks)
        worker = worker_query
        D, I = zip(*results)
        D = np.vstack(D)
        I = np.vstack(I)

    return D, I
Exemple #18
0
	def symbolize_signal(self, signal, parallel = None, n_jobs = -1):
		"""
		Symbolize whole time-series signal to a sentence (vector of words),
		parallel can be {None, "ipython"}
		"""
		window_index = self.sliding_window_index(len(signal))
		if parallel == None:
			return map(lambda wi: self.symbolize_window(signal[wi]), window_index)
		elif parallel == "ipython":
			## too slow
			raise NotImplementedError("parallel parameter %s not supported" % parallel)
			#return self.iparallel_symbolize_signal(signal)
		elif parallel == "joblib":
			with tempfile.NamedTemporaryFile(delete=False) as f:
				tf = f.name
			print "save temp file at %s" % tf 
			tfiles = joblib.dump(signal, tf)
			xs = joblib.load(tf, "r")
			n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs 
			window_index = list(window_index)
			batch_size = len(window_index) / n_jobs
			batches = chunk(window_index, batch_size)
			symbols = Parallel(n_jobs)(delayed(joblib_symbolize_window)(self, xs, batch) for batch in batches)
			for f in tfiles: os.unlink(f)
			return sum(symbols, [])
		else:
			raise NotImplementedError("parallel parameter %s not supported" % parallel)
Exemple #19
0
def finetune(config_path):
    with open(config_path, 'r') as f:
        config = yaml.load(f)
    expname = config['experiment_desc']
    import os
    os.system(f'rm "{expname}"/*')

    batch_size = config.pop('batch_size')
    get_dataloader = partial(DataLoader,
                             batch_size=batch_size,
                             num_workers=cpu_count() // 2,
                             shuffle=False,
                             drop_last=True,
                             pin_memory=True)

    import blurdata

    sigma = config['train']['sigma']
    print('sigma:', sigma)
    tr = blurdata.get_transform(256, sigma, circular=False)
    datasets = (
        blurdata.SyntheticDatasetFromFiles(
            glob.glob('/mnt/cdisk/anger/hdr+/trainresize/*'), transform=tr),
        blurdata.SyntheticDatasetFromFiles(
            glob.glob('/mnt/cdisk/anger/hdr+/trainresize/*'),
            transform=tr,
            val=True),
    )

    train = get_dataloader(datasets[0], batch_size=batch_size)
    val = get_dataloader(datasets[1], batch_size=1)
    trainer = Trainer(config, train=train, val=val)
    trainer.load_checkpoint(config['load_checkpoint'])
    trainer.train()
Exemple #20
0
def _get_n_jobs(n_jobs):
    """Get number of jobs for the computation.

    This function reimplements the logic of joblib to determine the actual
    number of jobs depending on the cpu count. If -1 all CPUs are used.
    If 1 is given, no parallel computing code is used at all, which is useful
    for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
    Thus for n_jobs = -2, all CPUs but one are used.

    Parameters
    ----------
    n_jobs : int
        Number of jobs stated in joblib convention.

    Returns
    -------
    n_jobs : int
        The actual number of jobs as positive integer.

    """
    if n_jobs < 0:
        return max(cpu_count() + 1 + n_jobs, 1)
    elif n_jobs == 0:
        raise ValueError('Parameter n_jobs == 0 has no meaning.')
    else:
        return n_jobs
Exemple #21
0
def parallel_upload_chunks(vol, files, bin_paths, chunk_size, num_workers):
    """Push tif images as chunks in CloudVolume object in Parallel

    Arguments:
        vol {cloudvolume.CloudVolume} -- volume that will contain image data
        files {list} -- strings of tif image filepaths
        bin_paths {list} -- binary paths to tif files
        chunk_size {list} -- 3 ints for original tif image dimensions
        num_workers {int} -- max number of concurrently running jobs
    """
    tiff_jobs = int(num_workers /
                    2) if num_workers == cpu_count() else num_workers
    with tqdm_joblib(tqdm(desc="Load tiffs",
                          total=len(files))) as progress_bar:
        tiffs = Parallel(tiff_jobs,
                         timeout=1800,
                         backend="multiprocessing",
                         verbose=50)(delayed(tf.imread)(i) for i in files)
    with tqdm_joblib(tqdm(desc="Load ranges",
                          total=len(bin_paths))) as progress_bar:
        ranges = Parallel(tiff_jobs,
                          timeout=1800,
                          backend="multiprocessing",
                          verbose=50)(delayed(get_data_ranges)(i, chunk_size)
                                      for i in bin_paths)
    print("loaded tiffs and bin paths")
    vol_ = CloudVolume(vol.layer_cloudpath, parallel=False, mip=vol.mip)

    with tqdm_joblib(tqdm(desc="Upload chunks",
                          total=len(ranges))) as progress_bar:
        Parallel(tiff_jobs,
                 timeout=1800,
                 backend="multiprocessing",
                 verbose=50)(delayed(upload_chunk)(vol_, r, i)
                             for r, i in zip(ranges, tiffs))
Exemple #22
0
 def importers(self):
     print('\nVersions:')
     print("Keras :", keras.__version__)
     print("Tensorflow :", tf.__version__)
     if self.workers < 0:
         self.workers = joblib.cpu_count() + self.workers
         print('\nNegative workers means all available except N-1 (%d)' %
               self.workers)
     if not self.gpu:
         tf.config.threading.set_inter_op_parallelism_threads(self.workers)
         tf.config.threading.set_intra_op_parallelism_threads(self.workers)
         print('\nGPU disabled!')
         os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
         if len(tf.config.list_physical_devices('GPU')) > 0:
             print('You have at least one available GPU device not in use!')
         print('Using %d CPU workers' % self.workers)
     else:
         if len(tf.config.list_physical_devices('GPU')) < 1:
             print('\nGPU was not found. Using CPU instead!')
         else:
             print('\nGPU enabled!')
             if len(tf.config.list_physical_devices(
                     'GPU')) < self.workers or self.workers < 0:
                 self.workers = len(tf.config.list_physical_devices('GPU'))
                 print(
                     'Number of workers adjusted to fit the GPUs available')
             print('Using %d GPU workers' % self.workers)
             if self.gpu_test:
                 self.testGPU()
     self.multicore = True if self.workers > 1 else False
     print('Multiprocessing status:', self.multicore)
def parallelIterateOnMemMap(function,
                            data,
                            result,
                            iterations,
                            moveTo=None,
                            cleanup=True,
                            n_jobs=cpu_count()):

    #try:
    #temporary files
    folder = tempfile.mkdtemp()
    data_name = os.path.join(folder, 'data')
    result_name = os.path.join(folder, 'result')

    #result memmap
    if isinstance(result, np.memmap):
        result_mmap = result
    if isinstance(result, tuple):  # if shape create temp result memmap
        result_mmap = np.memmap(result_name,
                                dtype=data.dtype,
                                shape=result,
                                mode='w+')
    elif isinstance(result, np.ndarray):
        result_mmap = np.memmap(result_name,
                                dtype=data.dtype,
                                shape=result.shape,
                                mode='w+')
    else:
        raise RuntimeError('result should be array, memmap or tuple')

    #input data memmap
    dump(data, data_name)
    data_mmap = load(data_name, mmap_mode='r')

    # Fork the worker processes to perform computation concurrently
    #Parallel(n_jobs=n_jobs)(delayed(function)(data_mmap, result_mmap, i) for i in iterations)
    Parallel(n_jobs=n_jobs)(delayed(function)(data_mmap, result_mmap, i)
                            for i in iterations)

    #except:
    #    print("Exception inparallel processing!")
    #    try:
    #        shutil.rmtree(folder)
    #    except:
    #        print("Failed to delete: " + folder)

    if moveTo is None:
        result = np.array(result_mmap)
    else:
        result_mmap.flush()
        shutil.move(result_name, moveTo)
        result = np.memmap(moveTo, dtype=data.dtype, shape=result)

    if cleanup:
        try:
            shutil.rmtree(folder)
        except:
            print("Failed to delete: " + folder)

    return result
Exemple #24
0
def getXY(e, n, yvec, d, t, extent):
    print("getting point cloud ...")

    #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))

    o = Parallel(n_jobs=cpu_count(), verbose=0)(
        delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent)
        for k in range(len(n)))

    #eating, northing, distance to sonar, depth, heading
    X, Y, D, h, t = zip(*o)

    # merge flatten and stack
    X = np.asarray(X, 'float').T
    X = X.flatten()

    # merge flatten and stack
    Y = np.asarray(Y, 'float').T
    Y = Y.flatten()

    # merge flatten and stack
    D = np.asarray(D, 'float').T
    D = D.flatten()

    # merge flatten and stack
    h = np.asarray(h, 'float').T
    h = h.flatten()

    # merge flatten and stack
    t = np.asarray(t, 'float').T
    t = t.flatten()

    return X, Y, D, h, t
Exemple #25
0
    def _fit_multiclass_task(self, X, y, sample_weight, params):
        if params['init_model'] is not None:
            max_digits = len(str(len(self._classes)))
            init_model_filenames = ['{}.{}'.format(params['init_model'],
                                                   str(i + 1).zfill(max_digits)) for i in range(self._n_classes)]
        ovr_list = [None] * self._n_classes
        for i, cls_num in enumerate(self._classes):
            if params['init_model'] is not None:
                params['init_model'] = init_model_filenames[i]
            self._classes_map[i] = cls_num
            ovr_list[i] = (y == cls_num).astype(int)
            self._estimators[i] = RGFExecuter(**params)

        n_jobs = self.n_jobs if self.n_jobs > 0 else cpu_count() + self.n_jobs + 1
        substantial_n_jobs = max(n_jobs, self.n_classes_)
        if substantial_n_jobs < n_jobs and self.verbose:
            print('n_jobs = {0}, but RGFClassifier uses {1} CPUs because '
                  'classes_ is {2}'.format(n_jobs, substantial_n_jobs,
                                           self.n_classes_))

        self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(utils.fit_ovr_binary)(self._estimators[i],
                                                                                      X,
                                                                                      ovr_list[i],
                                                                                      sample_weight)
                                                        for i in range(self._n_classes))
Exemple #26
0
def correct_scans2(fp, TL):
    if np.ndim(fp) == 2:
        return c_scans2(fp, TL)
    else:
        return Parallel(n_jobs=cpu_count(),
                        verbose=0)(delayed(c_scans2)(fp[p], TL[p])
                                   for p in xrange(len(fp)))
Exemple #27
0
def _job_chunks(l, n_jobs):
   n_chunks = n_jobs
   if n_jobs < 0:
      # so, have n chunks if we are using all n cores/cpus
      n_chunks = cpu_count() + 1 - n_jobs

   return _chunks(l, n_chunks)
def ingest_image_stack(s3_path, voxel_size, img_stack, extension, dtype):

    if extension == "tif":
        img = tf.imread(os.path.expanduser(img_stack))
    else:
        tmp = sitk.ReadImage(os.path.expanduser(img_stack))
        img = sitk.GetArrayFromImage(tmp)
    img = np.asarray(img, dtype=dtype)

    img_size = img.shape[::-1]
    vol = create_cloud_volume(s3_path, img_size, voxel_size, dtype=dtype)

    mem = virtual_memory()
    num_procs = min(math.floor(mem.total / (img.shape[0] * img.shape[1] * 8)),
                    joblib.cpu_count())
    print(f"num processes: {num_procs}")
    print(f"layer path: {vol.layer_cloudpath}")
    global layer_path, num_mips
    num_mips = 3
    layer_path = vol.layer_cloudpath

    data = [(i, img.T[:, :, i]) for i in range(img.shape[0])]
    files = [i[1] for i in data]
    zs = [i[0] for i in data]

    Parallel(num_procs)(delayed(process)(z, f)
                        for z, f in tqdm(zip(zs, files), total=len(zs)))
Exemple #29
0
def fit_and_save_with_grid(data: Union[np.ndarray, pd.DataFrame],
                           grid_path: str,
                           type: str = 'umap',
                           output_dir: str = '.',
                           n_jobs: int = -1):
    type = type.lower()
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    os.makedirs(output_dir, exist_ok=True)
    joblib.dump(scaler, filename=os.path.join(output_dir, 'scaler.joblib'))

    if grid_path:
        with open(grid_path) as config_file:
            grid_dict = json.load(config_file)
        param_grid = ParameterGrid(grid_dict)
        if (n_jobs == -1) and (len(param_grid) > cpu_count()):
            n_jobs = len(param_grid)
        if n_jobs == 1:
            for params in param_grid:
                fit_and_save(data=data,
                             output_dir=output_dir,
                             n_jobs=1,
                             **params)
        else:
            Parallel(
                n_jobs=n_jobs, backend='multiprocessing'
            )(delayed(fit_and_save)
              (data=data, output_dir=output_dir, type=type, n_jobs=1, **params)
              for params in param_grid)
    else:
        fit_and_save(data=data, output_dir=output_dir, n_jobs=n_jobs)
Exemple #30
0
def pmap(pickleable_fn, data, n_jobs=None, verbose=1, **kwargs):
    """Parallel map using joblib.

    Parameters
    ----------
    pickleable_fn : callable
        Function to map over data.
    data : iterable
        Data over which we want to parallelize the function call.
    n_jobs : int, optional
        The maximum number of concurrently running jobs. By default, it is one less than
        the number of CPUs.
    verbose: int, optional
        The verbosity level. If nonzero, the function prints the progress messages.
        The frequency of the messages increases with the verbosity level. If above 10,
        it reports all iterations. If above 50, it sends the output to stdout.
    kwargs
        Additional arguments for :attr:`pickleable_fn`.

    Returns
    -------
    list
        The i-th element of the list corresponds to the output of applying
        :attr:`pickleable_fn` to :attr:`data[i]`.
    """
    if n_jobs is None:
        n_jobs = cpu_count() - 1

    return Parallel(n_jobs=n_jobs,
                    verbose=verbose)(delayed(pickleable_fn)(d, **kwargs)
                                     for d in data)
Exemple #31
0
def _job_chunks(l, n_jobs):
    n_chunks = n_jobs
    if n_jobs < 0:
        # so, have n chunks if we are using all n cores/cpus
        n_chunks = cpu_count() + 1 - n_jobs

    return _chunks(l, n_chunks)
Exemple #32
0
    def select(self, x: np.ndarray, y: np.ndarray,
               options: Optional[Dict] = None) -> np.ndarray:
        """
        L0 combinatorial optimization
        Args:
            x (np.ndarray): design matrix
            y (np.ndarray): target vector
            options:
        Returns:
        """
        n, p = x.shape
        index_array = list(range(p))

        def _lstsq(c):
            x_comb = x[:, c]
            beta = lstsq(x_comb, y)[0]
            res = 1. / 2 * np.mean((x_comb.dot(beta) - y) ** 2)
            penalty = self.lambd * len(c)
            res += penalty
            return res

        indices = []
        for p_temp in range(1, p + 1):
            for comb in combinations(index_array, p_temp):
                indices.append(comb)
        loss = Parallel(n_jobs=cpu_count())(delayed(_lstsq)(comb) for comb in indices)
        argmin = np.argmin(loss)
        self.indices = np.array(indices[argmin])
        x_temp = x[:, self.indices]
        self.coef_ = np.zeros_like(x[0, :])
        self.coef_[self.indices] = lstsq(x_temp, y)[0]
        return self.indices
Exemple #33
0
def main():
    """Do the gridsearch over several configuration files."""
    parser = ap.ArgumentParser()
    parser.add_argument('-n', '--name', required=True)
    parser.add_argument('--cpu', type=int, default=cpu_count() // 2)
    parser.add_argument('--single-job', action="store_true")
    parser.add_argument('--outdir', type=str)
    parser.add_argument('confdir', type=str)
    parser.add_argument('--no-desc', dest='edit_desc', action='store_false')
    args = parser.parse_args()

    # /!\ If single-job is used, outdir has to be defined
    if args.single_job:
        with open(args.confdir, 'r') as f:
            conf = json.load(f)
        start_run(args.name, conf, args.outdir)
    else:
        start = datetime.datetime.now()
        packed_run_name = '{}_{}'.format(args.name,
                                         start.strftime('%y%m%d_%H%M%S'))
        res_grid_path = join('res', packed_run_name)
        os.makedirs(res_grid_path)
        shutil.copy('desc.template.md', join(res_grid_path, 'desc.md'))
        if args.edit_desc:
            subprocess.call([EDITOR, join(res_grid_path, 'desc.md')])
        print('Using {} cpu'.format(args.cpu))
        logging.basicConfig(level=logging.WARNING)
        Parallel(n_jobs=args.cpu)(
            delayed(start_run)(run_name, conf, res_grid_path)
            for run_name, conf in get_confs(args.confdir))
        logger.info('All over!')
        print('took', datetime.datetime.now() - start)
Exemple #34
0
def get_sys_info():
    "Return a dictionary with info from the current system."

    # Import are nested to avoid long import time when func is not called
    import scipy
    import psutil
    import numpy as np
    from joblib import cpu_count

    info = {}

    # Info on the env
    info["env-OMP_NUM_THREADS"] = os.environ.get('OMP_NUM_THREADS')

    # Info on the OS
    info["platform"] = platform.system()
    info["platform-architecture"] = platform.machine()
    info["platform-release"] = platform.release()
    info["platform-version"] = platform.version()

    # Info on the hardware
    info["system-cpus"] = cpu_count()
    info["system-processor"] = _get_processor_name()
    info["system-ram (GB)"] = round(psutil.virtual_memory().total /
                                    (1024.0**3))

    # Info on dependency libs
    info["version-cuda"] = get_cuda_version()
    info["version-numpy"] = (np.__version__, _get_numpy_libs())
    info["version-scipy"] = scipy.__version__

    # Info on benchmark version
    info["benchmark-git-tag"] = _get_git_tag()

    return info
Exemple #35
0
def get_grid(mode, orig_def, targ_def, merge, influence, minX, maxX, minY,
             maxY, res, nn, sigmas, eps, shape, numstdevs, trans, humlon,
             humlat):

    if mode == 1:

        wf = None

        complete = 0
        while complete == 0:
            try:
                try:
                    dat = pyresample.kd_tree.resample_nearest(
                        orig_def,
                        merge.flatten(),
                        targ_def,
                        radius_of_influence=res * 20,
                        fill_value=None,
                        nprocs=cpu_count() - 2)
                except:
                    dat = pyresample.kd_tree.resample_nearest(
                        orig_def,
                        merge.flatten(),
                        targ_def,
                        radius_of_influence=res * 20,
                        fill_value=None,
                        nprocs=1)
                if 'dat' in locals():
                    complete = 1
            except:
                print 'Something went wrong with resampling...'

    dat = dat.reshape(shape)

    return dat, res
def dissimilarity(tracks, prototypes, distance=bundles_distances_mam,
                  n_jobs=-1, verbose=False):
    """Compute the dissimilarity (distance) matrix between tracks and
    given prototypes. This function supports parallel (multicore)
    computation.

    Parameters
    ----------
    tracks : list or array of objects
           an iterable of streamlines.
    prototypes : iterable of objects
           The prototypes.
    distance : function
           Distance function between groups of streamlines.
    prototype_policy : string
           Shortname for the prototype selection policy. The default
           value is 'sff'.
    n_jobs : int
           If joblib is available, split the dissimilarity computation
           in n_jobs. If n_jobs is -1, then all available cpus/cores
           are used. The default value is -1.
    verbose : bool
           If true prints some messages. Deafault is True.

    Return
    ------
    dissimilarity_matrix : array (N, num_prototypes)

    See Also
    --------
    furthest_first_traversal, subset_furthest_first

    Notes
    -----
    """
    if verbose:
        print("Computing the dissimilarity matrix.")

    if joblib_available and n_jobs != 1:
        if n_jobs is None or n_jobs == -1:
            n_jobs = cpu_count()

        if verbose:
            print("Parallel computation of the dissimilarity matrix: %s cpus." % n_jobs)

        if n_jobs > 1:
            tmp = np.linspace(0, len(tracks), n_jobs + 1).astype(np.int)
        else:  # corner case: joblib detected 1 cpu only.
            tmp = (0, len(tracks))

        chunks = zip(tmp[:-1], tmp[1:])
        dissimilarity_matrix = np.vstack(Parallel(n_jobs=n_jobs)(delayed(distance)(tracks[start:stop], prototypes) for start, stop in chunks))
    else:
        dissimilarity_matrix = distance(tracks, prototypes)

    if verbose:
        print("Done.")

    return dissimilarity_matrix
def save_pcaed_feat(drivers_list, X):
    if len(drivers_list) == 0:
        raise Exception("empty")
    
    n_proc = cpu_count()

    Parallel(n_jobs=n_proc)(delayed(p_save_reduced)(d, X[ind*200:(ind+1)*200, :]) for (ind, d) in enumerate(drivers_list))
    return True
def extract_features_drivers(drivers_list):
    if len(drivers_list) == 0:
        raise Exception("empty")

    features = np.memmap("output", dtype="float64", shape=(len(drivers_list), 200, _N_F), mode='w+')

    n_proc = cpu_count()
    Parallel(n_jobs=n_proc)(delayed(p_getinfo)(d, features, i) for i, d in enumerate(drivers_list))
    return features
def main():
    # -------------------------------------------------------------------------------
    # Parameters

    # the script will most likely work if we swap the TEXTS variable
    # with any iterable of text (where one element represents a document,
    # and the whole iterable is the corpus)
    newsgroups_train = fetch_20newsgroups(subset = 'train')
    TEXTS = newsgroups_train.data

    # spacy's english model for text preprocessing
    NLP = spacy.load('en')

    # a set of stopwords built-in to spacy, we can always
    # expand this set for the problem that we are working on,
    # here we include python built-in string punctuation mark
    STOPWORDS = spacy.en.STOP_WORDS | set(punctuation) | set(ENGLISH_STOP_WORDS)

    # create a directory called 'model' to store all outputs in later section
    MODEL_DIR = 'model'
    UNIGRAM_PATH = os.path.join(MODEL_DIR, 'unigram.txt')
    PHRASE_MODEL_CHECKPOINT = os.path.join(MODEL_DIR, 'phrase_model')
    BIGRAM_PATH = os.path.join(MODEL_DIR, 'bigram.txt')
    WORD2VEC_CHECKPOINT = os.path.join(MODEL_DIR, 'word2vec')

    # -------------------------------------------------------------------------------
    logger.info('job started')
    if not os.path.isdir(MODEL_DIR):
        os.mkdir(MODEL_DIR)

    if not os.path.exists(UNIGRAM_PATH):
        logger.info('preprocessing text')
        export_unigrams(UNIGRAM_PATH, texts = TEXTS, parser = NLP, stopwords = STOPWORDS)

    if os.path.exists(PHRASE_MODEL_CHECKPOINT):
        phrase_model = Phrases.load(PHRASE_MODEL_CHECKPOINT)
    else:
        logger.info('training phrase model')
        # use LineSetence to stream text as oppose to loading it all into memory
        unigram_sentences = LineSentence(UNIGRAM_PATH)
        phrase_model = Phrases(unigram_sentences)
        phrase_model.save(PHRASE_MODEL_CHECKPOINT)

    if not os.path.exists(BIGRAM_PATH):
        logger.info('converting words to phrases')
        export_bigrams(UNIGRAM_PATH, BIGRAM_PATH, phrase_model)

    if os.path.exists(WORD2VEC_CHECKPOINT):
        word2vec = Word2Vec.load(WORD2VEC_CHECKPOINT)
    else:
        logger.info('training word2vec')
        sentences = LineSentence(BIGRAM_PATH)
        word2vec = Word2Vec(sentences, workers = cpu_count())
        word2vec.save(WORD2VEC_CHECKPOINT)

    logger.info('job completed')
def execute():
  dims = (100000,4);
  x = createSharedNumpyArray(dims);
  x[:] = np.random.rand(dims[0], dims[1]);
  
  res = Parallel(n_jobs = cpu_count())(delayed(su)(x,i) for i in range(dims[0]));
  



  
    def denoise(self, videoIn):
        halfWindowSize = self.searchWindowSize / 2
        halfTemplate = self.templateWindowSize / 2
        delta = halfWindowSize + halfTemplate

        shape = tuple(np.add(videoIn.shape, (0, 2*delta, 2*delta)))
        video = np.zeros(shape)
        for i in xrange(0, videoIn.shape[0]):
            video[i] = cv2.copyMakeBorder(videoIn[i], delta, delta, delta, delta, cv2.BORDER_REFLECT_101)

        out = video.copy()
        outLBP = video.copy()
        outM = video.copy()
        outNonUni = video.copy()
        outNonUniXY = video.copy()
        #out = np.ones(video.shape)

        nFrames = video.shape[0]
        nRows = video.shape[1]
        nCols = video.shape[2]

        aux = np.zeros((self.templateWindowSize, self.templateWindowSize, self.templateWindowSize))
        aux[halfTemplate, halfTemplate, halfTemplate] = 1
        gaussian = ndimage.filters.gaussian_filter(aux, self.sigma)

        ranges = [range(delta, nFrames - delta), range(delta, nRows - delta), range(delta, nCols - delta)]
        self.coordinates = list(itertools.product(*ranges))

        lbpTop = LBPTOP(LBP(8, uniform=True, rotation_invariant=True), \
                        LBP(8, uniform=True, rotation_invariant=True), \
                        LBP(8, uniform=True, rotation_invariant=True))
        lbpVideos = lbpTop.generateCodes(video)
        sizeXY = lbpTop.getMaxXY()
        sizeXT = lbpTop.getMaxXT()
        sizeYT = lbpTop.getMaxYT()

        # Fast Non Local Means
        avg, avgGrad = self.neighborhoodFeatures(video)

        ncpus = joblib.cpu_count()
        results = Parallel(n_jobs=ncpus,max_nbytes=2e9)(delayed(processPixel)(video, t, i, j, self.h, halfWindowSize, halfTemplate, gaussian, lbpVideos, sizeXY, sizeXT, sizeYT) for t,i,j in coordinates)

        for idx in xrange(0,len(results)):
            out[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][0]
            outLBP[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][1]
            outM[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][2]
            outNonUni[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][3]
            outNonUniXY[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][4]

        return out[:, delta: -delta, delta: -delta], \
                outLBP[:, delta: -delta, delta: -delta], \
                outM[:, delta: -delta, delta: -delta],\
                outNonUni[:, delta: -delta, delta: -delta],\
                outNonUniXY[:, delta: -delta, delta: -delta]
Exemple #42
0
	def signal_to_paa_vector(self, signal, n_jobs = -1):
		window_index = self.sliding_window_index(len(signal))
		with tempfile.NamedTemporaryFile(delete=False) as f:
				tf = f.name
		print "save temp file at %s" % tf 
		tfiles = joblib.dump(signal, tf)
		xs = joblib.load(tf, "r")
		n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs 
		window_index = list(window_index)
		batch_size = len(window_index) / n_jobs
		batches = chunk(window_index, batch_size)
		vecs = Parallel(n_jobs)(delayed(joblib_paa_window)(self, xs, batch) for batch in batches)
		for f in tfiles: os.unlink(f)
		return np.vstack(vecs)
def compute_dissimilarity(data, distance, prototype_policy, num_prototypes, verbose=False, size_limit=500000, n_jobs=None):
    """Compute dissimilarity matrix given data, distance,
    prototype_policy and number of prototypes.
    """
    print "Computing dissimilarity data for the original data:",
    data_original = data
    num_proto = num_prototypes
    if data.shape[0] > size_limit:
        print
        print "Datset too big: subsampling to %s entries only!" % size_limit
        data = data[np.random.permutation(data.shape[0])[:size_limit], :]
    
    print prototype_policy    
    print "number of prototypes:", num_proto
    stdout.flush()
    if verbose: print("Generating %s prototypes as" % num_proto),
    # Note that we use the original dataset here, not the subsampled one!
    if prototype_policy=='random':
        if verbose: print("random subset of the initial data.")
        prototype_idx = np.random.permutation(data_original.shape[0])[:num_proto]
        prototype = [data_original[i] for i in prototype_idx]
    elif prototype_policy=='fft':
        prototype_idx = furthest_first_traversal(data_original, num_proto, distance)
        prototype = [data_original[i] for i in prototype_idx]
    elif prototype_policy=='sff':
        prototype_idx = subset_furthest_first(data_original, num_proto, distance)
        prototype = [data_original[i] for i in prototype_idx]                
    else:
        raise Exception                

    if verbose: print("Computing dissimilarity matrix.")
    if joblib_available and n_jobs != 1:
        if n_jobs is None or n_jobs == -1:
            n_jobs = cpu_count()

        print("Parallel computation of the dissimilarity matrix: %s cpus." % n_jobs)
        if n_jobs > 1:
            tmp = np.linspace(0, data.shape[0], n_jobs).astype(np.int)
        else: # corner case: joblib detected 1 cpu only.
            tmp = (0, data.shape[0])

        chunks = zip(tmp[:-1], tmp[1:])
        data_dissimilarity = np.vstack(Parallel(n_jobs=n_jobs)(delayed(distance)(data[start:stop], prototype) for start, stop in chunks))
    else:
        data_dissimilarity = distance(data, prototype)
                
    print
    return data_dissimilarity
def compute_preferences(MQD, click_models, cutoff, output_filepath):
    preferences = Parallel(n_jobs=cpu_count())(
                        delayed(compute_preferences_parallel)(
                            click_model_name, query,
                            MQD[click_model_name][query]['model'],
                            MQD[click_model_name][query]['relevances'],
                            cutoff)
                        for click_model_name in click_models
                        for query in MQD[click_model_name].keys())

    for stats in preferences:
        click_model_name, query, prefs = stats
        MQD[click_model_name][query]['preferences'] = prefs

    with open(output_filepath, 'wb') as ofile:
        pickle.dump(MQD, ofile, protocol=-1)
Exemple #45
0
def ecfps_mp(numjobs=None, dest_dir=None):
    """Python-parallel computation of ECFPs.
    Parameters:
      - numjobs: the number of threads to use (None=all in the machine).
      - dest_dir: the directory to which the fingerprints will be written, in weird fp format(TM).
    """
    dest_dir = _MALARIA_ECFPS_PARALLEL_RESULTS_DIR if dest_dir is None else dest_dir
    ensure_dir(dest_dir)
    numjobs = cpu_count() if numjobs is None else int(numjobs)
    Parallel(n_jobs=numjobs)(delayed(_molidsmiles_it_ecfp)
                             (start=start,
                              step=numjobs,
                              output_file=op.join(dest_dir, 'all__fcfp=%r__start=%d__step=%d.weirdfps' %
                                                            (fcfp, start, numjobs)),
                              fcfp=fcfp)
                             for start, fcfp in product(range(numjobs), (True, False)))
Exemple #46
0
def electre1(nmtx, ncriteria, nweights, p, q, njobs=None):
    # determine the njobs
    njobs = njobs or joblib.cpu_count()

    # get the concordance and discordance info
    # multiprocessing environment
    with joblib.Parallel(n_jobs=njobs) as jobs:
        mtx_concordance = concordance(nmtx, ncriteria, nweights, jobs)
        mtx_discordance = discordance(nmtx, ncriteria, jobs)

    with np.errstate(invalid='ignore'):
        outrank = (
            (mtx_concordance >= p) & (mtx_discordance <= q))

    kernel_mask = ~outrank.any(axis=0)
    kernel = np.where(kernel_mask)[0]
    return kernel, outrank, mtx_concordance, mtx_discordance
Exemple #47
0
def getXY(e,n,yvec,d,t,extent):
   print("getting point cloud ...")

   #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))

   if os.name=='posix':
      o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))

      #eating, northing, distance to sonar, depth, heading
      X, Y, D, H, T = zip(*o)

   else:
      X = []; Y = [];
      D = []; H = []; T = []
      for k in range(len(n)):
         out1,out2,out3,out4,out5 = xyfunc(e[k], n[k], yvec, d[k], t[k], extent)
         X.append(out1); Y.append(out2)
         D.append(out3); H.append(out4); T.append(out5)


   # merge flatten and stack
   X = np.asarray(X,'float').T
   X = X.flatten()

   # merge flatten and stack
   Y = np.asarray(Y,'float').T
   Y = Y.flatten()

   # merge flatten and stack
   D = np.asarray(D,'float').T
   D = D.flatten()

   # merge flatten and stack
   H = np.asarray(H,'float').T
   H = H.flatten()

   # merge flatten and stack
   T = np.asarray(T,'float').T
   T = T.flatten()

   return X, Y, D, H, T
def getXY(e,n,yvec,d,t,extent):
   print("getting point cloud ...")

   #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))

   try:
      o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))  
   except:
      o = Parallel(n_jobs = 1, verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n)))  
      
   X, Y = zip(*o)

   # X flatten and stack
   X = np.asarray(X,'float')
   X = X.flatten()

   # Y flatten and stack
   Y = np.asarray(Y,'float')
   Y = Y.flatten()

   return X, Y
Exemple #49
0
    def __init__(self, n_jobs, function, args=None, kwargs=None):
        """
        Parameters
        ----------
        n_jobs : int
            Number of cores to be used for parallel calculation. If -1 use all
            available cores.
        function : object that supports __call__, as functions
            function to be run in parallel.
        args : list of tuples
            Arguments for function; see the ParallelCalculation class
            description.
        kwargs : list of dicts or None
            kwargs for function; see the ParallelCalculation
            class description.
        """

        # args[i] should be a list of args, one for each run
        self.n_jobs = n_jobs
        if self.n_jobs == -1:
            self.n_jobs = cpu_count()

        self.functions = function
        if not hasattr(self.functions, '__iter__'):
            self.functions = [self.functions] * len(args)
        if len(self.functions) != len(args):
            self.functions = self.functions[:] * (len(args) // len(self.functions))

        # Arguments should be present
        if args is None:
            args = []
        self.args = args

        # If kwargs are not present, use empty dicts
        if kwargs:
            self.kwargs = kwargs
        else:
            self.kwargs = [{} for i in self.args]

        self.nruns = len(args)
def clean_corpus(texts, parser, stopwords, batch_size, n_jobs):
    """
    Generator function using spaCy to parse reviews:
    - lemmatize the text
    - remove punctuation, whitespace and number
    - remove pronoun, e.g. 'it'
    - remove tokens that are shorter than 2
    """
    n_threads = cpu_count()
    if n_jobs > 0 and n_jobs < n_threads:
        n_threads = n_jobs

    # use the .pip to process texts as a stream;
    # this functionality supports using multi-threads
    for parsed_text in parser.pipe(texts, n_threads = n_threads, batch_size = batch_size):
        tokens = []
        for token in parsed_text:
            if valid_word(token) and token.lemma_ not in stopwords:
                tokens.append(token.lemma_)

        cleaned_text = ' '.join(tokens)
        yield cleaned_text
def compute_lambdas(MQD, click_models, n_repeats, n_impressions, compute_lambdas_method,
                    ranking_sampler, cutoff, store_impressions, output_filepath):
    # Run the computation of lambdas in paralell with the specified
    # `compute_lambdas_method` method and `ranking_sampler`.
    lambdas_counts = Parallel(n_jobs=cpu_count())(
                        delayed(compute_lambdas_method)(
                            click_model_name, query,
                            MQD[click_model_name][query]['model'],
                            MQD[click_model_name][query]['relevances'],
                            n_impressions,
                            n_repeats,
                            ranking_sampler,
                            store_impressions,
                            cutoff)
                        for click_model_name in click_models
                        for query in MQD[click_model_name].keys())

    # Copy the lambdas and counts into a dictionary. Note that Parallel
    # preserves the order of the results, but we still keep track of the
    # associated click model and queries names not to mix something up.
    for stats in lambdas_counts:
        click_model_name, query, cutoff, n_imps, ranking_sampler_name,\
        lambdas, total_counts, viewed_counts, total_lambdas, viewed_lambdas,\
        impressions = stats

        MQD[click_model_name][query]['stats'] = {}

        for i, n in enumerate(n_imps):
            MQD[click_model_name][query]['stats'][n] = {'lambdas': lambdas[i],
                                                        'total_lambdas': total_lambdas[i],
                                                        'viewed_lambdas': viewed_lambdas[i],
                                                        'total_counts': total_counts[i],
                                                        'viewed_counts': viewed_counts[i],
                                                        'cutoff': cutoff,
                                                        'impressions': impressions,
                                                        'ranking_sampler': ranking_sampler_name}

    with open(output_filepath, 'wb') as ofile:
        pickle.dump(MQD, ofile, protocol=-1)
def texture_slic(humfile, sonpath, doplot=1, numclasses=4, maxscale=20, notes=4):
          
      '''
      Create a texture lengthscale map using the algorithm detailed by Buscombe et al. (2015)
      This textural lengthscale is not a direct measure of grain size. Rather, it is a statistical 
      representation that integrates over many attributes of bed texture, of which grain size is the most important. 
      The technique is a physically based means to identify regions of texture within a sidescan echogram, 
      and could provide a basis for objective, automated riverbed sediment classification.

      Syntax
      ----------
      [] = PyHum.texture(humfile, sonpath, doplot, numclasses, maxscale, notes)

      Parameters
      ----------
      humfile : str
       path to the .DAT file
      sonpath : str
       path where the *.SON files are
      doplot : int, *optional* [Default=1]
       if 1, make plots, otherwise do not make plots
      numclasses : int, *optional* [Default=4]
       number of 'k means' that the texture lengthscale will be segmented into
      maxscale : int, *optional* [Default=20]
       Max scale as inverse fraction of data length for wavelet analysis
      notes : int, *optional* [Default=100]
       notes per octave for wavelet analysis

      Returns
      -------
      sonpath+base+'_data_class.dat': memory-mapped file
        contains the texture lengthscale map

      sonpath+base+'_data_kclass.dat': memory-mapped file
        contains the k-means segmented texture lengthscale map

      References
      ----------
      .. [1] Buscombe, D., Grams, P.E., and Smith, S.M.C., 2015, Automated riverbed sediment
       classification using low-cost sidescan sonar. Journal of Hydraulic Engineering 10.1061/(ASCE)HY.1943-7900.0001079, 06015019.
      '''

      # prompt user to supply file if no input file given
      if not humfile:
         print('An input file is required!!!!!!')
         Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
         humfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) 

      # prompt user to supply directory if no input sonpath is given
      if not sonpath:
         print('A *.SON directory is required!!!!!!')
         Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
         sonpath = askdirectory() 

      # print given arguments to screen and convert data type where necessary
      if humfile:
         print('Input file is %s' % (humfile))
         
      if sonpath:
         print('Sonar file path is %s' % (sonpath))

      if numclasses:
         numclasses = np.asarray(numclasses,int)
         print('Number of sediment classes: %s' % (str(numclasses)))
         
      if maxscale:
         maxscale = np.asarray(maxscale,int)
         print('Max scale as inverse fraction of data length: %s' % (str(maxscale)))
         
      if notes:
         notes = np.asarray(notes,int)
         print('Notes per octave: %s' % (str(notes)))
         
      if doplot:
         doplot = int(doplot)
         if doplot==0:
            print("Plots will not be made")   
      
      
      print('[Default] Number of processors is %s' % (str(cpu_count())))
                        
      ########################################################
      ########################################################
      
      # start timer
      if os.name=='posix': # true if linux/mac or cygwin on windows
         start = time.time()
      else: # windows
         start = time.clock()

      # if son path name supplied has no separator at end, put one on
      if sonpath[-1]!=os.sep:
         sonpath = sonpath + os.sep

      base = humfile.split('.DAT') # get base of file name for output
      base = base[0].split(os.sep)[-1]

      # remove underscores, negatives and spaces from basename
      base = humutils.strip_base(base)   

      meta = loadmat(os.path.normpath(os.path.join(sonpath,base+'meta.mat')))

      ft = 1/loadmat(sonpath+base+'meta.mat')['pix_m']
      #pix_m = np.squeeze(meta['pix_m'])
      #dep_m = np.squeeze(meta['dep_m'])
      dist_m = np.squeeze(meta['dist_m'])

      ### port
      print("processing port side ...")
      # load memory mapped scan ... port
      shape_port = np.squeeze(meta['shape_port'])
      if shape_port!='':

         if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_port_lar.dat'))):
            port_fp = io.get_mmap_data(sonpath, base, '_data_port_lar.dat', 'float32', tuple(shape_port))         
         else:
            port_fp = io.get_mmap_data(sonpath, base, '_data_port_la.dat', 'float32', tuple(shape_port))
          
         #port_fp2 = io.get_mmap_data(sonpath, base, '_data_port_l.dat', 'float32', tuple(shape_port))

      ### star
      print("processing starboard side ...")
      # load memory mapped scan ... port
      shape_star = np.squeeze(loadmat(sonpath+base+'meta.mat')['shape_star'])
      if shape_star!='':
         if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_star_lar.dat'))):
            star_fp = io.get_mmap_data(sonpath, base, '_data_star_lar.dat', 'float32', tuple(shape_star))
         else:
            star_fp = io.get_mmap_data(sonpath, base, '_data_star_la.dat', 'float32', tuple(shape_star))

         #star_fp2 = io.get_mmap_data(sonpath, base, '_data_star_l.dat', 'float32', tuple(shape_star))

      if len(shape_star)>2:
         shape = shape_port.copy()
         shape[1] = shape_port[1] + shape_star[1]
      else:
         shape = []
         shape.append(1)
         shape.append(shape_port[0])
         shape.append(shape_port[1])
         shape[1] = shape_port[0] + shape_star[0]

      #work on the entire scan
      #im = humutils.rescale(np.vstack((np.flipud(np.hstack(port_fp)), np.hstack(star_fp))),0,1)
      im = np.vstack((np.flipud(np.hstack(port_fp)), np.hstack(star_fp)))
      im[np.isnan(im)] = 0
      im = humutils.rescale(im,0,1)

      #get SLIC superpixels
      segments_slic = slic(im, n_segments=int(im.shape[0]/10), compactness=.1)

      #pre-allocate texture lengthscale array
      tl = np.zeros(im.shape, dtype = "float64")

      #cycle through each segment and compute tl
      for k in np.unique(segments_slic):
         mask = np.zeros(im.shape[:2], dtype = "uint8")
         mask[segments_slic == k] = 255
         cmask, cim = crop_toseg(mask, im)
         tl[segments_slic == k] = parallel_me(cim, maxscale, notes, np.shape(cim)[0])

      R_fp = io.get_mmap_data(sonpath, base, '_data_range.dat', 'float32', tuple(shape_star))
      R = np.vstack((np.flipud(np.hstack(R_fp)), np.hstack(R_fp)))
      R = R/np.max(R)

      #correct for range and scale
      tl = tl * np.cos(R) * (1/ft)
      tl[im==0] = np.nan 
      tl[np.isnan(im)] = np.nan 

      # create memory mapped file for Sp
      with open(os.path.normpath(os.path.join(sonpath,base+'_data_class.dat')), 'w+') as ff:
         fp = np.memmap(ff, dtype='float32', mode='w+', shape=tuple(shape))

      counter = 0
      if len(shape_star)>2:
         for p in range(len(port_fp)):
            if p==0:
               n,m = np.shape(np.vstack((np.flipud(port_fp[p]), star_fp[p])))
            else:
               n,m = np.shape(np.vstack((np.flipud(port_fp[p]), star_fp[p])))
            Sp = tl[:n, counter:counter+m]
            counter = counter+m
            fp[p] = Sp.astype('float32')
            del Sp
         del fp # flush data to file

         class_fp = io.get_mmap_data(sonpath, base, '_data_class.dat', 'float32', tuple(shape))

      else:

            with open(os.path.normpath(os.path.join(sonpath,base+'_data_class.dat')), 'w+') as ff:
               np.save(ff, np.squeeze(Sp).astype('float32'))

            with open(os.path.normpath(os.path.join(sonpath,base+'_data_class.dat')), 'r') as ff:
               class_fp = np.load(ff)

      dist_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dist_m'])

      ########################################################
      if doplot==1:

         if len(shape_star)>2:
            for p in range(len(star_fp)):
               plot_class(dist_m, shape_port, port_fp[p], star_fp[p], class_fp[p], ft, humfile, sonpath, base, p)
         else:
            plot_class(dist_m, shape_port, port_fp, star_fp, class_fp, ft, humfile, sonpath, base, 0)

         if len(shape_star)>2:
            for p in range(len(star_fp)):
               plot_contours(dist_m, shape_port, class_fp[p], ft, humfile, sonpath, base, numclasses, p)
         else:
            plot_contours(dist_m, shape_port, class_fp, ft, humfile, sonpath, base, numclasses, 0)
        

      #######################################################
      # k-means 
      
      if len(shape_star)>2:
         with open(os.path.normpath(os.path.join(sonpath,base+'_data_kclass.dat')), 'w+') as ff:
            fp = np.memmap(ff, dtype='float32', mode='w+', shape=tuple(shape))

         for p in range(len(port_fp)):
            wc = get_kclass(class_fp[p].copy(), numclasses)
            fp[p] = wc.astype('float32')
            del wc

         del fp

         kclass_fp = io.get_mmap_data(sonpath, base, '_data_kclass.dat', 'float32', tuple(shape))
            
      else:
         wc = get_kclass(class_fp.copy(), numclasses)

         with open(os.path.normpath(os.path.join(sonpath,base+'_data_kclass.dat')), 'w+') as ff:
            np.save(ff, np.squeeze(wc).astype('float32'))

         del wc
         
         with open(os.path.normpath(os.path.join(sonpath,base+'_data_kclass.dat')), 'r') as ff:
            kclass_fp = np.load(ff)
            
      ########################################################
      if doplot==1:

         if len(shape_star)>2:
            for p in range(len(star_fp)):
               plot_kmeans(dist_m, shape_port, port_fp[p], star_fp[p], kclass_fp[p], ft, humfile, sonpath, base, p)
         else:
            plot_kmeans(dist_m, shape_port, port_fp, star_fp, kclass_fp, ft, humfile, sonpath, base, 0)         

      if os.name=='posix': # true if linux/mac
         elapsed = (time.time() - start)
      else: # windows
         elapsed = (time.clock() - start)
      print("Processing took "+str(elapsed)+"seconds to analyse")

      print("Done!")
Exemple #53
0
def resample(orig_def, target_def, ss):
    '''
    Calculates Numpy Array for Raster Generation
    '''
    result = kd_tree.resample_nearest(orig_def, ss, target_def, radius_of_influence=1, fill_value=None, nprocs = cpu_count()-1)
    return result
from keras.preprocessing import sequence
from keras.models import model_from_json

models_dir = '.'
clf = model_from_json(open('%s/classifier_arc.json' % models_dir).read())
clf.load_weights('%s/classifier_weights.h5' % models_dir)

from joblib import Parallel, delayed, cpu_count
n_jobs = cpu_count()


def generator(vectors):
	for idx, vector in enumerate(vectors):
		print idx
		yield vector

def processing(vector):
	maxlen=40
	vector = sequence.pad_sequences([vector,], maxlen=maxlen)
	predicted_label = clf.predict_classes(vector, verbose=0)[0][0]

	return predicted_label


vectors = [[96, 838, 766, 246, 246, 29, 319, 2, 150, 1, 369, 304, 3684, 33, 1324, 1, 213, 51, 12, 35, 192, 1140, 17, 9903, 1, 506, 835, 266, 141, 19, 7384, 8689, 81536, 180, 291, 4, 33, 18, 1100, 1, 1565, 464, 506, 180, 1604, 7, 835, 17, 2, 599, 6, 38, 14, 7, 277, 1220, 191, 33, 133, 1604, 14, 1513, 24, 1111, 33, 115, 5984, 95, 18, 2, 1081, 822, 6, 5, 1397, 33, 39, 863, 0, 308, 0, 17, 2421, 6, 7822, 0, 418, 0, 37, 6, 4455, 36737, 0, 418, 0, 487, 12427, 33, 36, 7, 11090, 2666, 11, 2, 1165, 4, 115, 7254, 58, 38437, 3809, 34, 31029, 141, 16, 399, 835, 438, 160, 0, 418, 0, 17, 2421, 6, 7822, 25, 1711, 6, 99, 1051, 17, 634, 6, 5, 1397, 29, 212, 2161, 4, 2714, 136, 2704, 4, 229, 117, 23, 1745, 29, 319, 141, 2, 150, 4, 29, 314, 2, 441, 231, 261, 346, 2034, 3, 8, 14, 7, 4279, 606, 8, 14, 7, 642, 1775, 9, 1222, 24026, 606, 206, 2099, 1, 15, 141, 2, 835, 675, 12, 418, 403, 1211, 19, 4429, 81, 344, 564, 168, 3, 23248, 7, 443, 833, 386, 3, 3, 8, 14, 7, 642, 1775, 9, 52, 24026, 606, 287, 5, 29, 319, 2, 585, 1, 369, 1979, 2021], [96, 838, 766, 506, 835, 55, 304, 3684, 33, 1324, 1, 213, 51, 12, 35, 192, 1140, 17, 9903, 1, 506, 835, 180, 4, 33, 18, 1100, 1, 57103, 1604, 7, 835, 17, 2, 599, 6, 38, 14, 7, 277, 1220, 191, 33, 133, 1604, 14, 1513, 24, 1111, 33, 115, 5984, 95, 18, 2, 1081, 822, 6, 5, 1397, 33, 39, 863, 588, 418, 965, 17, 2421, 6, 7822, 346, 418, 2121, 37, 6, 4455, 36737, 513, 418, 2446, 487, 12427, 33, 36, 7, 11090, 2666, 11, 2, 1165, 4, 115, 7254, 58, 38437, 3809, 34, 31029, 141, 16, 399, 835, 438, 160, 612, 418, 2587, 17, 2421, 6, 7822, 25, 1711, 6, 99, 1051, 17, 634, 6, 5, 1397, 29, 212, 2161, 4, 2714, 136, 2704, 4, 229, 117, 23, 1745, 1979, 2021],[96, 766, 246, 246, 120, 15785, 30, 92, 3, 133, 31, 39, 19331, 1054, 2438, 457, 460, 27, 1688, 12, 2, 794, 1338, 221, 0, 3, 2, 1113, 7, 4209, 6, 822, 8, 45, 2286, 82, 7, 1635, 2312, 5376, 77, 1215, 86, 1764, 25, 31, 623, 708, 1113, 427, 222, 8617, 40, 68, 3599, 1, 2, 633, 139, 787, 7, 1764, 25355, 1308, 82, 478, 116, 7, 794, 1, 708, 68, 427, 1646, 227, 3, 8, 1707, 19, 7, 1391, 327, 2872, 9, 3647, 65, 0, 3, 7, 36, 347, 15, 1642, 159, 2, 4539, 65, 445, 209, 3028, 15, 450, 1, 1113, 3, 8, 1502, 1982, 38, 3047, 2, 248, 1930, 57613, 1057, 135, 6169, 459, 1, 2, 380, 25, 1136, 54, 292, 31, 39, 576, 7, 5582, 794, 538, 24, 2361, 38, 1, 281, 1822, 6, 42913, 2525, 819, 1257, 1046, 7, 0, 347, 674, 209, 8, 14, 70, 4709, 2551, 979, 7822, 630, 138, 380, 589, 3868, 8114, 3, 134, 1113, 1707, 15, 450, 16, 569, 4539, 105, 0, 35957, 8, 782, 1308, 460, 23408, 4, 27, 23832, 1, 20002, 4, 794, 1081, 692, 11, 199, 6, 2653, 2, 5317, 589, 938, 31, 597, 116, 1, 2892, 83, 106, 80, 3437, 2, 1642, 850, 159, 7, 794, 4200, 34, 20, 56908, 11, 199, 6, 1046, 33, 287, 0, 881, 794, 4706, 6, 0, 15533, 25, 179, 1882, 1845, 13, 5159, 13, 27, 24, 14, 1050, 29678, 3, 133, 11, 8, 12252, 25, 1113, 39, 65, 86, 529, 1054, 500, 445, 4, 524, 0, 3, 3002, 209, 155, 14, 24289, 1050, 116, 9, 7, 0, 1827, 2, 5679, 6, 604, 197, 36, 3647, 1189, 12, 1454, 1156, 1, 4249, 51, 4, 2199, 51, 2, 19879, 25, 419, 1189, 3647, 12, 70, 860, 860, 3626, 4, 338, 2, 1113, 287, 0, 8, 8199, 14963, 51, 2, 19879, 1998, 12, 191, 1156, 36, 2, 248, 2558, 178, 248, 96819, 4084, 178, 1016, 548, 20, 27, 1024, 221, 86, 1263, 548, 1755, 7, 794, 1, 8017, 3, 25, 23, 14, 7, 4232, 24, 604, 56, 3647, 6269, 1118, 1156, 209, 6819, 116, 7, 794, 672, 8, 14, 7, 4232, 3, 2488, 25, 380, 14, 1189, 2, 3647, 1113, 12, 1104, 694, 4259, 23, 133, 3204, 2, 0, 3, 6269, 24, 147, 342, 51, 12, 2123, 694, 3047, 33, 63383, 8, 4232, 632, 4, 2, 1113, 287, 2495, 25, 1454, 1784, 12, 91, 1156, 18, 1189, 4, 5042, 502, 91, 1784, 9, 0, 0, 58, 394, 94, 124, 325, 2188, 2112, 743, 607, 17, 425, 1817, 4196, 4858, 1453, 540, 3989, 4385, 1619, 5101, 396, 602, 467, 24582, 552, 4336, 560, 8, 15, 555, 45, 503, 452, 4, 410, 64, 4, 14, 107, 9, 2, 392, 87, 6, 2, 437, 13, 476, 1, 523, 23, 14, 527, 25, 5, 18, 22, 2, 107, 177, 13, 444, 453, 1, 433, 8, 32, 1, 2, 107, 177, 13, 76, 8, 32, 159, 574, 391, 29, 52, 1, 8, 32, 1, 479, 2, 429, 24, 2, 32, 46, 581, 76, 21, 5, 4, 209, 539, 384, 2, 32, 16, 30, 426, 278, 222, 402, 13, 578, 7, 333, 6, 2, 32, 12, 571, 418, 403, 1211, 19, 4429, 81, 344, 564, 168, 3, 3, 23248, 7, 443, 833, 386, 3, 3, 12, 571, 418, 403, 1211, 19, 24789, 115, 10276, 41126, 168, 3, 3, 3, 25, 383, 826, 14, 2677, 33, 2772, 2, 794, 147, 36, 2, 1734, 6, 16590, 2, 15, 459, 6, 7, 39338, 6, 15, 1249, 135, 7, 50211, 6, 604, 1009, 856, 2, 445, 12, 2, 694, 147, 1098, 2, 101, 9, 6806, 2388, 1, 106, 71, 380, 17, 2243, 15, 1249, 151, 36, 1009, 206, 164, 25, 24, 147, 20, 7, 2005, 13, 7, 0, 3, 3, 3, 3, 277, 340, 180, 34, 36, 1, 12692, 11, 12, 80, 356, 88, 1989, 2, 15785, 2438, 4, 0, 23, 1792, 16, 1454, 15, 1249, 9, 2, 383, 826, 3692, 133, 31, 39, 19331, 1054, 2438, 457, 460, 27, 1688, 12, 2, 794, 1338, 221, 157, 2, 1113, 7, 4209, 6, 822, 3, 3, 659, 794, 3204, 500, 9, 347, 15, 287, 1755, 1736, 794, 1470, 31, 1390, 20, 610, 1, 164, 752, 31, 43, 1054, 149, 51, 4, 1189, 136, 2399, 0, 3, 3, 697, 340, 71, 2075, 3133, 6, 1054, 3972, 556, 1113, 4, 794, 8462, 16, 1811, 191, 18, 12493, 14, 1, 36, 1113, 6505, 7, 277, 409, 6, 2, 524, 385, 16, 15, 459, 266, 141, 2691, 835, 19, 0, 1016, 25, 102, 2, 1113, 287, 2005, 16, 136, 6, 2, 1054, 149, 1189, 8624, 9, 2, 27, 3662, 2065, 24, 212, 1527, 2491, 3166, 3585, 568, 2, 27, 8195, 31, 43, 2, 27, 2075, 116, 1, 1698, 3082, 4500, 1, 27, 1113, 427, 3, 3, 1009, 3, 3, 3, 31, 39, 1288, 50, 18175, 3039, 1, 7, 7963, 1609, 0, 15417, 23, 5449, 82, 8, 3, 3, 7963, 1609, 5935, 37, 50, 2411, 1609, 7, 626, 1338, 221, 22660, 2, 1609, 1289, 105, 11, 2, 367, 149, 0, 4086, 2, 1609, 16369, 4517, 4, 1638, 1, 3441, 46770, 1609, 147, 503, 71, 18175, 985, 626, 191, 14, 2, 3039, 1, 367, 55049, 147, 521, 848, 1942, 24, 234, 0, 9, 2085, 4, 876, 657, 4, 266, 7963, 5939, 24, 234, 1117, 8208, 0, 147, 20, 71, 3406, 626, 2947, 2796, 17, 2, 1609, 24, 147, 5019, 2, 7640, 1117, 997, 9, 367, 3, 3, 1291, 2007, 5548, 3, 3, 1009, 3, 3, 3, 3, 1031, 10, 833, 12, 10450, 7, 794, 3, 3, 21338, 51, 35957, 445, 0, 2772, 11, 7, 1778, 0, 2, 380, 88, 1454, 1249, 2, 338, 147, 36, 1, 293, 311, 437, 49, 4492, 11881, 17, 7, 794, 2, 338, 287, 36, 7, 1221, 35957, 3039, 1, 2, 794, 4, 2, 794, 147, 2524, 2, 19485, 6, 8420, 92, 16, 2, 2598, 1249, 7, 380, 45, 36, 3, 3, 3, 8, 1707, 19, 7, 1391, 327, 2872, 9, 3647, 65, 0, 3, 7, 36, 347, 15, 1642, 159, 2, 4539, 65, 445, 209, 3028, 15, 450, 1, 1113, 764, 17, 1054, 1811, 4, 91, 7286, 0, 385, 9, 2085, 464, 35957, 0, 37, 0, 3, 3, 3, 8, 1502, 1982, 38, 3047, 2, 248, 1930, 57613, 1057, 135, 6169, 459, 1, 2, 380, 77, 133, 1754, 27, 794, 0, 3, 3, 134, 1113, 1707, 15, 450, 16, 569, 4539, 105, 0, 35957, 1113, 39, 2000, 556, 569, 1249, 628, 2, 10291, 209, 1707, 1054, 252, 7286, 9, 99, 92, 16, 3647, 4539, 464, 0, 3647, 4539, 14813, 10227, 15, 450, 16, 569, 4539, 4, 4010, 236, 3, 3, 3, 8, 1502, 1754, 1113, 1, 8017, 15, 32, 2728, 17, 2, 3647, 7286, 2728, 77, 1754, 24289, 1050, 8402, 16, 4539, 133, 11, 8, 12252, 25, 1113, 39, 65, 86, 529, 1054, 500, 445, 4, 524, 187, 3002, 209, 155, 14, 24289, 1050, 116, 9, 7, 0, 3, 3, 3, 3, 3, 3, 3, 4959, 144, 4712, 1720, 4010, 0, 13, 1113, 287, 2524, 8, 0, 206, 56, 7, 2005, 1332, 292, 3, 3, 0, 3, 3, 3, 3, 3, 1727, 708, 389, 0, 1302, 1, 20, 700, 21, 7, 794, 2488, 2, 338, 39, 3204, 86, 529, 30233, 708, 389, 1407, 666, 389, 3, 3, 6819, 39, 20, 700, 1332, 292, 1113, 287, 3204, 666, 389, 421, 12, 2, 7286, 23, 7542, 13, 9614, 0, 3, 3, 3, 3, 3, 4959, 144, 4712, 6269, 46770, 794, 147, 116, 1, 139, 2, 6269, 102, 24, 151, 342, 51, 67, 2, 380, 3445, 153, 91, 694, 3, 3, 25, 23, 14, 7, 4232, 24, 604, 56, 3647, 6269, 1118, 1156, 209, 6819, 116, 7, 794, 2488, 25, 380, 14, 1189, 2, 3647, 1113, 12, 1104, 694, 4259, 23, 133, 3204, 2, 438, 6269, 24, 147, 342, 51, 12, 2123, 694, 3047, 33, 63383, 8, 4232, 3, 3, 134, 3, 3, 3, 1009, 3, 3, 3, 3, 3, 4, 672, 25, 2, 380, 6035, 153, 694, 11, 7, 9912, 2244, 4, 2, 852, 14, 84, 3707, 12, 2, 694, 30, 24, 852, 14, 36579, 3, 3, 10276, 3, 3, 3, 3, 3, 12, 418, 562, 1211, 19, 15494, 81, 94, 124, 168, 3, 3, 2753, 11, 3266, 4, 10276, 183, 8, 237, 3, 0, 29, 56, 31558, 1956, 1109, 386, 25, 31, 18, 610, 1, 189, 7963, 9535, 9, 0, 6, 2, 1054, 5939, 31, 116, 23, 548, 117, 1778, 9, 5, 1397, 1, 149, 2, 18175, 3039, 9, 8, 227, 30, 6, 8, 2096, 50, 2856, 1, 782, 37, 460, 6, 2, 1054, 201, 12, 2, 694, 37, 938, 3, 3, 0, 39, 5, 1132, 12, 2, 1086, 6, 25, 31, 18, 610, 1, 708, 97, 248, 13, 30, 1054, 445, 201, 1, 2, 694, 95, 65, 31, 116, 4539, 9, 419, 28356, 24, 31, 116, 236, 9, 3, 3, 3, 21338, 51, 35957, 445, 3, 4959, 144, 4712, 1720, 4010, 2067, 2066, 50, 15, 16, 1095, 1, 173, 223, 1191, 156, 3, 1727, 708, 389, 3, 4959, 144, 4712, 6269, 3, 3, 374, 95, 25, 2, 604, 694, 14, 0, 151, 34, 6005, 30, 1811, 6269, 0, 1206, 3009, 1016, 1016, 7345, 30, 445, 2732, 4, 8071, 12, 694, 1659, 3, 3, 3, 394, 3, 3, 94, 124, 3, 3, 325, 3, 2188, 2112, 3, 743, 607, 17, 425, 1817, 3, 3, 4196, 4858, 1453, 540, 3989, 3, 4385, 1619, 5101, 3, 3, 396, 602, 3, 467, 24582, 3, 552, 4336, 3, 3, 3, 560, 8, 15, 555, 45, 503, 452, 4, 410, 64, 4, 14, 107, 9, 2, 392, 87, 6, 2, 437, 13, 476, 1, 523, 23, 14, 527, 25, 5, 18, 22, 2, 107, 177, 13, 444, 453, 1, 433, 8, 32, 1, 2, 107, 177, 13, 76, 8, 32, 159, 574, 391, 29, 52, 1, 8, 32, 1, 479, 2, 429, 24, 2, 32, 46, 581, 76, 21, 5, 4, 209, 539, 384, 2, 32, 16, 30, 426, 278, 222, 402, 13, 578, 7, 333, 6, 2, 32, 3, 12, 685, 418, 562, 1211, 19, 8430, 81, 180, 291, 4283, 168, 3, 3, 3, 304, 1397, 3, 3, 3, 419, 399, 2150, 16, 13303, 742, 77, 33, 1604, 24, 11, 2, 10245, 33, 287, 387, 1965, 17, 5, 136, 496, 33, 36, 3038, 8, 1165, 3, 3, 3, 3466, 203, 9, 18175, 383, 826, 9535, 33, 3038, 24, 2, 138, 292, 1, 212, 147, 20, 1, 87, 7963, 17223, 4, 6392, 18175, 6200, 1, 502, 7963, 9535, 1, 7462, 17, 236, 3, 3, 3, 155, 18, 7, 298, 6, 7963, 6806, 9535, 24, 33, 36, 158, 8420, 4, 5926, 17, 2, 1054, 1206, 4, 5344, 24, 70, 3048, 149, 1675, 3, 3, 3, 37, 667, 37, 33, 53, 13303, 742, 9, 375, 149, 1267, 319, 5, 2, 132, 6, 1054, 1206, 7882, 17, 7963, 9535, 24, 1708, 502, 1206, 4, 5344, 37, 229, 37, 7, 132, 6, 30, 2, 7963, 9535, 24, 33, 36, 702, 3, 3, 3, 3, 670, 3, 180, 3, 3, 3, 3, 3, 180, 291, 4283, 3, 3, 85321, 3, 3, 3, 3, 12, 871, 573, 507, 308, 507, 1211, 19, 34986, 180, 291, 4283, 168, 3, 3, 755, 3, 1009, 3, 31, 172, 387, 17, 7, 132, 6, 2, 1054, 1463, 1302, 1, 3568, 3647, 56, 814, 716, 178, 12, 367, 56, 95, 399, 1008, 1, 20, 3157, 4, 212, 16, 155, 3, 3, 180, 14, 8, 583, 5, 13, 865, 23477, 287, 24378, 3, 3, 33, 39, 117, 544, 7, 132, 4, 836, 1, 189, 54, 9535, 24, 18, 178, 9, 7640, 1117, 24, 39, 65, 2, 828, 1267, 133, 1237, 1, 2021, 1, 905, 1134, 1, 103, 141, 17, 8, 1009, 3, 3, 33, 2129, 164, 6, 0, 0, 17069, 102, 787, 7, 277, 707, 1, 387, 417, 3, 3, 3, 670, 3, 180, 3, 1009, 3, 3, 3, 12, 871, 308, 573, 1211, 344, 564, 168, 3, 3, 3, 3, 8815, 229, 419, 22, 254, 25, 2, 217, 94, 14, 1882, 61, 36, 6106, 11, 20476, 149, 82, 8, 23, 330, 34970, 511, 1, 2310, 77, 287, 20, 7, 3464, 930, 1, 43, 227, 3, 3, 33, 147, 914, 86, 938, 77, 9043, 12, 80, 8195, 99, 9535, 18, 4, 25, 155, 14, 447, 149, 178, 9, 850, 3, 3, 155, 18, 136, 1054, 3668, 9, 367, 544, 37, 0, 997, 77, 33, 36, 58, 1220, 80, 277, 151, 201, 4, 95, 710, 151, 737, 3, 1009, 3, 33, 115, 590, 1, 27073, 8, 201, 83, 31, 172, 387, 17, 7, 132, 6, 2, 1054, 1463, 1302, 1, 3568, 3647, 56, 814, 716, 178, 12, 367, 56, 95, 399, 1008, 1, 20, 3157, 4, 212, 16, 155, 3, 89, 16, 70, 338, 3, 12, 418, 573, 1211, 19, 13905, 81, 180, 291, 4283, 168, 3, 3, 3, 3, 419, 22, 50, 2545, 11, 8, 1666, 6, 201, 77, 33, 2772, 2, 84, 1027, 31, 287, 65, 14, 836, 1, 189, 71, 6, 99, 1530, 94, 1018, 4, 3578, 17, 236, 0, 20476, 544, 9535, 151, 147, 462, 38, 7, 1355, 1623, 1, 8, 536, 787, 1884, 153, 6106, 3, 3, 3, 95, 65, 5, 312, 3, 3, 3, 670, 3, 180, 3, 3, 3, 3, 3, 180, 291, 4283, 3, 3, 85321, 3, 3, 12, 871, 573, 507, 308, 507, 1211, 19, 6751, 94, 124, 168, 3, 220, 9, 2, 742, 1397, 3, 3, 120, 20476, 149, 1, 367, 8, 147, 20, 7, 1764, 35963, 19324, 25640, 4, 6133, 9, 38, 3, 3, 95, 25, 31, 1726, 83, 8, 20476, 201, 155, 18, 4416, 6, 20696, 1530, 24, 287, 65, 2, 20476, 201, 1659, 3, 3, 12, 871, 308, 573, 1211, 180, 291, 4283, 168, 3, 3, 3, 304, 3, 3, 12, 871, 573, 507, 308, 507, 1211, 19, 37101, 344, 564, 168, 3, 3, 755, 1397, 3, 3, 33, 1255, 17, 3920, 24, 1054, 445, 347, 15, 287, 43, 36184, 478, 36, 1, 1075, 136, 2399, 3607, 1, 2495, 80, 460, 478, 36, 1, 2613, 51, 9, 8, 787, 341, 33, 36, 25542, 778, 2, 1054, 149, 51, 4, 1189, 12, 1388, 6635, 6, 15, 6012, 829, 1, 43, 7, 681, 9, 95, 31, 18, 8587, 17, 3, 344, 88, 7, 277, 340, 31, 18, 320, 395, 1, 659, 7, 1813, 188, 167, 6, 2, 1054, 9, 8, 4, 31, 172, 1075, 502, 3607, 1, 56, 80, 2, 538, 9443, 3, 3, 3, 3, 77, 33, 133, 56, 2, 535, 6, 8, 790, 4, 24, 428, 604, 147, 122, 1054, 1811, 30, 2, 55, 22, 106, 95, 5, 2434, 155, 14, 2, 5679, 6, 31008, 8, 790, 25, 86, 2040, 1345, 4, 10888, 794, 1897, 24, 292, 2879, 865, 86, 73, 9, 95, 5, 2434, 77, 116, 1, 525, 7, 1494, 9, 1022, 6, 2, 0, 3, 8, 287, 20, 7, 2011, 1, 7948, 2, 2370, 6, 15785, 30, 2006, 92, 1009, 3, 3, 3, 3, 3, 3, 1301, 9, 729, 4539, 287, 20, 7232, 17, 5161, 729, 37, 651, 6, 2, 68, 582, 2992, 77, 34, 36, 1, 293, 3, 3, 33, 133, 1255, 23, 147, 20, 4454, 1, 65, 37, 460, 445, 12, 2, 204, 1113, 37, 938, 155, 18, 7, 1931, 13209, 1, 8, 3, 31, 206, 122, 1, 11191, 511, 2, 1937, 2558, 445, 30, 2, 0, 3, 7, 277, 409, 6, 2, 1054, 445, 149, 1675, 3048, 9535, 3157, 21, 2, 1054, 711, 135, 461, 20476, 8, 1, 367, 147, 248, 1975, 20, 0, 15610, 3, 33, 65, 36, 3569, 26803, 24, 31, 147, 451, 610, 1, 4088, 7, 3476, 409, 6, 24, 149, 1, 1075, 12, 367, 1156, 13755, 2, 383, 826, 9535, 31, 18, 226, 3, 133, 155, 18, 136, 2158, 6, 149, 191, 1075, 12, 3334, 21, 2, 292, 82, 5225, 60597, 24, 408, 20, 2796, 12, 471, 447, 1452, 222, 2089, 7, 1520, 31, 287, 19473, 12, 35, 529, 7, 538, 82, 5225, 60597, 77, 24, 147, 234, 1511, 1010, 6, 8091, 201, 3, 3, 3, 394, 3, 3, 94, 124, 3, 3, 325, 3, 2188, 2112, 3, 743, 607, 17, 425, 1817, 3, 3, 4196, 4858, 1453, 540, 3989, 3, 4385, 1619, 5101, 3, 3, 396, 602, 3, 467, 24582, 3, 552, 4336, 3, 3, 3, 560, 8, 15, 555, 45, 503, 452, 4, 410, 64, 4, 14, 107, 9, 2, 392, 87, 6, 2, 437, 13, 476, 1, 523, 23, 14, 527, 25, 5, 18, 22, 2, 107, 177, 13, 444, 453, 1, 433, 8, 32, 1, 2, 107, 177, 13, 76, 8, 32, 159, 574, 391, 29, 52, 1, 8, 32, 1, 479, 2, 429, 24, 2, 32, 46, 581, 76, 21, 5, 4, 209, 539, 384, 2, 32, 16, 30, 426, 278, 222, 402, 13, 578, 7, 333, 6, 2, 32, 3, 3, 3, 3, 3, 3, 3, 3]]

output = Parallel(n_jobs=n_jobs, verbose=0, pre_dispatch='1.5*n_jobs')(delayed(processing)(vector) for vector in generator(vectors))

print output
Exemple #55
0
def ss_plot():
    #Pandas method of importing data frame and getting extents
    db_connect="dbname='reach_4a' user='******'  host='localhost' port='9000'"
    conn = psycopg2.connect(db_connect)
    df = pd.read_sql_query('SELECT * from mb_may_2012_1m tt inner join (	SELECT s.easting, s.northing, s.texture, s.sidescan_intensity  FROM ss_2012_05 s) ss on tt.easting=ss.easting and tt.northing=ss.northing;', con=conn)
    minE = df['easting'].min()[0]
    maxE = df['easting'].max()[0]
    minN = df['northing'].min()[0]
    maxN = df['northing'].max()[0]
    conn.close()
    print 'Done Importing Data from Database'
    
    #Create grid for countourf plot
    res = 1
    grid_x, grid_y = np.meshgrid( np.arange(np.floor(minE), np.ceil(maxE), res), np.arange(np.floor(minN), np.ceil(maxN), res))
    grid_lon, grid_lat = trans(grid_x,grid_y,inverse=True)
    
    #Re-sampling procedure
    m_lon, m_lat = trans(df['easting'].values.flatten(), df['northing'].values.flatten(), inverse=True)
    orig_def = geometry.SwathDefinition(lons=m_lon, lats=m_lat)
    target_def = geometry.SwathDefinition(lons=grid_lon.flatten(), lats=grid_lat.flatten())
    print 'Now Resampling...'
    result = kd_tree.resample_nearest(orig_def, df['sidescan_intensity'].values.flatten(), target_def, radius_of_influence=1, fill_value=None, nprocs = cpu_count())
    print 'Done Resampling!!!' 
    
    #format side scan intensities grid for plotting
    gridded_result = np.reshape(result,np.shape(grid_lon))
    gridded_result = np.squeeze(gridded_result)
    gridded_result[np.isinf(gridded_result)] = np.nan
    gridded_result[gridded_result<=0] = np.nan
    grid2plot = np.ma.masked_invalid(gridded_result)
       
    
    print 'Now mapping...'
    #Create Figure
    fig = plt.figure(frameon=True)
    ax = plt.subplot(1,1,1)
    map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], llcrnrlon=np.min(grid_lon)-0.0009, llcrnrlat=np.min(grid_lat)-0.0009,urcrnrlon=np.max(grid_lon)+0.0009, urcrnrlat=np.max(grid_lat)+0.0009)
    gx,gy = map.projtran(grid_lon,grid_lat)
    map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=1200)
    im = map.pcolormesh(gx, gy, grid2plot, cmap='gray',vmin=0.1, vmax=30)
        
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)
    cbr = plt.colorbar(im, cax=cax)
    cbr.set_label('Sidescan Intensity [dBw]', size=8)
    for t in cbr.ax.get_yticklabels():
        t.set_fontsize(8)
    plt.savefig(r'C:\workspace\Texture_Classification\output\May2012_1m_sidescan_intensity.png')    
  pdat = np.load(posfile, mmap_mode = 'r+');
  pdat[t,0,:] = pos_center;
  pdat[t,1,:] = pos_head;
  pdat[t,2,:] = pos_tail;
  #pdat.close();
  
  cdat = np.load(curfile, mmap_mode = 'r+');
  cdat[t,0] = curvature_mean;
  cdat[t,1] = curvature_variation;
  #cdat.close();



from joblib import Parallel, delayed, cpu_count

Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(ntime))

Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(3212, 3300))

Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(3300, ntime))

Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(500000, ntime))

analyse_shape_at_time(500000)

analyse_shape_at_time(513477)

i = 0;

i+=1;
analyse_shape_at_time(513466+i)
Exemple #57
0
def texture(humfile, sonpath, win, shift, doplot, density, numclasses, maxscale, notes):
          
      '''
      Create a texture lengthscale map using the algorithm detailed by Buscombe et al. (forthcoming)
      This textural lengthscale is not a direct measure of grain size. Rather, it is a statistical 
      representation that integrates over many attributes of bed texture, of which grain size is the most important. 
      The technique is a physically based means to identify regions of texture within a sidescan echogram, 
      and could provide a basis for objective, automated riverbed sediment classification.

      Syntax
      ----------
      [] = PyHum.texture(humfile, sonpath, win, shift, doplot, density, numclasses, maxscale, notes)

      Parameters
      ----------
      humfile : str
       path to the .DAT file
      sonpath : str
       path where the *.SON files are
      win : int, *optional* [Default=100]
       pixel in pixels of the moving window
      shift : int, *optional* [Default=10]
       shift in pixels for moving window operation
      doplot : int, *optional* [Default=1]
       if 1, make plots, otherwise do not make plots
      density : int, *optional* [Default=win/2]
       echogram will be sampled every 'density' pixels
      numclasses : int, *optional* [Default=4]
       number of 'k means' that the texture lengthscale will be segmented into
      maxscale : int, *optional* [Default=20]
       Max scale as inverse fraction of data length for wavelet analysis
      notes : int, *optional* [Default=100]
       notes per octave for wavelet analysis

      Returns
      -------
      sonpath+base+'_data_class.dat': memory-mapped file
        contains the texture lengthscale map

      sonpath+base+'_data_kclass.dat': memory-mapped file
        contains the k-means segmented texture lengthscale map

      References
      ----------
      .. [1] Buscombe, D., Grams, P.E., and Smith, S.M.C., Automated riverbed sediment
       classification using low-cost sidescan sonar. submitted to
       Journal of Hydraulic Engineering
      '''
                        
      # prompt user to supply file if no input file given
      if not humfile:
         print 'An input file is required!!!!!!'
         Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
         inputfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) 

      # prompt user to supply directory if no input sonpath is given
      if not sonpath:
         print 'A *.SON directory is required!!!!!!'
         Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
         sonpath = askdirectory() 

      # print given arguments to screen and convert data type where necessary
      if humfile:
         print 'Input file is %s' % (humfile)
      if sonpath:
         print 'Sonar file path is %s' % (sonpath)
      if win:
         win = np.asarray(win,int)
         print 'Window is %s square pixels' % (str(win))
      if shift:
         shift = np.asarray(shift,int)
         print 'Min shift is %s pixels' % (str(shift))
      if density:
         density = np.asarray(density,int)
         print 'Image will be sampled every %s pixels' % (str(density))
      if numclasses:
         numclasses = np.asarray(numclasses,int)
         print 'Number of sediment classes: %s' % (str(numclasses))
      if maxscale:
         maxscale = np.asarray(maxscale,int)
         print 'Max scale as inverse fraction of data length: %s' % (str(maxscale))
      if notes:
         notes = np.asarray(notes,int)
         print 'Notes per octave: %s' % (str(notes))
      if doplot:
         doplot = int(doplot)
         if doplot==0:
            print "Plots will not be made"    
      
      
      print '[Default] Number of processors is %s' % (str(cpu_count()))

      if not win:
         win = 100
         print '[Default] Window is %s square pixels' % (str(win))

      if not shift:
         shift = 10
         print '[Default] Min shift is %s pixels' % (str(shift))

      if not density:
         density = win/2
         print '[Default] Echogram will be sampled every %s pixels' % (str(density))

      if not numclasses:
         numclasses = 4
         print '[Default] Number of sediment classes: %s' % (str(numclasses))

      if not maxscale:
         maxscale = 20
         print '[Default] Max scale as inverse fraction of data length: %s ' % (str(maxscale))

      if not notes:
         notes = 4
         print '[Default] Notes per octave: %s ' % (str(notes))

      if not doplot:
         if doplot != 0:
            doplot = 1
            print "[Default] Plots will be made"

      ########################################################
      ########################################################
      
      # start timer
      if os.name=='posix': # true if linux/mac or cygwin on windows
         start = time.time()
      else: # windows
         start = time.clock()

      # if son path name supplied has no separator at end, put one on
      if sonpath[-1]!=os.sep:
         sonpath = sonpath + os.sep

      base = humfile.split('.DAT') # get base of file name for output
      base = base[0].split(os.sep)[-1]

      ft = 1/loadmat(sonpath+base+'meta.mat')['pix_m']
      pix_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['pix_m'])
      dep_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dep_m'])
      dist_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dist_m'])

      ### port
      print "processing port side ..."
      # load memory mapped scan ... port
      shape_port = np.squeeze(loadmat(sonpath+base+'meta.mat')['shape_port'])
      if shape_port!='':
         port_fp = np.memmap(sonpath+base+'_data_port_la.dat', dtype='float32', mode='r', shape=tuple(shape_port))
         port_fp2 = np.memmap(sonpath+base+'_data_port_l.dat', dtype='float32', mode='r', shape=tuple(shape_port))

      ### star
      print "processing starboard side ..."
      # load memory mapped scan ... port
      shape_star = np.squeeze(loadmat(sonpath+base+'meta.mat')['shape_star'])
      if shape_star!='':
         star_fp = np.memmap(sonpath+base+'_data_star_la.dat', dtype='float32', mode='r', shape=tuple(shape_star))
         star_fp2 = np.memmap(sonpath+base+'_data_star_l.dat', dtype='float32', mode='r', shape=tuple(shape_star))

      shape = shape_port.copy()
      shape[1] = shape_port[1] + shape_star[1]

      # create memory mapped file for Sp
      fp = np.memmap(sonpath+base+'_data_class.dat', dtype='float32', mode='w+', shape=tuple(shape))

      #SRT = []
      for p in xrange(len(port_fp)):

         Z,ind = humutils.sliding_window(np.vstack((np.flipud(port_fp[p]), star_fp[p])),(win,win),(shift,shift))

         try:
            print "%s windows to process with a density of %s" % (str(len(Z)), str(density)) #% (str(len(Z)), str(density))
            # do the wavelet clacs and get the stats
            d = Parallel(n_jobs = -1, verbose=0)(delayed(parallel_me)(Z[k], maxscale, notes, win, density) for k in xrange(len(Z)))
         except:
            print "memory error: trying serial"
            d = Parallel(n_jobs = 1, verbose=0)(delayed(parallel_me)(Z[k], maxscale, notes, win, density) for k in xrange(len(Z)))

         srt = np.reshape(d , ( ind[0], ind[1] ) )
         del d

         try:
            print "%s windows to process with a density of %s" % (str(len(Z)), str(density)) #% (str(len(Z)), str(density))
            # do the wavelet clacs and get the stats
            d = Parallel(n_jobs = -1, verbose=0)(delayed(parallel_me)(Z[k].T, maxscale, notes, win, density) for k in xrange(len(Z)))
         except:
            print "memory error: trying serial"
            d = Parallel(n_jobs = 1, verbose=0)(delayed(parallel_me)(Z[k].T, maxscale, notes, win, density) for k in xrange(len(Z)))

         srt2 = np.reshape(d , ( ind[0], ind[1] ) )
         del d
         Z = None

         SRT = srt+srt2
         del srt, srt2

         Snn = SRT.copy() 
         del SRT

         # replace nans using infilling algorithm
         rn = replace_nans.RN(Snn.astype('float64'),1000,0.01,2,'localmean')
         Snn = rn.getdata()
         del rn   

         Ny, Nx = np.shape( np.vstack((np.flipud(port_fp[p]), star_fp[p])) )
         Snn = median_filter(Snn,(int(Nx/100),int(Ny/100)))
   
         Sp = humutils.im_resize(Snn,Nx,Ny)
         del Snn

         Sp[np.isnan(np.vstack((np.flipud(port_fp[p]), star_fp[p])))] = np.nan
         Sp[np.isnan(np.vstack((np.flipud(port_fp2[p]), star_fp2[p])))] = np.nan

         extent = shape_port[1]
         Zdist = dist_m[shape_port[-1]*p:shape_port[-1]*(p+1)]
         yvec = np.linspace(pix_m,extent*pix_m,extent)
         d = dep_m[shape_port[-1]*p:shape_port[-1]*(p+1)]

         R_fp = np.memmap(sonpath+base+'_data_range.dat', dtype='float32', mode='r', shape=tuple(shape_star))

         #R = np.ones(np.shape(Sp))
         #for k in range(len(d)): 
         #   R[:,k] = np.hstack((np.flipud(d[k]/yvec), d[k]/yvec))

         #if len(d)<np.shape(port_fp[p])[1]:
         #   d = np.append(d,d[-1])
         #Zbed = np.squeeze(d*ft)

         #R1 = R[extent:,:]
         #R2 = np.flipud(R[:extent,:])

         ## shift proportionally depending on where the bed is
         #for k in xrange(np.shape(R1)[1]):
         #   R1[:,k] = np.r_[R1[Zbed[k]:,k], np.zeros( (np.shape(R1)[0] -  np.shape(R1[Zbed[k]:,k])[0] ,) )]

         #for k in xrange(np.shape(R2)[1]):
         #   R2[:,k] = np.r_[R2[Zbed[k]:,k], np.zeros( (np.shape(R2)[0] -  np.shape(R2[Zbed[k]:,k])[0] ,) )]

         #R = np.vstack((np.flipud(R2),R1))
         #del R1, R2

         R = np.vstack((np.flipud(R_fp[0]),R_fp[0]))
         
         R[R>0.8] = np.nan

         rn = replace_nans.RN(R.astype('float64'),1000,0.01,2,'localmean')
         R = rn.getdata()
         del rn   

         Sp = (Sp**2) * np.cos(R) / shift**2

         fp[p] = Sp.astype('float32')
         del Sp

      del fp # flush data to file

      class_fp = np.memmap(sonpath+base+'_data_class.dat', dtype='float32', mode='r', shape=tuple(shape))

      dist_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dist_m'])

      ########################################################
      ########################################################
      if doplot==1:

         for p in xrange(len(star_fp)):
            plot_class(dist_m, shape_port, port_fp[p], star_fp[p], class_fp[p], ft, humfile, sonpath, base, p)

         for p in xrange(len(star_fp)):
            plot_contours(dist_m, shape_port, class_fp[p], ft, humfile, sonpath, base, numclasses, p)


      #######################################################
      # k-means 
      fp = np.memmap(sonpath+base+'_data_kclass.dat', dtype='float32', mode='w+', shape=tuple(shape))

      for p in xrange(len(port_fp)):
         Sk = class_fp[p].copy()
         Sk[np.isnan(Sk)] = 0
         wc, values = humutils.cut_kmeans(Sk,numclasses+1)
         wc[Sk==0] = np.nan
         del Sk
         fp[p] = wc.astype('float32')
         del wc

      del fp

      kclass_fp = np.memmap(sonpath+base+'_data_kclass.dat', dtype='float32', mode='r', shape=tuple(shape))

      ########################################################
      if doplot==1:

         for p in xrange(len(star_fp)):
            plot_kmeans(dist_m, shape_port, port_fp[p], star_fp[p], kclass_fp[p], ft, humfile, sonpath, base, p)

      if os.name=='posix': # true if linux/mac
         elapsed = (time.time() - start)
      else: # windows
         elapsed = (time.clock() - start)
      print "Processing took ", elapsed , "seconds to analyse"

      print "Done!"
Exemple #58
0
def getgrid_lm(humlon, humlat, merge, influence, minX, maxX, minY, maxY, res, mode, trans, nn, wf, sigmas, eps):

   complete=0
   while complete==0:
      try:
         grid_x, grid_y, res = getmesh(minX, maxX, minY, maxY, res)
         longrid, latgrid = trans(grid_x, grid_y, inverse=True)
         shape = np.shape(grid_x)
         targ_def = pyresample.geometry.SwathDefinition(lons=longrid.flatten(), lats=latgrid.flatten())
         del longrid, latgrid

         orig_def = pyresample.geometry.SwathDefinition(lons=humlon.flatten(), lats=humlat.flatten())

         if mode==1:
            try:
               dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, fill_value=None, nprocs = cpu_count())
            except:
               dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, fill_value=None, nprocs = 1)

            stdev = None
            counts = None
         elif mode==2:
            try:
               dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*10, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = cpu_count())
            except:
               dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*10, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = 1)
         else:
            try:
               dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = np.nan, nprocs = cpu_count(), epsilon = eps)
            except:
               dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = np.nan, nprocs = 1, epsilon = eps)

         if 'dat' in locals():
            complete=1
      except:
         print("memory error: trying grid resolution of %s" % (str(res*2)))
         res = res*2

   return dat, stdev, counts, res, complete, shape
Exemple #59
0
def make_map(e, n, t, d, dat_port, dat_star, data_R, pix_m, res, cs2cs_args, sonpath, p, mode, nn, numstdevs, c, dx, use_uncorrected, scalemax): #dogrid, influence,dowrite,

   thres=5

   trans =  pyproj.Proj(init=cs2cs_args)

   mp = np.nanmean(dat_port)
   ms = np.nanmean(dat_star)
   if mp>ms:
      merge = np.vstack((dat_port,dat_star*(mp/ms)))      
   else:
      merge = np.vstack((dat_port*(ms/mp),dat_star))
   del dat_port, dat_star

   merge[np.isnan(merge)] = 0
   merge = merge[:,:len(n)]

   ## actual along-track resolution is this: dx times dy = Af
   tmp = data_R * dx * (c*0.007 / 2) #dx = np.arcsin(c/(1000*meta['t']*meta['f']))
   res_grid = np.sqrt(np.vstack((tmp, tmp)))
   del tmp
   res_grid = res_grid[:np.shape(merge)[0],:np.shape(merge)[1]]
   
   #if use_uncorrected != 1:
   #   merge = merge - 10*np.log10(res_grid)
   
   res_grid = res_grid.astype('float32')

   merge[np.isnan(merge)] = 0
   merge[merge<0] = 0

   merge = merge.astype('float32')

   merge = denoise_tv_chambolle(merge.copy(), weight=.2, multichannel=False).astype('float32')

   R = np.vstack((np.flipud(data_R),data_R))
   del data_R
   R = R[:np.shape(merge)[0],:np.shape(merge)[1]]

   # get number pixels in scan line
   extent = int(np.shape(merge)[0]/2)

   yvec = np.squeeze(np.linspace(np.squeeze(pix_m),extent*np.squeeze(pix_m),extent))

   X, Y, D, h, t  = getXY(e,n,yvec,np.squeeze(d),t,extent)

   X = X.astype('float32')
   Y = Y.astype('float32')
   D = D.astype('float32')
   h = h.astype('float32')
   t = t.astype('float32')
   X = X.astype('float32')

   D[np.isnan(D)] = 0
   h[np.isnan(h)] = 0
   t[np.isnan(t)] = 0

   X = X[np.where(np.logical_not(np.isnan(Y)))]
   merge = merge.flatten()[np.where(np.logical_not(np.isnan(Y)))]
   res_grid = res_grid.flatten()[np.where(np.logical_not(np.isnan(Y)))]
   Y = Y[np.where(np.logical_not(np.isnan(Y)))]
   D = D[np.where(np.logical_not(np.isnan(Y)))]
   R = R.flatten()[np.where(np.logical_not(np.isnan(Y)))]
   h = h[np.where(np.logical_not(np.isnan(Y)))]
   t = t[np.where(np.logical_not(np.isnan(Y)))]

   Y = Y[np.where(np.logical_not(np.isnan(X)))]
   merge = merge.flatten()[np.where(np.logical_not(np.isnan(X)))]
   res_grid = res_grid.flatten()[np.where(np.logical_not(np.isnan(X)))]
   X = X[np.where(np.logical_not(np.isnan(X)))]
   D = D[np.where(np.logical_not(np.isnan(X)))]
   R = R.flatten()[np.where(np.logical_not(np.isnan(X)))]
   h = h[np.where(np.logical_not(np.isnan(X)))]
   t = t[np.where(np.logical_not(np.isnan(X)))]

   X = X[np.where(np.logical_not(np.isnan(merge)))]
   Y = Y[np.where(np.logical_not(np.isnan(merge)))]
   merge = merge[np.where(np.logical_not(np.isnan(merge)))]
   res_grid = res_grid.flatten()[np.where(np.logical_not(np.isnan(merge)))]
   D = D[np.where(np.logical_not(np.isnan(merge)))]
   R = R[np.where(np.logical_not(np.isnan(merge)))]
   h = h[np.where(np.logical_not(np.isnan(merge)))]
   t = t[np.where(np.logical_not(np.isnan(merge)))]

   X = X[np.where(np.logical_not(np.isinf(merge)))]
   Y = Y[np.where(np.logical_not(np.isinf(merge)))]
   merge = merge[np.where(np.logical_not(np.isinf(merge)))]
   res_grid = res_grid.flatten()[np.where(np.logical_not(np.isinf(merge)))]
   D = D[np.where(np.logical_not(np.isinf(merge)))]
   R = R[np.where(np.logical_not(np.isinf(merge)))]
   h = h[np.where(np.logical_not(np.isinf(merge)))]
   t = t[np.where(np.logical_not(np.isinf(merge)))]



   print("writing point cloud")
   #if dowrite==1:
   ## write raw bs to file
   outfile = os.path.normpath(os.path.join(sonpath,'x_y_ss_raw'+str(p)+'.asc'))
   ##write.txtwrite( outfile, np.hstack((humutils.ascol(X.flatten()),humutils.ascol(Y.flatten()), humutils.ascol(merge.flatten()), humutils.ascol(D.flatten()), humutils.ascol(R.flatten()), humutils.ascol(h.flatten()), humutils.ascol(t.flatten())  )) )
   np.savetxt(outfile, np.hstack((humutils.ascol(X.flatten()),humutils.ascol(Y.flatten()), humutils.ascol(merge.flatten()), humutils.ascol(D.flatten()), humutils.ascol(R.flatten()), humutils.ascol(h.flatten()), humutils.ascol(t.flatten())  )) , fmt="%8.6f %8.6f %8.6f %8.6f %8.6f %8.6f %8.6f") 

   del D, R, h, t

   sigmas = 0.1 #m
   eps = 2

   print("gridding ...")
   #if dogrid==1:
   if 2>1:

      if res==99:
         resg = np.min(res_grid[res_grid>0])/2
         print('Gridding at resolution of %s' % str(resg))
      else:
         resg = res

      tree = KDTree(np.c_[X.flatten(),Y.flatten()])
      complete=0
      while complete==0:
         try:
            grid_x, grid_y, res = getmesh(np.min(X), np.max(X), np.min(Y), np.max(Y), resg)
            longrid, latgrid = trans(grid_x, grid_y, inverse=True)
            longrid = longrid.astype('float32')
            latgrid = latgrid.astype('float32')
            shape = np.shape(grid_x)

            ## create mask for where the data is not
            if pykdtree==1:
               dist, _ = tree.query(np.c_[grid_x.ravel(), grid_y.ravel()], k=1)
            else:
               try:
                  dist, _ = tree.query(np.c_[grid_x.ravel(), grid_y.ravel()], k=1, n_jobs=cpu_count())
               except:
                  #print ".... update your scipy installation to use faster kd-tree queries"
                  dist, _ = tree.query(np.c_[grid_x.ravel(), grid_y.ravel()], k=1)

            dist = dist.reshape(grid_x.shape)

            targ_def = pyresample.geometry.SwathDefinition(lons=longrid.flatten(), lats=latgrid.flatten())
            del longrid, latgrid

            humlon, humlat = trans(X, Y, inverse=True)
            orig_def = pyresample.geometry.SwathDefinition(lons=humlon.flatten(), lats=humlat.flatten())
            del humlon, humlat
            if 'orig_def' in locals():
               complete=1
         except:
            print("memory error: trying grid resolution of %s" % (str(resg*2)))
            resg = resg*2

      if mode==1:

         complete=0
         while complete==0:
            try:
               try:
                  dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = cpu_count(), reduce_data=1)
               except:
                  dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = 1, reduce_data=1)

               try:
                  r_dat = pyresample.kd_tree.resample_nearest(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = cpu_count(), reduce_data=1)
               except:
                  r_dat = pyresample.kd_tree.resample_nearest(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = 1, reduce_data=1)

               stdev = None
               counts = None
               if 'dat' in locals():
                  complete=1
            except:
               del grid_x, grid_y, targ_def, orig_def

               wf = None
               humlon, humlat = trans(X, Y, inverse=True)
               dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, merge, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps)
               r_dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, res_grid, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps)
               del humlon, humlat

      elif mode==2:

         # custom inverse distance
         wf = lambda r: 1/r**2

         complete=0
         while complete==0:
            try:
               try:
                  dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = cpu_count(), reduce_data=1)
               except:
                  dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = 1, reduce_data=1)

               try:
                  r_dat = pyresample.kd_tree.resample_custom(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = False, nprocs = cpu_count(), reduce_data=1)
               except:
                  r_dat = pyresample.kd_tree.resample_custom(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = False, nprocs = 1, reduce_data=1)

               if 'dat' in locals():
                  complete=1
            except:
               del grid_x, grid_y, targ_def, orig_def
               humlon, humlat = trans(X, Y, inverse=True)
               dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, merge, res*2, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps)
               r_dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, res_grid, res*2, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps)
               del humlat, humlon
               del stdev_null, counts_null

      elif mode==3:
         wf = None

         complete=0
         while complete==0:
            try:
               try:
                  dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = True, nprocs = cpu_count(), epsilon = eps, reduce_data=1)
               except:
                  dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = True, nprocs = 1, epsilon = eps, reduce_data=1)

               try:
                  r_dat = pyresample.kd_tree.resample_gauss(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = False, nprocs = cpu_count(), epsilon = eps, reduce_data=1)
               except:
                  r_dat = pyresample.kd_tree.resample_gauss(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = False, nprocs = 1, epsilon = eps, reduce_data=1)

               if 'dat' in locals():
                  complete=1
            except:
               del grid_x, grid_y, targ_def, orig_def
               humlon, humlat = trans(X, Y, inverse=True)
               dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, merge, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps)
               r_dat, stdev_null, counts_null, resg, complete, shape = getgrid_lm(humlon, humlat, res_grid, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps)
               del humlat, humlon
               del stdev_null, counts_null

      humlon, humlat = trans(X, Y, inverse=True)
      del X, Y, res_grid, merge

      dat = dat.reshape(shape)

      dat[dist>res*30] = np.nan
      del dist

      r_dat = r_dat.reshape(shape)
      r_dat[r_dat<1] = 1
      r_dat[r_dat > 2*np.pi] = 1
      r_dat[np.isnan(dat)] = np.nan

      dat = dat + r_dat #np.sqrt(np.cos(np.deg2rad(r_dat))) #dat*np.sqrt(r_dat) + dat

      del r_dat

      if mode>1:
         stdev = stdev.reshape(shape)
         counts = counts.reshape(shape)

      mask = dat.mask.copy()

      dat[mask==1] = np.nan
      #dat[mask==1] = 0

      if mode>1:
         dat[(stdev>numstdevs) & (mask!=0)] = np.nan
         dat[(counts<nn) & (counts>0)] = np.nan


   #if dogrid==1:

   dat[dat==0] = np.nan
   dat[np.isinf(dat)] = np.nan

   dat[dat<thres] = np.nan

   datm = np.ma.masked_invalid(dat)

   glon, glat = trans(grid_x, grid_y, inverse=True)
   #del grid_x, grid_y

   try:
      from osgeo import gdal,ogr,osr
      proj = osr.SpatialReference()
      proj.ImportFromEPSG(int(cs2cs_args.split(':')[-1])) #26949)
      datout = np.squeeze(np.ma.filled(dat))#.astype('int16')
      datout[np.isnan(datout)] = -99
      driver = gdal.GetDriverByName('GTiff')
      #rows,cols = np.shape(datout)
      cols,rows = np.shape(datout)    
      outFile = os.path.normpath(os.path.join(sonpath,'geotiff_map'+str(p)+'.tif'))
      ds = driver.Create( outFile, rows, cols, 1, gdal.GDT_Float32, [ 'COMPRESS=LZW' ] )        
      if proj is not None:  
        ds.SetProjection(proj.ExportToWkt()) 

      xmin, ymin, xmax, ymax = [grid_x.min(), grid_y.min(), grid_x.max(), grid_y.max()]

      xres = (xmax - xmin) / float(rows)
      yres = (ymax - ymin) / float(cols)
      geotransform = (xmin, xres, 0, ymax, 0, -yres)

      ds.SetGeoTransform(geotransform)
      ss_band = ds.GetRasterBand(1)
      ss_band.WriteArray(np.flipud(datout)) #datout)
      ss_band.SetNoDataValue(-99)
      ss_band.FlushCache()
      ss_band.ComputeStatistics(False)
      del ds   
   
   except:
      print("error: geotiff could not be created... check your gdal/ogr install")


   try:

      # =========================================================
      print("creating kmz file ...")
      ## new way to create kml file
      pixels = 1024 * 10

      fig, ax = humutils.gearth_fig(llcrnrlon=glon.min(),
                     llcrnrlat=glat.min(),
                     urcrnrlon=glon.max(),
                     urcrnrlat=glat.max(),
                     pixels=pixels)
      cs = ax.pcolormesh(glon, glat, datm, vmax=scalemax, cmap='gray')
      ax.set_axis_off()
      fig.savefig(os.path.normpath(os.path.join(sonpath,'map'+str(p)+'.png')), transparent=True, format='png')
      del fig, ax

      # =========================================================
      fig = plt.figure(figsize=(1.0, 4.0), facecolor=None, frameon=False)
      ax = fig.add_axes([0.0, 0.05, 0.2, 0.9])
      cb = fig.colorbar(cs, cax=ax)
      cb.set_label('Intensity [dB W]', rotation=-90, color='k', labelpad=20)
      fig.savefig(os.path.normpath(os.path.join(sonpath,'legend'+str(p)+'.png')), transparent=False, format='png')
      del fig, ax, cs, cb

      # =========================================================
      humutils.make_kml(llcrnrlon=glon.min(), llcrnrlat=glat.min(),
         urcrnrlon=glon.max(), urcrnrlat=glat.max(),
         figs=[os.path.normpath(os.path.join(sonpath,'map'+str(p)+'.png'))],
         colorbar=os.path.normpath(os.path.join(sonpath,'legend'+str(p)+'.png')),
         kmzfile=os.path.normpath(os.path.join(sonpath,'GroundOverlay'+str(p)+'.kmz')),
         name='Sidescan Intensity')

   except:
      print("error: map could not be created...")


   #y1 = np.min(glat)-0.001
   #x1 = np.min(glon)-0.001
   #y2 = np.max(glat)+0.001
   #x2 = np.max(glon)+0.001

   print("drawing and printing map ...")
   fig = plt.figure(frameon=False)
   map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1],
    resolution = 'i', #h #f
    llcrnrlon=np.min(humlon)-0.001, llcrnrlat=np.min(glat)-0.001,
    urcrnrlon=np.max(humlon)+0.001, urcrnrlat=np.max(glat)+0.001)

   try:
      map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=300)
   except:
      map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='ESRI_Imagery_World_2D', xpixels=1000, ypixels=None, dpi=300)
   #finally:
   #   print "error: map could not be created..."

   #if dogrid==1:
   gx,gy = map.projtran(glon, glat)

   ax = plt.Axes(fig, [0., 0., 1., 1.], )
   ax.set_axis_off()
   fig.add_axes(ax)

   #if dogrid==1:
   if 2>1:
      if datm.size > 25000000:
         print("matrix size > 25,000,000 - decimating by factor of 5 for display")
         map.pcolormesh(gx[::5,::5], gy[::5,::5], datm[::5,::5], cmap='gray', vmin=np.nanmin(datm), vmax=scalemax) #vmax=np.nanmax(datm)
      else:
         map.pcolormesh(gx, gy, datm, cmap='gray', vmin=np.nanmin(datm), vmax=scalemax) #vmax=np.nanmax(datm)
      del datm, dat
   else:
      ## draw point cloud
      x,y = map.projtran(humlon, humlat)
      map.scatter(x.flatten(), y.flatten(), 0.5, merge.flatten(), cmap='gray', linewidth = '0')

   #map.drawmapscale(x1+0.001, y1+0.001, x1, y1, 200., units='m', barstyle='fancy', labelstyle='simple', fontcolor='k') #'#F8F8FF')
   #map.drawparallels(np.arange(y1-0.001, y2+0.001, 0.005),labels=[1,0,0,1], linewidth=0.0, rotation=30, fontsize=8)
   #map.drawmeridians(np.arange(x1, x2, 0.002),labels=[1,0,0,1], linewidth=0.0, rotation=30, fontsize=8)

   custom_save2(sonpath,'map_imagery'+str(p))
   del fig


   del humlat, humlon
   return res #return the new resolution