コード例 #1
0
def _extract_dset_frames(dset_name):
    if dset_name == 'breakfast':
        import data.breakfast as dset
    else:
        raise ValueError('no such dataset')

    extracted_frames_dir = dset.EXTRACTED_FRAMES_DIR
    if not os.path.exists(extracted_frames_dir):
        os.makedirs(extracted_frames_dir)

    video_dir = dset.VIDEO_DIR
    videos = os.listdir(video_dir)
    processed_videos = os.listdir(extracted_frames_dir)
    processed_videos = [video[:-5] for video in processed_videos
                        ]  # remove the '.hdf5' extension
    unprocessed_videos = np.setdiff1d(videos, processed_videos)

    video_files = [
        os.path.join(video_dir, video) for video in unprocessed_videos
    ]
    extracted_files = [
        os.path.join(extracted_frames_dir, video + '.hdf5')
        for video in unprocessed_videos
    ]
    p_umap(extract_video_frames, video_files, extracted_files)
コード例 #2
0
def _extract_video_lengths(dset_name):
    if dset_name == 'breakfast':
        import data.breakfast as dset
    else:
        raise ValueError('no such dataset')

    video_length_dir = dset.VIDEO_LENGTHS_DIR
    if not os.path.exists(video_length_dir):
        os.makedirs(video_length_dir)

    video_dir = dset.VIDEO_DIR
    videos = os.listdir(video_dir)
    processed_videos = os.listdir(video_length_dir)
    processed_videos = [video[:-4] for video in processed_videos
                        ]  # remove the '.npy' extension
    unprocessed_videos = np.setdiff1d(videos, processed_videos)

    video_files = [
        os.path.join(video_dir, video) for video in unprocessed_videos
    ]
    video_length_files = [
        os.path.join(video_length_dir, video + '.npy')
        for video in unprocessed_videos
    ]
    p_umap(extract_video_lengths, video_files, video_length_files)
コード例 #3
0
    def rebuild_dataset_by_dir(self,
                               annotation=None,
                               target_root="./export",
                               multiprocess=True,
                               num_cpus=1.0):
        target_root = target_root.replace(
            "/",
            "\\") if platform.system().find("Windows") >= 0 else target_root

        if annotation is None:
            annotation = self.annotations

        mp_args = annotation[['file_root', 'file_name',
                              "class_name"]].values.tolist()
        mp_args = [[self.config['dataset_root']] + arg for arg in mp_args]

        reverse_dict = {
            v: int(k)
            for k, v in self.config['label_dict'].items()
        }

        if multiprocess:
            p_umap(partial(KProductsDataset.copy_image,
                           target_root=target_root,
                           reverse_dict=reverse_dict),
                   mp_args,
                   desc="Rebuilding Dataset by directory ...",
                   num_cpus=num_cpus)
        else:
            for arg in tqdm(mp_args,
                            desc="Rebuilding Dataset by directory ..."):
                KProductsDataset.copy_image(arg,
                                            target_root=target_root,
                                            reverse_dict=reverse_dict)
コード例 #4
0
def _preprocess_video_labels(dset_name):
    if dset_name == 'breakfast':
        import data.breakfast as dset
    else:
        raise ValueError('no such dataset')

    hdf5_label_dir = dset.FRAME_RECOGNITION_LABEL_DIR
    if not os.path.exists(hdf5_label_dir):
        os.makedirs(hdf5_label_dir)

    label_dir = dset.LABEL_DIR
    labels = os.listdir(label_dir)
    processed_labels = os.listdir(hdf5_label_dir)
    processed_labels = [video[:-5] for video in processed_labels
                        ]  # remove the '.hdf5' extension
    unprocessed_labesl = np.sort(np.setdiff1d(labels, processed_labels))

    raw_label_files = [
        os.path.join(label_dir, video) for video in unprocessed_labesl
    ]
    hdf5_label_files = [
        os.path.join(hdf5_label_dir, video + '.hdf5')
        for video in unprocessed_labesl
    ]
    p_umap(_convert_label_file, raw_label_files, hdf5_label_files)
コード例 #5
0
def dicom_to_png_matlab(dicom_paths,
                        image_paths,
                        selection_criteria,
                        skip_existing=True):
    """Converts a dicom image to a grayscale 16-bit png image using matlab.

    NOTE: Must be run from oncodata/dicom_to_png directory so that Matlab
    can find the dicomToPng.m conversion script.

    Arguments:
        dicom_paths(list[str]): A list of paths to dicom files.
        image_paths(list[str]): A list of paths where the images will be saved.
        skip_existing(bool): True to skip images which already exist.
    """

    if len(dicom_paths) != len(image_paths):
        print('Error: DICOM paths and image paths must be the same length.')
        exit()

    dicom_paths = np.array(dicom_paths)
    image_paths = np.array(image_paths)

    if skip_existing:
        print('Checking for existing images')
        keep = p_map(lambda image_path: not os.path.exists(image_path),
                     image_paths)
        keep_indices = np.where(keep)
        dicom_paths = dicom_paths[keep_indices]
        image_paths = image_paths[keep_indices]

    # Ensure that dicoms meet selection criteria and only have one slice
    print('Checking for invalid dicoms')
    keep = p_map(
        lambda dicom_path: is_selected_dicom(dicom_path, selection_criteria)
        and has_one_slice(dicom_path), dicom_paths)
    keep_indices = np.where(keep)
    dicom_paths = dicom_paths[keep_indices]
    image_paths = image_paths[keep_indices]

    if len(dicom_paths) == 0:
        return

    # Create directory for images if necessary
    print('Creating directories for images')
    p_umap(create_directory_if_necessary, image_paths)

    # Save paths to temporary files which will be loaded by matlab
    with NamedTemporaryFile(suffix='.txt') as dicoms_file:
        with NamedTemporaryFile(suffix='.txt') as images_file:
            np.savetxt(dicoms_file.name, dicom_paths, fmt='%s')
            np.savetxt(images_file.name, image_paths, fmt='%s')

            # Convert DICOM to PNG using matlab
            print('Converting with matlab')
            Popen([
                'matlab', '-nodisplay', '-nodesktop', '-nojvm', '-r',
                "dicomToPng('%s', '%s'); exit;" %
                (dicoms_file.name, images_file.name)
            ]).wait()
コード例 #6
0
 def vectorize_dataset(self, multiprocess=False, batch_size=32):
     annot_by_file_root = self.get_annotation_by_file_root()
     if multiprocess:
         p_umap(partial(self.vectorize_images, batch_size=batch_size),
                annot_by_file_root, desc="Vectorization Dataset ...")
     else:
         for annotation in tqdm(annot_by_file_root, "Vectorization Dataset ..."):
             self.vectorize_images(annotation, batch_size=batch_size)
コード例 #7
0
ファイル: smali.py プロジェクト: yyg192/HinDroid-1
 def _save_interim_BP(Bs, Ps, csvs, nproc):
     print('Saving B and P', file=sys.stdout)
     p_umap(
         lambda arr, file: np.save(file, arr),
         Bs + Ps,
         [f[:-4] + '.B' for f in csvs] + [f[:-4] + '.P' for f in csvs],
         num_cpus=nproc
     )
コード例 #8
0
def main(dicom_dir, dicom_list_json_path, png_dir, dcmtk, imagemagick, matlab,
         dicom_types, dicom_ext):
    """Converts DICOM files in a directory to PNG images.

    NOTE: When using Matlab, must be run from oncodata/dicom_to_png
    directory so that Matlab can find the dicomToPng.m conversion script.

    Arguments:
        dicom_dir(str): Path to a directory containing DICOM files.
        dicom_list_json_path(str): Path to optional list of dicom files [replace dicom dir].
        png_dir(str): Path to a directory where PNG versions of the
            DICOM images will be saved.
        dcmtk(bool): True to use dcmtk to convert DICOMs to PNGs.
        imagemagick(bool): True to use ImageMagick to convert DICOMs to PNGs.
        matlab(bool): Ture to use matlab to convert DICOMs to PNGs.
    """

    print('Extracting DICOM paths')
    if dicom_list_json_path is not None:
        dicom_paths = json.load(open(dicom_list_json_path, 'r'))
    else:
        dicom_paths = []
        for root, _, files in os.walk(dicom_dir):
            dicom_paths.extend([
                os.path.join(root, f) for f in files if f.endswith(dicom_ext)
            ])

    image_paths = [
        dicom_path_to_png_path(dicom_path, dicom_dir, png_dir, dicom_ext)
        for dicom_path in dicom_paths
    ]

    selection_criteria = []
    for dicom_type in dicom_types:
        criteria = DICOM_TYPES.get(dicom_type)
        assert criteria is not None, "Unsupported dicom_type. Please add the appropriate type to DICOM_TYPES."
        selection_criteria.append(criteria)
    assert len(selection_criteria) > 0, "No dicoms selected."
    selection_criteria = tuple(selection_criteria)

    if dcmtk:
        print('Converting to PNG')
        p_umap(dicom_to_png_dcmtk, dicom_paths, image_paths,
               selection_criteria)
    elif imagemagick:
        print('Converting to PNG')
        p_umap(dicom_to_png_imagemagick, dicom_paths, image_paths,
               selection_criteria)
    elif matlab:
        dicom_to_png_matlab(dicom_paths, image_paths, selection_criteria)
コード例 #9
0
def create_dataset(num_machines,
                   csv_file,
                   save_filename,
                   num_cpus=multiprocessing.cpu_count() - 1):

    df = pd.DataFrame(columns=[
        "graph_object", "num_tasks", "num_machines", "weights", "order",
        "features", "psize", "GD_cost", "LR_cost", "opt_cost",
        "global_opt_cost", "ETF-H_cost", "weak_strongman_cost"
    ])
    tie_breaking_rule = 2
    count = 0

    csv_df = pd.read_csv(csv_file)

    rows = []
    for index, row in csv_df.iterrows():
        rows.append(row)

    # Write to the file
    df.to_csv(save_filename, mode="w+", index=False)
    dataset_parallel_generator = partial(dataset_parallel_mapper,
                                         filename=save_filename)
    # Mapping and doing it in parallel
    result = list(
        filter([None],
               p_umap(dataset_parallel_generator, rows, num_cpus=num_cpus)))
    result = [r[0] for r in result]

    # appending to entry dict
    for entry_dict in result:
        df.append(entry_dict, ignore_index=True)

    return df
コード例 #10
0
ファイル: base_combo.py プロジェクト: xapple/libcbm_runner
    def __call__(self, parallel=False, timer=True):
        """A method to run a combo by simulating all countries."""
        # Message #
        print("Running combo '%s'." % self.short_name)
        # Timer start #
        timer = Timer()
        timer.print_start()

        # Function to run a single country #
        def run_country(args):
            code, steps = args
            for runner in steps:
                return runner.run()

        # Run countries sequentially #
        if not parallel:
            result = t_map(run_country, self.runners.items())
        # Run countries in parallel #
        if parallel:
            result = p_umap(run_country, self.runners.items(), num_cpus=4)
        # Timer end #
        timer.print_end()
        timer.print_total_elapsed()
        # Compile logs #
        self.compile_logs()
        # Return #
        return result
コード例 #11
0
def fetch_submissions(**kwargs):
    """[function to fetch submissions]

    Returns:
        [dict]: [the log of submission fetching process]
    """
    post_args, meta_args = kwargs['POST_ARGS'], kwargs['META_ARGS']
    filepath, total, meta, subreddits = meta_args['filepath'], meta_args['total'], \
                                        meta_args['meta'], meta_args['subreddits']
    sort_type, sort, size, start = post_args['sort_type'], post_args[
        'sort'], post_args['size'], post_args['start']
    if os.path.exists(os.path.join(filepath, 'raw', 'posts', 'log.json')):
        return json.load(
            open(os.path.join(filepath, 'raw', 'posts', 'log.json')))
    else:
        tolist = lambda x: [x for _ in range(len(subreddits))]
        res = p_umap(fetch_posts,
                     subreddits,
                     tolist(total),
                     tolist(meta),
                     tolist(filepath),
                     tolist(sort_type),
                     tolist(sort),
                     tolist(size),
                     tolist(start),
                     num_cpus=NUM_WORKER)
        with open(os.path.join(filepath, 'raw', 'posts', 'log.json'),
                  'w') as fp:
            json.dump(res, fp)
        return res
コード例 #12
0
ファイル: cpmcmc.py プロジェクト: willemvandenboom/cpmcmc
def _repeat_cMCMC(
    S, pyfunc, rng=np.random.default_rng()
):
    tmp_seed = random_seed(rng)

    def par_func(ss):
        return pyfunc(rng=np.random.default_rng(
            seed=np.random.SeedSequence(entropy=tmp_seed, spawn_key=(ss,))
        ))

    start_time = time.perf_counter()
    result_list = p_tqdm.p_umap(par_func, range(S))

    print("Elapsed time:", IPython.core.magics.execution._format_time(
        time.perf_counter() - start_time
    ))

    comp_time  = np.array([[
        result_list[s][key] for key in ["meet time", "additional time"]
    ] for s in range(S)])

    tau_arr = np.array([result_list[s]["meeting time"] for s in range(S)])
    rep_h_coupled = [result_list[s]["h coupled"] for s in range(S)]

    return {"meeting time": tau_arr, "h": rep_h_coupled, "time": comp_time}
コード例 #13
0
ファイル: fits.py プロジェクト: ESMartiny/NetworkSIR
def fit_multiple_files(cfg,
                       filenames,
                       num_cores=1,
                       do_tqdm=True,
                       y_max=0.01,
                       verbose=False):

    func = partial(fit_single_file, cfg=cfg, y_max=y_max)

    if num_cores == 1:
        if do_tqdm:
            filenames = tqdm(filenames)
        results = [func(filename) for filename in filenames]

    else:
        results = p_umap(func, filenames, num_cpus=num_cores, disable=True)

    reject_counter = Counter()

    # postprocess results from multiprocessing:
    fit_objects = {}
    for filename, fit_result in results:

        if isinstance(fit_result, str):
            if verbose:
                print(
                    f"\n\n{filename} was rejected due to {fit_result.lower()}")
            reject_counter[fit_result.lower()] += 1

        else:
            fit_object = fit_result
            fit_objects[filename] = fit_object
            reject_counter["no rejection"] += 1

    return fit_objects, reject_counter
コード例 #14
0
def cli(repeat_file, supernode_file, supernodes, min_similarity):
    repeats = np.load(repeat_file)["repeats"]
    supernode_array = np.load(supernode_file)["25"]

    if not supernodes:
        supernodes = np.unique(supernode_array)

    def _data(supernodes, min_similarity):
        for s in range(33, min_similarity, -1):
            for sn in supernodes:
                yield repeats[supernode_array == sn], sn, s

    p_umap(
        lambda x, y: func(*x),
        _data(supernodes, min_similarity),
        range(len(supernodes) * (33 - min_similarity)),
    )
コード例 #15
0
def run_experiment(models, series_file):
    """Run experiment on multiple cores and write result to series_file."""
    results = p_umap(run_simulation, models)
    print("time for writing the results")
    with open(series_file, "a") as file:
        for result in results:
            file.write('{:.2f},{:d},{:d},{:},{:},{:d}\n'.format(
                result["density"], int(result["grouping"]),
                int(result["iteration"]), result["seed"], result["winner"],
                result["steps"]))
コード例 #16
0
    def resize_dataset(self,
                       target_w=320,
                       target_root="./export",
                       skip_exists=True,
                       multiprocess=True,
                       num_cpus=1.0,
                       copy_annotation=True):
        """
        Resize images from entires dataset.
        This functions uses multi-cores. Be aware that it will slow down your computer.

        Args:
            target_w (int): Target width for resizing. Height is automatically set by ratio.
            target_root (str): Target dataset root to save resized images.
            skip_exists (bool): True: Skip resizing if resized file already exists.
            multiprocess (bool): Use multi process.
            num_cpus (int, float): Number(int) or proportion(float) of cpus to utilize in multiprocess.
        """

        target_root = target_root.replace(
            "/",
            "\\") if platform.system().find("Windows") >= 0 else target_root

        mp_args = self.annotations[['root', 'file_root',
                                    'file_name']].values.tolist()

        if multiprocess:
            p_umap(partial(KProductsDataset.resize_image,
                           target_w=target_w,
                           target_root=target_root,
                           skip_exists=skip_exists,
                           copy_annotation=copy_annotation),
                   mp_args,
                   desc="Resizing Images ...",
                   num_cpus=num_cpus)
        else:
            for arg in tqdm(mp_args, desc="Resizing Images ..."):
                KProductsDataset.resize_image(arg,
                                              target_w=target_w,
                                              target_root=target_root,
                                              skip_exists=skip_exists,
                                              copy_annotation=copy_annotation)
コード例 #17
0
def parallel_run(to_do, realizations= 1, keep_in = 0):
    """Get list of dictionaries of model and run parameters to run,  run each given number of realizations in parallel
    Among all realizations we keep."""
    print("Preparing list to run", flush=True)
    run_list = [(D, r < keep_in) for r in range(realizations) for D in to_do]
    #print(f"We have {mp.cpu_count()} CPUs")
    #pool = mp.Pool(mp.cpu_count())
    print("Starting execution of " +str(len(run_list)) +" runs", flush=True)
    rows = list(p_umap(run_,run_list))
    #rows = list(pool.map(run_, run_list))
    print("done", flush=True)
    df = pd.DataFrame(rows)
    return df
コード例 #18
0
def extract_save(in_dir, out_dir, class_i, nproc):
    app_dirs = glob(os.path.join(in_dir, '*/'))

    print(f'Extracting features for {class_i}')

    meta = p_umap(process_app,
                  app_dirs,
                  out_dir,
                  num_cpus=nproc,
                  file=sys.stdout)
    meta = [i for i in meta if i is not None]
    packages = [t[0] for t in meta]
    csv_paths = [t[1] for t in meta]
    return packages, csv_paths
コード例 #19
0
def main(source_dir, dest_dir):
    """Copies all files from one directory to another
    while preserving the underlying directory structure.

    Arguments:
        source_dir(str): The directory with files to copy.
        dest_dir(str): The directory where the files will be copied to.
    """

    paths = []
    for root, _, files in os.walk(source_dir):
        paths.extend([os.path.join(root, f) for f in files])

    def copy(source_path, skip_existing=True):
        """Copies a file from source_path to source_path with
        source_dir replaced by dest_dir.

        Arguments:
            source_path(str): Path to a file to be copied.
            skip_existing(bool): True to skip copying files
                when the destination file already exists.
        """

        dest_path = source_path.replace(source_dir.strip('/'),
                                        dest_dir.strip('/'))

        # Skip if dest file already exists
        if skip_existing and os.path.exists(dest_path):
            return

        # Create directory if necessary
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)

        copyfile(source_path, dest_path)

    p_umap(copy, paths)
コード例 #20
0
def comments_detail(filepath):
    """[function to fetch comments detail]

    Args:
        filepath ([string]): [filepath to store the data]
    """
    subreddit_fp = glob(join(filepath, POST_DETAIL_DIR, '*.json'))
    subreddits = [i.split('/')[-1][:-5] for i in subreddit_fp]
    tolist = lambda x: [x for _ in range(len(subreddits))]
    rest = p_umap(comment_detail,
                  subreddit_fp,
                  tolist(filepath),
                  subreddits,
                  num_cpus=NUM_WORKER)
    with open(join(filepath, COMMENT_DIR, 'log.json'), 'w') as fp:
        json.dump(rest, fp)
コード例 #21
0
def main(directory, results_path):
    """Extracts and saves metadata from DICOMs to a JSON file.

    Arguments:
        directory(str): Path to a directory containing DICOMs.
        results_path(str): Path to the JSON where the metadata
            will be saved.
    """

    dicom_paths = []
    for root, _, files in os.walk(directory):
        dicom_paths.extend([
            os.path.abspath(os.path.join(root, f)) for f in files
            if f.endswith('.dcm')
        ])

    metadata = p_umap(get_dicom_metadata_and_slice_counts, dicom_paths)

    with open(results_path, 'w') as results_file:
        json.dump(metadata, results_file, indent=4, sort_keys=True)
コード例 #22
0
def get_data(outfolder, data_source=None, nprocs=2, recompute=False):
    '''
    Retrieve data for year/location/group from the internet
    and return data (or write data to file, if `outfolder` is
    not `None`).
    '''
    # setup
    os.makedirs(outfolder, exist_ok=True)
    app_data_path = app_heap_path = os.path.join('data', 'out', 'all-apps',
                                                 'app-data')
    os.makedirs(app_data_path, exist_ok=True)
    app_to_parse_path = os.path.join(
        outfolder, 'app_list.csv')  # location of any predetermined apps

    try:  # search for predetermined list of apps
        apps_df = pd.read_csv(app_to_parse_path)
    except FileNotFoundError:  # if no such file, create one by looking for apps under data_source directory
        apps_df = find_apps(data_source)
        apps_df.to_csv(app_to_parse_path)

    def parse_app(app_dir, outfolder):
        app = Application(app_dir)
        outpath = os.path.join(app_data_path, app.app_name + ".csv")
        if os.path.exists(outpath) and not recompute:
            return
        else:
            data = app.parse()
            if data.shape[0] == 0:
                print(f'No data for {app.app_name}', file=sys.stdout)
                return
            else:
                data.to_csv(outpath, index=False)

    print("STEP 1 - PARSING APPS")
    # concurrent execution of smali parsing
    app_parser = p_umap(parse_app,
                        apps_df.app_dir, [outfolder] * len(apps_df.app_dir),
                        num_cpus=nprocs)
コード例 #23
0
def submissions_detail(filepath):
    """[function to fetch submissions' comments detail]

    Args:
        filepath ([string]): [filepath to store the data]
    """
    subreddits_fp = glob(join(filepath, POST_DIR, '*.csv'))
    subreddits = [i.split('/')[-1][:-4] for i in subreddits_fp]
    n, N = 1, len(subreddits)
    for subreddit, fp in zip(subreddits, subreddits_fp):
        print('fetching {0} subreddit details, Progress: {1}/{2}'.format(
            subreddit, str(n), str(N)))
        if os.path.exists(join(filepath, POST_DETAIL_DIR,
                               subreddit + '.json')):
            n += 1
            continue
        else:
            ids = pd.read_csv(fp).id.tolist()
            rest = p_umap(submission_detail, ids, num_cpus=NUM_WORKER)
            with open(join(filepath, POST_DETAIL_DIR, subreddit + '.json'),
                      'w') as f:
                json.dump(rest, f)
            n += 1
コード例 #24
0
ファイル: ML_plot.py プロジェクト: ESMartiny/NetworkSIR
            # Get the network hashes
            network_hashes = set([
                utils.cfg_to_hash(cfg.network, exclude_ID=False)
                for cfg in cfgs
            ])

            # Get list of unique cfgs
            cfgs_network = []
            for cfg in cfgs:
                network_hash = utils.cfg_to_hash(cfg.network, exclude_ID=False)

                if network_hash in network_hashes:
                    cfgs_network.append(cfg)
                    network_hashes.remove(network_hash)

            # Generate the networks
            print("Generating networks. Please wait")
            p_umap(f_single_network, cfgs_network, num_cpus=num_cores)

            # Then run the simulations on the network
            print("Running simulations. Please wait")
            f_single_simulation = partial(simulation.run_single_simulation,
                                          verbose=False)
            for cfg in p_uimap(f_single_simulation, cfgs, num_cpus=num_cores):
                simulation.update_database(db_cfg, q, cfg)

print(
    f"\n{N_files:,} files were generated, total duration {utils.format_time(t.elapsed)}"
)
print("Finished simulating!")
コード例 #25
0
def main():
    #The following block of code is useful for getting a shapefile encompassing the entire subset (Use for clipping DEMs etc)
    #Also, I define the local ortho coordinates using the center of the big bounding box
    init_time = time.time()
    parser = getparser()
    args = parser.parse_args()
    img_folder = args.img_folder
    try:
        img_list = sorted(glob.glob(os.path.join(img_folder, '*.tif')))
        print("Number of images {}".format(len(img_list)))
    except:
        print(
            "No images found in the directory. Make sure they end with a .tif extension"
        )
        sys.exit()
    out_fn = args.out_fn
    perc_overlap = np.float(args.percentage)
    out_shp = os.path.splitext(out_fn)[0] + '_bound.gpkg'
    n_proc = cpu_count()
    shp_list = p_umap(skysat.skysat_footprint, img_list, num_cpus=2 * n_proc)
    merged_shape = geo.shp_merger(shp_list)
    bbox = merged_shape.total_bounds
    print(f'Bounding box lon_lat is:{bbox}')
    bound_poly = Polygon([[bbox[0], bbox[3]], [bbox[2], bbox[3]],
                          [bbox[2], bbox[1]], [bbox[0], bbox[1]]])
    bound_shp = gpd.GeoDataFrame(index=[0], geometry=[bound_poly], crs=geo_crs)
    bound_centroid = bound_shp.centroid
    cx = bound_centroid.x.values[0]
    cy = bound_centroid.y.values[0]
    pad = np.ptp([bbox[3], bbox[1]]) / 6.0
    lat_1 = bbox[1] + pad
    lat_2 = bbox[3] - pad
    #local_ortho = '+proj=ortho +lat_0={} +lon_0={}'.format(cy,cx)
    local_aea = "+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={} +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs".format(
        lat_1, lat_2, cy, cx)
    print('Local Equal Area coordinate system is : {} \n'.format(local_aea))
    print('Saving bound shapefile at {} \n'.format(out_shp))
    bound_shp.to_file(out_shp, driver='GPKG')
    img_combinations = list(combinations(img_list, 2))
    n_comb = len(img_combinations)
    perc_overlap = np.ones(n_comb, dtype=float) * perc_overlap
    proj = local_aea
    tv = p_map(skysat.frame_intsec,
               img_combinations, [proj] * n_comb,
               perc_overlap,
               num_cpus=4 * n_proc)
    # result to this contains truth value (0 or 1, overlap percentage)
    truth_value = [tvs[0] for tvs in tv]
    overlap = [tvs[1] for tvs in tv]
    valid_list = list(compress(img_combinations, truth_value))
    overlap_perc_list = list(compress(overlap, truth_value))
    print(
        'Number of valid combinations are {}, out of total {}  input images making total combinations {}\n'
        .format(len(valid_list), len(img_list), n_comb))
    with open(out_fn, 'w') as f:
        img1_list = [x[0] for x in valid_list]
        img2_list = [x[1] for x in valid_list]
        for idx, i in enumerate(valid_list):
            #f.write("%s %s\n" % i)
            f.write(
                f"{os.path.abspath(img1_list[idx])} {os.path.abspath(img2_list[idx])}\n"
            )
    out_fn_overlap = os.path.splitext(out_fn)[0] + '_with_overlap_perc.pkl'
    img1_list = [x[0] for x in valid_list]
    img2_list = [x[1] for x in valid_list]
    out_df = pd.DataFrame({
        'img1': img1_list,
        'img2': img2_list,
        'overlap_perc': overlap_perc_list
    })
    out_df.to_pickle(out_fn_overlap)
    out_fn_stereo = os.path.splitext(out_fn_overlap)[0] + '_stereo_only.pkl'
    stereo_only_df = skysat.prep_trip_df(out_fn_overlap)
    stereo_only_df.to_pickle(out_fn_stereo)
    out_fn_stereo_ba = os.path.splitext(out_fn_overlap)[0] + '_stereo_only.txt'
    stereo_only_df[['img1', 'img2']].to_csv(out_fn_stereo_ba,
                                            sep=' ',
                                            header=False,
                                            index=False)
    print('Script completed in time {} s!'.format(time.time() - init_time))
コード例 #26
0
ファイル: dlTweets.py プロジェクト: nathos7/analyseTweets
	D = dict([i[::-1] for i in _lemmatize(m)])
	return D['nc'] if 'nc' in D else D['v'] if 'v' in D else D['adj'] if 'adj' in D else m

def getTweetsAsDict(usr):
	d = dict()
	for m in clean(getTweetsAsTxt(usr)).split():
		m = mLemmatizer(m)
		d[m] = d.get(m, 0)+1
	return d

###############################################################

def handleUser(usr):
	return [usr, getTweetsAsDict(usr)]

usersTweets = p_umap(handleUser, users) # download tweets from all users, in concurential mode, showing a progressbar

glob = dict(usersTweets)

allWords = {}




print(glob.keys())

for i in users:
	for mot in glob[i]:
		allWords[mot] = allWords.get(mot, 0) + glob[i][mot]

コード例 #27
0
def main():
    # The following params are originally written in config files
    graph_dir = 'graph_nospeed_gpickle'
    min_subgraph_length_pix = 20
    rdp_epsilon = 1

    # Other parameters
    simplify_graph = True
    verbose = False
    pickle_protocol = 4  # 4 is most recent, python 2.7 can't read 4
    node_iter = 10000  # start int for node naming
    edge_iter = 10000  # start int for edge naming
    manually_reproject_nodes = False

    parser = argparse.ArgumentParser()
    parser.add_argument('--imgs_dir',
                        required=True,
                        type=str,
                        help='dir contains GeoTIFF images for geo reference')
    parser.add_argument('--wkt_csv_file',
                        default=None,
                        type=str,
                        help='WKT file of road skeletons in csv format')
    parser.add_argument('--results_dir',
                        required=True,
                        type=str,
                        help='dir to write output file into')
    parser.add_argument('--n_threads',
                        default=None,
                        type=int,
                        help='desired number of threads for multi-proc')
    args = parser.parse_args()
    assert os.path.exists(args.imgs_dir)
    assert os.path.exists(args.results_dir)
    if args.wkt_csv_file is None:
        args.wkt_csv_file = os.path.join(args.results_dir, 'wkt_nospeed.csv')
    out_gdir = os.path.join(args.results_dir, graph_dir)
    os.makedirs(out_gdir, exist_ok=True)

    # read in wkt list
    df_wkt = pd.read_csv(args.wkt_csv_file)

    # iterate through image ids and create graphs
    t0 = time.time()
    image_ids = np.sort(np.unique(df_wkt['ImageId']))
    nfiles = len(image_ids)
    if args.n_threads is not None:
        n_threads = min(args.n_threads, nfiles)
    else:
        n_threads = None

    params = []
    for image_id in image_ids:
        out_file = os.path.join(out_gdir, image_id.split('.')[0] + '.gpickle')

        # for geo referencing, im_file should be the raw image
        im_file = os.path.join(args.imgs_dir, image_id + '.tif')

        # Select relevant WKT lines
        df_filt = df_wkt['WKT_Pix'][df_wkt['ImageId'] == image_id]
        wkt_list = df_filt.values

        # print a few values
        if verbose:
            print("image_file:", im_file)
            print("  wkt_list[:2]", wkt_list[:2])

        if (len(wkt_list) == 0) or (wkt_list[0] == 'LINESTRING EMPTY'):
            G = nx.MultiDiGraph()
            nx.write_gpickle(G, out_file, protocol=pickle_protocol)
            continue
        else:
            params.append((wkt_list, im_file, min_subgraph_length_pix,
                           node_iter, edge_iter, simplify_graph, rdp_epsilon,
                           manually_reproject_nodes, out_file, pickle_protocol,
                           n_threads, verbose))

    if n_threads is None:
        print(
            f"Using all thread(s) to process {len(params)} non-empty graphs ..."
        )
    else:
        print(
            f"Using {n_threads} thread(s) to process {len(params)} non-empty graphs ..."
        )
    # Compute geospatial road graph
    if n_threads is None or n_threads > 1:
        # with Pool(n_threads as pool:
        #     tqdm(pool.map(wkt_to_G, params), total=len(params))
        # Replace python multiprocessing.Pool with p_tqdm:
        # https://github.com/swansonk14/p_tqdm
        p_umap(wkt_to_G, params, num_cpus=n_threads)
    else:
        for param in tqdm(params):
            wkt_to_G(param)

    print("Graph gpickle dir: ", out_gdir)
    t1 = time.time()
    print("Time to run wkt_to_G.py: {:6.2f} s".format(t1 - t0))
コード例 #28
0
ファイル: simulation.py プロジェクト: ESMartiny/NetworkSIR
def run_simulations(
        simulation_parameters,
        N_runs=2,
        num_cores_max=None,
        N_tot_max=False,
        verbose=False,
        force_rerun=False,
        dry_run=False,
        **kwargs) :

    if isinstance(simulation_parameters, dict) :
        simulation_parameters = utils.format_simulation_paramters(simulation_parameters)
        cfgs_all = utils.generate_cfgs(simulation_parameters, N_runs, N_tot_max, verbose=verbose)

        N_tot_max = utils.d_num_cores_N_tot[utils.extract_N_tot_max(simulation_parameters)]

    elif isinstance(simulation_parameters[0], utils.DotDict) :
        cfgs_all = simulation_parameters

        N_tot_max = np.max([cfg.network.N_tot for cfg in cfgs_all])

    else :
        raise ValueError(f"simulation_parameters not of the correct type")

    if len(cfgs_all) == 0 :
        N_files = 0
        return N_files

    db_cfg = utils.get_db_cfg()
    q = Query()

    db_counts  = np.array([db_cfg.count((q.hash == cfg.hash) & (q.network.ID == cfg.network.ID)) for cfg in cfgs_all])

    assert np.max(db_counts) <= 1

    # keep only cfgs that are not in the database already
    if force_rerun :
        cfgs = cfgs_all
    else :
        cfgs = [cfg for (cfg, count) in zip(cfgs_all, db_counts) if count == 0]

    N_files = len(cfgs)

    num_cores = utils.get_num_cores_N_tot(N_tot_max, num_cores_max)

    if isinstance(simulation_parameters, dict) :
        s_simulation_parameters = str(simulation_parameters)
    elif isinstance(simulation_parameters, list) :
        s_simulation_parameters = f"{len(simulation_parameters)} runs"
    else :
        raise AssertionError("simulation_parameters neither list nor dict")

    print( f"\n\n" f"Generating {N_files :3d} network-based simulations",
           f"with {num_cores} cores",
           f"based on {s_simulation_parameters}.",
           "Please wait. \n",
           flush=True)

    if dry_run or N_files == 0 :
        return N_files

    # kwargs = {}
    if num_cores == 1 :
        for cfg in tqdm(cfgs) :
            cfg_out = run_single_simulation(cfg, save_initial_network=True, verbose=verbose, **kwargs)
            update_database(db_cfg, q, cfg_out)

    else :
        # First generate the networks
        f_single_network = partial(run_single_simulation, only_initialize_network=True, save_initial_network=True, verbose=verbose, **kwargs)

        # Get the network hashes
        network_hashes = set([utils.cfg_to_hash(cfg.network, exclude_ID=False) for cfg in cfgs])

        # Get list of unique cfgs
        cfgs_network = []
        for cfg in cfgs :
            network_hash = utils.cfg_to_hash(cfg.network, exclude_ID=False)

            if network_hash in network_hashes :
                cfgs_network.append(cfg)
                network_hashes.remove(network_hash)

        # Generate the networks
        print("Generating networks. Please wait")
        p_umap(f_single_network, cfgs_network, num_cpus=num_cores)

        # Then run the simulations on the network
        print("Running simulations. Please wait")
        f_single_simulation = partial(run_single_simulation, verbose=verbose, **kwargs)
        for cfg in p_uimap(f_single_simulation, cfgs, num_cpus=num_cores) :
            update_database(db_cfg, q, cfg)

    return N_files
コード例 #29
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    mode = args.mode
    if mode == 'gridding_only':
        tr = args.tr
        tsrs = args.tsrs
        point2dem_opts = asp.get_point2dem_opts(tr=tr, tsrs=tsrs)
        pc_list = args.point_cloud_list
        job_list = [point2dem_opts + [pc] for pc in pc_list]
        p2dem_log = p_map(asp.run_cmd, ['point2dem'] * len(job_list),
                          job_list,
                          num_cpus=cpu_count())
        print(p2dem_log)
    if mode == 'classic_dem_align':
        ref_dem = args.refdem
        source_dem = args.source_dem
        max_displacement = args.max_displacement
        outprefix = args.outprefix
        align = args.align
        if args.trans_only == 0:
            trans_only = False
        else:
            trans_only = True
        asp.dem_align(ref_dem, source_dem, max_displacement, outprefix, align,
                      trans_only)
    if mode == 'multi_align':
        """ Align multiple DEMs to a single source DEM """
        ref_dem = args.refdem
        source_dem_list = args.source_dem_list
        max_displacement = args.max_displacement
        outprefix_list = [
            f'{os.path.splitext(source_dem)[0]}_aligned_to{os.path.splitext(os.path.basename(ref_dem))[0]}'
            for source_dem in source_dem_list
        ]
        align = args.align
        if args.trans_only == 0:
            trans_only = False
        else:
            trans_only = True
        n_source = len(source_dem_list)
        ref_dem_list = [ref_dem] * n_source
        max_disp_list = [max_displacement] * n_source
        align_list = [align] * n_source
        trans_list = [trans_only] * n_source
        p_umap(asp.dem_align,
               ref_dem_list,
               source_dem_list,
               max_disp_list,
               outprefix_list,
               align_list,
               trans_list,
               num_cpus=cpu_count())
    if mode == 'align_cameras':
        transform_txt = args.transform
        input_camera_list = args.cam_list
        n_cam = len(input_camera_list)
        if (args.rpc == 1) & (args.dem != 'None'):
            print("will also write rpc files")
            dem = args.dem
            img_list = arg.img_list
            rpc = True
        else:
            dem = None
            img_list = [None] * n_cam
            rpc = False
        transform_list = [transform_txt] * n_cam
        outfolder = args.outfol
        if not os.path.exists(outfolder):
            os.makedirs(outfolder)
        outfolder = [outfolder] * n_cam
        write = [True] * n_cam
        rpc = [rpc] * n_cam
        dem = [dem] * n_cam
        p_umap(asp.align_cameras,
               input_camera_list,
               transform_list,
               outfolder,
               write,
               rpc,
               dem,
               img_list,
               num_cpus=cpu_count())
コード例 #30
0
ファイル: main.py プロジェクト: mayer2014/fundSpider
from fund_list import get_fund_list
from fund_info import FuncInfo
# from tqdm import tqdm
import os
from p_tqdm import p_umap

csv_data_dir = "./output/csv_data"

def get_fund(fund):
    code = fund.get("code")
    # name = fund.get("name")
    file_name = os.path.join(csv_data_dir, u"%s.csv" % code)
    if not os.path.exists(csv_data_dir):
        os.mkdir(csv_data_dir)
    if os.path.isfile(file_name):
        return
    info = FuncInfo(code=code)
    info.load_net_value_info(start_date, end_date)
    df = info.get_data_frame()
    df.to_csv(file_name)


if __name__ == '__main__':
    start_date, end_date = "2000-01-01", "2020-01-09"
    fund_list = get_fund_list()
    fund_num = len(fund_list)
    print("total fund: %s" % fund_num)
    p_umap(get_fund, fund_list)