def _extract_dset_frames(dset_name): if dset_name == 'breakfast': import data.breakfast as dset else: raise ValueError('no such dataset') extracted_frames_dir = dset.EXTRACTED_FRAMES_DIR if not os.path.exists(extracted_frames_dir): os.makedirs(extracted_frames_dir) video_dir = dset.VIDEO_DIR videos = os.listdir(video_dir) processed_videos = os.listdir(extracted_frames_dir) processed_videos = [video[:-5] for video in processed_videos ] # remove the '.hdf5' extension unprocessed_videos = np.setdiff1d(videos, processed_videos) video_files = [ os.path.join(video_dir, video) for video in unprocessed_videos ] extracted_files = [ os.path.join(extracted_frames_dir, video + '.hdf5') for video in unprocessed_videos ] p_umap(extract_video_frames, video_files, extracted_files)
def _extract_video_lengths(dset_name): if dset_name == 'breakfast': import data.breakfast as dset else: raise ValueError('no such dataset') video_length_dir = dset.VIDEO_LENGTHS_DIR if not os.path.exists(video_length_dir): os.makedirs(video_length_dir) video_dir = dset.VIDEO_DIR videos = os.listdir(video_dir) processed_videos = os.listdir(video_length_dir) processed_videos = [video[:-4] for video in processed_videos ] # remove the '.npy' extension unprocessed_videos = np.setdiff1d(videos, processed_videos) video_files = [ os.path.join(video_dir, video) for video in unprocessed_videos ] video_length_files = [ os.path.join(video_length_dir, video + '.npy') for video in unprocessed_videos ] p_umap(extract_video_lengths, video_files, video_length_files)
def rebuild_dataset_by_dir(self, annotation=None, target_root="./export", multiprocess=True, num_cpus=1.0): target_root = target_root.replace( "/", "\\") if platform.system().find("Windows") >= 0 else target_root if annotation is None: annotation = self.annotations mp_args = annotation[['file_root', 'file_name', "class_name"]].values.tolist() mp_args = [[self.config['dataset_root']] + arg for arg in mp_args] reverse_dict = { v: int(k) for k, v in self.config['label_dict'].items() } if multiprocess: p_umap(partial(KProductsDataset.copy_image, target_root=target_root, reverse_dict=reverse_dict), mp_args, desc="Rebuilding Dataset by directory ...", num_cpus=num_cpus) else: for arg in tqdm(mp_args, desc="Rebuilding Dataset by directory ..."): KProductsDataset.copy_image(arg, target_root=target_root, reverse_dict=reverse_dict)
def _preprocess_video_labels(dset_name): if dset_name == 'breakfast': import data.breakfast as dset else: raise ValueError('no such dataset') hdf5_label_dir = dset.FRAME_RECOGNITION_LABEL_DIR if not os.path.exists(hdf5_label_dir): os.makedirs(hdf5_label_dir) label_dir = dset.LABEL_DIR labels = os.listdir(label_dir) processed_labels = os.listdir(hdf5_label_dir) processed_labels = [video[:-5] for video in processed_labels ] # remove the '.hdf5' extension unprocessed_labesl = np.sort(np.setdiff1d(labels, processed_labels)) raw_label_files = [ os.path.join(label_dir, video) for video in unprocessed_labesl ] hdf5_label_files = [ os.path.join(hdf5_label_dir, video + '.hdf5') for video in unprocessed_labesl ] p_umap(_convert_label_file, raw_label_files, hdf5_label_files)
def dicom_to_png_matlab(dicom_paths, image_paths, selection_criteria, skip_existing=True): """Converts a dicom image to a grayscale 16-bit png image using matlab. NOTE: Must be run from oncodata/dicom_to_png directory so that Matlab can find the dicomToPng.m conversion script. Arguments: dicom_paths(list[str]): A list of paths to dicom files. image_paths(list[str]): A list of paths where the images will be saved. skip_existing(bool): True to skip images which already exist. """ if len(dicom_paths) != len(image_paths): print('Error: DICOM paths and image paths must be the same length.') exit() dicom_paths = np.array(dicom_paths) image_paths = np.array(image_paths) if skip_existing: print('Checking for existing images') keep = p_map(lambda image_path: not os.path.exists(image_path), image_paths) keep_indices = np.where(keep) dicom_paths = dicom_paths[keep_indices] image_paths = image_paths[keep_indices] # Ensure that dicoms meet selection criteria and only have one slice print('Checking for invalid dicoms') keep = p_map( lambda dicom_path: is_selected_dicom(dicom_path, selection_criteria) and has_one_slice(dicom_path), dicom_paths) keep_indices = np.where(keep) dicom_paths = dicom_paths[keep_indices] image_paths = image_paths[keep_indices] if len(dicom_paths) == 0: return # Create directory for images if necessary print('Creating directories for images') p_umap(create_directory_if_necessary, image_paths) # Save paths to temporary files which will be loaded by matlab with NamedTemporaryFile(suffix='.txt') as dicoms_file: with NamedTemporaryFile(suffix='.txt') as images_file: np.savetxt(dicoms_file.name, dicom_paths, fmt='%s') np.savetxt(images_file.name, image_paths, fmt='%s') # Convert DICOM to PNG using matlab print('Converting with matlab') Popen([ 'matlab', '-nodisplay', '-nodesktop', '-nojvm', '-r', "dicomToPng('%s', '%s'); exit;" % (dicoms_file.name, images_file.name) ]).wait()
def vectorize_dataset(self, multiprocess=False, batch_size=32): annot_by_file_root = self.get_annotation_by_file_root() if multiprocess: p_umap(partial(self.vectorize_images, batch_size=batch_size), annot_by_file_root, desc="Vectorization Dataset ...") else: for annotation in tqdm(annot_by_file_root, "Vectorization Dataset ..."): self.vectorize_images(annotation, batch_size=batch_size)
def _save_interim_BP(Bs, Ps, csvs, nproc): print('Saving B and P', file=sys.stdout) p_umap( lambda arr, file: np.save(file, arr), Bs + Ps, [f[:-4] + '.B' for f in csvs] + [f[:-4] + '.P' for f in csvs], num_cpus=nproc )
def main(dicom_dir, dicom_list_json_path, png_dir, dcmtk, imagemagick, matlab, dicom_types, dicom_ext): """Converts DICOM files in a directory to PNG images. NOTE: When using Matlab, must be run from oncodata/dicom_to_png directory so that Matlab can find the dicomToPng.m conversion script. Arguments: dicom_dir(str): Path to a directory containing DICOM files. dicom_list_json_path(str): Path to optional list of dicom files [replace dicom dir]. png_dir(str): Path to a directory where PNG versions of the DICOM images will be saved. dcmtk(bool): True to use dcmtk to convert DICOMs to PNGs. imagemagick(bool): True to use ImageMagick to convert DICOMs to PNGs. matlab(bool): Ture to use matlab to convert DICOMs to PNGs. """ print('Extracting DICOM paths') if dicom_list_json_path is not None: dicom_paths = json.load(open(dicom_list_json_path, 'r')) else: dicom_paths = [] for root, _, files in os.walk(dicom_dir): dicom_paths.extend([ os.path.join(root, f) for f in files if f.endswith(dicom_ext) ]) image_paths = [ dicom_path_to_png_path(dicom_path, dicom_dir, png_dir, dicom_ext) for dicom_path in dicom_paths ] selection_criteria = [] for dicom_type in dicom_types: criteria = DICOM_TYPES.get(dicom_type) assert criteria is not None, "Unsupported dicom_type. Please add the appropriate type to DICOM_TYPES." selection_criteria.append(criteria) assert len(selection_criteria) > 0, "No dicoms selected." selection_criteria = tuple(selection_criteria) if dcmtk: print('Converting to PNG') p_umap(dicom_to_png_dcmtk, dicom_paths, image_paths, selection_criteria) elif imagemagick: print('Converting to PNG') p_umap(dicom_to_png_imagemagick, dicom_paths, image_paths, selection_criteria) elif matlab: dicom_to_png_matlab(dicom_paths, image_paths, selection_criteria)
def create_dataset(num_machines, csv_file, save_filename, num_cpus=multiprocessing.cpu_count() - 1): df = pd.DataFrame(columns=[ "graph_object", "num_tasks", "num_machines", "weights", "order", "features", "psize", "GD_cost", "LR_cost", "opt_cost", "global_opt_cost", "ETF-H_cost", "weak_strongman_cost" ]) tie_breaking_rule = 2 count = 0 csv_df = pd.read_csv(csv_file) rows = [] for index, row in csv_df.iterrows(): rows.append(row) # Write to the file df.to_csv(save_filename, mode="w+", index=False) dataset_parallel_generator = partial(dataset_parallel_mapper, filename=save_filename) # Mapping and doing it in parallel result = list( filter([None], p_umap(dataset_parallel_generator, rows, num_cpus=num_cpus))) result = [r[0] for r in result] # appending to entry dict for entry_dict in result: df.append(entry_dict, ignore_index=True) return df
def __call__(self, parallel=False, timer=True): """A method to run a combo by simulating all countries.""" # Message # print("Running combo '%s'." % self.short_name) # Timer start # timer = Timer() timer.print_start() # Function to run a single country # def run_country(args): code, steps = args for runner in steps: return runner.run() # Run countries sequentially # if not parallel: result = t_map(run_country, self.runners.items()) # Run countries in parallel # if parallel: result = p_umap(run_country, self.runners.items(), num_cpus=4) # Timer end # timer.print_end() timer.print_total_elapsed() # Compile logs # self.compile_logs() # Return # return result
def fetch_submissions(**kwargs): """[function to fetch submissions] Returns: [dict]: [the log of submission fetching process] """ post_args, meta_args = kwargs['POST_ARGS'], kwargs['META_ARGS'] filepath, total, meta, subreddits = meta_args['filepath'], meta_args['total'], \ meta_args['meta'], meta_args['subreddits'] sort_type, sort, size, start = post_args['sort_type'], post_args[ 'sort'], post_args['size'], post_args['start'] if os.path.exists(os.path.join(filepath, 'raw', 'posts', 'log.json')): return json.load( open(os.path.join(filepath, 'raw', 'posts', 'log.json'))) else: tolist = lambda x: [x for _ in range(len(subreddits))] res = p_umap(fetch_posts, subreddits, tolist(total), tolist(meta), tolist(filepath), tolist(sort_type), tolist(sort), tolist(size), tolist(start), num_cpus=NUM_WORKER) with open(os.path.join(filepath, 'raw', 'posts', 'log.json'), 'w') as fp: json.dump(res, fp) return res
def _repeat_cMCMC( S, pyfunc, rng=np.random.default_rng() ): tmp_seed = random_seed(rng) def par_func(ss): return pyfunc(rng=np.random.default_rng( seed=np.random.SeedSequence(entropy=tmp_seed, spawn_key=(ss,)) )) start_time = time.perf_counter() result_list = p_tqdm.p_umap(par_func, range(S)) print("Elapsed time:", IPython.core.magics.execution._format_time( time.perf_counter() - start_time )) comp_time = np.array([[ result_list[s][key] for key in ["meet time", "additional time"] ] for s in range(S)]) tau_arr = np.array([result_list[s]["meeting time"] for s in range(S)]) rep_h_coupled = [result_list[s]["h coupled"] for s in range(S)] return {"meeting time": tau_arr, "h": rep_h_coupled, "time": comp_time}
def fit_multiple_files(cfg, filenames, num_cores=1, do_tqdm=True, y_max=0.01, verbose=False): func = partial(fit_single_file, cfg=cfg, y_max=y_max) if num_cores == 1: if do_tqdm: filenames = tqdm(filenames) results = [func(filename) for filename in filenames] else: results = p_umap(func, filenames, num_cpus=num_cores, disable=True) reject_counter = Counter() # postprocess results from multiprocessing: fit_objects = {} for filename, fit_result in results: if isinstance(fit_result, str): if verbose: print( f"\n\n{filename} was rejected due to {fit_result.lower()}") reject_counter[fit_result.lower()] += 1 else: fit_object = fit_result fit_objects[filename] = fit_object reject_counter["no rejection"] += 1 return fit_objects, reject_counter
def cli(repeat_file, supernode_file, supernodes, min_similarity): repeats = np.load(repeat_file)["repeats"] supernode_array = np.load(supernode_file)["25"] if not supernodes: supernodes = np.unique(supernode_array) def _data(supernodes, min_similarity): for s in range(33, min_similarity, -1): for sn in supernodes: yield repeats[supernode_array == sn], sn, s p_umap( lambda x, y: func(*x), _data(supernodes, min_similarity), range(len(supernodes) * (33 - min_similarity)), )
def run_experiment(models, series_file): """Run experiment on multiple cores and write result to series_file.""" results = p_umap(run_simulation, models) print("time for writing the results") with open(series_file, "a") as file: for result in results: file.write('{:.2f},{:d},{:d},{:},{:},{:d}\n'.format( result["density"], int(result["grouping"]), int(result["iteration"]), result["seed"], result["winner"], result["steps"]))
def resize_dataset(self, target_w=320, target_root="./export", skip_exists=True, multiprocess=True, num_cpus=1.0, copy_annotation=True): """ Resize images from entires dataset. This functions uses multi-cores. Be aware that it will slow down your computer. Args: target_w (int): Target width for resizing. Height is automatically set by ratio. target_root (str): Target dataset root to save resized images. skip_exists (bool): True: Skip resizing if resized file already exists. multiprocess (bool): Use multi process. num_cpus (int, float): Number(int) or proportion(float) of cpus to utilize in multiprocess. """ target_root = target_root.replace( "/", "\\") if platform.system().find("Windows") >= 0 else target_root mp_args = self.annotations[['root', 'file_root', 'file_name']].values.tolist() if multiprocess: p_umap(partial(KProductsDataset.resize_image, target_w=target_w, target_root=target_root, skip_exists=skip_exists, copy_annotation=copy_annotation), mp_args, desc="Resizing Images ...", num_cpus=num_cpus) else: for arg in tqdm(mp_args, desc="Resizing Images ..."): KProductsDataset.resize_image(arg, target_w=target_w, target_root=target_root, skip_exists=skip_exists, copy_annotation=copy_annotation)
def parallel_run(to_do, realizations= 1, keep_in = 0): """Get list of dictionaries of model and run parameters to run, run each given number of realizations in parallel Among all realizations we keep.""" print("Preparing list to run", flush=True) run_list = [(D, r < keep_in) for r in range(realizations) for D in to_do] #print(f"We have {mp.cpu_count()} CPUs") #pool = mp.Pool(mp.cpu_count()) print("Starting execution of " +str(len(run_list)) +" runs", flush=True) rows = list(p_umap(run_,run_list)) #rows = list(pool.map(run_, run_list)) print("done", flush=True) df = pd.DataFrame(rows) return df
def extract_save(in_dir, out_dir, class_i, nproc): app_dirs = glob(os.path.join(in_dir, '*/')) print(f'Extracting features for {class_i}') meta = p_umap(process_app, app_dirs, out_dir, num_cpus=nproc, file=sys.stdout) meta = [i for i in meta if i is not None] packages = [t[0] for t in meta] csv_paths = [t[1] for t in meta] return packages, csv_paths
def main(source_dir, dest_dir): """Copies all files from one directory to another while preserving the underlying directory structure. Arguments: source_dir(str): The directory with files to copy. dest_dir(str): The directory where the files will be copied to. """ paths = [] for root, _, files in os.walk(source_dir): paths.extend([os.path.join(root, f) for f in files]) def copy(source_path, skip_existing=True): """Copies a file from source_path to source_path with source_dir replaced by dest_dir. Arguments: source_path(str): Path to a file to be copied. skip_existing(bool): True to skip copying files when the destination file already exists. """ dest_path = source_path.replace(source_dir.strip('/'), dest_dir.strip('/')) # Skip if dest file already exists if skip_existing and os.path.exists(dest_path): return # Create directory if necessary os.makedirs(os.path.dirname(dest_path), exist_ok=True) copyfile(source_path, dest_path) p_umap(copy, paths)
def comments_detail(filepath): """[function to fetch comments detail] Args: filepath ([string]): [filepath to store the data] """ subreddit_fp = glob(join(filepath, POST_DETAIL_DIR, '*.json')) subreddits = [i.split('/')[-1][:-5] for i in subreddit_fp] tolist = lambda x: [x for _ in range(len(subreddits))] rest = p_umap(comment_detail, subreddit_fp, tolist(filepath), subreddits, num_cpus=NUM_WORKER) with open(join(filepath, COMMENT_DIR, 'log.json'), 'w') as fp: json.dump(rest, fp)
def main(directory, results_path): """Extracts and saves metadata from DICOMs to a JSON file. Arguments: directory(str): Path to a directory containing DICOMs. results_path(str): Path to the JSON where the metadata will be saved. """ dicom_paths = [] for root, _, files in os.walk(directory): dicom_paths.extend([ os.path.abspath(os.path.join(root, f)) for f in files if f.endswith('.dcm') ]) metadata = p_umap(get_dicom_metadata_and_slice_counts, dicom_paths) with open(results_path, 'w') as results_file: json.dump(metadata, results_file, indent=4, sort_keys=True)
def get_data(outfolder, data_source=None, nprocs=2, recompute=False): ''' Retrieve data for year/location/group from the internet and return data (or write data to file, if `outfolder` is not `None`). ''' # setup os.makedirs(outfolder, exist_ok=True) app_data_path = app_heap_path = os.path.join('data', 'out', 'all-apps', 'app-data') os.makedirs(app_data_path, exist_ok=True) app_to_parse_path = os.path.join( outfolder, 'app_list.csv') # location of any predetermined apps try: # search for predetermined list of apps apps_df = pd.read_csv(app_to_parse_path) except FileNotFoundError: # if no such file, create one by looking for apps under data_source directory apps_df = find_apps(data_source) apps_df.to_csv(app_to_parse_path) def parse_app(app_dir, outfolder): app = Application(app_dir) outpath = os.path.join(app_data_path, app.app_name + ".csv") if os.path.exists(outpath) and not recompute: return else: data = app.parse() if data.shape[0] == 0: print(f'No data for {app.app_name}', file=sys.stdout) return else: data.to_csv(outpath, index=False) print("STEP 1 - PARSING APPS") # concurrent execution of smali parsing app_parser = p_umap(parse_app, apps_df.app_dir, [outfolder] * len(apps_df.app_dir), num_cpus=nprocs)
def submissions_detail(filepath): """[function to fetch submissions' comments detail] Args: filepath ([string]): [filepath to store the data] """ subreddits_fp = glob(join(filepath, POST_DIR, '*.csv')) subreddits = [i.split('/')[-1][:-4] for i in subreddits_fp] n, N = 1, len(subreddits) for subreddit, fp in zip(subreddits, subreddits_fp): print('fetching {0} subreddit details, Progress: {1}/{2}'.format( subreddit, str(n), str(N))) if os.path.exists(join(filepath, POST_DETAIL_DIR, subreddit + '.json')): n += 1 continue else: ids = pd.read_csv(fp).id.tolist() rest = p_umap(submission_detail, ids, num_cpus=NUM_WORKER) with open(join(filepath, POST_DETAIL_DIR, subreddit + '.json'), 'w') as f: json.dump(rest, f) n += 1
# Get the network hashes network_hashes = set([ utils.cfg_to_hash(cfg.network, exclude_ID=False) for cfg in cfgs ]) # Get list of unique cfgs cfgs_network = [] for cfg in cfgs: network_hash = utils.cfg_to_hash(cfg.network, exclude_ID=False) if network_hash in network_hashes: cfgs_network.append(cfg) network_hashes.remove(network_hash) # Generate the networks print("Generating networks. Please wait") p_umap(f_single_network, cfgs_network, num_cpus=num_cores) # Then run the simulations on the network print("Running simulations. Please wait") f_single_simulation = partial(simulation.run_single_simulation, verbose=False) for cfg in p_uimap(f_single_simulation, cfgs, num_cpus=num_cores): simulation.update_database(db_cfg, q, cfg) print( f"\n{N_files:,} files were generated, total duration {utils.format_time(t.elapsed)}" ) print("Finished simulating!")
def main(): #The following block of code is useful for getting a shapefile encompassing the entire subset (Use for clipping DEMs etc) #Also, I define the local ortho coordinates using the center of the big bounding box init_time = time.time() parser = getparser() args = parser.parse_args() img_folder = args.img_folder try: img_list = sorted(glob.glob(os.path.join(img_folder, '*.tif'))) print("Number of images {}".format(len(img_list))) except: print( "No images found in the directory. Make sure they end with a .tif extension" ) sys.exit() out_fn = args.out_fn perc_overlap = np.float(args.percentage) out_shp = os.path.splitext(out_fn)[0] + '_bound.gpkg' n_proc = cpu_count() shp_list = p_umap(skysat.skysat_footprint, img_list, num_cpus=2 * n_proc) merged_shape = geo.shp_merger(shp_list) bbox = merged_shape.total_bounds print(f'Bounding box lon_lat is:{bbox}') bound_poly = Polygon([[bbox[0], bbox[3]], [bbox[2], bbox[3]], [bbox[2], bbox[1]], [bbox[0], bbox[1]]]) bound_shp = gpd.GeoDataFrame(index=[0], geometry=[bound_poly], crs=geo_crs) bound_centroid = bound_shp.centroid cx = bound_centroid.x.values[0] cy = bound_centroid.y.values[0] pad = np.ptp([bbox[3], bbox[1]]) / 6.0 lat_1 = bbox[1] + pad lat_2 = bbox[3] - pad #local_ortho = '+proj=ortho +lat_0={} +lon_0={}'.format(cy,cx) local_aea = "+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={} +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs".format( lat_1, lat_2, cy, cx) print('Local Equal Area coordinate system is : {} \n'.format(local_aea)) print('Saving bound shapefile at {} \n'.format(out_shp)) bound_shp.to_file(out_shp, driver='GPKG') img_combinations = list(combinations(img_list, 2)) n_comb = len(img_combinations) perc_overlap = np.ones(n_comb, dtype=float) * perc_overlap proj = local_aea tv = p_map(skysat.frame_intsec, img_combinations, [proj] * n_comb, perc_overlap, num_cpus=4 * n_proc) # result to this contains truth value (0 or 1, overlap percentage) truth_value = [tvs[0] for tvs in tv] overlap = [tvs[1] for tvs in tv] valid_list = list(compress(img_combinations, truth_value)) overlap_perc_list = list(compress(overlap, truth_value)) print( 'Number of valid combinations are {}, out of total {} input images making total combinations {}\n' .format(len(valid_list), len(img_list), n_comb)) with open(out_fn, 'w') as f: img1_list = [x[0] for x in valid_list] img2_list = [x[1] for x in valid_list] for idx, i in enumerate(valid_list): #f.write("%s %s\n" % i) f.write( f"{os.path.abspath(img1_list[idx])} {os.path.abspath(img2_list[idx])}\n" ) out_fn_overlap = os.path.splitext(out_fn)[0] + '_with_overlap_perc.pkl' img1_list = [x[0] for x in valid_list] img2_list = [x[1] for x in valid_list] out_df = pd.DataFrame({ 'img1': img1_list, 'img2': img2_list, 'overlap_perc': overlap_perc_list }) out_df.to_pickle(out_fn_overlap) out_fn_stereo = os.path.splitext(out_fn_overlap)[0] + '_stereo_only.pkl' stereo_only_df = skysat.prep_trip_df(out_fn_overlap) stereo_only_df.to_pickle(out_fn_stereo) out_fn_stereo_ba = os.path.splitext(out_fn_overlap)[0] + '_stereo_only.txt' stereo_only_df[['img1', 'img2']].to_csv(out_fn_stereo_ba, sep=' ', header=False, index=False) print('Script completed in time {} s!'.format(time.time() - init_time))
D = dict([i[::-1] for i in _lemmatize(m)]) return D['nc'] if 'nc' in D else D['v'] if 'v' in D else D['adj'] if 'adj' in D else m def getTweetsAsDict(usr): d = dict() for m in clean(getTweetsAsTxt(usr)).split(): m = mLemmatizer(m) d[m] = d.get(m, 0)+1 return d ############################################################### def handleUser(usr): return [usr, getTweetsAsDict(usr)] usersTweets = p_umap(handleUser, users) # download tweets from all users, in concurential mode, showing a progressbar glob = dict(usersTweets) allWords = {} print(glob.keys()) for i in users: for mot in glob[i]: allWords[mot] = allWords.get(mot, 0) + glob[i][mot]
def main(): # The following params are originally written in config files graph_dir = 'graph_nospeed_gpickle' min_subgraph_length_pix = 20 rdp_epsilon = 1 # Other parameters simplify_graph = True verbose = False pickle_protocol = 4 # 4 is most recent, python 2.7 can't read 4 node_iter = 10000 # start int for node naming edge_iter = 10000 # start int for edge naming manually_reproject_nodes = False parser = argparse.ArgumentParser() parser.add_argument('--imgs_dir', required=True, type=str, help='dir contains GeoTIFF images for geo reference') parser.add_argument('--wkt_csv_file', default=None, type=str, help='WKT file of road skeletons in csv format') parser.add_argument('--results_dir', required=True, type=str, help='dir to write output file into') parser.add_argument('--n_threads', default=None, type=int, help='desired number of threads for multi-proc') args = parser.parse_args() assert os.path.exists(args.imgs_dir) assert os.path.exists(args.results_dir) if args.wkt_csv_file is None: args.wkt_csv_file = os.path.join(args.results_dir, 'wkt_nospeed.csv') out_gdir = os.path.join(args.results_dir, graph_dir) os.makedirs(out_gdir, exist_ok=True) # read in wkt list df_wkt = pd.read_csv(args.wkt_csv_file) # iterate through image ids and create graphs t0 = time.time() image_ids = np.sort(np.unique(df_wkt['ImageId'])) nfiles = len(image_ids) if args.n_threads is not None: n_threads = min(args.n_threads, nfiles) else: n_threads = None params = [] for image_id in image_ids: out_file = os.path.join(out_gdir, image_id.split('.')[0] + '.gpickle') # for geo referencing, im_file should be the raw image im_file = os.path.join(args.imgs_dir, image_id + '.tif') # Select relevant WKT lines df_filt = df_wkt['WKT_Pix'][df_wkt['ImageId'] == image_id] wkt_list = df_filt.values # print a few values if verbose: print("image_file:", im_file) print(" wkt_list[:2]", wkt_list[:2]) if (len(wkt_list) == 0) or (wkt_list[0] == 'LINESTRING EMPTY'): G = nx.MultiDiGraph() nx.write_gpickle(G, out_file, protocol=pickle_protocol) continue else: params.append((wkt_list, im_file, min_subgraph_length_pix, node_iter, edge_iter, simplify_graph, rdp_epsilon, manually_reproject_nodes, out_file, pickle_protocol, n_threads, verbose)) if n_threads is None: print( f"Using all thread(s) to process {len(params)} non-empty graphs ..." ) else: print( f"Using {n_threads} thread(s) to process {len(params)} non-empty graphs ..." ) # Compute geospatial road graph if n_threads is None or n_threads > 1: # with Pool(n_threads as pool: # tqdm(pool.map(wkt_to_G, params), total=len(params)) # Replace python multiprocessing.Pool with p_tqdm: # https://github.com/swansonk14/p_tqdm p_umap(wkt_to_G, params, num_cpus=n_threads) else: for param in tqdm(params): wkt_to_G(param) print("Graph gpickle dir: ", out_gdir) t1 = time.time() print("Time to run wkt_to_G.py: {:6.2f} s".format(t1 - t0))
def run_simulations( simulation_parameters, N_runs=2, num_cores_max=None, N_tot_max=False, verbose=False, force_rerun=False, dry_run=False, **kwargs) : if isinstance(simulation_parameters, dict) : simulation_parameters = utils.format_simulation_paramters(simulation_parameters) cfgs_all = utils.generate_cfgs(simulation_parameters, N_runs, N_tot_max, verbose=verbose) N_tot_max = utils.d_num_cores_N_tot[utils.extract_N_tot_max(simulation_parameters)] elif isinstance(simulation_parameters[0], utils.DotDict) : cfgs_all = simulation_parameters N_tot_max = np.max([cfg.network.N_tot for cfg in cfgs_all]) else : raise ValueError(f"simulation_parameters not of the correct type") if len(cfgs_all) == 0 : N_files = 0 return N_files db_cfg = utils.get_db_cfg() q = Query() db_counts = np.array([db_cfg.count((q.hash == cfg.hash) & (q.network.ID == cfg.network.ID)) for cfg in cfgs_all]) assert np.max(db_counts) <= 1 # keep only cfgs that are not in the database already if force_rerun : cfgs = cfgs_all else : cfgs = [cfg for (cfg, count) in zip(cfgs_all, db_counts) if count == 0] N_files = len(cfgs) num_cores = utils.get_num_cores_N_tot(N_tot_max, num_cores_max) if isinstance(simulation_parameters, dict) : s_simulation_parameters = str(simulation_parameters) elif isinstance(simulation_parameters, list) : s_simulation_parameters = f"{len(simulation_parameters)} runs" else : raise AssertionError("simulation_parameters neither list nor dict") print( f"\n\n" f"Generating {N_files :3d} network-based simulations", f"with {num_cores} cores", f"based on {s_simulation_parameters}.", "Please wait. \n", flush=True) if dry_run or N_files == 0 : return N_files # kwargs = {} if num_cores == 1 : for cfg in tqdm(cfgs) : cfg_out = run_single_simulation(cfg, save_initial_network=True, verbose=verbose, **kwargs) update_database(db_cfg, q, cfg_out) else : # First generate the networks f_single_network = partial(run_single_simulation, only_initialize_network=True, save_initial_network=True, verbose=verbose, **kwargs) # Get the network hashes network_hashes = set([utils.cfg_to_hash(cfg.network, exclude_ID=False) for cfg in cfgs]) # Get list of unique cfgs cfgs_network = [] for cfg in cfgs : network_hash = utils.cfg_to_hash(cfg.network, exclude_ID=False) if network_hash in network_hashes : cfgs_network.append(cfg) network_hashes.remove(network_hash) # Generate the networks print("Generating networks. Please wait") p_umap(f_single_network, cfgs_network, num_cpus=num_cores) # Then run the simulations on the network print("Running simulations. Please wait") f_single_simulation = partial(run_single_simulation, verbose=verbose, **kwargs) for cfg in p_uimap(f_single_simulation, cfgs, num_cpus=num_cores) : update_database(db_cfg, q, cfg) return N_files
def main(): parser = get_parser() args = parser.parse_args() mode = args.mode if mode == 'gridding_only': tr = args.tr tsrs = args.tsrs point2dem_opts = asp.get_point2dem_opts(tr=tr, tsrs=tsrs) pc_list = args.point_cloud_list job_list = [point2dem_opts + [pc] for pc in pc_list] p2dem_log = p_map(asp.run_cmd, ['point2dem'] * len(job_list), job_list, num_cpus=cpu_count()) print(p2dem_log) if mode == 'classic_dem_align': ref_dem = args.refdem source_dem = args.source_dem max_displacement = args.max_displacement outprefix = args.outprefix align = args.align if args.trans_only == 0: trans_only = False else: trans_only = True asp.dem_align(ref_dem, source_dem, max_displacement, outprefix, align, trans_only) if mode == 'multi_align': """ Align multiple DEMs to a single source DEM """ ref_dem = args.refdem source_dem_list = args.source_dem_list max_displacement = args.max_displacement outprefix_list = [ f'{os.path.splitext(source_dem)[0]}_aligned_to{os.path.splitext(os.path.basename(ref_dem))[0]}' for source_dem in source_dem_list ] align = args.align if args.trans_only == 0: trans_only = False else: trans_only = True n_source = len(source_dem_list) ref_dem_list = [ref_dem] * n_source max_disp_list = [max_displacement] * n_source align_list = [align] * n_source trans_list = [trans_only] * n_source p_umap(asp.dem_align, ref_dem_list, source_dem_list, max_disp_list, outprefix_list, align_list, trans_list, num_cpus=cpu_count()) if mode == 'align_cameras': transform_txt = args.transform input_camera_list = args.cam_list n_cam = len(input_camera_list) if (args.rpc == 1) & (args.dem != 'None'): print("will also write rpc files") dem = args.dem img_list = arg.img_list rpc = True else: dem = None img_list = [None] * n_cam rpc = False transform_list = [transform_txt] * n_cam outfolder = args.outfol if not os.path.exists(outfolder): os.makedirs(outfolder) outfolder = [outfolder] * n_cam write = [True] * n_cam rpc = [rpc] * n_cam dem = [dem] * n_cam p_umap(asp.align_cameras, input_camera_list, transform_list, outfolder, write, rpc, dem, img_list, num_cpus=cpu_count())
from fund_list import get_fund_list from fund_info import FuncInfo # from tqdm import tqdm import os from p_tqdm import p_umap csv_data_dir = "./output/csv_data" def get_fund(fund): code = fund.get("code") # name = fund.get("name") file_name = os.path.join(csv_data_dir, u"%s.csv" % code) if not os.path.exists(csv_data_dir): os.mkdir(csv_data_dir) if os.path.isfile(file_name): return info = FuncInfo(code=code) info.load_net_value_info(start_date, end_date) df = info.get_data_frame() df.to_csv(file_name) if __name__ == '__main__': start_date, end_date = "2000-01-01", "2020-01-09" fund_list = get_fund_list() fund_num = len(fund_list) print("total fund: %s" % fund_num) p_umap(get_fund, fund_list)