def build_clusters(self): set_list = [(k, n) for k in self.k_list for n in range(self.num_iter)] p = Pool() p.starmap(self.prepare_directory, tqdm(set_list)) p.close() self.write_dirlist() self.print_face()
class Parallel(Executor): def __init__(self, pool_size=cpu_count() - 1): self.workers = pool_size self.pool = Pool(processes=pool_size) def run(self, func, arglist): return self.pool.starmap(func, arglist) def num_workers(self): return self.workers
def all_countries(base_path, multiprocess=True, overwrite=True, savefig=False, report=False): """ Main function to estimate the length of all the roads and countries we are interested in. Args: *base_path* : Base path to the location of all files and directories in this project. *multiprocess* : Set to True by default. Set to False in the case of limited processing power. *overwrite* : Set to True by default. This relates to all input data (i.e. .poly files, .osm.pbf files and shapefiles). *savefig* : Set to False by default. When set to True, it will return a figure with the roads of a country. Returns: An Excel file with the length of all **Primary**, **Secondary**, **Tertiary**, **Track** and **Other** roads for each country. """ print('The calculation of road lenghts has started!') start = time.time() # ============================================================================= # """ Set path to dirs""" # ============================================================================= dir_out = os.path.join(base_path, 'output_data') poly_dir = os.path.join(base_path, 'poly_files') osm_path_in = os.path.join(base_path, 'osm_continent') fig_dir = os.path.join(base_path, 'Figures') # ============================================================================= # """ create directories if they are not created yet """ # ============================================================================= if not os.path.exists(dir_out): os.makedirs(dir_out) if not os.path.exists(poly_dir): os.makedirs(poly_dir) if not os.path.exists(osm_path_in): os.makedirs(osm_path_in) if (savefig == True) and not os.path.exists(fig_dir): os.makedirs(fig_dir) # ============================================================================= # """Set path to files we use """ # ============================================================================= wb_country_in = os.path.join(base_path, 'input_data', 'wbccodes2014.csv') global_shape = os.path.join(base_path, 'input_data', '2015_GAUL_Dataset_Mod.gdb') # ============================================================================= # """Load country shapes and list and only save the required countries""" # ============================================================================= wb_country = pd.read_csv(wb_country_in, header=0, index_col=0) #filter high income countries from country file country_list = wb_country[['country', 'continent' ]].loc[wb_country['wbregion'] != 'YHI'] # add column to country list so we can easily look up the required continental # osm file for that continent map_continent = { 'MA': 'central-america', 'SA': 'south-america', 'EU': 'europe', 'AS': 'asia', 'AU': 'australia-oceania', 'AF': 'africa', 'AM': 'north-america' } country_list['osm-cont'] = country_list['continent'].map( lambda x: (map_continent[x])) # ============================================================================= # """ create .poly files to clip countries from osm.pbf files """ # ============================================================================= if not os.listdir(poly_dir): create_poly_files(base_path, global_shape, save_shapefile=overwrite) # ============================================================================= # """ check if we have actually downloaded the openstreetmap input files. If not, # lets download them. Note: this will take a while! """ # ============================================================================= continent_list = [ 'central-america', 'south-america', 'europe', 'asia', 'australia-oceania', 'africa', 'north-america' ] for continent in continent_list: url = 'http://download.geofabrik.de/%s-latest.osm.pbf' % continent if '%s-latest.osm.pbf' % (continent) not in os.listdir(osm_path_in): urllib.request.urlretrieve(url, osm_path_in) # ============================================================================= # """ create extracted osm files for each country per continent """ # ============================================================================= out = [] countries = [] continent_osms = [] base_paths = [] overwrites = [] savefigs = [] reporting = [] for country in country_list.iterrows(): country = country[1] continent_osm = os.path.join( osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont'])) countries.append(country['country']) continent_osms.append(continent_osm) base_paths.append(base_path) overwrites.append(overwrite) savefigs.append(savefig) reporting.append(report) # multiprocessing will start if set to True. Set to False with limited processing capacities if multiprocess == True: pool = Pool(cpu_count() - 1) out = pool.starmap( single_country, zip(countries, continent_osms, base_paths, overwrites, savefigs, reporting)) # when multiprocessing set to False, we will just loop over the countries. else: out = [] i = 0 for country in country_list.iterrows(): country = country[1] continent_osm = os.path.join( osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont'])) out.append( single_country(country['country'], continent_osm, base_path, overwrites[i], savefigs[i], reporting[i])) i += 1 df = pd.concat(out, axis=1).T map_country = dict(zip(wb_country['country'], wb_country['country_name'])) df['Country'] = df.index.to_series().map(map_country) df.set_index('Country', inplace=True, drop=True) writer = pd.ExcelWriter(os.path.join(dir_out, 'dist_roads.xlsx')) df.to_excel(writer, 'output') writer.save() end = time.time() print('It took ' + str(np.float16((end - start))) + " seconds to finish!")
from multiprocess import Pool from time import sleep import time def f(x): sleep(1) return x + 1 def g(s, x): sleep(s) return x + 1 # [f(i) for i in range(5)] # slow pool = Pool(4) starttime = time.time() # res = pool.map(f, range(12)) # res2 = pool.map(lambda a: g(1, a), range(12)) res3 = pool.starmap(g, [[1, 1], [1, 2], [1, 3], [1, 4]]) print(time.time() - starttime)
# generating data for neural net with model as input and grid as output input1 = model_input start = time.time() # going parallel cpu_cores = cpu_count() parallel_set = np.array_split(model_input, cpu_cores, axis=0) parallel_list = [] # generating list of datasets for parallel for i in range(cpu_cores): parallel_list.append((parallel_set[i], someOptionList)) # parallel pool = Pool(cpu_cores) res = pool.starmap(priceGenerator, parallel_list) output1 = np.concatenate(res, axis = 0) stop = time.time() print("time: ", stop-start) # saving dataset1 np.savetxt("Data/hestonPriceGridInput.csv", input1, delimiter=",") np.savetxt("Data/hestonPriceGridOutput.csv", output1, delimiter=",") # generating data for nn with all inputs and 1 output price total_comb = np.shape(model_input)[0] * np.shape(output1)[1] total_cols = np.shape(model_input)[1] + np.shape(option_input)[1] total_options = np.shape(option_input)[0] input2 = np.empty((total_comb, total_cols)) output2 = np.empty((total_comb, 1)) for i in range(np.shape(model_input)[0]):
# load dataframe with regions regions_shape = os.path.join(base_path,'regions','Tanzania Regions.shp') tza_regions = gpd.read_file(regions_shape) regions = list(filter(None.__ne__, tza_regions['REGION'])) regions = ['Arusha','Dar-Es-Salaam','Dodoma','Iringa','Kagera','Kigoma','Kilimanjaro','Manyara', 'Tabora','Mbeya','Morogoro','Mtwara','Mwanza','Pwani','Ruvuma','Singida','Rukwa', 'Lindi','Tanga','Shinyanga','Manyara'] #[ base_paths = [base_path]*len(regions) region_shapes = [regions_shape]*len(regions) pool = Pool(cpu_count()-1) pool.starmap(dist_junction, zip(regions,region_shapes,base_paths)) # ============================================================================= # # merge output of distance to junction # ============================================================================= shp_network = os.path.join(base_path,'output_closest_jct','dist_to_jct_tza.shp') df_list_regions = [] for region in regions: if len([i for i in os.listdir(os.path.join(base_path,'output_closest_jct')) if i.endswith('%s.shp' % region)]) != 0: try: country_path = os.path.join(base_path,'output_closest_jct','%s.shp' % (region)) inb = gpd.read_file(country_path) df_list_regions.append(gpd.read_file(country_path)) except: print('%s doesnt seem to have been finished' % (region))
train_input, train_output, test_input, test_output, 1000, 100, 0.1, "sabr5.h5", 1000, 0.9, 0.1, 0, False, True, 15 ], [ train_input, train_output, test_input, test_output, 1000, 100, 0.1, "sabr6.h5", 1000, 0.9, 0.1, 0.2, False, True, 15 ], [ train_input, train_output, test_input, test_output, 1000, 100, 0.1, "sabr7.h5", 1000, 0.9, 0.1, 0, False, False, 15 ], [ train_input, train_output, test_input, test_output, 1000, 100, 0.1, "sabr8.h5", 1000, 0.9, 0.1, 0.2, False, False, 15 ]] res = pool.starmap(ms.model_train, input_set) print(res) """ # Number of nodes nodes = 1000 # Model creation model = Sequential() # Layer 1 model.add(Dense(nodes, input_shape=(14,))) model.add(Activation('softplus')) # Rectified Linear Unit, f(x) = max(x,0) model.add(Dropout(0.2)) # Layer 2, output model.add(Dense(10))
def run(): parser = create_parser() args = parser.parse_args() if args.num_threads is not None: threads = int(args.num_threads) else: threads = 1 print("Parsing BLAST output.\n") num_blast = count_line(args.input_blast_file) batch_blast = split_file(args.input_blast_file, num_blast, threads) print("\nSplit BLAST input into " + str(len(batch_blast)) + " subunit(s).\n") #print(batch_blast) num_seq = count_seq(args.input_fasta_file) batch_seq = split_file(args.input_fasta_file, num_seq, threads) print("\nSplit sequence input into " + str(len(batch_seq)) + " subunit(s).\n") #print(batch_seq) ### Parse BLAST output file if args.keep_term is not None: print("\nKeeping sequences belonging to sskingdom: " + str(args.keep_term) + "\n") keepage = [] p = Pool(threads) with open(args.input_blast_file + "_tmp", "w") as out: for i, j in p.starmap(parse_blast, [(batch_blast, args.keep_term)]): for line in j: ### fix nested for loop out.write(line) keepage = i blast = args.input_blast_file + "_tmp" else: p = Pool(threads) print("\nNo parsing of BLAST output.\n") keepage = [] blast = args.input_blast_file ### Using BLAST output file (parsed or otherwise), figure out which baits need to be removed print("\nDetermining baits to remove.\n") for i in p.starmap(to_remove, [(batch_seq, blast)]): removal = i #print("Removed: " + str(removal)) #print("Kept: " + str(keepage)) ### Off-target filter using above list of baits to remove (or ignore when checking each bait) print("\nFiltering baits.\n") count_off_target = 0 with open(args.output_fasta_file, "w") as out2: for i in p.starmap(off_target, [(batch_seq, removal, blast)]): for t in i: out2.write(">" + str(t[0]) + "\n" + str(t[1]) + "\n") count_off_target = count_off_target + 1 count_off_target = num_seq - count_off_target return (count_off_target)