def distances1(): gmaps = googlemaps.Client(key=config.api_key_distance_matrix) number_transports = 1 with open('./simulation/locations1.csv', 'rb') as csvfile: item_reader = csv.reader(csvfile, delimiter='|') rows = [row for row in item_reader] rows = np.array([[float(row[0]), float(row[1])] for row in rows[1:]]).tolist() distances = [] final_array = [] final_distance = [] row_to_write = '' document_name = 'matlab1.csv' #archivo de nodos conectados o resultado rows.insert(0,[19.434940, -99.195697]) temp_array = list(itertools.combinations(rows,2)) origins = list() for element in temp_array: origins.append(element[0]) destinations = list() for element in temp_array: destinations.append(element[1]) print(temp_array) origins_divided = [list(c) for c in mit.divide(len(origins)/5, origins)] destinations_divided = [list(c) for c in mit.divide(len(origins)/5, destinations)] distance_matrix = [] for i in range(len(origins_divided)): d_mat = gmaps.distance_matrix(origins_divided[i], destinations_divided[i], mode='driving') for row in d_mat['rows'][0]['elements']: distance_matrix.append(row['duration']['value']) final_temp_array = [[temp_array[i][0], temp_array[i][1], distance_matrix[i]] for i in range(len(temp_array))] df = pd.DataFrame(final_temp_array) df.to_csv('matlab1.csv')
def set_templates(self, templates_data): divided_templates_data = [ list(x) for x in more_itertools.divide(self.n_process, templates_data) ] for process, _templates_data in zip(self.processes, divided_templates_data): if _templates_data: process.start_set_templates(_templates_data) process.is_active = True else: process.is_active = False all_exit_status = [] for process in self.processes: if process.is_active: exit_status, _ = process.get_from_return_queue() all_exit_status.append(exit_status) if not all(all_exit_status): msg = 'MultiprocessMatching.set_templates() is faild.' print(msg) raise TemplateMatchingException(msg) return
def process(files_path, num_zip, num_xml): pool_size = cpu_count() * 2 doc_per_process = 4 reduce_tree_depth = 4 with Pool(pool_size) as pool: gen = ReaderWriter(files_path, num_zip, num_xml) gen = chunked(gen, doc_per_process) gen = pool.imap_unordered(mapper_process, gen) gen = divide(1 << reduce_tree_depth, gen) gen = flatten(map(partition, gen)) gen = pool.imap_unordered(reducer_process, gen) for _ in range(reduce_tree_depth - 1): gen = chunked(gen, 2) gen = flatten(map(partition, gen)) gen = pool.imap_unordered(reducer_process, gen) gen = partition(gen) gen = pool.imap_unordered(reducer_process, gen) # DEBUG: # result = list(gen); print(result); return; gen = list(gen) if not gen: click.secho('Empty output', fg='red') for res in gen: for k, v in res.items(): click.secho('Ok %s' % k, fg='green') shutil.move(v, k)
def transform(files, columns=2): for file in files: print("File %s " % file.name) source_html = bs4.BeautifulSoup(file.read(), features="html.parser") for ind_page, page in enumerate(source_html.select(".page")): print(SEP_1 + "Reading page %s " % ind_page) for ind_col, col in enumerate(page.select(".column")): if ind_col == 0: last_col = [col] else: lines = col.select("li") print(SEP_2 + "Number of lines %s " % len(lines)) lines_per_column = len(lines) // 2 print(SEP_2 + "Number of lines per column: %s" % lines_per_column) for ind_grp, group in enumerate( more_itertools.divide(columns, lines)): # Create new tags new_col = source_html.new_tag( "div", attrs={"class": "column"}) new_ul = source_html.new_tag("ul") # Insert new tags for line in group: new_ul.append(line) new_col.append(new_ul) last_col[-1].insert_after(new_col) # Register last column last_col.append(new_col) # Remove original column del col print(file.name.replace(".html", ".modified.html")) style = source_html.find("style") style.string = STYLE with open(file.name.replace(".html", ".modified.html"), "w") as f: f.write(str(source_html))
def map_async( self, func: Callable[[_InputType], _OutputType], iterable: Iterable[_InputType], chunksize: Optional[int] = 1, callback: Optional[Callable[[_OutputType], None]] = None, error_callback: Optional[Callable[[BaseException], None]] = None, ) -> MapResult[_OutputType]: __doc__ = super().apply_async.__doc__ # noqa: F841 chuncks_async_results: List[AsyncResult[List[_OutputType]]] = [] for c in mitertools.divide(self.n_worker, iterable=iterable): task = _RayMapTask( task_id=uuid4(), func=func, args=c, callback=callback, error_callback=error_callback, ) chunck_async_result = _RayAsyncResult[List[_OutputType]]( task.task_id) chuncks_async_results.append(chunck_async_result) self.processed_results[task.task_id] = chunck_async_result self.task_queue.put(task) async_res: MapResult[_OutputType] = _RayAsyncMapResult[_OutputType]( async_results=chuncks_async_results) return async_res
def set_templates(self, templates_data): divided_templates_data = [ list(x) for x in more_itertools.divide(self.n_process, templates_data) ] for process, _templates_data in zip(self.processes, divided_templates_data): if _templates_data: process.start_set_templates(_templates_data) process.is_active = True else: process.is_active = False for process in self.processes: if process.is_active: exit_status, _ = process.get_from_return_queue() if not exit_status: self.terminate_all() log.print( 'MultiprocessMatching.set_templates() is faild. Kill all child processes and system exit.' ) sys.exit(1) return
def fetch_sequences(self, download_dir: str = 'dataset/raw', download_name: str = 'UniProt_seqs.fasta', n_threads: t.Optional[int] = None) -> t.Tuple[SeqRec]: if self.combined_resources is None: raise RequiresAttributeError('combined_resources') acc = set(self.combined_resources['Acc']) logging.info(f'Total sequences to fetch: {len(acc)}') path = f'{download_dir}/{download_name}' if Path(path).exists() and Path(path).is_file(): acc_exist = acc & {s.id.split('|')[1] for s in SeqIO.parse(path, 'fasta')} acc -= acc_exist logging.info(f'Found {len(acc_exist)} sequences in file {path}. ' f'Remaining sequences to fetch: {len(acc)}') if not acc: logging.info(f'No new sequences to fetch: returning existing ones.') self.seqs = tuple(SeqIO.parse(path, 'fasta')) return self.seqs num_chunks = len(acc) // 500 chunks = divide(num_chunks, acc) logging.info(f'Split downloading list into {num_chunks} chunks') path = f'{download_dir}/{download_name}' with open(path, 'a+') as f: with ThreadPoolExecutor(max_workers=n_threads) as executor: for future in tqdm( as_completed([executor.submit(fetch_seqs, chunk) for chunk in chunks]), desc='Fetching chunks of sequences', total=num_chunks): print(future.result(), file=f) self.seqs = tuple(SeqIO.parse(path, 'fasta')) logging.info(f'Fetched {len(self.seqs)} sequences to {path}') return self.seqs
def get_from_to_dates(db_tool, date_arg): """ The BackTesting Tool for strategies is scalable only by date. We build all dates from from_date until to_date [01.01.2019, 01.02.2019 ... 09.09.2023] and split the date list in small pieces for multi processing purposes. :param db_tool: :param date_arg: when first item and second item are digits then we split all dates in almost equal pieces otherwise the origin argument will returned. The first digit is the task id and the second the amount or tasks i.e 8 18 means we split all dates in 18 pieces and return part 8. :return: """ from_date = None if db_tool is None else \ db_tool.session.query(Signal.date).order_by(Signal.date).first() to_date = None if from_date is not None: from_date = from_date[0] to_date = datetime.datetime.now() if date_arg and len(date_arg) == 2 and \ (type(date_arg[0]) == int or (type(date_arg[0]) == str and date_arg[0].isdigit())) and \ (type(date_arg[1]) == int or (type(date_arg[1]) == str and date_arg[1].isdigit())): my_dates = [ my_date for my_date in BackTestingStrategy.date_range(from_date, to_date) ] my_list = list([x for x in divide(int(date_arg[1]), my_dates) ][int(date_arg[0])]) if len(my_list) > 1: return [my_list[0], my_list[-1]] return [from_date, to_date]
def main(args): index = 0 indexList = [] for line in open(args.input): index += 1 indexList.append(index) batch_num = args.batch_num batches = [list(c) for c in mit.divide(batch_num, indexList)] del indexList cpu = args.cpu print('cpu count: ', cpu) truncBatches = [] for batch in batches: truncBatch = [] for c in mit.divide(cpu, batch): temp = list(c) truncBatch.append([temp[0], temp[-1]]) truncBatches.append(truncBatch) del batches gc.collect() count = 0 for truncBatch in truncBatches: count += 1 pool = mp.Pool(processes=cpu) prod_x = partial(caller, file=args.input, num_walks=args.num_walks, walk_length=args.walk_length, decay=args.decay, q=args.q, p=args.p, batch_num=count) res = pool.map(prod_x, truncBatch) pool.close() pool.join() with open(args.output, 'a') as f: for content in res: for sentence in content: if (len(sentence) > 0): sentence = ''.join(sentence) f.write(sentence) del res gc.collect()
def test_get_cpu_core_mapping(): cpu_core_mapping = get_cpu_core_mapping() assert isinstance(cpu_core_mapping, list) # Assert cpu submappings are sequential for submapping in divide(THREADS // CORES, cpu_core_mapping): submapping = list(submapping) for a, b in zip(submapping[:-1], submapping[1:]): assert b - a == 1
def more_itertools_divide(): obj1 = [1, 2, 3, 4, 5, 10, 8] result = divide(3, obj1) for item in list(result): print(list(item))
def splitter(img, n_row, n_col): img_height, img_width = img.shape row_splits = [ list(x) for x in more_itertools.divide(n_row, range(img_height)) ] col_splits = [ list(x) for x in more_itertools.divide(n_col, range(img_width)) ] img_parts = [] for r in range(n_row): row_start = row_splits[r][0] row_end = row_splits[r][-1] for c in range(n_col): col_start = col_splits[c][0] col_end = col_splits[c][-1] img_parts.append(img[row_start:row_end, col_start:col_end]) return (img_parts)
def services_resolved(self): super().services_resolved() controller_data_service = next( s for s in self.services if s.uuid == '4f63756c-7573-2054-6872-65656d6f7465') controller_setup_data_characteristic = next( c for c in controller_data_service.characteristics if c.uuid == 'c8c51726-81bc-483b-a052-f7a14ea3d282') controller_data_characteristic = next( c for c in controller_data_service.characteristics if c.uuid == 'c8c51726-81bc-483b-a052-f7a14ea3d281') self.__setup_characteristic = controller_setup_data_characteristic self.__sensor_characteristic = controller_data_characteristic self.write(bytearray(b'\x01\x00'), 3) self.write(bytearray(b'\x06\x00'), 1) self.write(bytearray(b'\x07\x00'), 1) self.write(bytearray(b'\x08\x00'), 3) self.__max = 315 self.__r = self.__max / 2 self.__axisX = self.__axisY = 0 self.__altX = self.__altY = 0 self.__device = uinput.Device([ uinput.REL_X, uinput.REL_Y, uinput.BTN_LEFT, uinput.BTN_RIGHT, uinput.KEY_LEFTCTRL, uinput.KEY_LEFTALT, uinput.KEY_HOME, uinput.KEY_UP, uinput.KEY_DOWN, uinput.KEY_LEFT, uinput.KEY_RIGHT, uinput.KEY_VOLUMEUP, uinput.KEY_VOLUMEDOWN, uinput.KEY_KPPLUS, uinput.KEY_KPMINUS, uinput.KEY_PAGEUP, uinput.KEY_PAGEDOWN, uinput.KEY_KP0, uinput.KEY_SCROLLDOWN, uinput.KEY_SCROLLUP ]) # , uinput.BTN_TOUCH, uinput.ABS_PRESSURE self.__reset = self.__volbtn = self.__tchbtn = self.__trig = True self.__time = round(time.time()) + 10 self.__lastupdated = 0 self.__updatecounts = 0 self.__wheelPos = -1 self.__useWheel = False self.__c_numberOfWheelPositions = 64 [self.__l_top, self.__l_right, self.__l_bottom, self.__l_left] = [ list(x) for x in mit.divide( 4, ror([i for i in range(0, self.__c_numberOfWheelPositions)], self.__c_numberOfWheelPositions // 8)) ] self.__wheelMultiplier = 2 self.__useTouch = False self.__dirUp = False self.__dirDown = False self.__VR = False controller_data_characteristic.enable_notifications() print("setup done")
def _calculate_function_with_batch_size(args, f, max_batch): batch_size = len(args[0]) batch_number = math.ceil(batch_size / max_batch) outputs = [ f(*list(zip(*a))) for a in more_itertools.divide(zip(*args), batch_number) ] outputs = list(zip(*outputs)) return outputs
def _encode_batch_multi(tokenized_batch, models, token_type=0): chunks = map(list, divide(len(models), tokenized_batch)) forwarded = [model.forward( torch.tensor(tokenized_chunk, dtype=torch.long, device=model.device), token_type=token_type, ) for model, tokenized_chunk in zip(models, chunks) if tokenized_chunk] vecs = torch.cat([x.cpu() for x in forwarded]).numpy() return vecs
def Process(self, FeatureList): FeatureList = more_itertools.divide(self.__threads, FeatureList) with Threading(self.__name, self.__logger, self.__threads) as pool: Results = pool.map( functools.partial(TabixObject.TabixThread, FileName=self.__filename, Header=self.__header, Logger=self.__logger), enumerate(FeatureList)) return Results
def splitCities(cities, numOfSalesman, originCity, maps): listCoordinate = CreateListCoordinate(cities, maps) listCoordinate.sort(key=lambda x: x[1]) listCoordinate.remove([originCity, maps[originCity]]) citiesNoOrigin = [listCoordinate[i][0] for i in range(len(listCoordinate))] partition = [list(c) for c in mit.divide(numOfSalesman, citiesNoOrigin)] listOfSubCities = [] for i in range(numOfSalesman): listOfSubCities.append([originCity]) listOfSubCities[i] = listOfSubCities[i] + partition[i] return listOfSubCities
def wrap_multiple_col(s, n): if n <= 1: return s else: s_list = s.split('\n') group_list = [ list(itertools.chain(s_list[:2], it)) for it in more_itertools.divide(n, s_list[2:]) ] max_len = max([len(it) for it in group_list]) for it in group_list: for i in xrange(max_len - len(it)): it.append('') return '\n'.join(['\t\t'.join(it) for it in zip(*group_list)])
def row_rec_magic(row_definitions: str, r: Iterable): front, back = more_itertools.divide(2, r) if len(row_definitions) == 1: if row_definitions == ['F']: return next(front) elif row_definitions == ['B']: return next(back) head, *tail = row_definitions if head == 'F': return row_rec_magic(tail, front) elif head == 'B': return row_rec_magic(tail, back)
def column_rec_magic(column_definition, r: Iterable): left, right = more_itertools.divide(2, r) if len(column_definition) == 1: if column_definition == ['L']: return next(left) elif column_definition == ['R']: return next(right) head, *tail = column_definition if head == 'L': return column_rec_magic(tail, left) elif head == 'R': return column_rec_magic(tail, right)
def write_edgelist_to_file(args): edges, filtered_edges_filepath, nodes_subsample, i = args filtered_edges = [] args = [] for nodes_subsample_part in divide(os.cpu_count(), nodes_subsample): nodes_subsample_part = list(nodes_subsample_part) args.append((edges, filtered_edges, nodes_subsample_part)) pool = Pool() results = pool.map(filter_edges, args) for result in results: filtered_edges.extend(result) with open("{}-{}".format(filtered_edges_filepath, i), 'w') as all_edges_file: for edge in filtered_edges: all_edges_file.write("{} {}\n".format(edge[0], edge[1]))
def evaluate_all(self, solutions): unevaluated = [s for s in solutions] #forcing evaluation of all solutions for now if not s.evaluated] num_chunks = min(len(unevaluated), self.evaluator.n_executors) jobs_build = [_BuildJob(list(chunk)) for chunk in divide(num_chunks, unevaluated)] results_build = self.evaluator.evaluate_all(jobs_build) results = itertools.chain.from_iterable([s.solutions for s in results_build]) # if needed, update the original solution with the results for i, solution in enumerate(results): if unevaluated[i] != solution: for attr, val in solution.__dict__.items(): if attr != 'problem': unevaluated[i].__setattr__(attr, val) self.nfe += len(unevaluated)
def __init__(self, templates_data, image_path, n_process=8): assert len(templates_data) > n_process self.n_process = n_process self.processes = [] divided_templates_data = [ list(x) for x in more_itertools.divide(self.n_process, templates_data) ] for _templates_data in divided_templates_data: process = MatchingProcess(_templates_data, image_path) process.is_active = True self.processes.append(process)
def query_loop(self, df: pd.DataFrame, query: str): if query is 'standard': symbols = ['^'+s+'$' for s in df.symbol.tolist()] elif query is 'by_name': symbols = [str(s).strip() for s in df.title.tolist()] symbols = [re.sub(' +', ' ', s) for s in symbols] symbols_split = [list(c) for c in mit.divide(8, symbols)] results = pd.DataFrame() for i, s in enumerate(symbols_split): print("Parsing symbol list of size: {}, List: {}/{}".format(len(s), i+1, len(symbols_split))) symbols = '|'.join(s) query_results = self.run_query(symbols, query) results = results.append(query_results) print("Size of results returned: {}".format(results.shape[0])) return results
def generate_grid_search_shell(): cmd_list = [] for setting in ParameterGrid(param_grid): cmd_list.append(cmd_template % setting) print('#cmd:', len(cmd_list)) # shuffle and divide cmds to different gpus rnd.shuffle(cmd_list) cmd_bucket_list = more_itertools.divide(len(gpus), cmd_list) for i, gpu_id in enumerate(gpus): with codecs.open('grid/grid.%s.sh' % (gpu_id, ), 'w', 'utf-8') as f_out: f_out.write(cmd_env) f_out.write('\n'.join( map(lambda x: 'CUDA_VISIBLE_DEVICES=%d %s' % (gpu_id, x), cmd_bucket_list[i])) + '\n')
def create_midi(genome, note_sequence, key, scale): midi, channel, tracks = track_preprocessing() track = 0 partitions = [list(note) for note in mit.divide(tracks, note_sequence)] for partition in partitions: time = 0 for note in partition: midi.addNote(track, channel, note['pitch'], time, note['duration'], VOLUME) time += note['duration'] track += 1 file_name = MIDI_FILE_PREFIX + '-'.join([genome, key, scale]) + ".mid" with open(file_name, "wb") as file: midi.writeFile(file) file.close() return file_name
def build_index_record(mode='update'): # TODO read hbz-IDs into list record_ids = [] with open('%s/unload.TIT' % config.SUNRISE_DATA_DIR, 'r') as title_data: creation_date = '' hbz_id = '' for line in title_data: if line.startswith('0002:'): creation_date = line.strip().replace('0002:', '') elif line.startswith('0010.001:'): hbz_id = line.strip().replace('0010.001:', '') elif line.startswith('9999:'): if mode == 'update' and datetime.datetime.strptime( creation_date, '%d.%m.%Y') >= datetime.datetime.strptime( (datetime.date.today() - datetime.timedelta(days=1) ).strftime('%Y%m%d'), '%Y%m%d'): record_ids.append(hbz_id) elif mode == 'full': record_ids.append(hbz_id) creation_date = '' hbz_id = '' number_of_pool_workers = 4 number_of_partitions = 1000 print('%s record to enrich.' % len(record_ids)) # split record_ids into number_of_workers partitions partitions = [list(x) for x in divide(number_of_partitions, record_ids)] # init thread pool with a queue of partitions and get data for them from graphdb total_rvk_notation_counter = 0 with concurrent.futures.ProcessPoolExecutor( max_workers=number_of_pool_workers) as executor: for rvk_notation_counter, hbz_more_than_50_rvk in executor.map( get_data, partitions): total_rvk_notation_counter += rvk_notation_counter print('%s notations; %s records with > 50 notations.' % (rvk_notation_counter, hbz_more_than_50_rvk)) print('ubdo_rvk_index.json ready.') print('%s notations.' % total_rvk_notation_counter)
def split_file(filename: str, nparts: int) -> List[str]: """ Split the content of a text file into a given number of temporary files :param filename: the path to the file to split :param nparts: the number of parts to create :return: list of filenames of the parts """ with open(filename, "r") as fileobj: lines = fileobj.read().splitlines() filenames = [] for chunk in more_itertools.divide(nparts, lines): filenames.append(tempfile.mktemp()) with open(filenames[-1], "w") as fileobj: fileobj.write("\n".join(chunk)) return filenames
def clusterer(dests, sales, start): # cluster the destination for the salesperson # removing start point destList=[] for dest in dests: destList.append(dest) destList.sort() destList.remove(start) # initiating clust clust=[] for i in range(sales): clust.append([start]) clust[i]= clust[i] + [list(d) for d in mit.divide(sales, destList)][i] #return return clust
def reduce_async( self, func: Callable[[_InputType, _InputType], _InputType], iterable: Iterable[_InputType], callback: Optional[Callable[[_InputType], None]] = None, error_callback: Optional[Callable[[BaseException], None]] = None, ) -> ReduceResult[_InputType]: """A variant of the map() method which returns a AsyncResult object. If callback is specified then it should be a callable which accepts a single argument. When the result becomes ready callback is applied to it, that is unless the call failed, in which case the error_callback is applied instead. If error_callback is specified then it should be a callable which accepts a single argument. If the target function fails, then the error_callback is called with the exception instance. Callbacks should complete immediately since otherwise the resource which handles the results will get blocked. :param func: the callable to be run, func must be associative (func(func(x, y), z) == func(x, func(y, z))) :type func: Callable[[_InputType, _InputType], _OutputType] :param iterable: The input parameters on which func is called :type iterable: Iterable[_InputType] :param callback: a callable that is called each time a result is available, callback is called with this result :type callback: Optional[Callable[[_OutputType], None]] :param error_callback: a callable that is called each time func raise an exception, error_callback is called with this exception :type error_callback: Optional[Callable[[BaseException], None]] :return: The list of async results ([..., AsyncResult(func(iterable[i])), ...]) :rtype: MapResult[_OutputType] """ chunks = mitertools.divide(n=self.n_worker, iterable=iterable) tasks = [] def reduce(*chunk): return functools.reduce(func, chunk) for c in chunks: task = PoolTask( func=reduce, args=c, callback=callback, error_callback=error_callback, ) tasks.append(task) result = self.parallel_async(tasks=tasks) return ReduceResult[_InputType](func=func, map_result=result)