def has_next_day(dates_dict, year, month, day): """Return next day found in nested dates_dict or None if can't find one.""" # Check current month for next days days = sorted(dates_dict[year][month].keys()) if day != last(days): di = days.index(day) next_day = days[di + 1] return {"year": year, "month": month, "day": next_day} # dates_dict[year][month][next_day]) # Check current year for next months months = sorted(dates_dict[year].keys()) if month != last(months): mi = months.index(month) next_month = months[mi + 1] next_day = first(sorted(dates_dict[year][next_month].keys())) return {"year": year, "month": next_month, "day": next_day} # Check for next years years = sorted(dates_dict.keys()) if year != last(years): yi = years.index(year) next_year = years[yi + 1] next_month = first(sorted(dates_dict[next_year].keys())) next_day = first(sorted(dates_dict[next_year][next_month].keys())) return {"year": next_year, "month": next_month, "day": next_day} return False
def single_partition_join(left, right, **kwargs): # if the merge is perfomed on_index, divisions can be kept, otherwise the # new index will not necessarily correspond the current divisions meta = left._meta_nonempty.merge(right._meta_nonempty, **kwargs) kwargs['empty_index_dtype'] = meta.index.dtype name = 'merge-' + tokenize(left, right, **kwargs) if left.npartitions == 1 and kwargs['how'] in ('inner', 'right'): left_key = first(left.__dask_keys__()) dsk = {(name, i): (apply, merge_chunk, [left_key, right_key], kwargs) for i, right_key in enumerate(right.__dask_keys__())} if kwargs.get('right_index') or right._contains_index_name( kwargs.get('right_on')): divisions = right.divisions else: divisions = [None for _ in right.divisions] elif right.npartitions == 1 and kwargs['how'] in ('inner', 'left'): right_key = first(right.__dask_keys__()) dsk = {(name, i): (apply, merge_chunk, [left_key, right_key], kwargs) for i, left_key in enumerate(left.__dask_keys__())} if kwargs.get('left_index') or left._contains_index_name( kwargs.get('left_on')): divisions = left.divisions else: divisions = [None for _ in left.divisions] else: raise NotImplementedError("single_partition_join has no fallback for invalid calls") graph = HighLevelGraph.from_collections(name, dsk, dependencies=[left, right]) return new_dd_object(graph, name, meta, divisions)
def has_previous_day(dates_dict, year, month, day): """Return previous day found in nested dates_dict or None if can't find one.""" days = sorted(dates_dict[year][month].keys()) # Check current month if day != first(days): di = days.index(day) prev_day = days[di - 1] return {"year": year, "month": month, "day": prev_day} # Check current year months = sorted(dates_dict[year].keys()) if month != first(months): mi = months.index(month) prev_month = months[mi - 1] last_day = last(sorted(dates_dict[year][prev_month].keys())) return {"year": year, "month": prev_month, "day": last_day} # Check other years years = sorted(dates_dict.keys()) if year != first(years): yi = years.index(year) prev_year = years[yi - 1] prev_month = last(sorted(dates_dict[prev_year].keys())) last_day = last(sorted(dates_dict[prev_year][prev_month].keys())) return {"year": prev_year, "month": prev_month, "day": last_day} return False
def _get_larger_chroms(ref_file): """Retrieve larger chromosomes, avoiding the smaller ones for plotting. """ from scipy.cluster.vq import kmeans, vq all_sizes = [] for c in ref.file_contigs(ref_file): all_sizes.append(float(c.size)) all_sizes.sort() # separate out smaller chromosomes and haplotypes with kmeans centroids, _ = kmeans(np.array(all_sizes), 2) idx, _ = vq(np.array(all_sizes), centroids) little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes))) little_sizes = [x[1] for x in little_sizes] # create one more cluster with the smaller, removing the haplotypes centroids2, _ = kmeans(np.array(little_sizes), 2) idx2, _ = vq(np.array(little_sizes), centroids2) little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes))) little_sizes2 = [x[1] for x in little_sizes2] # get any chromosomes not in haplotype/random bin thresh = max(little_sizes2) larger_chroms = [] for c in ref.file_contigs(ref_file): if c.size > thresh: larger_chroms.append(c.name) return larger_chroms
def str_cat_sql(expr, lhs, rhs, **kwargs): left, right = first(lhs.inner_columns), first(rhs.inner_columns) if expr.sep: result = (left + expr.sep + right).label(expr.lhs._name) else: result = (left + right).label(expr.lhs._name) return reconstruct_select([result], lhs)
def single_partition_join(left, right, **kwargs): # if the merge is perfomed on_index, divisions can be kept, otherwise the # new index will not necessarily correspond the current divisions meta = pd.merge(left._meta_nonempty, right._meta_nonempty, **kwargs) name = 'merge-' + tokenize(left, right, **kwargs) if left.npartitions == 1: left_key = first(left.__dask_keys__()) dsk = {(name, i): (apply, pd.merge, [left_key, right_key], kwargs) for i, right_key in enumerate(right.__dask_keys__())} if kwargs.get('right_index') or right._contains_index_name( kwargs.get('right_on')): divisions = right.divisions else: divisions = [None for _ in right.divisions] elif right.npartitions == 1: right_key = first(right.__dask_keys__()) dsk = {(name, i): (apply, pd.merge, [left_key, right_key], kwargs) for i, left_key in enumerate(left.__dask_keys__())} if kwargs.get('left_index') or left._contains_index_name( kwargs.get('left_on')): divisions = left.divisions else: divisions = [None for _ in left.divisions] return new_dd_object(toolz.merge(dsk, left.dask, right.dask), name, meta, divisions)
def test_live_migrate_anti_affinity(self): """ Make sure that if we have an anti-affinity group set, and we try to live migrate to a host with the anti-affinity group, it will fail - Creates an :return: """ data = self.setup_affinities(self.sanity) # Make sure that the affinity and anti-aff instances are booted up aff_inst = data["aff_instance"] anti_inst = data["anti_instance"] smog.nova.poll_status(aff_inst, "ACTIVE") smog.nova.poll_status(anti_inst, "ACTIVE") # Now, perform a live migration for the anti_inst. This should fail # Get what host the instance is currently on, and compare before/after discovered = self.sanity.discover() fltrfn = lambda x: x.instance.name == "aa-test" # In functional-speak, find the instance object in out discovered # discovered Instance objects whose name is 'aff-test'. There should # only be one of these, so take the first one. Use toolz.first rather # than use index ([0]). In the general case this is better (for # example, what if we use a generator or iterator instead of list or # tuple. Remember, functional programming rulez! before_inst = toolz.first(filter(fltrfn, [inst for inst in discovered])) before_host = before_inst.host anti_inst.live_migrate() discovered = self.sanity.discover() after_inst = toolz.first(filter(fltrfn, [inst for inst in discovered])) after_host = after_inst.host self.assertTrue(before_host.hostname == after_host.hostname)
def binop_sql(t, lhs, rhs, **kwargs): if isinstance(lhs, Select): assert len(lhs.c) == 1, "Select cannot have more than a single column when doing" " arithmetic, got %r" % lhs lhs = first(lhs.inner_columns) if isinstance(rhs, Select): assert len(rhs.c) == 1, "Select cannot have more than a single column when doing" " arithmetic, got %r" % rhs rhs = first(rhs.inner_columns) return t.op(lhs, rhs)
def coalesce_sql_select(expr, lhs, rhs, **kwargs): if isinstance(lhs, Select): orig = lhs lhs = first(lhs.inner_columns) else: orig = rhs rhs = first(rhs.inner_columns) result = sa.sql.functions.coalesce(lhs, rhs).label(expr._name) return reconstruct_select([result], orig)
def str_cat_sql(expr, lhs, rhs, **kwargs): if isinstance(lhs, Select): orig = lhs lhs = first(lhs.inner_columns) else: orig = rhs rhs = first(rhs.inner_columns) if expr.sep: result = (lhs + expr.sep + rhs).label(expr.lhs._name) else: result = (lhs + rhs).label(expr.lhs._name) return reconstruct_select([result], orig)
def compute_up(expr, data, **kwargs): name = expr._name try: inner_columns = list(data.inner_columns) names = list(c.name for c in data.inner_columns) column = inner_columns[names.index(name)] except (KeyError, ValueError): single_column_select = compute(expr, first(data.inner_columns), post_compute=False, return_type="native") column = first(single_column_select.inner_columns) result = unify_froms(sa.select([column]), data.froms + single_column_select.froms) return result.where(unify_wheres([data, single_column_select])) else: return data.with_only_columns([column])
def binop_sql(t, lhs, rhs, **kwargs): if isinstance(lhs, Select): assert len(lhs.c) == 1, ( 'Select cannot have more than a single column when doing' ' arithmetic, got %r' % lhs ) lhs = first(lhs.inner_columns) if isinstance(rhs, Select): assert len(rhs.c) == 1, ( 'Select cannot have more than a single column when doing' ' arithmetic, got %r' % rhs ) rhs = first(rhs.inner_columns) return f(t, lhs, rhs)
def compute_down(expr, data, chunksize=2**20, map=map, **kwargs): leaf = expr._leaves()[0] # If the bottom expression is a projection or field then want to do # compute_up first children = set(e for e in expr._traverse() if isinstance(e, Expr) and any(i is expr._leaves()[0] for i in e._inputs)) if len(children) == 1 and isinstance(first(children), (Field, Projection)): raise NotImplementedError() chunk = symbol('chunk', chunksize * leaf.schema) (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk) data_parts = partitions(data, chunksize=(chunksize,)) parts = list(map(curry(compute_chunk, data, chunk, chunk_expr), data_parts)) if isinstance(parts[0], np.ndarray): intermediate = np.concatenate(parts) elif isinstance(parts[0], pd.DataFrame): intermediate = pd.concat(parts) elif isinstance(parts[0], Iterable): intermediate = list(concat(parts)) else: raise TypeError( "Don't know how to concatenate objects of type %s" % type(parts[0])) return compute(agg_expr, {agg: intermediate})
def dshape(self): if self.child.columns and len(self.child.columns) == 1: name = self.child.columns[0] + '_' + type(self).__name__ dtype = self.dtype or first(self.child.schema[0].fields.values()[0]) return DataShape(Record([[name, self.dtype]])) else: return DataShape(Record([[type(self).__name__, self.dtype]]))
def execute_string_group_by_find_in_set(op, needle, haystack, **kwargs): # `list` could contain series, series groupbys, or scalars # mixing series and series groupbys is not allowed series_in_haystack = [ type(piece) for piece in haystack if isinstance(piece, (pd.Series, SeriesGroupBy)) ] if not series_in_haystack: return ibis.util.safe_index(haystack, needle) try: collection_type, = frozenset(map(type, series_in_haystack)) except ValueError: raise ValueError('Mixing Series and SeriesGroupBy is not allowed') pieces = haystack_to_series_of_lists( [getattr(piece, 'obj', piece) for piece in haystack] ) result = pieces.map(toolz.flip(ibis.util.safe_index)(needle)) if issubclass(collection_type, pd.Series): return result assert issubclass(collection_type, SeriesGroupBy) return result.groupby( toolz.first( piece.grouper.groupings for piece in haystack if hasattr(piece, 'grouper') ) )
def discover_chunks(c, **kwargs): data = c.data if isinstance(data, Iterator): fst, c.data = peek(data) else: fst = first(c) return var * discover(fst).subshape[0]
def test_pre_compute_with_projection_projects_on_data_frames(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(), csv, comfortable_memory=10) assert set(first(result).columns) == \ set(['sepal_length', 'sepal_width'])
def udf(func): llvm_module = first(func._compileinfos.values()).library._final_module engine = ee.EngineBuilder.new(llvm_module).create() functions = [ func for func in llvm_module.functions if not func.name.startswith('_') and not func.is_declaration ] addr = engine.get_function_address(functions[1].name) assert addr > 0, 'addr == %d' % addr # Declare the ctypes function prototype # functype = cfunctype(c_double, c_double) path = os.path.expanduser( os.path.join('~', 'ibis-data', 'ibis-testing-data', 'ibis-testing.db') ) con = sqlite3_connection(path.encode('utf8')) result = register( con, addr, func.__name__.encode('utf8'), len(func.nopython_signatures[0].args) ) import ipdb; ipdb.set_trace() con.execute("select mysin(1.0230923)".encode('utf8'))
def _schema(self): schema = self._child.schema[0] if isinstance(schema, Record) and len(schema.types) == 1: result = toolz.first(schema.types) else: result = schema return DataShape(result)
def test_pre_compute_calls_lean_projection(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) result = pre_compute(s.sort('sepal_length').species, csv, comfortable_memory=10) assert set(first(result).columns) == \ set(['sepal_length', 'species'])
def post_compute(expr, query, scope=None): """ Execute SQLAlchemy query against SQLAlchemy engines If the result of compute is a SQLAlchemy query then it is likely that the data elements are themselves SQL objects which contain SQLAlchemy engines. We find these engines and, if they are all the same, run the query against these engines and return the result. """ if not all(isinstance(val, (MetaData, Engine, Table)) for val in scope.values()): return query engines = set(filter(None, map(engine_of, scope.values()))) if not engines: return query if len(set(map(str, engines))) != 1: raise NotImplementedError("Expected single SQLAlchemy engine") engine = first(engines) with engine.connect() as conn: # Perform query result = conn.execute(select(query)).fetchall() if isscalar(expr.dshape): return result[0][0] if isscalar(expr.dshape.measure): return [x[0] for x in result] return result
def port(self): if not self._port: try: self._port = first(self._sockets.values()).getsockname()[1] except StopIteration: raise OSError("Server has no port. Please call .listen first") return self._port
def test_context_manager(self, dt_tb, dt_data): """ check the context manager auto-closes the resources """ with Data("{0}::dt".format(dt_tb)) as t: f = first(t._resources().values()) assert f.isopen assert not f.isopen
def prepare_exclude_file(items, base_file, chrom=None): """Prepare a BED file for exclusion, incorporating variant regions and chromosome. Excludes locally repetitive regions (if `remove_lcr` is set) and centromere regions, both of which contribute to long run times and false positive structural variant calls. """ out_file = "%s-exclude.bed" % utils.splitext_plus(base_file)[0] all_vrs = _get_variant_regions(items) ready_region = (shared.subset_variant_regions(tz.first(all_vrs), chrom, base_file, items) if len(all_vrs) > 0 else chrom) with shared.bedtools_tmpdir(items[0]): # Get a bedtool for the full region if no variant regions if ready_region == chrom: want_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]), items[0]["config"], chrom) lcr_bed = shared.get_lcr_bed(items) if lcr_bed: want_bedtool = want_bedtool.subtract(pybedtools.BedTool(lcr_bed)) else: want_bedtool = pybedtools.BedTool(ready_region).saveas() sv_exclude_bed = _get_sv_exclude_file(items) if sv_exclude_bed and len(want_bedtool) > 0: want_bedtool = want_bedtool.subtract(sv_exclude_bed).saveas() if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"): with file_transaction(out_file) as tx_out_file: full_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]), items[0]["config"]) if len(want_bedtool) > 0: full_bedtool.subtract(want_bedtool).saveas(tx_out_file) else: full_bedtool.saveas(tx_out_file) return out_file
def scalar_coerce(rec, val): if len(rec.fields) == 1: return scalar_coerce(first(rec.types), val) else: raise TypeError("Trying to coerce complex datashape\n" "got dshape: %s\n" "scalar_coerce only intended for scalar values" % rec)
def compute_up(t, data, **kwargs): assert len(data.c) == 1, "Select cannot have more than a single column when doing arithmetic" column = first(data.inner_columns) if isinstance(t.lhs, Expr): return sa.func.pow(column, t.rhs) else: return sa.func.pow(t.lhs, column)
def test_basic(): def test_g(): time.sleep(0.01) def test_h(): time.sleep(0.02) def test_f(): for i in range(100): test_g() test_h() thread = threading.Thread(target=test_f) thread.daemon = True thread.start() state = create() for i in range(100): time.sleep(0.02) frame = sys._current_frames()[thread.ident] process(frame, None, state) assert state['count'] == 100 d = state while len(d['children']) == 1: d = first(d['children'].values()) assert d['count'] == 100 assert 'test_f' in str(d['description']) g = [c for c in d['children'].values() if 'test_g' in str(c['description'])][0] h = [c for c in d['children'].values() if 'test_h' in str(c['description'])][0] assert g['count'] < h['count'] assert 95 < g['count'] + h['count'] <= 100
def read_header(self, stream): stream.seek(0) if stream.read(len(self.MAGIC)) != self.MAGIC: raise FormatException('Not a Daybreak database') version = first(unpack('!H', stream.read(2))) if version != self.VERSION: raise FormatException("Expected database version {}, got {}".format(self.VERSION, version))
def persist(self, collections): """ Persist dask collections on cluster Starts computation of the collection on the cluster in the background. Provides a new dask collection that is semantically identical to the previous one, but now based off of futures currently in execution. Parameters ---------- collections: sequence or single dask object Collections like dask.array or dataframe or dask.value objects Returns ------- List of collections, or single collection, depending on type of input. Examples -------- >>> xx = executor.persist(x) # doctest: +SKIP >>> xx, yy = executor.persist([x, y]) # doctest: +SKIP See Also -------- Executor.compute """ if isinstance(collections, (tuple, list, set, frozenset)): singleton = False else: singleton = True collections = [collections] assert all(isinstance(c, Base) for c in collections) groups = groupby(lambda x: x._optimize, collections) dsk = merge([opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items()]) d = {k: unpack_remotedata(v) for k, v in dsk.items()} dsk2 = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} for k, v in dsk2.items(): dependencies[k] |= set(_deps(dsk, v)) names = list({k for c in collections for k in flatten(c._keys())}) self._send_to_scheduler({'op': 'update-graph', 'tasks': valmap(dumps_task, dsk2), 'dependencies': dependencies, 'keys': names, 'client': self.id}) result = [redict_collection(c, {k: Future(k, self) for k in flatten(c._keys())}) for c in collections] if singleton: return first(result) else: return result
def compute_up(t, data, **kwargs): assert len(data.c) == 1, "Select cannot have more than a single column when doing arithmetic" column = first(data.inner_columns) op = getattr(sa.func, type(t).__name__) if isinstance(t.lhs, Expr): return op(column, t.rhs) else: return op(t.lhs, column)
def shuffle( df: DataFrame, column_names: List[str], npartitions: Optional[int] = None, ignore_index: bool = False, ) -> DataFrame: """Order divisions of DataFrame so that all values within column(s) align This enacts a task-based shuffle using explicit-comms. It requires a full dataset read, serialization and shuffle. This is expensive. If possible you should avoid shuffles. This does not preserve a meaningful index/partitioning scheme. This is not deterministic if done in parallel. Requires an activate client. Parameters ---------- df: dask.dataframe.DataFrame Dataframe to shuffle column_names: list of strings List of column names on which we want to split. npartitions: int or None The desired number of output partitions. If None, the number of output partitions equals `df.npartitions` ignore_index: bool Ignore index during shuffle. If True, performance may improve, but index values will not be preserved. Returns ------- df: dask.dataframe.DataFrame Shuffled dataframe Developer Notes --------------- The implementation consist of three steps: (a) Extend the dask graph of `df` with a call to `shuffle_group()` for each dataframe partition and submit the graph. (b) Submit a task on each worker that shuffle (all-to-all communicate) the groups from (a) and return a list of dataframe-partitions. (c) Submit a dask graph that extract (using `getitem()`) individual dataframe-partitions from (b). """ c = comms.default_comms() # As default we preserve number of partitions if npartitions is None: npartitions = df.npartitions # Step (a): partition/group each dataframe-partition name = ("explicit-comms-shuffle-group-" f"{tokenize(df, column_names, npartitions, ignore_index)}") df = df.persist( ) # Making sure optimizations are apply on the existing graph dsk = dict(df.__dask_graph__()) output_keys = [] for input_key in df.__dask_keys__(): output_key = (name, input_key[1]) dsk[output_key] = ( shuffle_group, input_key, column_names, 0, npartitions, npartitions, ignore_index, npartitions, ) output_keys.append(output_key) # Compute `df_groups`, which is a list of futures, one future per partition in `df`. # Each future points to a dict of length `df.npartitions` that maps each # partition-id to a DataFrame. df_groups = compute_as_if_collection(type(df), dsk, output_keys, sync=False) wait(df_groups) for f in df_groups: # Check for errors if f.status == "error": f.result() # raise exception # Step (b): find out which workers has what part of `df_groups`, # find the number of output each worker should have, # and submit `local_shuffle()` on each worker. key_to_part = {str(part.key): part for part in df_groups} in_parts = defaultdict(list) # Map worker -> [list of futures] for key, workers in c.client.who_has(df_groups).items(): # Note, if multiple workers have the part, we pick the first worker in_parts[first(workers)].append(key_to_part[key]) # Let's create a dict that specifices the number of partitions each worker has in_nparts = {} workers = set() # All ranks that have a partition of `df` for rank, worker in enumerate(c.worker_addresses): nparts = len(in_parts.get(worker, ())) if nparts > 0: in_nparts[rank] = nparts workers.add(rank) workers_sorted = sorted(workers) # Find the output partitions for each worker div = npartitions // len(workers) rank_to_out_part_ids = {} # rank -> [list of partition id] for i, rank in enumerate(workers_sorted): rank_to_out_part_ids[rank] = list(range(div * i, div * (i + 1))) for rank, i in zip(workers_sorted, range(div * len(workers), npartitions)): rank_to_out_part_ids[rank].append(i) # Run `local_shuffle()` on each worker result_futures = {} for rank, worker in enumerate(c.worker_addresses): if rank in workers: result_futures[rank] = c.submit( worker, local_shuffle, in_nparts, in_parts[worker], rank_to_out_part_ids, ignore_index, ) distributed.wait(list(result_futures.values())) del df_groups # Step (c): extract individual dataframe-partitions name = f"explicit-comms-shuffle-getitem-{tokenize(name)}" dsk = {} meta = None for rank, parts in rank_to_out_part_ids.items(): for i, part_id in enumerate(parts): dsk[(name, part_id)] = (getitem, result_futures[rank], i) if meta is None: # Get the meta from the first output partition meta = delayed(make_meta)(delayed(getitem)( result_futures[rank], i)).compute() assert meta is not None divs = [None] * (len(dsk) + 1) return new_dd_object(dsk, name, meta, divs).persist()
def until_convergence(it: Iterator[Params], eq: Callable = lambda x: x[0] != x[1]) -> Params: it2 = tz.drop(1, it) pairs = zip(it, it2) return tz.first(itertools.dropwhile(eq, pairs))[0]
state = i['state'] nsteps = snakemake.params.get('nsteps', 1) files = [ (i.tend, ('FQT', 'FSL')), (i.cent, ('QV', 'TABS', 'QN', 'QP', 'QRAD')), (i.stat, ('p', 'RHO')), (i['2d'], ('LHF', 'SHF', 'SOLIN')), ] data = TrainingData.from_var_files(files) nt, ny, nx, nz = data.FQT.shape loader = data.get_loader(nt, batch_size=ny * nx * nz, shuffle=False) input_data = first(loader) model = ForcedStepper.from_file(state) model.eval() model.nsteps = 1 print("nsteps", nsteps) with torch.no_grad(): out = model(input_data) def unstackdiag(x): shape = (nt - 1, ny, nx) return x.data.numpy().reshape(shape)
def _do_predict(self, X_df, coefs, loc_dict, intercept, dtype): client = default_client() part_size = ceil(X_df.shape[1] / X_df.npartitions) # We scatter delayed operations to gather columns on the workers scattered = [] for i in range(X_df.npartitions): up_limit = min((i + 1) * part_size, X_df.shape[1]) cols = X_df.columns.values[i * part_size:up_limit] loc_cudf = X_df[cols] yield wait(loc_cudf) scattered.append( client.submit(preprocess_predict, loc_cudf, workers=[loc_dict[i]])) yield wait(scattered) del (loc_cudf) # Break apart Dask.array/dataframe into chunks/parts data_parts = scattered coef_parts = coefs.to_delayed() # Arrange parts into pairs. This enforces co-locality parts = list(map(delayed, zip(data_parts, coef_parts))) parts = client.compute(parts) # Start computation in the background yield wait(parts) for part in parts: if part.status == 'error': yield part # trigger error locally # A dict in the form of { part_key: part } key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_parts = {} for key, workers in who_has.items(): worker = parse_host_port(first(workers)) if worker not in worker_parts: worker_parts[worker] = [] worker_parts[worker].append(key_to_part_dict[key]) """ Create IP Handles on each worker hosting input data """ # Format of input_devarrays = ([(X, y)..], dev) input_devarrays = [(worker, client.submit(predict_to_device_arrays, part, worker, loc_dict, X_df.npartitions, dtype=dtype, workers=[worker])) for worker, part in worker_parts.items()] yield wait(input_devarrays) """ Gather IPC handles for each worker and call _fit() on each worker containing data. """ exec_node = loc_dict[X_df.npartitions - 1] # Need to fetch parts on worker on_worker = list(filter(lambda x: x[0] == exec_node, input_devarrays)) not_on_worker = list( filter(lambda x: x[0] != exec_node, input_devarrays)) ipc_handles = [ client.submit(get_input_ipc_handles, future, unique=np.random.randint(0, 1e6), workers=[a_worker]) for a_worker, future in not_on_worker ] raw_arrays = [future for a_worker, future in on_worker] # IPC Handles are loaded in separate threads on worker so they can be # used to make calls through cython # Calls _predict_on_worker defined in the bottom ret = client.submit(_predict_on_worker, (ipc_handles, raw_arrays), self.intercept, self._build_params_map(), workers=[exec_node]) yield wait(ret) dfs = [ client.submit(series_on_worker, f, worker, loc_dict, X_df.npartitions, X_df, workers=[worker]) for worker, f in input_devarrays ] return dfs
def test(): patients = _get_patients('Animal 2', 'Customer 2') if len(patients) == 1: patient = first(patients) print(patient)
def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables, Iterators, or Queues pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- List, iterator, or Queue of futures, depending on the type of the inputs. See also -------- Executor.submit: Submit a single function """ if not callable(func): raise TypeError("First input to map must be a callable function") if (all(map(isqueue, iterables)) or all(isinstance(i, Iterator) for i in iterables)): q_out = pyQueue() t = Thread(target=self._threaded_map, args=(q_out, func, iterables), kwargs=kwargs) t.daemon = True t.start() if isqueue(iterables[0]): return q_out else: return queue_to_iterator(q_out) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) allow_other_workers = kwargs.pop('allow_other_workers', False) if allow_other_workers and workers is None: raise ValueError("Only use allow_other_workers= if using workers=") iterables = list(zip(*zip(*iterables))) if pure: keys = [ funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables) ] else: uid = str(uuid.uuid4()) keys = [ funcname(func) + '-' + uid + '-' + str(uuid.uuid4()) for i in range(min(map(len, iterables))) ] if not kwargs: dsk = { key: (func, ) + args for key, args in zip(keys, zip(*iterables)) } else: dsk = { key: (apply, func, (tuple, list(args)), kwargs) for key, args in zip(keys, zip(*iterables)) } d = {key: unpack_remotedata(task) for key, task in dsk.items()} dsk = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} if isinstance(workers, str): workers = [workers] if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") if allow_other_workers not in (True, False, None): raise TypeError("allow_other_workers= must be True or False") if allow_other_workers is True: loose_restrictions = set(keys) else: loose_restrictions = set() logger.debug("map(%s, ...)", funcname(func)) self._send_to_scheduler({ 'op': 'update-graph', 'tasks': valmap(dumps_task, dsk), 'dependencies': dependencies, 'keys': keys, 'restrictions': restrictions, 'loose_restrictions': loose_restrictions, 'client': self.id }) return [Future(key, self) for key in keys]
def _load_dataset(self, dates, data_query_cutoff_times, assets, mask, columns): try: (expr_data, ) = {self._table_expressions[c] for c in columns} except ValueError: raise AssertionError( 'all columns must share the same expression data', ) expr, deltas, checkpoints, odo_kwargs = expr_data odo_kwargs = dict(odo_kwargs) have_sids = (first(columns).dataset.ndim == 2) added_query_fields = {AD_FIELD_NAME, TS_FIELD_NAME } | ({SID_FIELD_NAME} if have_sids else set()) requested_columns = set(map(getname, columns)) colnames = sorted(added_query_fields | requested_columns) lower_dt, upper_dt = data_query_cutoff_times[[0, -1]] def collect_expr(e, lower): """Materialize the expression as a dataframe. Parameters ---------- e : Expr The baseline or deltas expression. lower : datetime The lower time bound to query. Returns ------- result : pd.DataFrame The resulting dataframe. Notes ----- This can return more data than needed. The in memory reindex will handle this. """ predicate = e[TS_FIELD_NAME] < upper_dt if lower is not None: predicate &= e[TS_FIELD_NAME] >= lower return odo(e[predicate][colnames], pd.DataFrame, **odo_kwargs) lower, materialized_checkpoints = get_materialized_checkpoints( checkpoints, colnames, lower_dt, odo_kwargs) materialized_expr_deferred = self.pool.apply_async( collect_expr, (expr, lower), ) materialized_deltas = (self.pool.apply(collect_expr, (deltas, lower)) if deltas is not None else None) # If the rows that come back from the blaze backend are constructed # from LabelArrays with Nones in the categories, pandas # complains. Ignore those warnings for now until we have a story for # updating our categorical missing values to NaN. with ignore_pandas_nan_categorical_warning(): all_rows = pd.concat( filter( lambda df: df is not None, ( materialized_checkpoints, materialized_expr_deferred.get(), materialized_deltas, ), ), ignore_index=True, copy=False, ) all_rows[TS_FIELD_NAME] = all_rows[TS_FIELD_NAME].astype( 'datetime64[ns]', ) all_rows.sort_values([TS_FIELD_NAME, AD_FIELD_NAME], inplace=True) if have_sids: return adjusted_arrays_from_rows_with_assets( dates, data_query_cutoff_times, assets, columns, all_rows, ) else: return adjusted_arrays_from_rows_without_assets( dates, data_query_cutoff_times, columns, all_rows, )
def _load_dataset(self, dates, assets, mask, columns): try: (expr_data, ) = {self._table_expressions[c] for c in columns} except ValueError: raise AssertionError( 'all columns must share the same expression data', ) expr, deltas, checkpoints, odo_kwargs = expr_data have_sids = (first(columns).dataset.ndim == 2) added_query_fields = {AD_FIELD_NAME, TS_FIELD_NAME } | ({SID_FIELD_NAME} if have_sids else set()) requested_columns = set(map(getname, columns)) colnames = sorted(added_query_fields | requested_columns) data_query_time = self._data_query_time data_query_tz = self._data_query_tz lower_dt, upper_dt = normalize_data_query_bounds( dates[0], dates[-1], data_query_time, data_query_tz, ) def collect_expr(e, lower): """Materialize the expression as a dataframe. Parameters ---------- e : Expr The baseline or deltas expression. lower : datetime The lower time bound to query. Returns ------- result : pd.DataFrame The resulting dataframe. Notes ----- This can return more data than needed. The in memory reindex will handle this. """ predicate = e[TS_FIELD_NAME] < upper_dt if lower is not None: predicate &= e[TS_FIELD_NAME] >= lower return odo(e[predicate][colnames], pd.DataFrame, **odo_kwargs) lower, materialized_checkpoints = get_materialized_checkpoints( checkpoints, colnames, lower_dt, odo_kwargs) materialized_expr_deferred = self.pool.apply_async( collect_expr, (expr, lower), ) materialized_deltas = (self.pool.apply(collect_expr, (deltas, lower)) if deltas is not None else None) all_rows = pd.concat( filter( lambda df: df is not None, ( materialized_checkpoints, materialized_expr_deferred.get(), materialized_deltas, ), ), ignore_index=True, copy=False, ) all_rows[TS_FIELD_NAME] = all_rows[TS_FIELD_NAME].astype( 'datetime64[ns]', ) all_rows.sort_values([TS_FIELD_NAME, AD_FIELD_NAME], inplace=True) if have_sids: return adjusted_arrays_from_rows_with_assets( dates, data_query_time, data_query_tz, assets, columns, all_rows, ) else: return adjusted_arrays_from_rows_without_assets( dates, data_query_time, data_query_tz, columns, all_rows, )
def test_global_workers(s, a, b): n = len(Worker._instances) w = first(Worker._instances) assert w is a or w is b
def _get_tenant(customer): tenant = frappe.get_all("Tenant Master", fields=["*"], filters={"customer": customer}) return first(tenant) if tenant else None
tags.label("Target identity:", fr=key) with tags.select( cls="form-control target-identity", id=key, data_scenario="unseen"): for t, _ in col["audio-paths-ours"]: tags.option( t, data_target=t, data_speaker=col["speaker"], data_sample=col["sample-id"]) with tags.audio(controls=True, cls="embed-responsive", id=key + "-audio", data_scenario="unseen"): _, p = first(col["audio-paths-ours"]) tags.source(src=p, type="audio/wav") tags.script(type="text/javascript", src="script.js") raw(r""" <!-- Global site tag (gtag.js) - Google Analytics --> <script async src="https://www.googletagmanager.com/gtag/js?id=UA-71565185-2"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-71565185-2'); </script> """)
def test_first(): for p in pairs: first(p)
def find_id_in_single_index(ind: Index, id: int) -> str: try: return t.first(key for key, value in ind.items() if id in value) except StopIteration: return None
def test_create_index_unique(sql): create_index(sql, 'y', name='y_idx', unique=True) assert len(sql.data.indexes) == 1 idx = first(sql.data.indexes) assert idx.unique assert idx.columns.y == sql.data.c.y
def compute_up(t, s, **kwargs): assert len(s.c) == 1, \ 'Select cannot have more than a single column when filtering with `like`' return compute_up(t, first(s.inner_columns), **kwargs)
def compute_up(expr, data, **kwargs): column = first(data.inner_columns) cast = sa.cast(column, dshape_to_alchemy(expr.to)).label(expr._name) return reconstruct_select([cast], data)
def compute(self, args, sync=False): """ Compute dask collections on cluster Parameters ---------- args: iterable of dask objects or single dask object Collections like dask.array or dataframe or dask.value objects sync: bool (optional) Returns Futures if False (default) or concrete values if True Returns ------- List of Futures if input is a sequence, or a single future otherwise Examples -------- >>> from dask import do, value >>> from operator import add >>> x = dask.do(add)(1, 2) >>> y = dask.do(add)(x, x) >>> xx, yy = executor.compute([x, y]) # doctest: +SKIP >>> xx # doctest: +SKIP <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e> >>> xx.result() # doctest: +SKIP 3 >>> yy.result() # doctest: +SKIP 6 Also support single arguments >>> xx = executor.compute(x) # doctest: +SKIP See Also -------- Executor.get: Normal synchronous dask.get function """ if isinstance(args, (list, tuple, set, frozenset)): singleton = False else: args = [args] singleton = True variables = [a for a in args if isinstance(a, Base)] groups = groupby(lambda x: x._optimize, variables) dsk = merge([ opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items() ]) names = ['finalize-%s' % tokenize(v) for v in variables] dsk2 = { name: (v._finalize, v._keys()) for name, v in zip(names, variables) } d = {k: unpack_remotedata(v) for k, v in merge(dsk, dsk2).items()} dsk3 = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} for k, v in dsk3.items(): dependencies[k] |= set(_deps(dsk, v)) self._send_to_scheduler({ 'op': 'update-graph', 'tasks': valmap(dumps_task, dsk3), 'dependencies': dependencies, 'keys': names, 'client': self.id }) i = 0 futures = [] for arg in args: if isinstance(arg, Base): futures.append(Future(names[i], self)) i += 1 else: futures.append(arg) if sync: result = self.gather(futures) else: result = futures if singleton: return first(result) else: return result
def compute_up(expr, data, **kwargs): return data.with_only_columns( first( compute(expr._child[field], data, post_compute=False).inner_columns) for field in expr.fields)
def persist(self, collections): """ Persist dask collections on cluster Starts computation of the collection on the cluster in the background. Provides a new dask collection that is semantically identical to the previous one, but now based off of futures currently in execution. Parameters ---------- collections: sequence or single dask object Collections like dask.array or dataframe or dask.value objects Returns ------- List of collections, or single collection, depending on type of input. Examples -------- >>> xx = executor.persist(x) # doctest: +SKIP >>> xx, yy = executor.persist([x, y]) # doctest: +SKIP See Also -------- Executor.compute """ if isinstance(collections, (tuple, list, set, frozenset)): singleton = False else: singleton = True collections = [collections] assert all(isinstance(c, Base) for c in collections) groups = groupby(lambda x: x._optimize, collections) dsk = merge([ opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items() ]) d = {k: unpack_remotedata(v) for k, v in dsk.items()} dsk2 = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} for k, v in dsk2.items(): dependencies[k] |= set(_deps(dsk, v)) names = list({k for c in collections for k in flatten(c._keys())}) self._send_to_scheduler({ 'op': 'update-graph', 'tasks': valmap(dumps_task, dsk2), 'dependencies': dependencies, 'keys': names, 'client': self.id }) result = [ redict_collection(c, {k: Future(k, self) for k in flatten(c._keys())}) for c in collections ] if singleton: return first(result) else: return result
def compute_up(t, s, **kwargs): assert len(s.foreign_keys) == 1, 'exactly one foreign key allowed' key_col = first(s.foreign_keys).column return sa.select([key_col.table.c[t._name]]).where(s == key_col)
def _do_fit(self, X_df, y_df, dtype): client = default_client() # Finding location of parts of y_df to distribute columns of X_df loc_dict = {} yield wait(y_df) tt = yield client.who_has(y_df) location = tuple(tt.values()) for i in range(X_df.npartitions): part_number = eval(list(tt.keys())[i])[1] loc_dict[part_number] = parse_host_port(str(location[i])[:-3]) # Lets divide the columns evenly, matching the order of the labels part_size = ceil(X_df.shape[1] / X_df.npartitions) # We scatter delayed operations to gather columns on the workers scattered = [] coefs = [] for i in range(X_df.npartitions): up_limit = min((i + 1) * part_size, X_df.shape[1]) cols = X_df.columns.values[i * part_size:up_limit] loc_cudf = X_df[cols] yield wait(loc_cudf) scattered.append( client.submit(preprocess_on_worker, loc_cudf, workers=[loc_dict[i]])) yield wait(scattered) coefs.append( client.submit(dev_array_on_worker, up_limit - i * part_size, dtype=dtype, unique=np.random.randint(0, 1e6), workers=[loc_dict[i]])) yield wait(coefs) del (loc_cudf) # Break apart Dask.array/dataframe into chunks/parts # data_parts = map(delayed, scattered) data_parts = scattered label_parts = y_df.to_delayed() coef_parts = coefs # Arrange parts into pairs. This enforces co-locality parts = list(map(delayed, zip(data_parts, label_parts, coef_parts))) parts = client.compute(parts) # Start computation in the background yield wait(parts) for part in parts: if part.status == 'error': yield part # trigger error locally # A dict in the form of { part_key: part } key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_parts = {} for key, workers in who_has.items(): worker = parse_host_port(first(workers)) if worker not in worker_parts: worker_parts[worker] = [] worker_parts[worker].append(key_to_part_dict[key]) """ Create IP Handles on each worker hosting input data """ # Format of input_devarrays = ([(X, y)..], dev) input_devarrays = [(worker, client.submit(fit_to_device_arrays, part, workers=[worker])) for worker, part in worker_parts.items()] yield wait(input_devarrays) """ Gather IPC handles for each worker and call _fit() on each worker containing data. """ # Last worker is the only one that can have less items. exec_node = loc_dict[X_df.npartitions - 1] # Need to fetch parts on worker on_worker = list(filter(lambda x: x[0] == exec_node, input_devarrays)) not_on_worker = list( filter(lambda x: x[0] != exec_node, input_devarrays)) ipc_handles = [ client.submit(get_input_ipc_handles, future, workers=[a_worker]) for a_worker, future in not_on_worker ] raw_arrays = [future for a_worker, future in on_worker] # IPC Handles are loaded in separate threads on worker so they can be # used to make calls through cython # Calls _fit_on_worker defined in the bottom intercept = client.submit(_fit_on_worker, (ipc_handles, raw_arrays), self._build_params_map(), workers=[exec_node]) yield wait(intercept) coef_series = [ client.submit(coef_on_worker, coefs[i], i, X_df.shape[1], X_df.npartitions, loc_dict[i], workers=[loc_dict[i]]) for i in range(len(loc_dict)) ] # coef_on_worker(self, coef, locations, ncols, nparts, worker): raise gen.Return((coef_series, intercept, loc_dict))
def binary_math_sql_select(t, lhs, rhs, **kwargs): left, right = first(lhs.inner_columns), first(rhs.inner_columns) result = getattr(sa.func, type(t).__name__)(left, right) assert lhs.table == rhs.table return reconstruct_select([result], lhs.table)
def _get_tenant(customer): tenant = frappe.get_all( "Tenant Master", filters={"customer": customer} ) return first(tenant).get("name") if tenant else None
def _key(self): return first(self._dasks[0])
def test_callables(): cl = CL(lambda: (list(range(3)) for i in range(3))) assert first(cl) == [0, 1, 2] assert first(cl) == [0, 1, 2]
def read_parquet(path, columns=None, filters=None, categories=None, index=None, **kwargs): """ Read Dask DataFrame from ParquetFile This reads a directory of Parquet data into a Dask.dataframe, one file per partition. It selects the index among the sorted columns if any exist. Parameters ---------- path : string Source directory for data. Prepend with protocol like ``s3://`` or ``hdfs://`` for remote data. columns: list or None List of column names to load filters: list List of filters to apply, like ``[('x', '>' 0), ...]`` index: string or None Name of index column to use if that column is sorted categories: list or None For any fields listed here, if the parquet encoding is Dictionary, the column will be created with dtype category. Use only if it is guaranteed that the column is encoded as dictionary in all row-groups. Examples -------- >>> df = read_parquet('s3://bucket/my-parquet-data') # doctest: +SKIP See Also -------- to_parquet """ if fastparquet is False: raise ImportError("fastparquet not installed") if filters is None: filters = [] myopen = OpenFileCreator(path, compression=None, text=False) try: pf = fastparquet.ParquetFile(path + myopen.fs.sep + '_metadata', open_with=myopen, sep=myopen.fs.sep) except: pf = fastparquet.ParquetFile(path, open_with=myopen, sep=myopen.fs.sep) columns = columns or (pf.columns + list(pf.cats)) rgs = [ rg for rg in pf.row_groups if not (fastparquet.api.filter_out_stats(rg, filters, pf.helper)) and not (fastparquet.api.filter_out_cats(rg, filters)) ] parts = [ delayed(pf.read_row_group_file)(rg, columns, categories, **kwargs) for rg in rgs ] # TODO: if categories vary from one rg to next, need to cope dtypes = { k: ('category' if k in (categories or []) else v) for k, v in pf.dtypes.items() if k in columns } df = dd.from_delayed(parts, meta=dtypes) # Find an index among the partially sorted columns minmax = fastparquet.api.sorted_partitioned_columns(pf) if index is False: index_col = None elif len(minmax) > 1: if index: index_col = index else: raise ValueError("Multiple possible indexes exist: %s. " "Please select one with index='index-name'" % sorted(minmax)) elif len(minmax) == 1: index_col = first(minmax) else: index_col = None if index_col: divisions = list( minmax[index_col]['min']) + [minmax[index_col]['max'][-1]] df = df.set_index(index_col, sorted=True, divisions=divisions) return df
def merge_tables(target, tables, columns=None): """ Merge a number of tables onto a target table. Tables must have registered merge rules via the `broadcast` function. Parameters ---------- target : str, DataFrameWrapper, or TableFuncWrapper Name of the table (or wrapped table) onto which tables will be merged. tables : list of `DataFrameWrapper`, `TableFuncWrapper`, or str All of the tables to merge. Should include the target table. columns : list of str, optional If given, columns will be mapped to `tables` and only those columns will be requested from each table. The final merged table will have only these columns. By default all columns are used from every table. Returns ------- merged : pandas.DataFrame """ # allow target to be string or table wrapper if isinstance(target, (DataFrameWrapper, TableFuncWrapper)): target = target.name # allow tables to be strings or table wrappers tables = [ get_table(t) if not isinstance(t, (DataFrameWrapper, TableFuncWrapper)) else t for t in tables ] merges = {t.name: {} for t in tables} tables = {t.name: t for t in tables} casts = _get_broadcasts(tables.keys()) logger.debug('attempting to merge tables {} to target table {}'.format( tables.keys(), target)) # relate all the tables by registered broadcasts for table, onto in casts: merges[onto][table] = merges[table] merges = {target: merges[target]} # verify that all the tables can be merged to the target all_tables = set(_all_reachable_tables(merges)) if all_tables != set(tables.keys()): raise RuntimeError( ('Not all tables can be merged to target "{}". Unlinked tables: {}' ).format(target, list(set(tables.keys()) - all_tables))) # add any columns necessary for indexing into other tables # during merges if columns: columns = list(columns) for c in casts.values(): if c.onto_on: columns.append(c.onto_on) if c.cast_on: columns.append(c.cast_on) # get column map for which columns go with which table colmap = column_map(tables.values(), columns) # get frames frames = { name: t.to_frame(columns=colmap[name]) for name, t in tables.items() } # perform merges until there's only one table left while merges[target]: nm = _next_merge(merges) onto = toolz.first(nm) onto_table = frames[onto] # loop over all the tables that can be broadcast onto # the onto_table and merge them all in. for cast in nm[onto]: cast_table = frames[cast] bc = casts[(cast, onto)] with log_start_finish('merge tables {} and {}'.format(onto, cast), logger): onto_table = pd.merge(onto_table, cast_table, left_on=bc.onto_on, right_on=bc.cast_on, left_index=bc.onto_index, right_index=bc.cast_index) # replace the existing table with the merged one frames[onto] = onto_table # free up space by dropping the cast table del frames[cast] # mark the onto table as having no more things to broadcast # onto it. _recursive_getitem(merges, onto)[onto] = {} logger.debug('finished merge') return frames[target]
import os import pathlib import sys from importlib import metadata import toml import toolz from appdirs import AppDirs from kivy.factory import Factory try: META = dict(metadata.metadata(__name__)) __author__ = META["Author"] __version__ = META["Version"] except metadata.PackageNotFoundError: pyproject_toml_path = toolz.first( pathlib.Path(__file__).parent.parent.glob("**/pyproject.toml")) with open(pyproject_toml_path) as file: pyproject_toml = toml.load(file) __author__ = pyproject_toml["tool"]["poetry"]["authors"][0] __version__ = pyproject_toml["tool"]["poetry"]["version"] dirs = AppDirs(appname=__name__, appauthor=__author__, version=__version__) sys.path.append(os.path.dirname(__file__)) CONFIG_DIR = pathlib.Path(dirs.user_config_dir) CONFIG_PATH = CONFIG_DIR / "config.ini" APP_DIR = pathlib.Path(dirs.user_data_dir) HOME = pathlib.Path.home() USER = HOME.stem BASE_PATH = pathlib.Path(__file__).parent.absolute()
def _data(self): return first(self._dasks[0].values())
def _finalize(self, args): if len(self._keys()) > 1: return args else: return first(args)