def _init_buffers_async(self, new_buffer_ids): with _futures.ThreadPoolExecutor(max_workers=len(new_buffer_ids)) as e: futs = { e.submit(self._init_buffer, t, i): (t, i) for t, i in new_buffer_ids } for f in _futures.as_completed(futs): t, i = futs[f] dat = f.result() laste = 0 b = self._get_buffer_deque(t, i) cb_count = 0 # does groupby guarantee sorted ?? for e, grp in _groupby( dat, lambda t: int(t[1].mktime // self._isec)): # fill in gaps while laste and (e - laste > 1): obj = NULL(laste + 1, self._isec, self._tfunc) b.append(obj) cb_count += 1 laste += 1 d = [i[0] for i in grp] obj = self._iobj(d, e, self._isec, self._tfunc) b.append(obj) cb_count += 1 laste = e # we sort new to old so need do all callbacks after appends for n in range(cb_count - 1, 0, -1): self._callback(i, t, b, n)
def _init_buffers_async(self, new_buffer_ids): with _futures.ThreadPoolExecutor(max_workers = len(new_buffer_ids)) as e: futs = {e.submit(self._init_buffer,t,i):(t,i) for t,i in new_buffer_ids} for f in _futures.as_completed(futs): t, i = futs[f] dat = f.result() laste = 0 b = self._get_buffer_deque(t,i) cb_count = 0 # does groupby guarantee sorted ?? for e, grp in _groupby(dat, lambda t: int(t[1].mktime // self._isec)): # fill in gaps while laste and (e - laste > 1): obj = NULL(laste + 1, self._isec, self._tfunc) b.append(obj) cb_count += 1 laste += 1 d = [i[0] for i in grp] obj = self._iobj(d, e, self._isec, self._tfunc) b.append(obj) cb_count += 1 laste = e # we sort new to old so need do all callbacks after appends for n in range(cb_count-1,0,-1): self._callback(i,t,b,n)
def _gap_sizes_1D(y): z = [] for a, b in _groupby(_np.isnan(y).astype(int), lambda x: x == 0): if a: z.extend(list(b)) else: # Where the value is one, replace 1 with the number of sequential 1's l = len(list(b)) z.extend([l] * l) return _np.asarray(z)
def load_vpc(self, source): #TODO: handle file-like, filename, and an alraedy read dataset. cohort_colnames = [[j.rsplit("__", 1)[0] for j in i[1]] for i in _groupby(source[0]["column"][1:], lambda x: int(x.split("__")[-1]))] n_cols_per_cohort = [len(i) for i in cohort_colnames] split_columns = _np.split(source[1], _np.cumsum(n_cols_per_cohort[:-1]), axis=1) for i in range(len(source[0]["cohort_names"])): self.cohorts.append( PointCloud_Cohort(source[0]["cohort_names"][i], source[0]["cohort_times"][i], cohort_colnames[i], split_columns[i], source[0]["embryospergene"][i])) self.all_headers = source[0] self.all_data = source[1]
def groupby(value, attribute, full_alphabet=False): """Group alphabetically a sequence of objects by a common attribute.""" attr_getter = lambda item: first_letter(getattr(item, attribute)) grouped = _groupby(sorted(value, key=attr_getter), attr_getter) if full_alphabet: # convert grouped to dict grouped = dict(map(lambda group: (group[0], list(group[1])), grouped)) # prepare unique set of all used and alphabetic letters all_letters = sorted(set(grouped.keys() + list(alphabet()))) # return special tuples, empty list is used if there are no items # for given letter return [_GroupTuple( (letter, grouped.get(letter, [])) ) for letter in all_letters] return sorted(map(_GroupTuple, grouped))
def nangdok(data_dir, batch_size, test_max_size, **kwargs): """Load Nangdock corpus data.""" join = lambda f: _path.join(data_dir, f) texts = [] with open(join("script_nmbd_by_sentence.txt"), encoding="utf-16-le") as f: tmp = [] for line in f.readlines(): if line.startswith("<"): texts.append(tmp) tmp = [] elif _re.match(r"^\d+\..*", line): tmp.append(line) texts.append(tmp) del texts[0] participants = sorted( filter(lambda l: _re.match("^[fm][v-z][0-9]+", l), _os.listdir(data_dir))) test_sentences = kwargs.get("test_sentences", [_random.choice(ts) for ts in texts]) test_participants = kwargs.get("test_participants", [ _random.choice(list(g)) for _, g in _groupby(participants, lambda p: p[:2]) ]) train = [] test = [] for participant in sorted(participants): for i, _ in enumerate(texts): for j, text in enumerate(_): f = join("{0}/{0}_t{1:0>2}_s{2:0>2}.wav".format( participant, i + 1, j + 1)) if _path.isfile(f): if text in test_sentences or participants in test_participants: test.append((f, text)) else: train.append((f, text)) _random.shuffle(test) valid = test[:batch_size] if test_max_size and batch_size + test_max_size < len(test): test = test[batch_size:(batch_size + test_max_size)] else: test = test[batch_size:] return train, valid, test
def groupby(value, attribute, full_alphabet=False): """Group alphabetically a sequence of objects by a common attribute.""" attr_getter = lambda item: first_letter(getattr(item, attribute)) grouped = _groupby(sorted(value, key=attr_getter), attr_getter) if full_alphabet: # convert grouped to dict grouped = dict(map(lambda group: (group[0], list(group[1])), grouped)) # prepare unique set of all used and alphabetic letters all_letters = sorted(set(grouped.keys() + list(alphabet()))) # return special tuples, empty list is used if there are no items # for given letter return [ _GroupTuple((letter, grouped.get(letter, []))) for letter in all_letters ] return sorted(map(_GroupTuple, grouped))
def contiguous_ranges(list_in): r""" For every unique entry in :obj:`list_in` return the contiguous ranges in list Parameters ---------- list_in : list Returns ------- ranges : dict The keys are with unique entries of list_in, values are the ranges in which the entry appears """ offset = 0 _ranges = _defdict(list) for key, grpr in _groupby(list_in): l = len(list(grpr)) irange = _np.arange(offset, offset + l) _ranges[key].append(irange) offset += l return dict(_ranges)
def group_by(items, key=None): keyfunc = make_getter(key) return map(_GroupTuple, _groupby(items, keyfunc))
def groupby(iterable, key, reverse=False): """排序分组 """ return _groupby(sorted(iterable, key=key, reverse=reverse), key=key)
def check_arguments(kwargs): """Make sure all keywords are allowed. Raises OptionsError on error, returns sanitized dictionary on success. Note: Checks in SYNONYMS if argument is not recognized, raises OptionsError if it is not found there either. """ new_kwds = {} # Make sure types are correct for arg, opt in kwargs.items(): if arg not in ALLOWED_KWDS: if arg in SYNONYMS: arg = SYNONYMS[arg] assert arg in ALLOWED_KWDS else: raise OptionsError('Unrecognized argument {}'.format(arg)) if opt is not None and not isinstance(opt, ALLOWED_KWDS[arg]): newtype = ALLOWED_KWDS[arg] if (newtype is list or newtype is tuple) \ and not isinstance(arg, (list, tuple)): opt = run.listify(opt) elif newtype is int and isinstance(opt, str) and opt.isdigit(): opt = int(opt) else: raise TypeError('arg "{}" must be {}, is {} ({})'.format( arg, ALLOWED_KWDS[arg], opt, type(opt))) new_kwds[arg] = opt # Parse individual complex options for arg, opt in new_kwds.items(): if arg == 'time': try: if '-' in opt: day, time = opt.split('-') else: day = 0 time = opt time = [int(i) for i in time.split(':')] if len(time) == 3: hours, mins, secs = time elif len(time) == 2: hours = 0 mins, secs = time elif len(time) == 1: hours = mins = 0 secs = time[0] hours = (int(day) * 24) + hours if secs > 60: mins += 1 secs = secs % 60 if mins > 60: hours += 1 mins = mins % 60 opt = '{}:{}:{}'.format( str(hours).rjust(2, '0'), str(mins).rjust(2, '0'), str(secs).rjust(2, '0')) new_kwds[arg] = opt except: raise OptionsError('time must be formatted as D-HH:MM:SS ' + 'or a fragment of that (e.g. MM:SS) ' + 'it is formatted as {}'.format(opt)) # Force memory into an integer of megabytes elif arg == 'mem' and isinstance(opt, str): if opt.isdigit(): opt = int(opt) else: # Try to guess unit by suffix memerror = ('mem is malformatted, should be a number ' 'of MB or a string like 24MB or 10GB, ' 'it is: {}'.format(opt)) groups = _groupby(opt, key=str.isdigit) try: svalk, svalg = next(groups) sval = int(''.join(svalg)) sunitk, sunitg = next(groups) sunit = ''.join(sunitg).lower() except ValueError: err = list(_sys.exc_info()) err[1] = ValueError(memerror) _raise(*err) if len(list(groups)) != 0 or not svalk or sunitk: raise ValueError(memerror) if sunit == 'b': opt = int(float(sval) / float(1024) / float(1024)) elif sunit == 'kb' or sunit == 'k': opt = int(float(sval) / float(1024)) elif sunit == 'mb' or sunit == 'm': opt = sval elif sunit == 'gb' or sunit == 'g': opt = sval * 1024 elif sunit == 'tb' or sunit == 't': # Crazy people opt = sval * 1024 * 1024 else: raise ValueError( 'Unknown memory unit opt {}'.format(sunit)) # Don't allow 0, minimum memory req is 5MB if opt < 5: opt = 5 new_kwds[arg] = opt return new_kwds
def groupby(iterable: Iterable[V], key: Callable[[V], K]) -> Dict[K, List[V]]: return {k: list(v) for k, v in _groupby(sorted(iterable, key=key), key) } # type: ignore