def buffer_logic(state): ''' Buffer create/replace/reuse logic. The function name is not very good :( new_state | old_state | same sources | action ----------------|---------------|--------------|----------------- replace | replace | True | reuse buffer replace | replace | False | replace buffer replace | no-replace | True | create buffer (copy candidates) replace | no-replace | False | create buffer no-replace | replace | True | create buffer (copy candidates) no-replace | replace | False | create buffer no-replace | no-replace | True | reuse buffer no-replace | no-replace | False | create buffer A reusable buffer will be looked for, then a replacement buffer and as a last resort a new one will be created. Returns: old_state (dict): In case a state was reused/replaced it is returned because it will be needed later on to compare it with the current state and determine whether the window should be resized/moved, etc... ''' # We are only interested in buffers which are in the same container. # That's where the interesting reuse/replace logic is at. states = fn.where(variables.states, container=state['container']) with_same_sources = partial(same_sources, state) reusable_state = fn.first(fn.where( ifilter(with_same_sources, states), replace = state['replace'] )) replaceable_state = fn.first(fn.where( ifilter(lambda x: not with_same_sources(x), states), replace = True )) old_state = None if reusable_state: state.update(fn.project(reusable_state, ['uid', 'buffer', 'sources'])) old_state = reusable_state variables.states.remove(reusable_state) elif replaceable_state: state.update(fn.project(replaceable_state, ['uid', 'buffer'])) state['sources'] = populated_candidates(state) set_buffer_contents(state['buffer'], aggregate_candidates(state)) old_state = replaceable_state variables.states.remove(replaceable_state) else: same = find(with_same_sources, states) state['sources'] = (same and same['sources']) or populated_candidates(state) state['buffer'] = make_pyunite_buffer(state) return old_state
def get_matrix_filename(series_id, platform_id): filenames = list(matrix_filenames(series_id, platform_id)) mirror_filenames = (os.path.join(conf.SERIES_MATRIX_MIRROR, filename) for filename in filenames) mirror_filename = first(filename for filename in mirror_filenames if os.path.isfile(filename)) if mirror_filename: return mirror_filename for filename in filenames: print 'Loading URL', conf.SERIES_MATRIX_URL + filename, '...' try: res = urllib2.urlopen(conf.SERIES_MATRIX_URL + filename) except urllib2.URLError: pass else: mirror_filename = os.path.join(conf.SERIES_MATRIX_MIRROR, filename) print 'Cache to', mirror_filename directory = os.path.dirname(mirror_filename) if not os.path.exists(directory): os.makedirs(directory) with open(mirror_filename, 'wb') as f: shutil.copyfileobj(res, f) return mirror_filename raise LookupError("Can't find matrix file for series %s, platform %s" % (series_id, platform_id))
def help(self, source, target, args): """Display help for the given command or plugin. Syntax: HELP [<command>] | [<plugin>] """ plugins = self.parent.bot.plugins command = self.parent.bot.command if not args: q = "help" else: q = first(args.split(" ", 1)) if q in plugins: msg = get_plugin_help(plugins[q]) elif q in command: msg = get_command_help(command[q], q) else: msg = None if msg is None: msg = ( "No help available for: {0:s}. " "To get a list of plugins, type: plugins" ).format(q) return format_msg(msg)
def test_scope_of_global(): """Make sure the scope of a global is the entire program.""" js = """a = 0;""" ast = parse(js) raise SkipTest assignment = first(assignments(ast)) eq_(assignment.scope_of(assignment["left"]["name"])["type"], "Program")
def info(self, source, target, args): """Display info for the given plugin. Syntax: INFO <plugin> """ plugins = self.parent.bot.plugins if not args: return "No plugin specified." name = first(args.split(" ", 1)).lower() if name in plugins: plugin = plugins[name] description = getattr(plugin, "__doc__", name) description = description.split("\n", 1)[0] version = getattr(plugin, "__version__", "Unknown") author = getattr(plugin, "__author__", "Unknown") msg = "{0:s} - {1:s} v{2:s} by {3:s}".format( name, description, version, author ) else: msg = ( "No info available for: {0:s}. " "To get a list of plugins, type: plugins" ).format(name) return format_msg(msg)
def host(self, source, target, args): """Resolve a hostname or ip address. Syntax: HOST [<hostname>] | [<ip>] """ if not args: return "No hostname or ip address specified." tokens = args.split(" ", 1) host = first(tokens) isip = all( c.isdigit() for c in host.replace(".", "") ) if isip: try: name, aliases, addresses = gethostbyaddr(host) msg = "{0:s} -> {1:s}".format(host, name) except Exception as error: msg = log("ERROR: {0:s}", error) log(format_exc()) else: try: address = gethostbyname(host) msg = "{0:s} -> {1:s}".format(host, address) except Exception as error: msg = log("ERROR: {0:s}", error) log(format_exc()) return msg
def register_api_resource(api, resource): if funcy.is_seqcoll(resource): cls, url, endpoint = ( funcy.first(resource), funcy.second(resource), funcy.nth(2, resource), ) api.add_resource(cls, url, endpoint=endpoint)
def test_scope_of_initialized_variable(): js = """function smoo() { var a = 0; }""" ast = parse(js) raise SkipTest assignment = first(assignments(ast)) eq_(assignment.scope_of(assignment["id"]["name"])["id"]["name"], "smoo")
def test_scope_of_global_function(): js = """function smoo() { var a; a = 0; }""" ast = parse(js) raise SkipTest assignment = first(assignments(ast)) eq_(assignment.scope_of(assignment["left"]["name"])["type"], "FunctionDeclaration")
def nearest_scope_holder(self): """Return the nearest node that can have its own scope, potentially including myself. This will be either a FunctionDeclaration or a Program (for now). """ return first(n for n in self.walk_up() if isinstance(n, (FunctionDeclaration, Program)))
def mergesort(filename, output=None, key=None, maxitems=1e6, progress=True): """Given an input file sort it by performing a merge sort on disk. :param filename: Either a filename as a ``str`` or a ``py._path.local.LocalPath`` instance. :type filename: ``str`` or ``py._path.local.LocalPath`` :param output: An optional output filename as a ``str`` or a ``py._path.local.LocalPath`` instance. :type output: ``str`` or ``py._path.local.LocalPath`` or ``None`` :param key: An optional key to sort the data on. :type key: ``function`` or ``None`` :param maxitems: Maximum number of items to hold in memory at a time. :type maxitems: ``int`` :param progress: Whether or not to display a progress bar :type progress: ``bool`` This uses ``py._path.local.LocalPath.make_numbered_dir`` to create temporry scratch space to work with when splitting the input file into sorted chunks. The mergesort is processed iteratively in-memory using the ``~merge`` function which is almost identical to ``~heapq.merge`` but adds in the support of an optional key function. """ p = filename if isinstance(filename, LocalPath) else LocalPath(filename) output = p if output is None else output key = key if key is not None else lambda x: x scratch = LocalPath.make_numbered_dir(prefix="mergesort-") nlines = sum(1 for line in p.open("r")) # Compute a reasonable chunksize < maxitems chunksize = first(ifilter(lambda x: x < maxitems, imap(lambda x: nlines / (2**x), count(1)))) # Split the file up into n sorted files if progress: bar = ProgressBar("Split/Sorting Data", max=(nlines / chunksize)) for i, items in enumerate(ichunks(chunksize, jsonstream(p))): with scratch.ensure("{0:d}.json".format(i)).open("w") as f: f.write("\n".join(map(dumps, sorted(items, key=key)))) if progress: bar.next() if progress: bar.finish() q = scratch.listdir("*.json") with output.open("w") as f: if progress: bar = ProgressBar("Merge/Sorting Data", max=nlines) for item in merge(*imap(jsonstream, q)): f.write("{0:s}\n".format(dumps(item))) if progress: bar.next() if progress: bar.finish()
def test_scope_of_inner_function(): js = """function smoo() { function bar() { a = 0; } }""" ast = parse(js) raise SkipTest assignment = first(assignments(ast)) eq_(assignment.scope_of(assignment["left"]["name"])["type"], "Program")
def transform_event(kudago_event, parent_id, children_count): tags = kudago_event['tags'] categories = kudago_event['categories'] place = kudago_event['place'] kind = find_first(('festival', 'exhibition', 'theater'), categories) dates = filter(is_date_finite, kudago_event['dates']) if kind not in ('festival', 'exhibition'): dates = flatten(map(split_date, dates)) dates = list(sorted(map(transform_date, dates), key=itemgetter('start'))) participants = [ {'role': p['role']['slug'], 'agent': p['agent']['id']} for p in kudago_event['participants'] ] return { '_id': kudago_event['id'], '_type': 'event', 'kind': kind, 'is_for_kids': 'kids' in categories, 'is_premiere': 'премьера' in tags, 'name': kudago_event['short_title'], 'full_name': kudago_event['title'], 'tagline': kudago_event['tagline'], 'lead': strip_links(kudago_event['description']), 'description': strip_links(kudago_event['body_text']), 'location': kudago_event['location']['slug'], 'place': place['id'] if place else None, 'parent': parent_id, 'participants': participants, 'age_restriction': kudago_event['age_restriction'], 'price': transform_price(kudago_event['price'], kudago_event['is_free']), 'dates_count': len(dates), 'children_count': children_count, 'favorites_count': kudago_event['favorites_count'], 'comments_count': kudago_event['comments_count'], 'start': dates[0]['start'] if dates else None, 'end': dates[-1]['end'] if dates else None, 'first_image': first(kudago_event['images']), 'images': kudago_event['images'], 'dates': dates, 'source': { 'name': 'kudago.com', 'url': kudago_event['site_url'], } }
def test_scope_of_inner_reference(): js = """function smoo() { var a; function bar() { a = 0; } }""" ast = parse(js) raise SkipTest assignment = first(assignments(ast)) eq_(assignment.scope_of(assignment["left"]["name"])["id"]["name"], "smoo")
def auth(self, source, target, args): """Authorize a Remote Plugin Syntax: AUTH <plugin> <password> """ if not args: yield "No plugin specified." return tokens = args.split(" ", 2) plugin = first(tokens) password = second(tokens) data = self.parent.data.rplugins config = self.parent.config["rplugins"] if password != config["password"]: yield "Authorization failed." return if plugin in data["pending"]: url = data["pending"][plugin] del data["pending"][plugin] data["allowed"][plugin] = True value = yield self.call( task( verify_plugin, url, config["path"], data["allowed"], ), "workerprocesses" ) allowed, plugin = value.value if allowed: msg = log( "Remote Plugin {0:s} ({1:s}) successfully authorized.", url, plugin ) yield msg else: del data["allowed"][plugin] msg = log( "Remote Plugin {0:s} ({1:s}) failed authorization.", url, plugin ) yield msg else: yield log("Remote Plugin {0:s} not found.", plugin)
def get_supervised_dataset(race_data, race_factors): race_bins = get_bins(race_data) race_bin_groups = pd.DataFrame.from_dict(race_bins).groupby('race_id') # Input, ouput data_set = SupervisedDataSet(6, 15) for race_id, race_bin in race_bin_groups: # Skipe bins with fewer than 10% race population if not np.count_nonzero(race_bin.population_pct) > 10: continue race_factor = race_factors[race_factors.race_id == race_id] # If race has missing factor data then skip if race_factor.empty: continue input_factors = [first(race_factor.high_temp) / 100.0, first(race_factor.low_temp) / 100.0, first(race_factor.high_humidity) / 100.0, first(race_factor.low_humidity) / 100.0, first(race_factor.starting_elevation) / 10000.0, first(race_factor.gross_elevation_gain) / 10000.0 ] output_factors = race_bin.population_pct.tolist() data_set.appendLinked(input_factors, output_factors) return data_set
def part(self, source, target, args): """Leave the specified channel Syntax: PART <channel> [<message>] """ if not args: return "No channel specified." tokens = args.split(" ", 1) channel, message = first(tokens), second(tokens) or "Leaving" self.fire(PART(channel, message), "bot")
def get_bins(race_data): """ Group races and create bins (time ranges) of BIN_SIZE. For each bin find out pct of racers in that bin and avg time of that bin. Also assign bin number to identify racers and their bin they fall into later on. """ bin_data = [] race_groups = race_data.groupby('race_id') for race_id, race_group in race_groups: top_75_percentile = race_group[ race_group.final_time < race_group.final_time.quantile(.75)] # Skip races with missing data. if len(top_75_percentile) == 0: continue bins = pd.cut(top_75_percentile.final_time, BIN_SIZE, right=False) # fastest = time.strftime( # '%H:%M:%S', time.gmtime(min(top_75_percentile.final_time))) # slowest = time.strftime( # '%H:%M:%S', time.gmtime(max(top_75_percentile.final_time))) # print "fastest =>", fastest # print "slowest =>", slowest bin_number = 0 for bin_key, bin_group in top_75_percentile.groupby(bins): bin_number += 1 population_pct = len(bin_group) / float(len(top_75_percentile)) bin_avg_time = bin_group.final_time.mean() if math.isnan(bin_avg_time): # Yes Ugly. Pandas bin key is a string. # This split gives us bin's lower/upper range time. lower_range = float(first(bin_key.split(',')).strip('[')) upper_range = float(last(bin_key.split(',')).strip(')')) bin_avg_time = np.mean([lower_range, upper_range]) bin_data.append({'race_id': int(race_id), 'bin_number': bin_number, 'population_pct': population_pct, 'bin_avg_time': bin_avg_time }) return bin_data
def get_editor(cls, editor_id): """Getting editor by `editor_id`. Param editor_id is index in tuple from method cls.get_choices() Usage: >>> EditorTypesEnum.get_editor(editor_id=0) (<class 'redactor.widgets.RedactorEditor'>, {}) :param editor_id: int :return: tuple """ return first( (e for i, e in enumerate(cls.EDITOR_TYPES) if i == editor_id))
def remove(self, source, target, args): """Remove a channel from startup join list. Syntax: REMOVE <channel> """ if not args: return "No channels specified." channel = first(args.split(" ", 1)) if channel in self.parent.parent.channels: self.parent.parent.channels.remove(channel) return "{0:s} removed from startup join list".format(channel) return "{0:s} not in join startup list".format(channel)
def add(self, source, target, args): """Add a channel to startup join list. Syntax: ADD <channel> """ if not args: return "No channels specified." channel = first(args.split(" ", 1)) if channel not in self.parent.parent.channels: self.parent.parent.channels.append(channel) return "Added {0:s} to startup join list".format(channel) return "{0:s} already in startup join list".format(channel)
def swap_alias(alias, index, es): """Point an ES alias to a new index, and delete the old index. :arg index: The new index name """ # Get the index the alias currently points to. old_index = first(es.aliases(alias)) # Make the alias point to the new index. removal = ([{'remove': {'index': old_index, 'alias': alias}}] if old_index else []) es.update_aliases(removal + [{'add': {'index': index, 'alias': alias}}]) # atomic # Delete the old index. if old_index: es.delete_index(old_index)
def load(self, source, target, args): """Load a plugin Syntax: LOAD <plugin> """ if not args: yield "No plugin specified." plugin = first(args.split(" ", 1)) plugins = self.parent.bot.plugins if plugin in plugins: yield log("Plugin {0:s} already loaded!", plugin) else: yield self.fire(load(plugin), "plugins")
def join(self, source, target, args): """Join the specified channel. Syntax: JOIN <channel> """ if not args: return "No channel specified." channel = first(args.split(" ", 1)) if channel: msg = log("Joining channel: {0:s}", channel) self.fire(JOIN(channel), "bot") else: msg = log("No channel specified.") return msg
def transform_place(kudago_place, events_count): categories = kudago_place['categories'] kind = find_first(('park', 'cafe', 'museums', 'theatre'), categories) if kind == 'theatre': kind = 'theater' elif kind == 'museums': kind = 'museum' return { '_id': kudago_place['id'], '_type': 'place', 'kind': kind, 'is_for_kids': 'kids' in categories, 'is_stub': kudago_place['is_stub'], 'name': kudago_place['short_title'], 'full_name': kudago_place['title'], 'lead': strip_links(kudago_place['description']), 'description': strip_links(kudago_place['body_text']), 'location': kudago_place['location'], 'address': kudago_place['address'], 'subway': kudago_place['subway'], 'coords': transform_coords(kudago_place['coords']), 'age_restriction': kudago_place['age_restriction'], 'phone_numbers': transform_phone(kudago_place['phone']), 'working_hours': kudago_place['timetable'], 'url': kudago_place['foreign_url'], 'events_count': events_count, 'favorites_count': kudago_place['favorites_count'], 'comments_count': kudago_place['comments_count'], 'first_image': first(kudago_place['images']), 'images': kudago_place['images'], 'source': { 'name': 'kudago.com', 'url': kudago_place['site_url'], } }
def reload(self, source, target, args): """Reload an already loaded plugin Syntax: RELOAD <plugin> """ if not args: yield "No plugin specified." plugin = first(args.split(" ", 1)) plugins = self.parent.bot.plugins if plugin not in plugins: yield log("Plugin {0:s} is not loaded!", plugin) else: yield self.fire(unload(plugin), "plugins") yield yield self.fire(load(plugin), "plugins")
def add(self, source, target, args): """Add a Remote Plugin Syntax: ADD <url> """ if not args: yield "No URL specified." url = first(args.split(" ", 1)) data = self.parent.data.rplugins config = self.parent.config["rplugins"] if url in data["enabled"]: yield log("Remote Plugin {0:s} already loaded!", url) else: value = yield self.call( task( verify_plugin, url, config["path"], data["allowed"], ), "workerprocesses" ) allowed, plugin = value.value if allowed: msg = log( "Remote Plugin {0:s} ({1:s}) is already authorized.", url, plugin ) yield msg else: data["pending"][plugin] = url msg = log( "Remote Plugin {0:s} ({1:s}) pending authorization.", url, plugin ) yield msg
def greetings(self, source, target, args): """Manage greetings Syntax: GREETINGS <sub-command> See: COMMANDS greetings """ if not args: yield "No command specified." tokens = args.split(" ", 1) command, args = first(tokens), (second(tokens) or "") event = cmd.create(command, source, target, args) try: yield self.call(event, "commands:greetings") except Exception as error: yield "ERROR: {0:s}".format(error)
def radd(self, source, target, args): """Add a new RSS feed to be checked at the given interval. Intervan is in minutes. Syntax: RADD <url> [<interval>] """ if not args: yield "No URL specified." tokens = args.split(" ", 2) url = first(tokens) interval = second(tokens) or "60" try: interval = int(interval) except Exception, error: log("ERROR: {0:s}\n{1:s}", error, format_exc()) yield "Invalid interval specified."
def rplugins(self, source, target, args): """Manage Remote Plugins Syntax: RPLUGINS <command> See: COMMANDS rplugins """ if not args: yield "No command specified." tokens = args.split(" ", 1) command, args = first(tokens), (second(tokens) or "") command = command.encode("utf-8") event = cmd.create(command, source, target, args) try: yield (yield self.call(event, "commands:rplugins")) except Exception as error: yield "ERROR: {0:s}".format(error)
def test_live_checkpoints_resume(tmp_dir, scm, dvc, live_checkpoint_stage): results = dvc.experiments.run( live_checkpoint_stage.addressing, params=["foo=2"], tmp_dir=False ) checkpoint_resume = first(results) dvc.experiments.run( live_checkpoint_stage.addressing, checkpoint_resume=checkpoint_resume, tmp_dir=False, ) results = dvc.experiments.show() assert checkpoints_metric(results, "logs.json", "step") == [ 3, 3, 2, 1, 1, 0, ] assert checkpoints_metric(results, "logs.json", "metric1") == [ 4, 4, 3, 2, 2, 1, ] assert checkpoints_metric(results, "logs.json", "metric2") == [ 8, 8, 6, 4, 4, 2, ]
def test_pull_imported_stage_from_subrepos( tmp_dir, dvc, erepo_dir, is_dvc, files ): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen with subrepo.chdir(): gen(files, commit="files in subrepo") key = first(files) path = os.path.join("subrepo", key) dvc.imp(os.fspath(erepo_dir), path, out="out") # clean everything remove(dvc.odb.local.cache_dir) remove("out") makedirs(dvc.odb.local.cache_dir) stats = dvc.pull(["out.dvc"]) expected = [f"out{os.sep}"] if isinstance(files[key], dict) else ["out"] assert stats["added"] == expected assert (tmp_dir / "out").read_text() == files[key]
def test_reset_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, caplog): from dvc.repo.experiments.base import CheckpointExistsError dvc.experiments.run(checkpoint_stage.addressing, name="foo") scm.gitpython.repo.git.reset(hard=True) scm.gitpython.repo.git.clean(force=True) with pytest.raises(CheckpointExistsError): dvc.experiments.run(checkpoint_stage.addressing, name="foo", params=["foo=2"]) results = dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"], name="foo", force=True) exp = first(results) tree = scm.get_tree(exp) with tree.open(tmp_dir / "foo") as fobj: assert fobj.read().strip() == "5" with tree.open(tmp_dir / "metrics.yaml") as fobj: assert fobj.read().strip() == "foo: 2"
def _get_file_path(kwargs): from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX out = first( concat( kwargs.get("outs", []), kwargs.get("outs_no_cache", []), kwargs.get("metrics", []), kwargs.get("metrics_no_cache", []), kwargs.get("plots", []), kwargs.get("plots_no_cache", []), kwargs.get("outs_persist", []), kwargs.get("outs_persist_no_cache", []), kwargs.get("checkpoints", []), without([kwargs.get("live", None)], None), ) ) return ( os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX if out else DVC_FILE )
def test_get_used_cache(exists, expected_message, mocker, caplog): stage = mocker.MagicMock() mocker.patch.object(stage, "__str__", return_value="stage: 'stage.dvc'") mocker.patch.object(stage, "addressing", "stage.dvc") mocker.patch.object( stage.repo.tree.dvcignore, "check_ignore", return_value=_no_match("path"), ) output = BaseOutput(stage, "path") mocker.patch.object(output, "use_cache", True) mocker.patch.object(stage, "is_repo_import", False) mocker.patch.object(BaseOutput, "checksum", new_callable=mocker.PropertyMock).return_value = None mocker.patch.object(BaseOutput, "exists", new_callable=mocker.PropertyMock).return_value = exists with caplog.at_level(logging.WARNING, logger="dvc"): assert isinstance(output.get_used_cache(), NamedCache) assert first(caplog.messages) == expected_message
def prepare_file_path(kwargs): """Determine file path from the first output name. Used in creating .dvc files. """ from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX out = first( concat( kwargs.get("outs", []), kwargs.get("outs_no_cache", []), kwargs.get("metrics", []), kwargs.get("metrics_no_cache", []), kwargs.get("plots", []), kwargs.get("plots_no_cache", []), kwargs.get("outs_persist", []), kwargs.get("outs_persist_no_cache", []), kwargs.get("checkpoints", []), without([kwargs.get("live", None)], None), )) return (os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX if out else DVC_FILE)
def test_resume_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker, last): with pytest.raises(DvcException): if last: dvc.experiments.run( checkpoint_stage.addressing, checkpoint_resume=Experiments.LAST_CHECKPOINT, ) else: dvc.experiments.run(checkpoint_stage.addressing, checkpoint_resume="foo") results = dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"]) if last: exp_rev = Experiments.LAST_CHECKPOINT else: exp_rev = first(results) dvc.experiments.run(checkpoint_stage.addressing, checkpoint_resume=exp_rev) assert (tmp_dir / "foo").read_text() == "10" assert (tmp_dir / ".dvc" / "experiments" / "metrics.yaml").read_text().strip() == "foo: 2"
def _transfer(func, from_remote, to_remote): ret = [] runs = from_remote.fs.path.join(from_remote.fs_path, "runs") if not from_remote.fs.exists(runs): return [] from_path = from_remote.fs.path for src in from_remote.fs.find(runs): rel = from_path.relpath(src, from_remote.fs_path) dst = to_remote.fs.path.join(to_remote.fs_path, rel) key = to_remote.fs.path.parent(dst) # check if any build cache already exists for this key # TODO: check if MaxKeys=1 or something like that applies # or otherwise this will take a lot of time! if to_remote.fs.exists(key) and first(to_remote.fs.find(key)): continue func(src, dst) ret.append( (from_path.name(from_path.parent(src)), from_path.name(src)) ) return ret
def _resolve_stage(self, context: Context, name: str, definition) -> dict: definition = deepcopy(definition) wdir = self._resolve_wdir(context, definition.get(WDIR_KWD)) if self.wdir != wdir: logger.debug("Stage %s has different wdir than dvc.yaml file", name) contexts = [] params_yaml_file = wdir / DEFAULT_PARAMS_FILE if self.global_ctx_source != params_yaml_file: if os.path.exists(params_yaml_file): contexts.append( Context.load_from(self.repo.tree, str(params_yaml_file))) else: logger.debug("%s does not exist for stage %s", params_yaml_file, name) params_file = definition.get(PARAMS_KWD, []) for item in params_file: if item and isinstance(item, dict): contexts.append( Context.load_from(self.repo.tree, str(wdir / first(item)))) context.merge_update(*contexts) logger.trace( # pytype: disable=attribute-error "Context during resolution of stage %s:\n%s", name, context) with context.track(): stage_d = resolve(definition, context) params = stage_d.get(PARAMS_KWD, []) + self._resolve_params( context, wdir) if params: stage_d[PARAMS_KWD] = params return {name: stage_d}
def _load_outs(cls, stage, data, typ=None): from dvc.output.base import BaseOutput d = [] for key in data: if isinstance(key, str): entry = {BaseOutput.PARAM_PATH: key} if typ: entry[typ] = True d.append(entry) continue assert isinstance(key, dict) assert len(key) == 1 path = first(key) extra = key[path] if not typ: d.append({BaseOutput.PARAM_PATH: path, **extra}) continue entry = {BaseOutput.PARAM_PATH: path} persist = extra.pop(BaseOutput.PARAM_PERSIST, False) if persist: entry[BaseOutput.PARAM_PERSIST] = persist cache = extra.pop(BaseOutput.PARAM_CACHE, True) if not cache: entry[BaseOutput.PARAM_CACHE] = cache entry[typ] = extra or True d.append(entry) stage.outs.extend(output.loadd_from(stage, d))
def test_continue_checkpoint(tmp_dir, scm, dvc, mocker, last): tmp_dir.gen("checkpoint.py", CHECKPOINT_SCRIPT) tmp_dir.gen("params.yaml", "foo: 1") stage = dvc.run( cmd="python checkpoint.py foo 5 params.yaml metrics.yaml", metrics_no_cache=["metrics.yaml"], params=["foo"], outs_persist=["foo"], always_changed=True, name="checkpoint-file", ) scm.add([ "dvc.yaml", "dvc.lock", "checkpoint.py", "params.yaml", "metrics.yaml", ]) scm.commit("init") results = dvc.experiments.run(stage.addressing, checkpoint=True, params=["foo=2"]) if last: exp_rev = ":last" else: exp_rev = first(results) dvc.experiments.run( stage.addressing, checkpoint=True, checkpoint_continue=exp_rev, ) assert (tmp_dir / "foo").read_text() == "10" assert (tmp_dir / ".dvc" / "experiments" / "metrics.yaml").read_text().strip() == "foo: 2"
def test_diff_head(tmp_dir, scm, dvc, exp_stage): results = dvc.experiments.run(exp_stage.addressing, params=["foo=2"]) exp = first(results) assert dvc.experiments.diff(a_rev="HEAD", b_rev=exp) == { "params": { "params.yaml": { "foo": { "diff": 1, "old": 1, "new": 2 } } }, "metrics": { "metrics.yaml": { "foo": { "diff": 1, "old": 1, "new": 2 } } }, }
def distribution_settings(): error = None form = DistributionSettingsForm() entries = db.session.query(DistributionSettings).order_by( desc(DistributionSettings.id), ).limit(25).all() last_entry = first(entries) if last_entry: if not form.creator_rewards_pool.data: form.creator_rewards_pool.data = last_entry.creator_rewards_pool if not form.voter_rewards_pool.data: form.voter_rewards_pool.data = last_entry.voter_rewards_pool if not form.votes_per_user.data: form.votes_per_user.data = last_entry.votes_per_user if not form.min_reward.data: form.min_reward.data = last_entry.min_reward if form.validate_on_submit(): setting = DistributionSettings( creator_rewards_pool=int(form.creator_rewards_pool.data), voter_rewards_pool=int(form.voter_rewards_pool.data), votes_per_user=int(form.votes_per_user.data), min_reward=int(form.min_reward.data), created_at=dt.datetime.utcnow()) db.session.add(setting) db.session.commit() return redirect(url_for('.distribution_settings')) return render_template( 'distribution_settings.html', form=form, error=error, entries=entries, )
def test_new_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker, workspace): new_mock = mocker.spy(dvc.experiments, "new") results = dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"], tmp_dir=not workspace) exp = first(results) new_mock.assert_called_once() for rev in dvc.brancher([exp]): if rev == "workspace": continue tree = dvc.repo_tree with tree.open(tmp_dir / "foo") as fobj: assert fobj.read().strip() == "5" with tree.open(tmp_dir / "metrics.yaml") as fobj: assert fobj.read().strip() == "foo: 2" if workspace: assert scm.get_ref(EXEC_APPLY) == exp assert scm.get_ref(EXEC_CHECKPOINT) == exp if workspace: assert (tmp_dir / "foo").read_text().strip() == "5" assert (tmp_dir / "metrics.yaml").read_text().strip() == "foo: 2"
def build_outs_trie(stages): outs = Trie() for stage in filter(bool, stages): # bug? not using it later for out in stage.outs: out_key = out.path_info.parts # Check for dup outs if out_key in outs: dup_stages = [stage, outs[out_key].stage] raise OutputDuplicationError(str(out), dup_stages) # Check for overlapping outs if outs.has_subtrie(out_key): parent = out overlapping = first(outs.values(prefix=out_key)) else: parent = outs.shortest_prefix(out_key).value overlapping = out if parent and overlapping: msg = ( "The output paths:\n'{}'('{}')\n'{}'('{}')\n" "overlap and are thus in the same tracked directory.\n" "To keep reproducibility, outputs should be in separate " "tracked directories or tracked individually." ).format( str(parent), parent.stage.addressing, str(overlapping), overlapping.stage.addressing, ) raise OverlappingOutputPathsError(parent, overlapping, msg) outs[out_key] = out return outs
def test_scope_building(): """Make sure we find all the declarations within a function but don't stray into inner functions.""" js = """ function smoo() { var w, x; if (true) { var y; } function bar() { var z; } } function barbar() { } """ ast = parse(js) function = first(node for node in walk_down(ast) if node['type'] == FUNC_DECL) raise SkipTest("Need to reimplement scope") eq_(set(function.scope().keys()), set(['w', 'x', 'y', 'smoo', 'bar'])) eq_(set(ast.scope().keys()), set(['smoo', 'barbar']))
def build_sqoop_partition_options( self, relation: RelationDescription, partition_key: Optional[str], table_size: int ) -> List[str]: """ Build the partitioning-related arguments for Sqoop. """ if partition_key: column = fy.first(fy.where(relation.table_design["columns"], name=partition_key)) if column["sql_type"] in ("timestamp", "timestamp without time zone"): quoted_key_arg = """CAST(TO_CHAR("{}", 'YYYYMMDDHH24MISS') AS BIGINT)""".format(partition_key) else: quoted_key_arg = '"{}"'.format(partition_key) if relation.num_partitions: # num_partitions explicitly set in the design file overrides the dynamic determination. num_mappers = min(relation.num_partitions, self.max_partitions) else: num_mappers = self.maximize_partitions(table_size) if num_mappers > 1: return ["--split-by", quoted_key_arg, "--num-mappers", str(num_mappers)] # Use 1 mapper if either there is no partition key, or if the partitioner returns only one partition return ["--num-mappers", "1"]
def parse_schedule(body: str) -> Tuple[Item, ...]: def selector(css_class: str) -> Callable: return compose( partial(lmap, str.strip), partial(select, notnone), partial(lmap, attrgetter("text")), partial(HtmlElement.cssselect, expr=css_class), ) tree: HtmlElement = html.fromstring(body) select_day_containers = partial(HtmlElement.cssselect, expr=config.MAI_DAY_CONTAINER_SELECTOR) select_item_containers = partial(HtmlElement.cssselect, expr=config.MAI_ITEM_CONTAINER_SELECTOR) select_dates = selector(config.MAI_DATE_SELECTOR) select_days_of_week = selector(config.MAI_DAY_OF_WEEK_SELECTOR) select_times = selector(config.MAI_TIME_SELECTOR) select_types = selector(config.MAI_TYPE_SELECTOR) select_titles = selector(config.MAI_TITLE_SELECTOR) select_teachers = selector(config.MAI_TEACHER_SELECTOR) select_places = compose( lkeep, partial(lmap, str.strip), partial(select, notnone), partial(HtmlElement.xpath, _path=config.MAI_PLACE_XPATH_SELECTOR), ) return tuple( Item( date=first(select_dates(day_container)) or "", week_day=first(select_days_of_week(day_container)) or "", time=first(select_times(item_container)) or "", type=first(select_types(item_container)) or "", title=first(select_titles(item_container)) or "", place=first(select_places(item_container)) or "", teachers=select_teachers(item_container), ) for day_container in select_day_containers(tree) for item_container in select_item_containers(day_container))
def _git_client_args(fs): kwargs = { "password": fs.fs_args.get("password"), "key_filename": first(fs.fs_args.get("client_keys", [])), } return kwargs
def test_clear(dvc, index): index.update(["1234.dir"], ["5678"]) index.clear() assert first(index.hashes()) is None
def dir(self): return first(self.backends.values()).dir
def output(self) -> str: return fn.first(self.pcirc.outputs)
def get_only_element_from_collection(one_element_collection): """Assert that the collection has exactly one element, then return that element.""" if len(one_element_collection) != 1: raise AssertionError(u'Expected a collection with exactly one element, but got: {}' .format(one_element_collection)) return funcy.first(one_element_collection)
def test_update_py_params(tmp_dir, scm, dvc): tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen("params.py", "INT = 1\n") stage = dvc.run( cmd="python copy.py params.py metrics.py", metrics_no_cache=["metrics.py"], params=["params.py:INT"], name="copy-file", ) scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.py", "metrics.py"]) scm.commit("init") results = dvc.experiments.run(stage.addressing, params=["params.py:INT=2"], tmp_dir=True) exp_a = first(results) tree = scm.get_tree(exp_a) with tree.open(tmp_dir / "params.py") as fobj: assert fobj.read().strip() == "INT = 2" with tree.open(tmp_dir / "metrics.py") as fobj: assert fobj.read().strip() == "INT = 2" tmp_dir.gen( "params.py", "INT = 1\nFLOAT = 0.001\nDICT = {'a': 1}\n\n" "class Train:\n seed = 2020\n\n" "class Klass:\n def __init__(self):\n self.a = 111\n", ) stage = dvc.run( cmd="python copy.py params.py metrics.py", metrics_no_cache=["metrics.py"], params=["params.py:INT,FLOAT,DICT,Train,Klass"], name="copy-file", ) scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.py", "metrics.py"]) scm.commit("init") results = dvc.experiments.run( stage.addressing, params=["params.py:FLOAT=0.1,Train.seed=2121,Klass.a=222"], tmp_dir=True, ) exp_a = first(results) result = ("INT = 1\nFLOAT = 0.1\nDICT = {'a': 1}\n\n" "class Train:\n seed = 2121\n\n" "class Klass:\n def __init__(self):\n self.a = 222") def _dos2unix(text): if os.name != "nt": return text # NOTE: git on windows will use CRLF, so we have to convert it to LF # in order to compare with the original return text.replace("\r\n", "\n") tree = scm.get_tree(exp_a) with tree.open(tmp_dir / "params.py") as fobj: assert _dos2unix(fobj.read().strip()) == result with tree.open(tmp_dir / "metrics.py") as fobj: assert _dos2unix(fobj.read().strip()) == result tmp_dir.gen("params.py", "INT = 1\n") stage = dvc.run( cmd="python copy.py params.py metrics.py", metrics_no_cache=["metrics.py"], params=["params.py:INT"], name="copy-file", ) scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.py", "metrics.py"]) scm.commit("init") with pytest.raises(PythonFileCorruptedError): dvc.experiments.run(stage.addressing, params=["params.py:INT=2a"], tmp_dir=True)
def condition_action(txt, loc, toks): return make_filter(first(toks))
def _collect_graph(self, stages): """Generate a graph by using the given stages on the given directory The nodes of the graph are the stage's path relative to the root. Edges are created when the output of one stage is used as a dependency in other stage. The direction of the edges goes from the stage to its dependency: For example, running the following: $ dvc run -o A "echo A > A" $ dvc run -d A -o B "echo B > B" $ dvc run -d B -o C "echo C > C" Will create the following graph: ancestors <-- | C.dvc -> B.dvc -> A.dvc | | | --> descendants | ------- pipeline ------> | v (weakly connected components) Args: stages (list): used to build a graph, if None given, collect stages in the repository. Raises: OutputDuplicationError: two outputs with the same path StagePathAsOutputError: stage inside an output directory OverlappingOutputPathsError: output inside output directory CyclicGraphError: resulting graph has cycles """ import networkx as nx from pygtrie import Trie from dvc.exceptions import ( OutputDuplicationError, OverlappingOutputPathsError, StagePathAsOutputError, ) G = nx.DiGraph() stages = stages or self.stages outs = Trie() # Use trie to efficiently find overlapping outs and deps for stage in filter(bool, stages): # bug? not using it later for out in stage.outs: out_key = out.path_info.parts # Check for dup outs if out_key in outs: dup_stages = [stage, outs[out_key].stage] raise OutputDuplicationError(str(out), dup_stages) # Check for overlapping outs if outs.has_subtrie(out_key): parent = out overlapping = first(outs.values(prefix=out_key)) else: parent = outs.shortest_prefix(out_key).value overlapping = out if parent and overlapping: msg = ("Paths for outs:\n'{}'('{}')\n'{}'('{}')\n" "overlap. To avoid unpredictable behaviour, " "rerun command with non overlapping outs paths." ).format( str(parent), parent.stage.addressing, str(overlapping), overlapping.stage.addressing, ) raise OverlappingOutputPathsError(parent, overlapping, msg) outs[out_key] = out for stage in stages: out = outs.shortest_prefix(PathInfo(stage.path).parts).value if out: raise StagePathAsOutputError(stage, str(out)) # Building graph G.add_nodes_from(stages) for stage in stages: for dep in stage.deps: if dep.path_info is None: continue dep_key = dep.path_info.parts overlapping = [n.value for n in outs.prefixes(dep_key)] if outs.has_subtrie(dep_key): overlapping.extend(outs.values(prefix=dep_key)) G.add_edges_from((stage, out.stage) for out in overlapping) check_acyclic(G) return G
def __next__(self): next_item = first(self.take(1)) if not next_item: raise StopIteration return next_item
def query(query, node): matcher = first(parser.parseString(query)) return matcher(node)
def split_foreach_name(name): group, *keys = name.rsplit(JOIN, maxsplit=1) return group, first(keys)