Example #1
0
def buffer_logic(state):
    '''
    Buffer create/replace/reuse logic. The function name is not very good :(

       new_state    |   old_state   | same sources |     action
    ----------------|---------------|--------------|-----------------
        replace     |    replace    |    True      |  reuse buffer
        replace     |    replace    |    False     |  replace buffer
        replace     |   no-replace  |    True      |  create buffer (copy candidates)
        replace     |   no-replace  |    False     |  create buffer
       no-replace   |    replace    |    True      |  create buffer (copy candidates)
       no-replace   |    replace    |    False     |  create buffer
       no-replace   |   no-replace  |    True      |  reuse buffer
       no-replace   |   no-replace  |    False     |  create buffer

    A reusable buffer will be looked for, then a replacement buffer and as a
    last resort a new one will be created.

    Returns:
        old_state (dict): In case a state was reused/replaced it is returned
        because it will be needed later on to compare it with the current
        state and determine whether the window should be resized/moved, etc...
    '''
    # We are only interested in buffers which are in the same container.
    # That's where the interesting reuse/replace logic is at.
    states = fn.where(variables.states, container=state['container'])

    with_same_sources = partial(same_sources, state)

    reusable_state = fn.first(fn.where(
        ifilter(with_same_sources, states),
        replace = state['replace']
    ))

    replaceable_state = fn.first(fn.where(
        ifilter(lambda x: not with_same_sources(x), states),
        replace = True
    ))

    old_state = None

    if reusable_state:
        state.update(fn.project(reusable_state, ['uid', 'buffer', 'sources']))
        old_state = reusable_state
        variables.states.remove(reusable_state)

    elif replaceable_state:
        state.update(fn.project(replaceable_state, ['uid', 'buffer']))
        state['sources'] = populated_candidates(state)
        set_buffer_contents(state['buffer'], aggregate_candidates(state))
        old_state = replaceable_state
        variables.states.remove(replaceable_state)

    else:
        same = find(with_same_sources, states)
        state['sources'] = (same and same['sources']) or populated_candidates(state)
        state['buffer'] = make_pyunite_buffer(state)

    return old_state
Example #2
0
def get_matrix_filename(series_id, platform_id):
    filenames = list(matrix_filenames(series_id, platform_id))
    mirror_filenames = (os.path.join(conf.SERIES_MATRIX_MIRROR, filename) for filename in filenames)
    mirror_filename = first(filename for filename in mirror_filenames if os.path.isfile(filename))
    if mirror_filename:
        return mirror_filename

    for filename in filenames:
        print 'Loading URL', conf.SERIES_MATRIX_URL + filename, '...'
        try:
            res = urllib2.urlopen(conf.SERIES_MATRIX_URL + filename)
        except urllib2.URLError:
            pass
        else:
            mirror_filename = os.path.join(conf.SERIES_MATRIX_MIRROR, filename)
            print 'Cache to', mirror_filename

            directory = os.path.dirname(mirror_filename)
            if not os.path.exists(directory):
                os.makedirs(directory)
            with open(mirror_filename, 'wb') as f:
                shutil.copyfileobj(res, f)

            return mirror_filename

    raise LookupError("Can't find matrix file for series %s, platform %s"
                      % (series_id, platform_id))
Example #3
0
File: help.py Project: prologic/kdb
    def help(self, source, target, args):
        """Display help for the given command or plugin.

        Syntax: HELP [<command>] | [<plugin>]
        """

        plugins = self.parent.bot.plugins
        command = self.parent.bot.command

        if not args:
            q = "help"
        else:
            q = first(args.split(" ", 1))

        if q in plugins:
            msg = get_plugin_help(plugins[q])
        elif q in command:
            msg = get_command_help(command[q], q)
        else:
            msg = None

        if msg is None:
            msg = (
                "No help available for: {0:s}. "
                "To get a list of plugins, type: plugins"
            ).format(q)

        return format_msg(msg)
Example #4
0
def test_scope_of_global():
    """Make sure the scope of a global is the entire program."""
    js = """a = 0;"""
    ast = parse(js)
    raise SkipTest
    assignment = first(assignments(ast))
    eq_(assignment.scope_of(assignment["left"]["name"])["type"], "Program")
Example #5
0
File: help.py Project: prologic/kdb
    def info(self, source, target, args):
        """Display info for the given plugin.

        Syntax: INFO <plugin>
        """

        plugins = self.parent.bot.plugins

        if not args:
            return "No plugin specified."

        name = first(args.split(" ", 1)).lower()

        if name in plugins:
            plugin = plugins[name]
            description = getattr(plugin, "__doc__", name)
            description = description.split("\n", 1)[0]
            version = getattr(plugin, "__version__", "Unknown")
            author = getattr(plugin, "__author__", "Unknown")
            msg = "{0:s} - {1:s} v{2:s} by {3:s}".format(
                name, description, version, author
            )
        else:
            msg = (
                "No info available for: {0:s}. "
                "To get a list of plugins, type: plugins"
            ).format(name)

        return format_msg(msg)
Example #6
0
    def host(self, source, target, args):
        """Resolve a hostname or ip address.

        Syntax: HOST [<hostname>] | [<ip>]
        """

        if not args:
            return "No hostname or ip address specified."

        tokens = args.split(" ", 1)
        host = first(tokens)

        isip = all(
            c.isdigit()
            for c in host.replace(".", "")
        )

        if isip:
            try:
                name, aliases, addresses = gethostbyaddr(host)
                msg = "{0:s} -> {1:s}".format(host, name)
            except Exception as error:
                msg = log("ERROR: {0:s}", error)
                log(format_exc())
        else:
            try:
                address = gethostbyname(host)
                msg = "{0:s} -> {1:s}".format(host, address)
            except Exception as error:
                msg = log("ERROR: {0:s}", error)
                log(format_exc())

        return msg
Example #7
0
def register_api_resource(api, resource):
  if funcy.is_seqcoll(resource):
    cls, url, endpoint = (
        funcy.first(resource),
        funcy.second(resource),
        funcy.nth(2, resource),
      )
    api.add_resource(cls, url, endpoint=endpoint)
Example #8
0
def test_scope_of_initialized_variable():
    js = """function smoo() {
                var a = 0;
            }"""
    ast = parse(js)
    raise SkipTest
    assignment = first(assignments(ast))
    eq_(assignment.scope_of(assignment["id"]["name"])["id"]["name"], "smoo")
Example #9
0
def test_scope_of_global_function():
    js = """function smoo() {
                var a;
                a = 0;
            }"""
    ast = parse(js)
    raise SkipTest
    assignment = first(assignments(ast))
    eq_(assignment.scope_of(assignment["left"]["name"])["type"], "FunctionDeclaration")
Example #10
0
    def nearest_scope_holder(self):
        """Return the nearest node that can have its own scope, potentially
        including myself.

        This will be either a FunctionDeclaration or a Program (for now).

        """
        return first(n for n in self.walk_up() if
                     isinstance(n, (FunctionDeclaration, Program)))
Example #11
0
def mergesort(filename, output=None, key=None, maxitems=1e6, progress=True):
    """Given an input file sort it by performing a merge sort on disk.

    :param filename: Either a filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type filename:  ``str`` or ``py._path.local.LocalPath``

    :param output: An optional output filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type output:  ``str`` or ``py._path.local.LocalPath`` or ``None``

    :param key: An optional key to sort the data on.
    :type key:  ``function`` or ``None``

    :param maxitems: Maximum number of items to hold in memory at a time.
    :type maxitems:  ``int``

    :param progress: Whether or not to display a progress bar
    :type progress: ``bool``

    This uses ``py._path.local.LocalPath.make_numbered_dir`` to create temporry scratch space to work
    with when splitting the input file into sorted chunks. The mergesort is processed iteratively in-memory
    using the ``~merge`` function which is almost identical to ``~heapq.merge`` but adds in the support of
    an optional key function.
    """

    p = filename if isinstance(filename, LocalPath) else LocalPath(filename)
    output = p if output is None else output
    key = key if key is not None else lambda x: x

    scratch = LocalPath.make_numbered_dir(prefix="mergesort-")

    nlines = sum(1 for line in p.open("r"))

    # Compute a reasonable chunksize < maxitems
    chunksize = first(ifilter(lambda x: x < maxitems, imap(lambda x: nlines / (2**x), count(1))))

    # Split the file up into n sorted files
    if progress:
        bar = ProgressBar("Split/Sorting Data", max=(nlines / chunksize))
    for i, items in enumerate(ichunks(chunksize, jsonstream(p))):
        with scratch.ensure("{0:d}.json".format(i)).open("w") as f:
            f.write("\n".join(map(dumps, sorted(items, key=key))))
        if progress:
            bar.next()
    if progress:
        bar.finish()

    q = scratch.listdir("*.json")
    with output.open("w") as f:
        if progress:
            bar = ProgressBar("Merge/Sorting Data", max=nlines)
        for item in merge(*imap(jsonstream, q)):
            f.write("{0:s}\n".format(dumps(item)))
            if progress:
                bar.next()
        if progress:
            bar.finish()
Example #12
0
def test_scope_of_inner_function():
    js = """function smoo() {
                function bar() {
                    a = 0;
                }
            }"""
    ast = parse(js)
    raise SkipTest
    assignment = first(assignments(ast))
    eq_(assignment.scope_of(assignment["left"]["name"])["type"], "Program")
Example #13
0
def transform_event(kudago_event, parent_id, children_count):
    tags = kudago_event['tags']
    categories = kudago_event['categories']
    place = kudago_event['place']

    kind = find_first(('festival', 'exhibition', 'theater'), categories)
    dates = filter(is_date_finite, kudago_event['dates'])
    if kind not in ('festival', 'exhibition'):
        dates = flatten(map(split_date, dates))
    dates = list(sorted(map(transform_date, dates), key=itemgetter('start')))

    participants = [
        {'role': p['role']['slug'], 'agent': p['agent']['id']}
        for p in kudago_event['participants']
    ]

    return {
        '_id': kudago_event['id'],
        '_type': 'event',

        'kind': kind,
        'is_for_kids': 'kids' in categories,
        'is_premiere': 'премьера' in tags,

        'name': kudago_event['short_title'],
        'full_name': kudago_event['title'],
        'tagline': kudago_event['tagline'],
        'lead': strip_links(kudago_event['description']),
        'description': strip_links(kudago_event['body_text']),

        'location': kudago_event['location']['slug'],
        'place': place['id'] if place else None,
        'parent': parent_id,
        'participants': participants,

        'age_restriction': kudago_event['age_restriction'],
        'price': transform_price(kudago_event['price'], kudago_event['is_free']),

        'dates_count': len(dates),
        'children_count': children_count,
        'favorites_count': kudago_event['favorites_count'],
        'comments_count': kudago_event['comments_count'],

        'start': dates[0]['start'] if dates else None,
        'end': dates[-1]['end'] if dates else None,

        'first_image': first(kudago_event['images']),
        'images': kudago_event['images'],
        'dates': dates,

        'source': {
            'name': 'kudago.com',
            'url': kudago_event['site_url'],
        }
    }
Example #14
0
def test_scope_of_inner_reference():
    js = """function smoo() {
                var a;

                function bar() {
                    a = 0;
                }
            }"""
    ast = parse(js)
    raise SkipTest
    assignment = first(assignments(ast))
    eq_(assignment.scope_of(assignment["left"]["name"])["id"]["name"], "smoo")
Example #15
0
    def auth(self, source, target, args):
        """Authorize a Remote Plugin

        Syntax: AUTH <plugin> <password>
        """

        if not args:
            yield "No plugin specified."
            return

        tokens = args.split(" ", 2)
        plugin = first(tokens)
        password = second(tokens)

        data = self.parent.data.rplugins
        config = self.parent.config["rplugins"]

        if password != config["password"]:
            yield "Authorization failed."
            return

        if plugin in data["pending"]:
            url = data["pending"][plugin]
            del data["pending"][plugin]
            data["allowed"][plugin] = True

            value = yield self.call(
                task(
                    verify_plugin,
                    url,
                    config["path"],
                    data["allowed"],
                ),
                "workerprocesses"
            )

            allowed, plugin = value.value
            if allowed:
                msg = log(
                    "Remote Plugin {0:s} ({1:s}) successfully authorized.",
                    url, plugin
                )
                yield msg
            else:
                del data["allowed"][plugin]

                msg = log(
                    "Remote Plugin {0:s} ({1:s}) failed authorization.",
                    url, plugin
                )
                yield msg
        else:
            yield log("Remote Plugin {0:s} not found.", plugin)
def get_supervised_dataset(race_data, race_factors):

    race_bins = get_bins(race_data)
    race_bin_groups = pd.DataFrame.from_dict(race_bins).groupby('race_id')

    # Input, ouput
    data_set = SupervisedDataSet(6, 15)

    for race_id, race_bin in race_bin_groups:

        # Skipe bins with fewer than 10% race population
        if not np.count_nonzero(race_bin.population_pct) > 10:
            continue

        race_factor = race_factors[race_factors.race_id == race_id]

        # If race has missing factor data then skip
        if race_factor.empty:
            continue

        input_factors = [first(race_factor.high_temp) / 100.0,
                         first(race_factor.low_temp) / 100.0,
                         first(race_factor.high_humidity) / 100.0,
                         first(race_factor.low_humidity) / 100.0,
                         first(race_factor.starting_elevation) / 10000.0,
                         first(race_factor.gross_elevation_gain) / 10000.0
                         ]

        output_factors = race_bin.population_pct.tolist()

        data_set.appendLinked(input_factors, output_factors)

    return data_set
Example #17
0
    def part(self, source, target, args):
        """Leave the specified channel

        Syntax: PART <channel> [<message>]
        """

        if not args:
            return "No channel specified."

        tokens = args.split(" ", 1)
        channel, message = first(tokens), second(tokens) or "Leaving"

        self.fire(PART(channel, message), "bot")
def get_bins(race_data):
    """ Group races and create bins (time ranges) of BIN_SIZE. For each
        bin find out pct of racers in that bin and avg time of that bin.
        Also assign bin number to identify racers and their bin they fall
        into later on.
    """
    bin_data = []
    race_groups = race_data.groupby('race_id')

    for race_id, race_group in race_groups:

        top_75_percentile = race_group[
            race_group.final_time < race_group.final_time.quantile(.75)]

        # Skip races with missing data.
        if len(top_75_percentile) == 0:
            continue

        bins = pd.cut(top_75_percentile.final_time, BIN_SIZE, right=False)

        # fastest = time.strftime(
        #     '%H:%M:%S', time.gmtime(min(top_75_percentile.final_time)))
        # slowest = time.strftime(
        #     '%H:%M:%S', time.gmtime(max(top_75_percentile.final_time)))

        # print "fastest =>", fastest
        # print "slowest =>", slowest

        bin_number = 0

        for bin_key, bin_group in top_75_percentile.groupby(bins):

            bin_number += 1

            population_pct = len(bin_group) / float(len(top_75_percentile))
            bin_avg_time = bin_group.final_time.mean()

            if math.isnan(bin_avg_time):
                # Yes Ugly. Pandas bin key is a string.
                # This split gives us bin's lower/upper range time.
                lower_range = float(first(bin_key.split(',')).strip('['))
                upper_range = float(last(bin_key.split(',')).strip(')'))

                bin_avg_time = np.mean([lower_range, upper_range])

            bin_data.append({'race_id': int(race_id),
                             'bin_number': bin_number,
                             'population_pct': population_pct,
                             'bin_avg_time': bin_avg_time
                             })
    return bin_data
Example #19
0
    def get_editor(cls, editor_id):
        """Getting editor by `editor_id`. Param editor_id is index in tuple
        from method cls.get_choices()

        Usage:

        >>> EditorTypesEnum.get_editor(editor_id=0)
        (<class 'redactor.widgets.RedactorEditor'>, {})

        :param editor_id: int
        :return: tuple
        """
        return first(
            (e for i, e in enumerate(cls.EDITOR_TYPES) if i == editor_id))
Example #20
0
    def remove(self, source, target, args):
        """Remove a channel from startup join list.

        Syntax: REMOVE <channel>
        """

        if not args:
            return "No channels specified."

        channel = first(args.split(" ", 1))

        if channel in self.parent.parent.channels:
            self.parent.parent.channels.remove(channel)
            return "{0:s} removed from startup join list".format(channel)
        return "{0:s} not in join startup list".format(channel)
Example #21
0
    def add(self, source, target, args):
        """Add a channel to startup join list.

        Syntax: ADD <channel>
        """

        if not args:
            return "No channels specified."

        channel = first(args.split(" ", 1))

        if channel not in self.parent.parent.channels:
            self.parent.parent.channels.append(channel)
            return "Added {0:s} to startup join list".format(channel)
        return "{0:s} already in startup join list".format(channel)
Example #22
0
File: build.py Project: bitslab/dxr
def swap_alias(alias, index, es):
    """Point an ES alias to a new index, and delete the old index.

    :arg index: The new index name

    """
    # Get the index the alias currently points to.
    old_index = first(es.aliases(alias))

    # Make the alias point to the new index.
    removal = ([{'remove': {'index': old_index, 'alias': alias}}] if
               old_index else [])
    es.update_aliases(removal + [{'add': {'index': index, 'alias': alias}}])  # atomic

    # Delete the old index.
    if old_index:
        es.delete_index(old_index)
Example #23
0
File: core.py Project: prologic/kdb
    def load(self, source, target, args):
        """Load a plugin

        Syntax: LOAD <plugin>
        """

        if not args:
            yield "No plugin specified."

        plugin = first(args.split(" ", 1))

        plugins = self.parent.bot.plugins

        if plugin in plugins:
            yield log("Plugin {0:s} already loaded!", plugin)
        else:
            yield self.fire(load(plugin), "plugins")
Example #24
0
    def join(self, source, target, args):
        """Join the specified channel.

        Syntax: JOIN <channel>
        """

        if not args:
            return "No channel specified."

        channel = first(args.split(" ", 1))

        if channel:
            msg = log("Joining channel: {0:s}", channel)
            self.fire(JOIN(channel), "bot")
        else:
            msg = log("No channel specified.")

        return msg
Example #25
0
def transform_place(kudago_place, events_count):
    categories = kudago_place['categories']

    kind = find_first(('park', 'cafe', 'museums', 'theatre'), categories)
    if kind == 'theatre':
        kind = 'theater'
    elif kind == 'museums':
        kind = 'museum'

    return {
        '_id': kudago_place['id'],
        '_type': 'place',

        'kind': kind,
        'is_for_kids': 'kids' in categories,
        'is_stub': kudago_place['is_stub'],

        'name': kudago_place['short_title'],
        'full_name': kudago_place['title'],
        'lead': strip_links(kudago_place['description']),
        'description': strip_links(kudago_place['body_text']),

        'location': kudago_place['location'],
        'address': kudago_place['address'],
        'subway': kudago_place['subway'],
        'coords': transform_coords(kudago_place['coords']),

        'age_restriction': kudago_place['age_restriction'],
        'phone_numbers': transform_phone(kudago_place['phone']),
        'working_hours': kudago_place['timetable'],
        'url': kudago_place['foreign_url'],

        'events_count': events_count,
        'favorites_count': kudago_place['favorites_count'],
        'comments_count': kudago_place['comments_count'],

        'first_image': first(kudago_place['images']),
        'images': kudago_place['images'],

        'source': {
            'name': 'kudago.com',
            'url': kudago_place['site_url'],
        }
    }
Example #26
0
File: core.py Project: prologic/kdb
    def reload(self, source, target, args):
        """Reload an already loaded plugin

        Syntax: RELOAD <plugin>
        """

        if not args:
            yield "No plugin specified."

        plugin = first(args.split(" ", 1))

        plugins = self.parent.bot.plugins

        if plugin not in plugins:
            yield log("Plugin {0:s} is not loaded!", plugin)
        else:
            yield self.fire(unload(plugin), "plugins")
            yield
            yield self.fire(load(plugin), "plugins")
Example #27
0
    def add(self, source, target, args):
        """Add a Remote Plugin

        Syntax: ADD <url>
        """

        if not args:
            yield "No URL specified."

        url = first(args.split(" ", 1))

        data = self.parent.data.rplugins
        config = self.parent.config["rplugins"]

        if url in data["enabled"]:
            yield log("Remote Plugin {0:s} already loaded!", url)
        else:
            value = yield self.call(
                task(
                    verify_plugin,
                    url,
                    config["path"],
                    data["allowed"],
                ),
                "workerprocesses"
            )

            allowed, plugin = value.value
            if allowed:
                msg = log(
                    "Remote Plugin {0:s} ({1:s}) is already authorized.",
                    url, plugin
                )
                yield msg
            else:
                data["pending"][plugin] = url

                msg = log(
                    "Remote Plugin {0:s} ({1:s}) pending authorization.",
                    url, plugin
                )
                yield msg
Example #28
0
    def greetings(self, source, target, args):
        """Manage greetings

        Syntax: GREETINGS <sub-command>

        See: COMMANDS greetings
        """

        if not args:
            yield "No command specified."

        tokens = args.split(" ", 1)
        command, args = first(tokens), (second(tokens) or "")

        event = cmd.create(command, source, target, args)

        try:
            yield self.call(event, "commands:greetings")
        except Exception as error:
            yield "ERROR: {0:s}".format(error)
Example #29
0
File: rss.py Project: prologic/kdb
    def radd(self, source, target, args):
        """Add a new RSS feed to be checked at the given interval.

        Intervan is in minutes.

        Syntax: RADD <url> [<interval>]
        """

        if not args:
            yield "No URL specified."

        tokens = args.split(" ", 2)
        url = first(tokens)
        interval = second(tokens) or "60"

        try:
            interval = int(interval)
        except Exception, error:
            log("ERROR: {0:s}\n{1:s}", error, format_exc())
            yield "Invalid interval specified."
Example #30
0
    def rplugins(self, source, target, args):
        """Manage Remote Plugins

        Syntax: RPLUGINS <command>

        See: COMMANDS rplugins
        """

        if not args:
            yield "No command specified."

        tokens = args.split(" ", 1)
        command, args = first(tokens), (second(tokens) or "")
        command = command.encode("utf-8")

        event = cmd.create(command, source, target, args)

        try:
            yield (yield self.call(event, "commands:rplugins"))
        except Exception as error:
            yield "ERROR: {0:s}".format(error)
Example #31
0
def test_live_checkpoints_resume(tmp_dir, scm, dvc, live_checkpoint_stage):
    results = dvc.experiments.run(
        live_checkpoint_stage.addressing, params=["foo=2"], tmp_dir=False
    )

    checkpoint_resume = first(results)

    dvc.experiments.run(
        live_checkpoint_stage.addressing,
        checkpoint_resume=checkpoint_resume,
        tmp_dir=False,
    )

    results = dvc.experiments.show()
    assert checkpoints_metric(results, "logs.json", "step") == [
        3,
        3,
        2,
        1,
        1,
        0,
    ]
    assert checkpoints_metric(results, "logs.json", "metric1") == [
        4,
        4,
        3,
        2,
        2,
        1,
    ]
    assert checkpoints_metric(results, "logs.json", "metric2") == [
        8,
        8,
        6,
        4,
        4,
        2,
    ]
Example #32
0
def test_pull_imported_stage_from_subrepos(
    tmp_dir, dvc, erepo_dir, is_dvc, files
):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen
    with subrepo.chdir():
        gen(files, commit="files in subrepo")

    key = first(files)
    path = os.path.join("subrepo", key)
    dvc.imp(os.fspath(erepo_dir), path, out="out")

    # clean everything
    remove(dvc.odb.local.cache_dir)
    remove("out")
    makedirs(dvc.odb.local.cache_dir)

    stats = dvc.pull(["out.dvc"])

    expected = [f"out{os.sep}"] if isinstance(files[key], dict) else ["out"]
    assert stats["added"] == expected
    assert (tmp_dir / "out").read_text() == files[key]
Example #33
0
def test_reset_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, caplog):
    from dvc.repo.experiments.base import CheckpointExistsError

    dvc.experiments.run(checkpoint_stage.addressing, name="foo")
    scm.gitpython.repo.git.reset(hard=True)
    scm.gitpython.repo.git.clean(force=True)

    with pytest.raises(CheckpointExistsError):
        dvc.experiments.run(checkpoint_stage.addressing,
                            name="foo",
                            params=["foo=2"])

    results = dvc.experiments.run(checkpoint_stage.addressing,
                                  params=["foo=2"],
                                  name="foo",
                                  force=True)
    exp = first(results)

    tree = scm.get_tree(exp)
    with tree.open(tmp_dir / "foo") as fobj:
        assert fobj.read().strip() == "5"
    with tree.open(tmp_dir / "metrics.yaml") as fobj:
        assert fobj.read().strip() == "foo: 2"
Example #34
0
def _get_file_path(kwargs):
    from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX

    out = first(
        concat(
            kwargs.get("outs", []),
            kwargs.get("outs_no_cache", []),
            kwargs.get("metrics", []),
            kwargs.get("metrics_no_cache", []),
            kwargs.get("plots", []),
            kwargs.get("plots_no_cache", []),
            kwargs.get("outs_persist", []),
            kwargs.get("outs_persist_no_cache", []),
            kwargs.get("checkpoints", []),
            without([kwargs.get("live", None)], None),
        )
    )

    return (
        os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX
        if out
        else DVC_FILE
    )
Example #35
0
def test_get_used_cache(exists, expected_message, mocker, caplog):
    stage = mocker.MagicMock()
    mocker.patch.object(stage, "__str__", return_value="stage: 'stage.dvc'")
    mocker.patch.object(stage, "addressing", "stage.dvc")
    mocker.patch.object(
        stage.repo.tree.dvcignore,
        "check_ignore",
        return_value=_no_match("path"),
    )

    output = BaseOutput(stage, "path")

    mocker.patch.object(output, "use_cache", True)
    mocker.patch.object(stage, "is_repo_import", False)
    mocker.patch.object(BaseOutput,
                        "checksum",
                        new_callable=mocker.PropertyMock).return_value = None
    mocker.patch.object(BaseOutput, "exists",
                        new_callable=mocker.PropertyMock).return_value = exists

    with caplog.at_level(logging.WARNING, logger="dvc"):
        assert isinstance(output.get_used_cache(), NamedCache)
    assert first(caplog.messages) == expected_message
Example #36
0
def prepare_file_path(kwargs):
    """Determine file path from the first output name.

    Used in creating .dvc files.
    """
    from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX

    out = first(
        concat(
            kwargs.get("outs", []),
            kwargs.get("outs_no_cache", []),
            kwargs.get("metrics", []),
            kwargs.get("metrics_no_cache", []),
            kwargs.get("plots", []),
            kwargs.get("plots_no_cache", []),
            kwargs.get("outs_persist", []),
            kwargs.get("outs_persist_no_cache", []),
            kwargs.get("checkpoints", []),
            without([kwargs.get("live", None)], None),
        ))

    return (os.path.basename(os.path.normpath(out)) +
            DVC_FILE_SUFFIX if out else DVC_FILE)
Example #37
0
def test_resume_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker, last):
    with pytest.raises(DvcException):
        if last:
            dvc.experiments.run(
                checkpoint_stage.addressing,
                checkpoint_resume=Experiments.LAST_CHECKPOINT,
            )
        else:
            dvc.experiments.run(checkpoint_stage.addressing,
                                checkpoint_resume="foo")

    results = dvc.experiments.run(checkpoint_stage.addressing,
                                  params=["foo=2"])
    if last:
        exp_rev = Experiments.LAST_CHECKPOINT
    else:
        exp_rev = first(results)

    dvc.experiments.run(checkpoint_stage.addressing, checkpoint_resume=exp_rev)

    assert (tmp_dir / "foo").read_text() == "10"
    assert (tmp_dir / ".dvc" / "experiments" /
            "metrics.yaml").read_text().strip() == "foo: 2"
Example #38
0
    def _transfer(func, from_remote, to_remote):
        ret = []

        runs = from_remote.fs.path.join(from_remote.fs_path, "runs")
        if not from_remote.fs.exists(runs):
            return []

        from_path = from_remote.fs.path
        for src in from_remote.fs.find(runs):
            rel = from_path.relpath(src, from_remote.fs_path)
            dst = to_remote.fs.path.join(to_remote.fs_path, rel)
            key = to_remote.fs.path.parent(dst)
            # check if any build cache already exists for this key
            # TODO: check if MaxKeys=1 or something like that applies
            # or otherwise this will take a lot of time!
            if to_remote.fs.exists(key) and first(to_remote.fs.find(key)):
                continue
            func(src, dst)
            ret.append(
                (from_path.name(from_path.parent(src)), from_path.name(src))
            )

        return ret
Example #39
0
def transform_place(kudago_place, events_count):
    categories = kudago_place['categories']

    kind = find_first(('park', 'cafe', 'museums', 'theatre'), categories)
    if kind == 'theatre':
        kind = 'theater'
    elif kind == 'museums':
        kind = 'museum'

    return {
        '_id': kudago_place['id'],
        '_type': 'place',
        'kind': kind,
        'is_for_kids': 'kids' in categories,
        'is_stub': kudago_place['is_stub'],
        'name': kudago_place['short_title'],
        'full_name': kudago_place['title'],
        'lead': strip_links(kudago_place['description']),
        'description': strip_links(kudago_place['body_text']),
        'location': kudago_place['location'],
        'address': kudago_place['address'],
        'subway': kudago_place['subway'],
        'coords': transform_coords(kudago_place['coords']),
        'age_restriction': kudago_place['age_restriction'],
        'phone_numbers': transform_phone(kudago_place['phone']),
        'working_hours': kudago_place['timetable'],
        'url': kudago_place['foreign_url'],
        'events_count': events_count,
        'favorites_count': kudago_place['favorites_count'],
        'comments_count': kudago_place['comments_count'],
        'first_image': first(kudago_place['images']),
        'images': kudago_place['images'],
        'source': {
            'name': 'kudago.com',
            'url': kudago_place['site_url'],
        }
    }
Example #40
0
    def _resolve_stage(self, context: Context, name: str, definition) -> dict:
        definition = deepcopy(definition)
        wdir = self._resolve_wdir(context, definition.get(WDIR_KWD))
        if self.wdir != wdir:
            logger.debug("Stage %s has different wdir than dvc.yaml file",
                         name)

        contexts = []
        params_yaml_file = wdir / DEFAULT_PARAMS_FILE
        if self.global_ctx_source != params_yaml_file:
            if os.path.exists(params_yaml_file):
                contexts.append(
                    Context.load_from(self.repo.tree, str(params_yaml_file)))
            else:
                logger.debug("%s does not exist for stage %s",
                             params_yaml_file, name)

        params_file = definition.get(PARAMS_KWD, [])
        for item in params_file:
            if item and isinstance(item, dict):
                contexts.append(
                    Context.load_from(self.repo.tree, str(wdir / first(item))))

        context.merge_update(*contexts)

        logger.trace(  # pytype: disable=attribute-error
            "Context during resolution of stage %s:\n%s", name, context)

        with context.track():
            stage_d = resolve(definition, context)

        params = stage_d.get(PARAMS_KWD, []) + self._resolve_params(
            context, wdir)

        if params:
            stage_d[PARAMS_KWD] = params
        return {name: stage_d}
Example #41
0
    def _load_outs(cls, stage, data, typ=None):
        from dvc.output.base import BaseOutput

        d = []
        for key in data:
            if isinstance(key, str):
                entry = {BaseOutput.PARAM_PATH: key}
                if typ:
                    entry[typ] = True
                d.append(entry)
                continue

            assert isinstance(key, dict)
            assert len(key) == 1

            path = first(key)
            extra = key[path]

            if not typ:
                d.append({BaseOutput.PARAM_PATH: path, **extra})
                continue

            entry = {BaseOutput.PARAM_PATH: path}

            persist = extra.pop(BaseOutput.PARAM_PERSIST, False)
            if persist:
                entry[BaseOutput.PARAM_PERSIST] = persist

            cache = extra.pop(BaseOutput.PARAM_CACHE, True)
            if not cache:
                entry[BaseOutput.PARAM_CACHE] = cache

            entry[typ] = extra or True

            d.append(entry)

        stage.outs.extend(output.loadd_from(stage, d))
Example #42
0
def test_continue_checkpoint(tmp_dir, scm, dvc, mocker, last):
    tmp_dir.gen("checkpoint.py", CHECKPOINT_SCRIPT)
    tmp_dir.gen("params.yaml", "foo: 1")
    stage = dvc.run(
        cmd="python checkpoint.py foo 5 params.yaml metrics.yaml",
        metrics_no_cache=["metrics.yaml"],
        params=["foo"],
        outs_persist=["foo"],
        always_changed=True,
        name="checkpoint-file",
    )
    scm.add([
        "dvc.yaml",
        "dvc.lock",
        "checkpoint.py",
        "params.yaml",
        "metrics.yaml",
    ])
    scm.commit("init")

    results = dvc.experiments.run(stage.addressing,
                                  checkpoint=True,
                                  params=["foo=2"])
    if last:
        exp_rev = ":last"
    else:
        exp_rev = first(results)

    dvc.experiments.run(
        stage.addressing,
        checkpoint=True,
        checkpoint_continue=exp_rev,
    )

    assert (tmp_dir / "foo").read_text() == "10"
    assert (tmp_dir / ".dvc" / "experiments" /
            "metrics.yaml").read_text().strip() == "foo: 2"
Example #43
0
def test_diff_head(tmp_dir, scm, dvc, exp_stage):
    results = dvc.experiments.run(exp_stage.addressing, params=["foo=2"])
    exp = first(results)

    assert dvc.experiments.diff(a_rev="HEAD", b_rev=exp) == {
        "params": {
            "params.yaml": {
                "foo": {
                    "diff": 1,
                    "old": 1,
                    "new": 2
                }
            }
        },
        "metrics": {
            "metrics.yaml": {
                "foo": {
                    "diff": 1,
                    "old": 1,
                    "new": 2
                }
            }
        },
    }
Example #44
0
def distribution_settings():
    error = None
    form = DistributionSettingsForm()

    entries = db.session.query(DistributionSettings).order_by(
        desc(DistributionSettings.id), ).limit(25).all()

    last_entry = first(entries)

    if last_entry:
        if not form.creator_rewards_pool.data:
            form.creator_rewards_pool.data = last_entry.creator_rewards_pool
        if not form.voter_rewards_pool.data:
            form.voter_rewards_pool.data = last_entry.voter_rewards_pool
        if not form.votes_per_user.data:
            form.votes_per_user.data = last_entry.votes_per_user
        if not form.min_reward.data:
            form.min_reward.data = last_entry.min_reward

    if form.validate_on_submit():
        setting = DistributionSettings(
            creator_rewards_pool=int(form.creator_rewards_pool.data),
            voter_rewards_pool=int(form.voter_rewards_pool.data),
            votes_per_user=int(form.votes_per_user.data),
            min_reward=int(form.min_reward.data),
            created_at=dt.datetime.utcnow())
        db.session.add(setting)
        db.session.commit()
        return redirect(url_for('.distribution_settings'))

    return render_template(
        'distribution_settings.html',
        form=form,
        error=error,
        entries=entries,
    )
Example #45
0
def test_new_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker,
                        workspace):
    new_mock = mocker.spy(dvc.experiments, "new")
    results = dvc.experiments.run(checkpoint_stage.addressing,
                                  params=["foo=2"],
                                  tmp_dir=not workspace)
    exp = first(results)

    new_mock.assert_called_once()
    for rev in dvc.brancher([exp]):
        if rev == "workspace":
            continue
        tree = dvc.repo_tree
        with tree.open(tmp_dir / "foo") as fobj:
            assert fobj.read().strip() == "5"
        with tree.open(tmp_dir / "metrics.yaml") as fobj:
            assert fobj.read().strip() == "foo: 2"

    if workspace:
        assert scm.get_ref(EXEC_APPLY) == exp
    assert scm.get_ref(EXEC_CHECKPOINT) == exp
    if workspace:
        assert (tmp_dir / "foo").read_text().strip() == "5"
        assert (tmp_dir / "metrics.yaml").read_text().strip() == "foo: 2"
Example #46
0
File: trie.py Project: zivzone/dvc
def build_outs_trie(stages):
    outs = Trie()

    for stage in filter(bool, stages):  # bug? not using it later
        for out in stage.outs:
            out_key = out.path_info.parts

            # Check for dup outs
            if out_key in outs:
                dup_stages = [stage, outs[out_key].stage]
                raise OutputDuplicationError(str(out), dup_stages)

            # Check for overlapping outs
            if outs.has_subtrie(out_key):
                parent = out
                overlapping = first(outs.values(prefix=out_key))
            else:
                parent = outs.shortest_prefix(out_key).value
                overlapping = out
            if parent and overlapping:
                msg = (
                    "The output paths:\n'{}'('{}')\n'{}'('{}')\n"
                    "overlap and are thus in the same tracked directory.\n"
                    "To keep reproducibility, outputs should be in separate "
                    "tracked directories or tracked individually."
                ).format(
                    str(parent),
                    parent.stage.addressing,
                    str(overlapping),
                    overlapping.stage.addressing,
                )
                raise OverlappingOutputPathsError(parent, overlapping, msg)

            outs[out_key] = out

    return outs
Example #47
0
def test_scope_building():
    """Make sure we find all the declarations within a function but don't stray
    into inner functions."""
    js = """
    function smoo() {
        var w, x;
        if (true) {
            var y;
        }
        function bar() {
            var z;
        }
    }
    function barbar() {

    }
    """
    ast = parse(js)
    function = first(node for node in walk_down(ast)
                     if node['type'] == FUNC_DECL)
    raise SkipTest("Need to reimplement scope")
    eq_(set(function.scope().keys()), set(['w', 'x', 'y', 'smoo', 'bar']))

    eq_(set(ast.scope().keys()), set(['smoo', 'barbar']))
Example #48
0
    def build_sqoop_partition_options(
        self, relation: RelationDescription, partition_key: Optional[str], table_size: int
    ) -> List[str]:
        """
        Build the partitioning-related arguments for Sqoop.
        """
        if partition_key:
            column = fy.first(fy.where(relation.table_design["columns"], name=partition_key))
            if column["sql_type"] in ("timestamp", "timestamp without time zone"):
                quoted_key_arg = """CAST(TO_CHAR("{}", 'YYYYMMDDHH24MISS') AS BIGINT)""".format(partition_key)
            else:
                quoted_key_arg = '"{}"'.format(partition_key)

            if relation.num_partitions:
                # num_partitions explicitly set in the design file overrides the dynamic determination.
                num_mappers = min(relation.num_partitions, self.max_partitions)
            else:
                num_mappers = self.maximize_partitions(table_size)

            if num_mappers > 1:
                return ["--split-by", quoted_key_arg, "--num-mappers", str(num_mappers)]

        # Use 1 mapper if either there is no partition key, or if the partitioner returns only one partition
        return ["--num-mappers", "1"]
Example #49
0
def parse_schedule(body: str) -> Tuple[Item, ...]:
    def selector(css_class: str) -> Callable:
        return compose(
            partial(lmap, str.strip),
            partial(select, notnone),
            partial(lmap, attrgetter("text")),
            partial(HtmlElement.cssselect, expr=css_class),
        )

    tree: HtmlElement = html.fromstring(body)
    select_day_containers = partial(HtmlElement.cssselect,
                                    expr=config.MAI_DAY_CONTAINER_SELECTOR)
    select_item_containers = partial(HtmlElement.cssselect,
                                     expr=config.MAI_ITEM_CONTAINER_SELECTOR)
    select_dates = selector(config.MAI_DATE_SELECTOR)
    select_days_of_week = selector(config.MAI_DAY_OF_WEEK_SELECTOR)
    select_times = selector(config.MAI_TIME_SELECTOR)
    select_types = selector(config.MAI_TYPE_SELECTOR)
    select_titles = selector(config.MAI_TITLE_SELECTOR)
    select_teachers = selector(config.MAI_TEACHER_SELECTOR)
    select_places = compose(
        lkeep,
        partial(lmap, str.strip),
        partial(select, notnone),
        partial(HtmlElement.xpath, _path=config.MAI_PLACE_XPATH_SELECTOR),
    )
    return tuple(
        Item(
            date=first(select_dates(day_container)) or "",
            week_day=first(select_days_of_week(day_container)) or "",
            time=first(select_times(item_container)) or "",
            type=first(select_types(item_container)) or "",
            title=first(select_titles(item_container)) or "",
            place=first(select_places(item_container)) or "",
            teachers=select_teachers(item_container),
        ) for day_container in select_day_containers(tree)
        for item_container in select_item_containers(day_container))
Example #50
0
 def _git_client_args(fs):
     kwargs = {
         "password": fs.fs_args.get("password"),
         "key_filename": first(fs.fs_args.get("client_keys", [])),
     }
     return kwargs
Example #51
0
def test_clear(dvc, index):
    index.update(["1234.dir"], ["5678"])
    index.clear()
    assert first(index.hashes()) is None
Example #52
0
 def dir(self):
     return first(self.backends.values()).dir
Example #53
0
 def output(self) -> str:
     return fn.first(self.pcirc.outputs)
Example #54
0
def get_only_element_from_collection(one_element_collection):
    """Assert that the collection has exactly one element, then return that element."""
    if len(one_element_collection) != 1:
        raise AssertionError(u'Expected a collection with exactly one element, but got: {}'
                             .format(one_element_collection))
    return funcy.first(one_element_collection)
Example #55
0
def test_update_py_params(tmp_dir, scm, dvc):
    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen("params.py", "INT = 1\n")
    stage = dvc.run(
        cmd="python copy.py params.py metrics.py",
        metrics_no_cache=["metrics.py"],
        params=["params.py:INT"],
        name="copy-file",
    )
    scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.py", "metrics.py"])
    scm.commit("init")

    results = dvc.experiments.run(stage.addressing,
                                  params=["params.py:INT=2"],
                                  tmp_dir=True)
    exp_a = first(results)

    tree = scm.get_tree(exp_a)
    with tree.open(tmp_dir / "params.py") as fobj:
        assert fobj.read().strip() == "INT = 2"
    with tree.open(tmp_dir / "metrics.py") as fobj:
        assert fobj.read().strip() == "INT = 2"

    tmp_dir.gen(
        "params.py",
        "INT = 1\nFLOAT = 0.001\nDICT = {'a': 1}\n\n"
        "class Train:\n    seed = 2020\n\n"
        "class Klass:\n    def __init__(self):\n        self.a = 111\n",
    )
    stage = dvc.run(
        cmd="python copy.py params.py metrics.py",
        metrics_no_cache=["metrics.py"],
        params=["params.py:INT,FLOAT,DICT,Train,Klass"],
        name="copy-file",
    )
    scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.py", "metrics.py"])
    scm.commit("init")

    results = dvc.experiments.run(
        stage.addressing,
        params=["params.py:FLOAT=0.1,Train.seed=2121,Klass.a=222"],
        tmp_dir=True,
    )
    exp_a = first(results)

    result = ("INT = 1\nFLOAT = 0.1\nDICT = {'a': 1}\n\n"
              "class Train:\n    seed = 2121\n\n"
              "class Klass:\n    def __init__(self):\n        self.a = 222")

    def _dos2unix(text):
        if os.name != "nt":
            return text

        # NOTE: git on windows will use CRLF, so we have to convert it to LF
        # in order to compare with the original
        return text.replace("\r\n", "\n")

    tree = scm.get_tree(exp_a)
    with tree.open(tmp_dir / "params.py") as fobj:
        assert _dos2unix(fobj.read().strip()) == result
    with tree.open(tmp_dir / "metrics.py") as fobj:
        assert _dos2unix(fobj.read().strip()) == result

    tmp_dir.gen("params.py", "INT = 1\n")
    stage = dvc.run(
        cmd="python copy.py params.py metrics.py",
        metrics_no_cache=["metrics.py"],
        params=["params.py:INT"],
        name="copy-file",
    )
    scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.py", "metrics.py"])
    scm.commit("init")

    with pytest.raises(PythonFileCorruptedError):
        dvc.experiments.run(stage.addressing,
                            params=["params.py:INT=2a"],
                            tmp_dir=True)
Example #56
0
def condition_action(txt, loc, toks):
    return make_filter(first(toks))
Example #57
0
    def _collect_graph(self, stages):
        """Generate a graph by using the given stages on the given directory

        The nodes of the graph are the stage's path relative to the root.

        Edges are created when the output of one stage is used as a
        dependency in other stage.

        The direction of the edges goes from the stage to its dependency:

        For example, running the following:

            $ dvc run -o A "echo A > A"
            $ dvc run -d A -o B "echo B > B"
            $ dvc run -d B -o C "echo C > C"

        Will create the following graph:

               ancestors <--
                           |
                C.dvc -> B.dvc -> A.dvc
                |          |
                |          --> descendants
                |
                ------- pipeline ------>
                           |
                           v
              (weakly connected components)

        Args:
            stages (list): used to build a graph, if None given, collect stages
                in the repository.

        Raises:
            OutputDuplicationError: two outputs with the same path
            StagePathAsOutputError: stage inside an output directory
            OverlappingOutputPathsError: output inside output directory
            CyclicGraphError: resulting graph has cycles
        """
        import networkx as nx
        from pygtrie import Trie

        from dvc.exceptions import (
            OutputDuplicationError,
            OverlappingOutputPathsError,
            StagePathAsOutputError,
        )

        G = nx.DiGraph()
        stages = stages or self.stages
        outs = Trie()  # Use trie to efficiently find overlapping outs and deps

        for stage in filter(bool, stages):  # bug? not using it later
            for out in stage.outs:
                out_key = out.path_info.parts

                # Check for dup outs
                if out_key in outs:
                    dup_stages = [stage, outs[out_key].stage]
                    raise OutputDuplicationError(str(out), dup_stages)

                # Check for overlapping outs
                if outs.has_subtrie(out_key):
                    parent = out
                    overlapping = first(outs.values(prefix=out_key))
                else:
                    parent = outs.shortest_prefix(out_key).value
                    overlapping = out
                if parent and overlapping:
                    msg = ("Paths for outs:\n'{}'('{}')\n'{}'('{}')\n"
                           "overlap. To avoid unpredictable behaviour, "
                           "rerun command with non overlapping outs paths."
                           ).format(
                               str(parent),
                               parent.stage.addressing,
                               str(overlapping),
                               overlapping.stage.addressing,
                           )
                    raise OverlappingOutputPathsError(parent, overlapping, msg)

                outs[out_key] = out

        for stage in stages:
            out = outs.shortest_prefix(PathInfo(stage.path).parts).value
            if out:
                raise StagePathAsOutputError(stage, str(out))

        # Building graph
        G.add_nodes_from(stages)
        for stage in stages:
            for dep in stage.deps:
                if dep.path_info is None:
                    continue

                dep_key = dep.path_info.parts
                overlapping = [n.value for n in outs.prefixes(dep_key)]
                if outs.has_subtrie(dep_key):
                    overlapping.extend(outs.values(prefix=dep_key))

                G.add_edges_from((stage, out.stage) for out in overlapping)
        check_acyclic(G)

        return G
Example #58
0
    def __next__(self):
        next_item = first(self.take(1))
        if not next_item:
            raise StopIteration

        return next_item
Example #59
0
def query(query, node):
    matcher = first(parser.parseString(query))
    return matcher(node)
Example #60
0
def split_foreach_name(name):
    group, *keys = name.rsplit(JOIN, maxsplit=1)
    return group, first(keys)