Example #1
0
    def _merge(self, matches):
        # get matches up to and including first important_match
        #   but if no important_match, then all matches are important_matches
        relevant_matches = self._first_important_matches(matches)

        # get individual lines from important_matches that were marked important
        # these will be prepended to the final result
        def get_marked_lines(match, marker):
            return tuple(line
                         for line, flag in zip(match.value(self.__class__),
                                               match.valueflags(self.__class__))
                         if flag is marker)
        top_lines = concat(get_marked_lines(m, ParameterFlag.top) for m in relevant_matches)

        # also get lines that were marked as bottom, but reverse the match order so that lines
        # coming earlier will ultimately be last
        bottom_lines = concat(get_marked_lines(m, ParameterFlag.bottom) for m in
                              reversed(relevant_matches))

        # now, concat all lines, while reversing the matches
        #   reverse because elements closer to the end of search path take precedence
        all_lines = concat(m.value(self.__class__) for m in reversed(relevant_matches))

        # stack top_lines + all_lines, then de-dupe
        top_deduped = tuple(unique(concatv(top_lines, all_lines)))

        # take the top-deduped lines, reverse them, and concat with reversed bottom_lines
        # this gives us the reverse of the order we want, but almost there
        # NOTE: for a line value marked both top and bottom, the bottom marker will win out
        #       for the top marker to win out, we'd need one additional de-dupe step
        bottom_deduped = unique(concatv(reversed(tuple(bottom_lines)), reversed(top_deduped)))

        # just reverse, and we're good to go
        return tuple(reversed(tuple(bottom_deduped)))
Example #2
0
 def _verify(cls, prefix_setups, prefix_action_groups):
     exceptions = tuple(exc for exc in concatv(
         concat(cls._verify_individual_level(prefix_group)
                for prefix_group in itervalues(prefix_action_groups)),
         concat(cls._verify_prefix_level(target_prefix, prefix_group)
                for target_prefix, prefix_group in iteritems(prefix_action_groups)),
         cls._verify_transaction_level(prefix_setups),
     ) if exc)
     return exceptions
Example #3
0
 def custom_channels(self):
     from ..models.channel import Channel
     custom_channels = (Channel.make_simple_channel(self.channel_alias, url, name)
                        for name, url in iteritems(self._custom_channels))
     all_sources = self.default_channels, (self.local_build_root_channel,), custom_channels
     all_channels = (ch for ch in concat(all_sources))
     return odict((x.name, x) for x in all_channels)
Example #4
0
 def query(self, package_ref_or_match_spec):
     if not self._loaded:
         self.load()
     param = package_ref_or_match_spec
     if isinstance(param, string_types):
         param = MatchSpec(param)
     if isinstance(param, MatchSpec):
         if param.get_exact_value('name'):
             package_name = param.get_exact_value('name')
             for prec in self._names_index[package_name]:
                 if param.match(prec):
                     yield prec
         elif param.get_exact_value('track_features'):
             track_features = param.get_exact_value('track') or ()
             candidates = concat(self._track_features_index[feature_name]
                                 for feature_name in track_features)
             for prec in candidates:
                 if param.match(prec):
                     yield prec
         else:
             for prec in self._package_records:
                 if param.match(prec):
                     yield prec
     else:
         assert isinstance(param, PackageRef)
         for prec in self._names_index[param.name]:
             if prec == param:
                 yield prec
Example #5
0
 def custom_channels(self):
     from ..models.channel import Channel
     custom_channels = (Channel.make_simple_channel(self.channel_alias, url, name)
                        for name, url in iteritems(self._custom_channels))
     all_sources = self.default_channels, (self.local_build_root_channel,), custom_channels
     all_channels = (ch for ch in concat(all_sources))
     return odict((x.name, x) for x in all_channels)
Example #6
0
 def query(self, package_ref_or_match_spec):
     if not self._loaded:
         self.load()
     param = package_ref_or_match_spec
     if isinstance(param, string_types):
         param = MatchSpec(param)
     if isinstance(param, MatchSpec):
         if param.get_exact_value('name'):
             package_name = param.get_exact_value('name')
             for prec in self._names_index[package_name]:
                 if param.match(prec):
                     yield prec
         elif param.get_exact_value('track_features'):
             track_features = param.get_exact_value('track') or ()
             candidates = concat(self._track_features_index[feature_name]
                                 for feature_name in track_features)
             for prec in candidates:
                 if param.match(prec):
                     yield prec
         else:
             for prec in self._package_records:
                 if param.match(prec):
                     yield prec
     else:
         assert isinstance(param, PackageRef)
         for prec in self._names_index[param.name]:
             if prec == param:
                 yield prec
Example #7
0
    def custom_multichannels(self):
        from ..models.channel import Channel

        reserved_multichannel_urls = odict((
            (DEFAULTS_CHANNEL_NAME, self._default_channels),
            ('local', self.conda_build_local_urls),
        ))
        reserved_multichannels = odict(
            (name, tuple(
                Channel.make_simple_channel(self.channel_alias, url) for url in urls)
             ) for name, urls in iteritems(reserved_multichannel_urls)
        )
        custom_multichannels = odict(
            (name, tuple(
                Channel.make_simple_channel(self.channel_alias, url) for url in urls)
             ) for name, urls in iteritems(self._custom_multichannels)
        )
        all_multichannels = odict(
            (name, channels)
            for name, channels in concat(map(iteritems, (
                custom_multichannels,
                reserved_multichannels,  # reserved comes last, so reserved overrides custom
            )))
        )
        return all_multichannels
Example #8
0
    def custom_multichannels(self):
        from ..models.channel import Channel

        reserved_multichannel_urls = odict((
            (DEFAULTS_CHANNEL_NAME, self._default_channels),
            ('local', self.conda_build_local_urls),
        ))
        reserved_multichannels = odict(
            (name, tuple(
                Channel.make_simple_channel(self.channel_alias, url) for url in urls)
             ) for name, urls in iteritems(reserved_multichannel_urls)
        )
        custom_multichannels = odict(
            (name, tuple(
                Channel.make_simple_channel(self.channel_alias, url) for url in urls)
             ) for name, urls in iteritems(self._custom_multichannels)
        )
        all_multichannels = odict(
            (name, channels)
            for name, channels in concat(map(iteritems, (
                custom_multichannels,
                reserved_multichannels,  # reserved comes last, so reserved overrides custom
            )))
        )
        return all_multichannels
Example #9
0
 def get_pfe(self):
     from .package_cache import ProgressiveFetchExtract
     if not self.prefix_setups:
         return ProgressiveFetchExtract(())
     else:
         link_precs = set(concat(stp.link_precs for stp in itervalues(self.prefix_setups)))
         return ProgressiveFetchExtract(link_precs)
Example #10
0
    def query_all(cls, package_ref_or_match_spec, pkgs_dirs=None):
        if pkgs_dirs is None:
            pkgs_dirs = context.pkgs_dirs

        return concat(
            pcache.query(package_ref_or_match_spec)
            for pcache in cls.all_caches_writable_first(pkgs_dirs))
Example #11
0
def terms(
    doclike: types.DocLike,
    *,
    ngs: Optional[int | Collection[int] | types.DocLikeToSpans] = None,
    ents: Optional[bool | types.DocLikeToSpans] = None,
    ncs: Optional[bool | types.DocLikeToSpans] = None,
    dedupe: bool = True,
) -> Iterable[Span]:
    """
    Extract one or multiple types of terms -- ngrams, entities, and/or noun chunks --
    from ``doclike`` as a single, concatenated collection, with optional deduplication
    of spans extracted by more than one type.

    .. code-block:: pycon

        >>> extract.terms(doc, ngs=2, ents=True, ncs=True)
        >>> extract.terms(doc, ngs=lambda doc: extract.ngrams(doc, n=2))
        >>> extract.terms(doc, ents=extract.entities)
        >>> extract.terms(doc, ents=partial(extract.entities, include_types="PERSON"))

    Args:
        doclike
        ngs: N-gram terms to be extracted.
            If one or multiple ints, :func:`textacy.extract.ngrams(doclike, n=ngs)` is
            used to extract terms; if a callable, ``ngs(doclike)`` is used to extract
            terms; if None, no n-gram terms are extracted.
        ents: Entity terms to be extracted.
            If True, :func:`textacy.extract.entities(doclike)` is used to extract terms;
            if a callable, ``ents(doclike)`` is used to extract terms;
            if None, no entity terms are extracted.
        ncs: Noun chunk terms to be extracted.
            If True, :func:`textacy.extract.noun_chunks(doclike)` is used to extract
            terms; if a callable, ``ncs(doclike)`` is used to extract terms;
            if None, no noun chunk terms are extracted.
        dedupe: If True, deduplicate terms whose spans are extracted by multiple types
            (e.g. a span that is both an n-gram and an entity), as identified by
            identical (start, stop) indexes in ``doclike``; otherwise, don't.

    Returns:
        Next term from ``doclike``, in order of n-grams then entities then noun chunks,
        with each collection's terms given in order of appearance.

    Note:
        This function is *not* to be confused with keyterm extraction, which leverages
        statistics and algorithms to quantify the "key"-ness of terms before returning
        the top-ranking terms. There is no such scoring or ranking here.

    See Also:
        - :func:`textacy.extact.ngrams()`
        - :func:`textacy.extact.entities()`
        - :func:`textacy.extact.noun_chunks()`
        - :mod:`textacy.extact.keyterms`
    """
    extractors = _get_extractors(ngs, ents, ncs)
    terms_ = itertoolz.concat(extractor(doclike) for extractor in extractors)
    if dedupe is True:
        terms_ = itertoolz.unique(terms_, lambda span: (span.start, span.end))
    for term in terms_:
        yield term
Example #12
0
    def query_all(cls, package_ref_or_match_spec, pkgs_dirs=None):
        if pkgs_dirs is None:
            pkgs_dirs = context.pkgs_dirs

        return concat(pcache.query(package_ref_or_match_spec) for pcache in concatv(
            cls.writable_caches(pkgs_dirs),
            cls.read_only_caches(pkgs_dirs),
        ))
def explode_directories(child_directories, already_split=False):
    # get all directories including parents
    # use already_split=True for the result of get_all_directories()
    maybe_split = lambda x: x if already_split else x.split('/')
    return set(
        concat(
            accumulate(join, maybe_split(directory))
            for directory in child_directories))
Example #14
0
    def query_all(cls, package_ref_or_match_spec, pkgs_dirs=None):
        if pkgs_dirs is None:
            pkgs_dirs = context.pkgs_dirs

        return concat(pcache.query(package_ref_or_match_spec) for pcache in concatv(
            cls.writable_caches(pkgs_dirs),
            cls.read_only_caches(pkgs_dirs),
        ))
Example #15
0
    def custom_channels(self):
        from ..models.channel import Channel

        custom_channels = (
            Channel.make_simple_channel(self.channel_alias, url, name) for name, url in iteritems(self._custom_channels)
        )
        channels_from_multichannels = concat(channel for channel in itervalues(self.custom_multichannels))
        all_channels = odict((x.name, x) for x in (ch for ch in concatv(channels_from_multichannels, custom_channels)))
        return all_channels
Example #16
0
 def custom_multichannels(self):
     from ..models.channel import Channel
     default_custom_multichannels = {
         'defaults': self.default_channels,
         'local': (self.local_build_root_channel,),
     }
     all_channels = default_custom_multichannels, self._custom_multichannels
     return odict((name, tuple(Channel(v) for v in c))
                  for name, c in concat(map(iteritems, all_channels)))
Example #17
0
 def query_all(channels, subdirs, package_ref_or_match_spec):
     from .index import check_whitelist  # TODO: fix in-line import
     channel_urls = all_channel_urls(channels, subdirs=subdirs)
     check_whitelist(channel_urls)
     with ThreadLimitedThreadPoolExecutor() as executor:
         futures = tuple(executor.submit(
             SubdirData(Channel(url)).query, package_ref_or_match_spec
         ) for url in channel_urls)
         return tuple(concat(future.result() for future in as_completed(futures)))
Example #18
0
 def custom_multichannels(self):
     from ..models.channel import Channel
     default_custom_multichannels = {
         'defaults': self.default_channels,
         'local': (self.local_build_root_channel, ),
     }
     all_channels = default_custom_multichannels, self._custom_multichannels
     return odict((name, tuple(Channel(v) for v in c))
                  for name, c in concat(map(iteritems, all_channels)))
Example #19
0
def check_whitelist(channel_urls):
    if context.whitelist_channels:
        whitelist_channel_urls = tuple(
            concat(Channel(c).base_urls for c in context.whitelist_channels))
        for url in channel_urls:
            these_urls = Channel(url).base_urls
            if not all(this_url in whitelist_channel_urls
                       for this_url in these_urls):
                raise ChannelNotAllowed(Channel(url))
Example #20
0
    def _merge(self, matches):
        # get matches up to and including first important_match
        #   but if no important_match, then all matches are important_matches
        relevant_matches_and_values = tuple(
            (match, match.value(self))
            for match in self._first_important_matches(matches))
        for match, value in relevant_matches_and_values:
            if not isinstance(value, tuple):
                raise InvalidTypeError(self.name, value, match.source,
                                       value.__class__.__name__,
                                       self._type.__name__)

        # get individual lines from important_matches that were marked important
        # these will be prepended to the final result
        def get_marked_lines(match, marker, parameter_obj):
            return tuple(line for line, flag in zip(
                match.value(parameter_obj), match.valueflags(parameter_obj))
                         if flag is marker) if match else ()

        top_lines = concat(
            get_marked_lines(m, ParameterFlag.top, self)
            for m, _ in relevant_matches_and_values)

        # also get lines that were marked as bottom, but reverse the match order so that lines
        # coming earlier will ultimately be last
        bottom_lines = concat(
            get_marked_lines(m, ParameterFlag.bottom, self)
            for m, _ in reversed(relevant_matches_and_values))

        # now, concat all lines, while reversing the matches
        #   reverse because elements closer to the end of search path take precedence
        all_lines = concat(v for _, v in reversed(relevant_matches_and_values))

        # stack top_lines + all_lines, then de-dupe
        top_deduped = tuple(unique(concatv(top_lines, all_lines)))

        # take the top-deduped lines, reverse them, and concat with reversed bottom_lines
        # this gives us the reverse of the order we want, but almost there
        # NOTE: for a line value marked both top and bottom, the bottom marker will win out
        #       for the top marker to win out, we'd need one additional de-dupe step
        bottom_deduped = unique(
            concatv(reversed(tuple(bottom_lines)), reversed(top_deduped)))
        # just reverse, and we're good to go
        return tuple(reversed(tuple(bottom_deduped)))
Example #21
0
 def get_pfe(self):
     from .package_cache import ProgressiveFetchExtract
     if not self.prefix_setups:
         return ProgressiveFetchExtract({}, ())
     else:
         index = next(itervalues(self.prefix_setups)).index
         link_dists = set(
             concat(stp.link_dists
                    for stp in itervalues(self.prefix_setups)))
         return ProgressiveFetchExtract(index, link_dists)
    def execute(self):
        if not self._verified:
            self.verify()

        assert not context.dry_run

        try:
            self._execute(tuple(concat(interleave(itervalues(self.prefix_action_groups)))))
        finally:
            rm_rf(self.transaction_context['temp_dir'])
 def _get_pfe(self):
     from .package_cache_data import ProgressiveFetchExtract
     if self._pfe is not None:
         pfe = self._pfe
     elif not self.prefix_setups:
         self._pfe = pfe = ProgressiveFetchExtract(())
     else:
         link_precs = set(concat(stp.link_precs for stp in itervalues(self.prefix_setups)))
         self._pfe = pfe = ProgressiveFetchExtract(link_precs)
     return pfe
Example #24
0
 def _make_channel_priorities(channels):
     priorities_map = odict()
     for priority_counter, chn in enumerate(concat(
         (Channel(cc) for cc in c._channels) if isinstance(c, MultiChannel) else (c,)
         for c in (Channel(c) for c in channels)
     )):
         channel_name = chn.name
         if channel_name in priorities_map:
             continue
         priorities_map[channel_name] = min(priority_counter, MAX_CHANNEL_PRIORITY - 1)
     return priorities_map
Example #25
0
 def custom_channels(self):
     from ..models.channel import Channel
     custom_channels = (Channel.make_simple_channel(self.channel_alias, url, name)
                        for name, url in iteritems(self._custom_channels))
     channels_from_multichannels = concat(channel for channel
                                          in itervalues(self.custom_multichannels))
     all_channels = odict((x.name, x) for x in (ch for ch in concatv(
         channels_from_multichannels,
         custom_channels,
     )))
     return all_channels
Example #26
0
 def _make_channel_priorities(channels):
     priorities_map = odict()
     for priority_counter, chn in enumerate(concat(
         (Channel(cc) for cc in c._channels) if isinstance(c, MultiChannel) else (c,)
         for c in (Channel(c) for c in channels)
     )):
         channel_name = chn.name
         if channel_name in priorities_map:
             continue
         priorities_map[channel_name] = min(priority_counter, MAX_CHANNEL_PRIORITY - 1)
     return priorities_map
Example #27
0
def _collect_repodatas_serial_as_index(use_cache, tasks):
    session = CondaSession()
    results = (fetch_repodata(url,
                              schan,
                              pri,
                              use_cache=use_cache,
                              session=session) for url, schan, pri in tasks)
    index = dict(
        concat(
            iteritems(result.get('packages', {})) for result in results
            if result))
    return index
Example #28
0
def check_whitelist(channel_urls):
    if context.whitelist_channels:
        whitelist_channel_urls = tuple(concat(
            Channel(c).base_urls for c in context.whitelist_channels
        ))
        for url in channel_urls:
            these_urls = Channel(url).base_urls
            if not all(this_url in whitelist_channel_urls for this_url in these_urls):
                bad_channel = Channel(url)
                raise OperationNotAllowed("Channel not included in whitelist:\n"
                                          "  location: %s\n"
                                          "  canonical name: %s\n"
                                          % (bad_channel.location, bad_channel.canonical_name))
Example #29
0
 def query_all(package_ref_or_match_spec, channels=None, subdirs=None):
     from .index import check_whitelist  # TODO: fix in-line import
     if channels is None:
         channels = context.channels
     if subdirs is None:
         subdirs = context.subdirs
     channel_urls = all_channel_urls(channels, subdirs=subdirs)
     check_whitelist(channel_urls)
     with ThreadLimitedThreadPoolExecutor() as executor:
         futures = tuple(executor.submit(
             SubdirData(Channel(url)).query, package_ref_or_match_spec
         ) for url in channel_urls)
         return tuple(concat(future.result() for future in as_completed(futures)))
Example #30
0
def check_whitelist(channel_urls):
    if context.whitelist_channels:
        whitelist_channel_urls = tuple(concat(
            Channel(c).base_urls for c in context.whitelist_channels
        ))
        for url in channel_urls:
            these_urls = Channel(url).base_urls
            if not all(this_url in whitelist_channel_urls for this_url in these_urls):
                bad_channel = Channel(url)
                raise OperationNotAllowed("Channel not included in whitelist:\n"
                                          "  location: %s\n"
                                          "  canonical name: %s\n"
                                          % (bad_channel.location, bad_channel.canonical_name))
Example #31
0
def describe_all_parameters():
    builder = []
    skip_categories = ('CLI-only', 'Hidden and Undocumented')
    for category, parameter_names in iteritems(context.category_map):
        if category in skip_categories:
            continue
        builder.append('# ######################################################')
        builder.append('# ## {:^48} ##'.format(category))
        builder.append('# ######################################################')
        builder.append('')
        builder.extend(concat(parameter_description_builder(name)
                              for name in parameter_names))
        builder.append('')
    return '\n'.join(builder)
Example #32
0
def _collect_repodatas_concurrent_as_index(executor, use_cache, tasks):
    futures = (executor.submit(fetch_repodata,
                               url,
                               schan,
                               pri,
                               use_cache=use_cache,
                               session=CondaSession())
               for url, schan, pri in tasks)
    results = (future.result() for future in futures)
    index = dict(
        concat(
            iteritems(result.get('packages', {})) for result in results
            if result))
    return index
Example #33
0
def prioritize_channels(channels, with_credentials=True, subdirs=None):
    # prioritize_channels returns and OrderedDict with platform-specific channel
    #   urls as the key, and a tuple of canonical channel name and channel priority
    #   number as the value
    # ('https://conda.anaconda.org/conda-forge/osx-64/', ('conda-forge', 1))
    channels = concat((Channel(cc) for cc in c._channels) if isinstance(c, MultiChannel) else (c,)
                      for c in (Channel(c) for c in channels))
    result = odict()
    for priority_counter, chn in enumerate(channels):
        channel = Channel(chn)
        for url in channel.urls(with_credentials, subdirs):
            if url in result:
                continue
            result[url] = channel.canonical_name, min(priority_counter, MAX_CHANNEL_PRIORITY - 1)
    return result
Example #34
0
def prioritize_channels(channels, with_credentials=True, subdirs=None):
    # prioritize_channels returns and OrderedDict with platform-specific channel
    #   urls as the key, and a tuple of canonical channel name and channel priority
    #   number as the value
    # ('https://conda.anaconda.org/conda-forge/osx-64/', ('conda-forge', 1))
    channels = concat((Channel(cc) for cc in c._channels) if isinstance(c, MultiChannel) else (c,)
                      for c in (Channel(c) for c in channels))
    result = odict()
    for priority_counter, chn in enumerate(channels):
        channel = Channel(chn)
        for url in channel.urls(with_credentials, subdirs):
            if url in result:
                continue
            result[url] = channel.canonical_name, min(priority_counter, MAX_CHANNEL_PRIORITY - 1)
    return result
Example #35
0
def query_all(channels, subdirs, package_ref_or_match_spec):
    channel_urls = all_channel_urls(channels, subdirs=subdirs)

    result = executor = None
    if context.concurrent:
        try:
            from concurrent.futures import ThreadPoolExecutor, as_completed
            executor = ThreadPoolExecutor(10)
            futures = (executor.submit(
                SubdirData(Channel(url)).query, package_ref_or_match_spec
            ) for url in channel_urls)
            result = tuple(concat(future.result() for future in as_completed(futures)))
        except (ImportError, RuntimeError) as e:
            # concurrent.futures is only available in Python >= 3.2 or if futures is installed
            # RuntimeError is thrown if number of threads are limited by OS
            log.debug(repr(e))
    if executor:
        executor.shutdown(wait=True)

    if result is None:
        subdir_datas = (SubdirData(Channel(url)) for url in channel_urls)
        result = tuple(concat(sd.query(package_ref_or_match_spec) for sd in subdir_datas))

    return result
    def _verify_individual_level(prefix_action_group):
        all_actions = concat(axngroup.actions
                             for action_groups in prefix_action_group
                             for axngroup in action_groups)

        # run all per-action verify methods
        #   one of the more important of these checks is to verify that a file listed in
        #   the packages manifest (i.e. info/files) is actually contained within the package
        for axn in all_actions:
            if axn.verified:
                continue
            error_result = axn.verify()
            if error_result:
                formatted_error = ''.join(format_exception_only(type(error_result), error_result))
                log.debug("Verification error in action %s\n%s", axn, formatted_error)
                yield error_result
Example #37
0
def describe_all_parameters():
    builder = []
    skip_categories = ('CLI-only', 'Hidden and Undocumented')
    for category, parameter_names in iteritems(context.category_map):
        if category in skip_categories:
            continue
        builder.append(
            '# ######################################################')
        builder.append('# ## {:^48} ##'.format(category))
        builder.append(
            '# ######################################################')
        builder.append('')
        builder.extend(
            concat(
                parameter_description_builder(name)
                for name in parameter_names))
        builder.append('')
    return '\n'.join(builder)
Example #38
0
    def query_all(channels, subdirs, package_ref_or_match_spec):
        channel_urls = all_channel_urls(channels, subdirs=subdirs)

        executor = None
        try:
            from concurrent.futures import ThreadPoolExecutor, as_completed
            executor = ThreadPoolExecutor(10)
            futures = (executor.submit(
                SubdirData(Channel(url)).query, package_ref_or_match_spec
            ) for url in channel_urls)
            return tuple(concat(future.result() for future in as_completed(futures)))
        except RuntimeError as e:  # pragma: no cover
            # concurrent.futures is only available in Python >= 3.2 or if futures is installed
            # RuntimeError is thrown if number of threads are limited by OS
            raise
        finally:
            if executor:
                executor.shutdown(wait=True)
 def __init__(
         self,
         dimensions: List[int],
         activation: torch.nn.Module = nn.ReLU(),
         final_activation: Optional[torch.nn.Module] = nn.ReLU(),
         weight_init: Callable[[torch.Tensor, torch.Tensor, float],
                               None] = default_initialise_weight_bias_,
         gain: float = nn.init.calculate_gain("relu"),
 ):
     """
     Autoencoder composed of a symmetric decoder and encoder components accessible via the encoder and decoder
     attributes. The dimensions input is the list of dimensions occurring in a single stack
     e.g. [100, 10, 10, 5] will make the embedding_dimension 100 and the hidden dimension 5, with the
     autoencoder shape [100, 10, 10, 5, 10, 10, 100].
     :param dimensions: list of dimensions occurring in a single stack
     :param activation: activation layer to use for all but final activation, default torch.nn.ReLU
     :param final_activation: final activation layer to use, set to None to disable, default torch.nn.ReLU
     :param weight_init: function for initialising weight and bias via mutation, defaults to default_initialise_weight_bias_
     :param gain: gain parameter to pass to weight_init
     """
     super(StackedAutoEncoderModel, self).__init__()
     self.dimensions = dimensions
     self.embedding_dimension = dimensions[0]
     self.hidden_dimension = dimensions[-1]
     # construct the encoder
     encoder_units = build_units(self.dimensions[:-1], activation)
     encoder_units.extend(
         build_units([self.dimensions[-2], self.dimensions[-1]], None))
     self.encoder = nn.Sequential(*encoder_units)
     # construct the decoder
     decoder_units = build_units(reversed(self.dimensions[1:]), activation)
     decoder_units.extend(
         build_units([self.dimensions[1], self.dimensions[0]],
                     final_activation))
     self.decoder = nn.Sequential(*decoder_units)
     # construct the softmax layer
     self.softmax_layer = nn.Linear(self.dimensions[-1], 2)
     # loss & optimizer
     self.criterion = nn.CrossEntropyLoss()
     self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
     # initialise the weights and biases in the layers
     for layer in concat([self.encoder, self.decoder]):
         weight_init(layer[0].weight, layer[0].bias, gain)
Example #40
0
    def merge(cls, match_specs):
        match_specs = tuple(cls(s) for s in match_specs if s)
        name_groups = groupby(attrgetter('name'), match_specs)
        unmergeable = name_groups.pop('*', []) + name_groups.pop(None, [])

        merged_specs = []
        mergeable_groups = tuple(
            concat(
                itervalues(groupby(lambda s: s.optional, group))
                for group in itervalues(name_groups)))
        for group in mergeable_groups:
            target_groups = groupby(attrgetter('target'), group)
            target_groups.pop(None, None)
            if len(target_groups) > 1:
                raise ValueError("Incompatible MatchSpec merge:%s" %
                                 dashlist(group))
            merged_specs.append(
                reduce(lambda x, y: x._merge(y), group
                       ) if len(group) > 1 else group[0])
        return tuple(concatv(merged_specs, unmergeable))
Example #41
0
    def transform(
            self,
            doclikes: Iterable[types.DocLike]) -> Iterable[Tuple[str, ...]]:
        """
        Convert a sequence of spaCy Docs or Spans into an ordered, nested sequence
        of terms as strings.

        Args:
            doclikes

        Yields:
            Ordered sequence of terms as strings for next Doc or Span.
        """
        normalize_ = self.normalize
        for doclike in doclikes:
            terms = itertoolz.concat(
                tokenizer(doclike) for tokenizer in self.tokenizers)
            if self.dedupe is True:
                terms = itertoolz.unique(terms, lambda span:
                                         (span.start, span.end))
            yield tuple(normalize_(term) for term in terms)
Example #42
0
def get_ngram_candidates(
    doc: Doc,
    ns: int | Collection[int],
    *,
    include_pos: Optional[str | Collection[str]] = ("NOUN", "PROPN", "ADJ"),
) -> Iterable[Tuple[Token, ...]]:
    """
    Get candidate keyterms from ``doc``, where candidates are n-length sequences
    of tokens (for all n in ``ns``) that don't start/end with a stop word or
    contain punctuation tokens, and whose constituent tokens are filtered by POS tag.

    Args:
        doc
        ns: One or more n values for which to generate n-grams. For example,
            ``2`` gets bigrams; ``(2, 3)`` gets bigrams and trigrams.
        include_pos: One or more POS tags with which to filter ngrams.
            If None, include tokens of all POS tags.

    Yields:
        Next ngram candidate, as a tuple of constituent Tokens.

    See Also:
        :func:`textacy.extract.ngrams()`
    """
    ns = utils.to_collection(ns, int, tuple)
    include_pos = utils.to_collection(include_pos, str, set)
    ngrams = itertoolz.concat(itertoolz.sliding_window(n, doc) for n in ns)
    ngrams = (
        ngram
        for ngram in ngrams
        if not (ngram[0].is_stop or ngram[-1].is_stop)
        and not any(word.is_punct or word.is_space for word in ngram)
    )
    if include_pos:
        ngrams = (
            ngram for ngram in ngrams if all(word.pos_ in include_pos for word in ngram)
        )
    for ngram in ngrams:
        yield ngram
Example #43
0
def get_random_sample(seq, n, stratify=True, random_state=None):
    """
    Args:
        seq (Sequence)
        n (int)
        stratify (bool)
        random_state (int)

    Returns:
        list
    """
    random.seed(a=random_state)
    if stratify is True:
        grped = itertoolz.groupby(operator.itemgetter(1), seq)
        n_per_grp = max(int(round(n / len(grped))), 1)
        sample = list(
            itertoolz.concat(
                random.sample(examples, min(len(examples), n_per_grp))
                for examples in grped.values()))
        random.shuffle(sample)
        return sample[:n]
    else:
        return random.sample(seq, min(len(seq), n))
Example #44
0
    def to_terms_list(self, ngrams=(1, 2, 3), named_entities=True,
                      lemmatize=True, lowercase=False, as_strings=False,
                      **kwargs):
        """
        Transform ``Doc`` into a sequence of ngrams and/or named entities, which
        aren't necessarily in order of appearance, where each term appears in
        the list with the same frequency that it appears in ``Doc``.

        Args:
            ngrams (int or Set[int]): n of which n-grams to include; ``(1, 2, 3)``
                (default) includes unigrams (words), bigrams, and trigrams; `2`
                if only bigrams are wanted; falsy (e.g. False) to not include any
            named_entities (bool): if True (default), include named entities
                in the terms list; note: if ngrams are also included, named
                entities are added *first*, and any ngrams that exactly overlap
                with an entity are skipped to prevent double-counting
            lemmatize (bool): if True (default), lemmatize all terms
            lowercase (bool): if True and `lemmatize` is False, words are lower-
                cased
            as_strings (bool): if True, terms are returned as strings; if False
                (default), terms are returned as their unique integer ids
            kwargs:
                - filter_stops (bool)
                - filter_punct (bool)
                - filter_nums (bool)
                - include_pos (str or Set[str])
                - exclude_pos (str or Set[str])
                - min_freq (int)
                - include_types (str or Set[str])
                - exclude_types (str or Set[str]
                - drop_determiners (bool)
                see :func:`extract.words <textacy.extract.words>`,
                :func:`extract.ngrams <textacy.extract.ngrams>`,
                and :func:`extract.named_entities <textacy.extract.named_entities>`
                for more information on these parameters

        Yields:
            int or str: the next term in the terms list, either as a unique
                integer id or as a string

        Raises:
            ValueError: if neither ``named_entities`` nor ``ngrams`` are included

        .. note:: Despite the name, this is a generator function; to get an
            actual list of terms, call ``list(doc.to_terms_list())``.
        """
        if not named_entities and not ngrams:
            raise ValueError('either `named_entities` or `ngrams` must be included')
        if isinstance(ngrams, int):
            ngrams = (ngrams,)
        if named_entities is True:
            ne_kwargs = {
                'include_types': kwargs.get('include_types'),
                'exclude_types': kwargs.get('exclude_types'),
                'drop_determiners': kwargs.get('drop_determiners', True),
                'min_freq': kwargs.get('min_freq', 1)}
        if ngrams:
            ngram_kwargs = {
                'filter_stops': kwargs.get('filter_stops', True),
                'filter_punct': kwargs.get('filter_punct', True),
                'filter_nums': kwargs.get('filter_nums', False),
                'include_pos': kwargs.get('include_pos'),
                'exclude_pos': kwargs.get('exclude_pos'),
                'min_freq': kwargs.get('min_freq', 1)}

        terms = []
        # special case: ensure that named entities aren't double-counted when
        # adding words or ngrams that were already added as named entities
        if named_entities is True and ngrams:
            ents = tuple(textacy.extract.named_entities(self, **ne_kwargs))
            ent_idxs = {(ent.start, ent.end) for ent in ents}
            terms.append(ents)
            for n in ngrams:
                if n == 1:
                    terms.append(
                        (word for word in textacy.extract.words(self, **ngram_kwargs)
                         if (word.idx, word.idx + 1) not in ent_idxs))
                else:
                    terms.append(
                        (ngram for ngram in textacy.extract.ngrams(self, n, **ngram_kwargs)
                         if (ngram.start, ngram.end) not in ent_idxs))
        # otherwise, no need to check for overlaps
        else:
            if named_entities is True:
                terms.append(textacy.extract.named_entities(self, **ne_kwargs))
            else:
                for n in ngrams:
                    if n == 1:
                        terms.append(textacy.extract.words(self, **ngram_kwargs))
                    else:
                        terms.append(textacy.extract.ngrams(self, n, **ngram_kwargs))

        terms = itertoolz.concat(terms)

        # convert token and span objects into integer ids
        if as_strings is False:
            if lemmatize is True:
                for term in terms:
                    try:
                        yield term.lemma
                    except AttributeError:
                        yield self.spacy_stringstore[term.lemma_]
            elif lowercase is True:
                for term in terms:
                    try:
                        yield term.lower
                    except AttributeError:
                        yield self.spacy_stringstore[term.orth_.lower()]
            else:
                for term in terms:
                    try:
                        yield term.orth
                    except AttributeError:
                        yield self.spacy_stringstore[term.orth_]
        # convert token and span objects into strings
        else:
            if lemmatize is True:
                for term in terms:
                    yield term.lemma_
            elif lowercase is True:
                for term in terms:
                    try:
                        yield term.lower_
                    except AttributeError:
                        yield term.orth_.lower()
            else:
                for term in terms:
                    yield term.orth_
Example #45
0
def explode_directories(child_directories, already_split=False):
    # get all directories including parents
    # use already_split=True for the result of get_all_directories()
    maybe_split = lambda x: x if already_split else x.split('/')
    return set(concat(accumulate(join, maybe_split(directory)) for directory in child_directories))
Example #46
0
def sgrank(doc, window_width=1500, n_keyterms=10, idf=None):
    """
    Extract key terms from a document using the [SGRank]_ algorithm.

    Args:
        doc (``spacy.Doc``)
        window_width (int, optional): width of sliding window in which term
            co-occurrences are said to occur
        n_keyterms (int or float, optional): if int, number of top-ranked terms
            to return as keyterms; if float, must be in the open interval (0, 1),
            representing the fraction of top-ranked terms to return as keyterms
        idf (dict, optional): mapping of
            {`normalized_str(term) <textacy.spacy_utils.normalized_str>`: inverse document frequency}
            for re-weighting of unigrams (n-grams with n > 1 have df assumed = 1);
            NOTE: results are better with idf information

    Returns:
        list[(str, float)]: sorted list of top ``n_keyterms`` key terms and their
            corresponding SGRank scores

    Raises:
        ValueError: if ``n_keyterms`` is a float but not in (0.0, 1.0]

    References:
        .. [SGRank] Danesh, Sumner, and Martin. "SGRank: Combining Statistical and
           Graphical Methods to Improve the State of the Art in Unsupervised Keyphrase
           Extraction". Lexical and Computational Semantics (* SEM 2015) (2015): 117.
    """
    if isinstance(n_keyterms, float):
        if not 0.0 < n_keyterms <= 1.0:
            raise ValueError('`n_keyterms` must be an int, or a float between 0.0 and 1.0')
    n_toks = len(doc)
    min_term_freq = min(n_toks // 1500, 4)

    # build full list of candidate terms
    terms = list(itertoolz.concat(
        extract.ngrams(doc, n, filter_stops=True, filter_punct=True, filter_nums=False,
                       good_pos_tags={'NOUN', 'ADJ'}, min_freq=min_term_freq)
        for n in range(1, 7)))
    # if inverse document frequencies available, also add verbs
    # verbs without IDF downweighting dominate the results, and not in a good way
    if idf:
        terms.extend(itertoolz.concat(
            extract.ngrams(doc, n, filter_stops=True, filter_punct=True, filter_nums=False,
                           good_pos_tags={'VERB'}, min_freq=min_term_freq)
            for n in range(1, 7)))

    terms_as_strs = {id(term): spacy_utils.normalized_str(term)
                     for term in terms}

    # pre-filter terms to the top 20% ranked by TF or modified TF*IDF, if available
    n_top_20pct = int(len(terms) * 0.2)
    term_counts = Counter(terms_as_strs[id(term)] for term in terms)
    if idf:
        mod_tfidfs = {term: count * idf[term] if ' ' not in term else count
                      for term, count in term_counts.items()}
        top_term_texts = {term for term, _ in sorted(
            mod_tfidfs.items(), key=itemgetter(1), reverse=True)[:n_top_20pct]}
    else:
        top_term_texts = {term for term, _ in term_counts.most_common(n_top_20pct)}

    terms = [term for term in terms
             if terms_as_strs[id(term)] in top_term_texts]

    # compute term weights from statistical attributes
    term_weights = {}
    set_terms_as_str = {terms_as_strs[id(terms)] for terms in terms}
    n_toks_plus_1 = n_toks + 1
    for term in terms:
        term_str = terms_as_strs[id(term)]
        pos_first_occ_factor = math.log(n_toks_plus_1 / (term.start + 1))
        # TODO: assess if len(t) puts too much emphasis on long terms
        # alternative: term_len = 1 if ' ' not in term else math.sqrt(len(term))
        term_len = 1 if ' ' not in term else len(term)
        term_count = term_counts[term_str]
        subsum_count = sum(term_counts[t2] for t2 in set_terms_as_str
                           if t2 != term_str and term_str in t2)
        term_freq_factor = (term_count - subsum_count)
        if idf and ' ' not in term_str:
            term_freq_factor *= idf[term_str]
        term_weights[term_str] = term_freq_factor * pos_first_occ_factor * term_len

    # filter terms to only those with positive weights
    terms = [term for term in terms
             if term_weights[terms_as_strs[id(term)]] > 0]

    n_coocs = defaultdict(lambda: defaultdict(int))
    sum_logdists = defaultdict(lambda: defaultdict(float))

    # iterate over windows
    for start_ind in range(n_toks):
        end_ind = start_ind + window_width
        window_terms = (term for term in terms
                        if start_ind <= term.start <= end_ind)
        # get all token combinations within window
        for t1, t2 in itertools.combinations(window_terms, 2):
            if t1 is t2:
                continue
            n_coocs[terms_as_strs[id(t1)]][terms_as_strs[id(t2)]] += 1
            try:
                sum_logdists[terms_as_strs[id(t1)]][terms_as_strs[id(t2)]] += \
                    math.log(window_width / abs(t1.start - t2.start))
            except ZeroDivisionError:  # HACK: pretend that they're 1 token apart
                sum_logdists[terms_as_strs[id(t1)]][terms_as_strs[id(t2)]] += \
                    math.log(window_width)
        if end_ind > n_toks:
            break

    # compute edge weights between co-occurring terms (nodes)
    edge_weights = defaultdict(lambda: defaultdict(float))
    for t1, t2s in sum_logdists.items():
        for t2 in t2s:
            edge_weights[t1][t2] = (sum_logdists[t1][t2] / n_coocs[t1][t2]) * term_weights[t1] * term_weights[t2]
    # normalize edge weights by sum of outgoing edge weights per term (node)
    norm_edge_weights = []
    for t1, t2s in edge_weights.items():
        sum_edge_weights = sum(t2s.values())
        norm_edge_weights.extend((t1, t2, {'weight': weight / sum_edge_weights})
                                 for t2, weight in t2s.items())

    # build the weighted directed graph from edges, rank nodes by pagerank
    graph = nx.DiGraph()
    graph.add_edges_from(norm_edge_weights)
    term_ranks = nx.pagerank_scipy(graph)

    if isinstance(n_keyterms, float):
        n_keyterms = int(len(term_ranks) * n_keyterms)

    return sorted(term_ranks.items(), key=itemgetter(1), reverse=True)[:n_keyterms]
Example #47
0
    def query_all(cls, package_ref_or_match_spec, pkgs_dirs=None):
        if pkgs_dirs is None:
            pkgs_dirs = context.pkgs_dirs

        return concat(pcache.query(package_ref_or_match_spec)
                      for pcache in cls.all_caches_writable_first(pkgs_dirs))
Example #48
0
 def get_all_extracted_entries(cls):
     package_caches = (cls(pd) for pd in context.pkgs_dirs)
     return tuple(pc_entry for pc_entry in concat(map(itervalues, package_caches))
                  if pc_entry.is_extracted)
Example #49
0
    def as_terms_list(self, words=True, ngrams=(2, 3), named_entities=True,
                      dedupe=True, lemmatize=True, **kwargs):
        """
        Represent doc as a sequence of terms -- which aren't necessarily in order --
        including words (unigrams), ngrams (for a range of n), and named entities.
        NOTE: Despite the name, this is a generator function; to get a *list* of terms,
        just wrap the call like ``list(doc.as_terms_list())``.

        Args:
            words (bool, optional): if True (default), include words in the terms list
            ngrams (tuple(int), optional): include a range of ngrams in the terms list;
                default is ``(2, 3)``, i.e. bigrams and trigrams are included; if
                ngrams aren't wanted, set to False-y

                NOTE: if n=1 (words) is included here and ``words`` is True, n=1 is skipped
            named_entities (bool, optional): if True (default), include named entities
                in the terms list
            dedupe (bool, optional): if True (default), named entities are added first
                to the terms list, and any words or ngrams that exactly overlap with
                previously added entities are skipped to prevent double-counting;
                since words and ngrams (n > 1) are inherently exclusive, this only
                applies to entities; you almost certainly want this to be True
            lemmatize (bool, optional): if True (default), lemmatize all terms;
                otherwise, return the text as it appeared
            kwargs:
                filter_stops (bool)
                filter_punct (bool)
                filter_nums (bool)
                good_pos_tags (set(str))
                bad_pos_tags (set(str))
                min_freq (int)
                good_ne_types (set(str))
                bad_ne_types (set(str))
                drop_determiners (bool)

        Yields:
            str: the next term in the terms list
        """
        all_terms = []
        # special case: ensure that named entities aren't double-counted when
        # adding words or ngrams that were already added as named entities
        if dedupe is True and named_entities is True and (words is True or ngrams):
            ents = list(self.named_entities(**kwargs))
            ent_idxs = {(ent.start, ent.end) for ent in ents}
            all_terms.append(ents)
            if words is True:
                all_terms.append((word for word in self.words(**kwargs)
                                  if (word.idx, word.idx + 1) not in ent_idxs))
            if ngrams:
                for n in range(ngrams[0], ngrams[1] + 1):
                    if n == 1 and words is True:
                        continue
                    all_terms.append((ngram for ngram in self.ngrams(n, **kwargs)
                                      if (ngram.start, ngram.end) not in ent_idxs))
        # otherwise add everything in, duplicates and all
        else:
            if named_entities is True:
                all_terms.append(self.named_entities(**kwargs))
            if words is True:
                all_terms.append(self.words(**kwargs))
            if ngrams:
                for n in range(ngrams[0], ngrams[1] + 1):
                    if n == 1 and words is True:
                        continue
                    all_terms.append(self.ngrams(n, **kwargs))

        if lemmatize is True:
            for term in itertoolz.concat(all_terms):
                yield term.lemma_
        else:
            for term in itertoolz.concat(all_terms):
                yield term.text
Example #50
0
    def make_actions_for_record(pref_or_spec):
        assert pref_or_spec is not None
        # returns a cache_action and extract_action

        # if the pref or spec has an md5 value
        # look in all caches for package cache record that is
        #   (1) already extracted, and
        #   (2) matches the md5
        # If one exists, no actions are needed.
        md5 = pref_or_spec.get('md5')
        if md5:
            extracted_pcrec = next((
                pcrec for pcrec in concat(PackageCacheData(pkgs_dir).query(pref_or_spec)
                                          for pkgs_dir in context.pkgs_dirs)
                if pcrec.is_extracted
            ), None)
            if extracted_pcrec:
                return None, None

        # there is no extracted dist that can work, so now we look for tarballs that
        #   aren't extracted
        # first we look in all writable caches, and if we find a match, we extract in place
        # otherwise, if we find a match in a non-writable cache, we link it to the first writable
        #   cache, and then extract
        first_writable_cache = PackageCacheData.first_writable()
        pcrec_from_writable_cache = next((
            pcrec for pcrec in concat(pcache.query(pref_or_spec)
                                      for pcache in PackageCacheData.writable_caches())
            if pcrec.is_fetched
        ), None)
        if pcrec_from_writable_cache:
            # extract in place
            extract_axn = ExtractPackageAction(
                source_full_path=pcrec_from_writable_cache.package_tarball_full_path,
                target_pkgs_dir=dirname(pcrec_from_writable_cache.package_tarball_full_path),
                target_extracted_dirname=basename(pcrec_from_writable_cache.extracted_package_dir),
                record_or_spec=pcrec_from_writable_cache,
                md5sum=pcrec_from_writable_cache.md5,
            )
            return None, extract_axn

        pcrec_from_read_only_cache = next((
            pcrec for pcrec in concat(pcache.query(pref_or_spec)
                                      for pcache in PackageCacheData.read_only_caches())
            if pcrec.is_fetched
        ), None)

        if pcrec_from_read_only_cache:
            # we found a tarball, but it's in a read-only package cache
            # we need to link the tarball into the first writable package cache,
            #   and then extract
            try:
                expected_size_in_bytes = pref_or_spec.size
            except AttributeError:
                expected_size_in_bytes = None
            cache_axn = CacheUrlAction(
                url=path_to_url(pcrec_from_read_only_cache.package_tarball_full_path),
                target_pkgs_dir=first_writable_cache.pkgs_dir,
                target_package_basename=pcrec_from_read_only_cache.fn,
                md5sum=md5,
                expected_size_in_bytes=expected_size_in_bytes,
            )
            trgt_extracted_dirname = pcrec_from_read_only_cache.fn[:-len(CONDA_TARBALL_EXTENSION)]
            extract_axn = ExtractPackageAction(
                source_full_path=cache_axn.target_full_path,
                target_pkgs_dir=first_writable_cache.pkgs_dir,
                target_extracted_dirname=trgt_extracted_dirname,
                record_or_spec=pcrec_from_read_only_cache,
                md5sum=pcrec_from_read_only_cache.md5,
            )
            return cache_axn, extract_axn

        # if we got here, we couldn't find a matching package in the caches
        #   we'll have to download one; fetch and extract
        url = pref_or_spec.get('url')
        assert url
        try:
            expected_size_in_bytes = pref_or_spec.size
        except AttributeError:
            expected_size_in_bytes = None
        cache_axn = CacheUrlAction(
            url=url,
            target_pkgs_dir=first_writable_cache.pkgs_dir,
            target_package_basename=pref_or_spec.fn,
            md5sum=md5,
            expected_size_in_bytes=expected_size_in_bytes,
        )
        extract_axn = ExtractPackageAction(
            source_full_path=cache_axn.target_full_path,
            target_pkgs_dir=first_writable_cache.pkgs_dir,
            target_extracted_dirname=pref_or_spec.fn[:-len(CONDA_TARBALL_EXTENSION)],
            record_or_spec=pref_or_spec,
            md5sum=md5,
        )
        return cache_axn, extract_axn
Example #51
0
def test_concat():
    assert list(concat([[], [], []])) == []
    assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) ==
            ['a', 'b', 0, 1, 2])
Example #52
0
def execute_config(args, parser):

    json_warnings = []
    json_get = {}

    if args.show_sources:
        if context.json:
            print(json.dumps(context.collect_all(), sort_keys=True,
                             indent=2, separators=(',', ': ')))
        else:
            lines = []
            for source, reprs in iteritems(context.collect_all()):
                lines.append("==> %s <==" % source)
                lines.extend(format_dict(reprs))
                lines.append('')
            print('\n'.join(lines))
        return

    if args.show is not None:
        if args.show:
            paramater_names = args.show
            all_names = context.list_parameters()
            not_params = set(paramater_names) - set(all_names)
            if not_params:
                from ..exceptions import ArgumentError
                from ..resolve import dashlist
                raise ArgumentError("Invalid configuration parameters: %s" % dashlist(not_params))
        else:
            paramater_names = context.list_parameters()

        from collections import OrderedDict

        d = OrderedDict((key, getattr(context, key)) for key in paramater_names)
        if context.json:
            print(json.dumps(d, sort_keys=True, indent=2, separators=(',', ': '),
                  cls=EntityEncoder))
        else:
            # Add in custom formatting
            if 'custom_channels' in d:
                d['custom_channels'] = {
                    channel.name: "%s://%s" % (channel.scheme, channel.location)
                    for channel in itervalues(d['custom_channels'])
                }
            if 'custom_multichannels' in d:
                from ..resolve import dashlist
                d['custom_multichannels'] = {
                    multichannel_name: dashlist(channels, indent=4)
                    for multichannel_name, channels in iteritems(d['custom_multichannels'])
                }

            print('\n'.join(format_dict(d)))
        context.validate_configuration()
        return

    if args.describe is not None:
        if args.describe:
            paramater_names = args.describe
            all_names = context.list_parameters()
            not_params = set(paramater_names) - set(all_names)
            if not_params:
                from ..exceptions import ArgumentError
                from ..resolve import dashlist
                raise ArgumentError("Invalid configuration parameters: %s" % dashlist(not_params))
            if context.json:
                print(json.dumps([context.describe_parameter(name) for name in paramater_names],
                                 sort_keys=True, indent=2, separators=(',', ': '),
                                 cls=EntityEncoder))
            else:
                builder = []
                builder.extend(concat(parameter_description_builder(name)
                                      for name in paramater_names))
                print('\n'.join(builder))
        else:
            if context.json:
                skip_categories = ('CLI-only', 'Hidden and Undocumented')
                paramater_names = sorted(concat(
                    parameter_names for category, parameter_names in context.category_map.items()
                    if category not in skip_categories
                ))
                print(json.dumps([context.describe_parameter(name) for name in paramater_names],
                                 sort_keys=True, indent=2, separators=(',', ': '),
                                 cls=EntityEncoder))
            else:
                print(describe_all_parameters())
        return

    if args.validate:
        context.validate_all()
        return

    if args.system:
        rc_path = sys_rc_path
    elif args.env:
        if 'CONDA_PREFIX' in os.environ:
            rc_path = join(os.environ['CONDA_PREFIX'], '.condarc')
        else:
            rc_path = user_rc_path
    elif args.file:
        rc_path = args.file
    else:
        rc_path = user_rc_path

    if args.write_default:
        if isfile(rc_path):
            with open(rc_path) as fh:
                data = fh.read().strip()
            if data:
                raise CondaError("The file '%s' "
                                 "already contains configuration information.\n"
                                 "Remove the file to proceed.\n"
                                 "Use `conda config --describe` to display default configuration."
                                 % rc_path)

        with open(rc_path, 'w') as fh:
            fh.write(describe_all_parameters())
        return

    # read existing condarc
    if os.path.exists(rc_path):
        with open(rc_path, 'r') as fh:
            rc_config = yaml_load(fh) or {}
    else:
        rc_config = {}

    grouped_paramaters = groupby(lambda p: context.describe_parameter(p)['parameter_type'],
                                 context.list_parameters())
    primitive_parameters = grouped_paramaters['primitive']
    sequence_parameters = grouped_paramaters['sequence']
    map_parameters = grouped_paramaters['map']

    # Get
    if args.get is not None:
        context.validate_all()
        if args.get == []:
            args.get = sorted(rc_config.keys())
        for key in args.get:
            if key not in primitive_parameters + sequence_parameters:
                message = "unknown key %s" % key
                if not context.json:
                    print(message, file=sys.stderr)
                else:
                    json_warnings.append(message)
                continue
            if key not in rc_config:
                continue

            if context.json:
                json_get[key] = rc_config[key]
                continue

            if isinstance(rc_config[key], (bool, string_types)):
                print("--set", key, rc_config[key])
            else:  # assume the key is a list-type
                # Note, since conda config --add prepends, these are printed in
                # the reverse order so that entering them in this order will
                # recreate the same file
                items = rc_config.get(key, [])
                numitems = len(items)
                for q, item in enumerate(reversed(items)):
                    # Use repr so that it can be pasted back in to conda config --add
                    if key == "channels" and q in (0, numitems-1):
                        print("--add", key, repr(item),
                              "  # lowest priority" if q == 0 else "  # highest priority")
                    else:
                        print("--add", key, repr(item))

    if args.stdin:
        content = timeout(5, sys.stdin.read)
        if not content:
            return
        try:
            parsed = yaml_load(content)
            rc_config.update(parsed)
        except Exception:  # pragma: no cover
            from ..exceptions import ParseError
            raise ParseError("invalid yaml content:\n%s" % content)

    # prepend, append, add
    for arg, prepend in zip((args.prepend, args.append), (True, False)):
        for key, item in arg:
            if key == 'channels' and key not in rc_config:
                rc_config[key] = ['defaults']
            if key not in sequence_parameters:
                from ..exceptions import CondaValueError
                raise CondaValueError("Key '%s' is not a known sequence parameter." % key)
            if not isinstance(rc_config.get(key, []), list):
                from ..exceptions import CouldntParseError
                bad = rc_config[key].__class__.__name__
                raise CouldntParseError("key %r should be a list, not %s." % (key, bad))
            arglist = rc_config.setdefault(key, [])
            if item in arglist:
                # Right now, all list keys should not contain duplicates
                message = "Warning: '%s' already in '%s' list, moving to the %s" % (
                    item, key, "top" if prepend else "bottom")
                arglist = rc_config[key] = [p for p in arglist if p != item]
                if not context.json:
                    print(message, file=sys.stderr)
                else:
                    json_warnings.append(message)
            arglist.insert(0 if prepend else len(arglist), item)

    # Set
    for key, item in args.set:
        key, subkey = key.split('.', 1) if '.' in key else (key, None)
        if key in primitive_parameters:
            value = context.typify_parameter(key, item)
            rc_config[key] = value
        elif key in map_parameters:
            argmap = rc_config.setdefault(key, {})
            argmap[subkey] = item
        else:
            from ..exceptions import CondaValueError
            raise CondaValueError("Key '%s' is not a known primitive parameter." % key)

    # Remove
    for key, item in args.remove:
        key, subkey = key.split('.', 1) if '.' in key else (key, None)
        if key not in rc_config:
            if key != 'channels':
                from ..exceptions import CondaKeyError
                raise CondaKeyError(key, "key %r is not in the config file" % key)
            rc_config[key] = ['defaults']
        if item not in rc_config[key]:
            from ..exceptions import CondaKeyError
            raise CondaKeyError(key, "%r is not in the %r key of the config file" %
                                (item, key))
        rc_config[key] = [i for i in rc_config[key] if i != item]

    # Remove Key
    for key, in args.remove_key:
        key, subkey = key.split('.', 1) if '.' in key else (key, None)
        if key not in rc_config:
            from ..exceptions import CondaKeyError
            raise CondaKeyError(key, "key %r is not in the config file" %
                                key)
        del rc_config[key]

    # config.rc_keys
    if not args.get:

        # Add representers for enums.
        # Because a representer cannot be added for the base Enum class (it must be added for
        # each specific Enum subclass), and because of import rules), I don't know of a better
        # location to do this.
        def enum_representer(dumper, data):
            return dumper.represent_str(str(data))

        yaml.representer.RoundTripRepresenter.add_representer(SafetyChecks, enum_representer)
        yaml.representer.RoundTripRepresenter.add_representer(PathConflict, enum_representer)

        try:
            with open(rc_path, 'w') as rc:
                rc.write(yaml_dump(rc_config))
        except (IOError, OSError) as e:
            raise CondaError('Cannot write to condarc file at %s\n'
                             'Caused by %r' % (rc_path, e))

    if context.json:
        from .common import stdout_json_success
        stdout_json_success(
            rc_path=rc_path,
            warnings=json_warnings,
            get=json_get
        )
    return
Example #53
0
def direct_quotations(doc):
    """
    Baseline, not-great attempt at direction quotation extraction (no indirect
    or mixed quotations) using rules and patterns. English only.

    Args:
        doc (``spacy.Doc``)

    Yields:
        (``spacy.Span``, ``spacy.Token``, ``spacy.Span``): next quotation in ``doc``
            represented as a (speaker, reporting verb, quotation) 3-tuple

    Notes:
        Loosely inspired by Krestel, Bergler, Witte. "Minding the Source: Automatic
        Tagging of Reported Speech in Newspaper Articles".

    TODO: Better approach would use ML, but needs a training dataset.
    """
    quote_end_punct = {',', '.', '?', '!'}
    quote_indexes = set(itertoolz.concat(
        (m.start(), m.end() - 1) for m in re.finditer(r"(\".*?\")|(''.*?'')|(``.*?'')", doc.string)))
    quote_positions = list(itertoolz.partition(
        2, sorted(tok.i for tok in doc if tok.idx in quote_indexes)))
    sents = list(doc.sents)
    sent_positions = [(sent.start, sent.end) for sent in sents]

    for q0, q1 in quote_positions:
        quote = doc[q0: q1 + 1]

        # we're only looking for direct quotes, not indirect or mixed
        if not any(char in quote_end_punct for char in quote.text[-4:]):
            continue

        # get adjacent sentences
        candidate_sent_indexes = []
        for i, (s0, s1) in enumerate(sent_positions):

            if s0 <= q1 + 1 and s1 > q1:
                candidate_sent_indexes.append(i)
            elif s0 < q0 and s1 >= q0 - 1:
                candidate_sent_indexes.append(i)

        for si in candidate_sent_indexes:
            sent = sents[si]

            # get any reporting verbs
            rvs = [tok for tok in sent
                   if spacy_utils.preserve_case(tok) is False
                   and tok.lemma_ in REPORTING_VERBS
                   and tok.pos_ == 'VERB'
                   and not any(oq0 <= tok.i <= oq1 for oq0, oq1 in quote_positions)]

            # get target offset against which to measure distances of NEs
            if rvs:
                if len(rvs) == 1:
                    rv = rvs[0]
                else:
                    min_rv_dist = 1000
                    for rv_candidate in rvs:
                        rv_dist = min(abs(rv_candidate.i - qp) for qp in (q0, q1))
                        if rv_dist < min_rv_dist:
                            rv = rv_candidate
                            min_rv_dist = rv_dist
                        else:
                            break
            else:
                # TODO: do we have no other recourse?!
                continue

            try:
                # rv_subj = _find_subjects(rv)[0]
                rv_subj = get_subjects_of_verb(rv)[0]
            except IndexError:
                continue
    #         if rv_subj.text in {'he', 'she'}:
    #             for ne in named_entities(doc, good_ne_types={'PERSON'}):
    #                 if ne.start < rv_subj.i:
    #                     speaker = ne
    #                 else:
    #                     break
    #         else:
            span = get_span_for_compound_noun(rv_subj)
            speaker = doc[span[0]: span[1] + 1]

            yield (speaker, rv, quote)
            break
Example #54
0
    def solve_final_state(self, update_modifier=NULL, deps_modifier=NULL, prune=NULL,
                          ignore_pinned=NULL, force_remove=NULL):
        """Gives the final, solved state of the environment.

        Args:
            update_modifier (UpdateModifier):
                An optional flag directing how updates are handled regarding packages already
                existing in the environment.

            deps_modifier (DepsModifier):
                An optional flag indicating special solver handling for dependencies. The
                default solver behavior is to be as conservative as possible with dependency
                updates (in the case the dependency already exists in the environment), while
                still ensuring all dependencies are satisfied.  Options include
                    * NO_DEPS
                    * ONLY_DEPS
                    * UPDATE_DEPS
                    * UPDATE_DEPS_ONLY_DEPS
                    * FREEZE_INSTALLED
            prune (bool):
                If ``True``, the solution will not contain packages that were
                previously brought into the environment as dependencies but are no longer
                required as dependencies and are not user-requested.
            ignore_pinned (bool):
                If ``True``, the solution will ignore pinned package configuration
                for the prefix.
            force_remove (bool):
                Forces removal of a package without removing packages that depend on it.

        Returns:
            Tuple[PackageRef]:
                In sorted dependency order from roots to leaves, the package references for
                the solved state of the environment.

        """
        if update_modifier is NULL:
            update_modifier = context.update_modifier
        else:
            update_modifier = UpdateModifier(text_type(update_modifier).lower())
        if deps_modifier is NULL:
            deps_modifier = context.deps_modifier
        else:
            deps_modifier = DepsModifier(text_type(deps_modifier).lower())
        prune = context.prune if prune is NULL else prune
        ignore_pinned = context.ignore_pinned if ignore_pinned is NULL else ignore_pinned
        force_remove = context.force_remove if force_remove is NULL else force_remove
        specs_to_remove = self.specs_to_remove
        specs_to_add = self.specs_to_add

        # force_remove is a special case where we return early
        if specs_to_remove and force_remove:
            if specs_to_add:
                raise NotImplementedError()
            solution = tuple(prec for prec in PrefixData(self.prefix).iter_records()
                             if not any(spec.match(prec) for spec in specs_to_remove))
            return IndexedSet(PrefixGraph(solution).graph)

        log.debug("solving prefix %s\n"
                  "  specs_to_remove: %s\n"
                  "  specs_to_add: %s\n"
                  "  prune: %s", self.prefix, specs_to_remove, specs_to_add, prune)

        # declare starting point, the initial state of the environment
        # `solution` and `specs_map` are mutated throughout this method
        prefix_data = PrefixData(self.prefix)
        solution = tuple(prec for prec in prefix_data.iter_records())

        # Check if specs are satisfied by current environment. If they are, exit early.
        if (update_modifier == UpdateModifier.SPECS_SATISFIED_SKIP_SOLVE
                and not specs_to_remove and not prune):
            for spec in specs_to_add:
                if not next(prefix_data.query(spec), None):
                    break
            else:
                # All specs match a package in the current environment.
                # Return early, with a solution that should just be PrefixData().iter_records()
                return IndexedSet(PrefixGraph(solution).graph)

        specs_from_history_map = History(self.prefix).get_requested_specs_map()
        if prune:  # or update_modifier == UpdateModifier.UPDATE_ALL  # pending conda/constructor#138  # NOQA
            # Users are struggling with the prune functionality in --update-all, due to
            # https://github.com/conda/constructor/issues/138.  Until that issue is resolved,
            # and for the foreseeable future, it's best to be more conservative with --update-all.

            # Start with empty specs map for UPDATE_ALL because we're optimizing the update
            # only for specs the user has requested; it's ok to remove dependencies.
            specs_map = odict()

            # However, because of https://github.com/conda/constructor/issues/138, we need
            # to hard-code keeping conda, conda-build, and anaconda, if they're already in
            # the environment.
            solution_pkg_names = set(d.name for d in solution)
            ensure_these = (pkg_name for pkg_name in {
                'anaconda', 'conda', 'conda-build',
            } if pkg_name not in specs_from_history_map and pkg_name in solution_pkg_names)
            for pkg_name in ensure_these:
                specs_from_history_map[pkg_name] = MatchSpec(pkg_name)
        else:
            specs_map = odict((d.name, MatchSpec(d.name)) for d in solution)

        # add in historically-requested specs
        specs_map.update(specs_from_history_map)

        # let's pretend for now that this is the right place to build the index
        prepared_specs = set(concatv(
            specs_to_remove,
            specs_to_add,
            itervalues(specs_from_history_map),
        ))

        index, r = self._prepare(prepared_specs)

        if specs_to_remove:
            # In a previous implementation, we invoked SAT here via `r.remove()` to help with
            # spec removal, and then later invoking SAT again via `r.solve()`. Rather than invoking
            # SAT for spec removal determination, we can use the PrefixGraph and simple tree
            # traversal if we're careful about how we handle features. We still invoke sat via
            # `r.solve()` later.
            _track_fts_specs = (spec for spec in specs_to_remove if 'track_features' in spec)
            feature_names = set(concat(spec.get_raw_value('track_features')
                                       for spec in _track_fts_specs))
            graph = PrefixGraph(solution, itervalues(specs_map))

            all_removed_records = []
            no_removed_records_specs = []
            for spec in specs_to_remove:
                # If the spec was a track_features spec, then we need to also remove every
                # package with a feature that matches the track_feature. The
                # `graph.remove_spec()` method handles that for us.
                log.trace("using PrefixGraph to remove records for %s", spec)
                removed_records = graph.remove_spec(spec)
                if removed_records:
                    all_removed_records.extend(removed_records)
                else:
                    no_removed_records_specs.append(spec)

            # ensure that each spec in specs_to_remove is actually associated with removed records
            unmatched_specs_to_remove = tuple(
                spec for spec in no_removed_records_specs
                if not any(spec.match(rec) for rec in all_removed_records)
            )
            if unmatched_specs_to_remove:
                raise PackagesNotFoundError(
                    tuple(sorted(str(s) for s in unmatched_specs_to_remove))
                )

            for rec in all_removed_records:
                # We keep specs (minus the feature part) for the non provides_features packages
                # if they're in the history specs.  Otherwise, we pop them from the specs_map.
                rec_has_a_feature = set(rec.features or ()) & feature_names
                if rec_has_a_feature and rec.name in specs_from_history_map:
                    spec = specs_map.get(rec.name, MatchSpec(rec.name))
                    spec._match_components.pop('features', None)
                    specs_map[spec.name] = spec
                else:
                    specs_map.pop(rec.name, None)

            solution = tuple(graph.graph)

        # We handle as best as possible environments in inconsistent states. To do this,
        # we remove now from consideration the set of packages causing inconsistencies,
        # and then we add them back in following the main SAT call.
        _, inconsistent_precs = r.bad_installed(solution, ())
        add_back_map = {}  # name: (prec, spec)
        if log.isEnabledFor(DEBUG):
            log.debug("inconsistent precs: %s",
                      dashlist(inconsistent_precs) if inconsistent_precs else 'None')
        if inconsistent_precs:
            for prec in inconsistent_precs:
                # pop and save matching spec in specs_map
                add_back_map[prec.name] = (prec, specs_map.pop(prec.name, None))
            solution = tuple(prec for prec in solution if prec not in inconsistent_precs)

        # For the remaining specs in specs_map, add target to each spec. `target` is a reference
        # to the package currently existing in the environment. Setting target instructs the
        # solver to not disturb that package if it's not necessary.
        # If the spec.name is being modified by inclusion in specs_to_add, we don't set `target`,
        # since we *want* the solver to modify/update that package.
        #
        # TLDR: when working with MatchSpec objects,
        #  - to minimize the version change, set MatchSpec(name=name, target=prec.dist_str())
        #  - to freeze the package, set all the components of MatchSpec individually
        for pkg_name, spec in iteritems(specs_map):
            matches_for_spec = tuple(prec for prec in solution if spec.match(prec))
            if matches_for_spec:
                if len(matches_for_spec) != 1:
                    raise CondaError(dals("""
                    Conda encountered an error with your environment.  Please report an issue
                    at https://github.com/conda/conda/issues/new.  In your report, please include
                    the output of 'conda info' and 'conda list' for the active environment, along
                    with the command you invoked that resulted in this error.
                      pkg_name: %s
                      spec: %s
                      matches_for_spec: %s
                    """) % (pkg_name, spec,
                            dashlist((text_type(s) for s in matches_for_spec), indent=4)))
                target_prec = matches_for_spec[0]
                if update_modifier == UpdateModifier.FREEZE_INSTALLED:
                    new_spec = MatchSpec(target_prec)
                else:
                    target = target_prec.dist_str()
                    new_spec = MatchSpec(spec, target=target)
                specs_map[pkg_name] = new_spec
        if log.isEnabledFor(TRACE):
            log.trace("specs_map with targets: %s", specs_map)

        # If we're in UPDATE_ALL mode, we need to drop all the constraints attached to specs,
        # so they can all float and the solver can find the most up-to-date solution. In the case
        # of UPDATE_ALL, `specs_map` wasn't initialized with packages from the current environment,
        # but *only* historically-requested specs.  This lets UPDATE_ALL drop dependencies if
        # they're no longer needed, and their presence would otherwise prevent the updated solution
        # the user most likely wants.
        if update_modifier == UpdateModifier.UPDATE_ALL:
            specs_map = {pkg_name: MatchSpec(spec.name, optional=spec.optional)
                         for pkg_name, spec in iteritems(specs_map)}

        # As a business rule, we never want to update python beyond the current minor version,
        # unless that's requested explicitly by the user (which we actively discourage).
        if 'python' in specs_map:
            python_prefix_rec = prefix_data.get('python')
            if python_prefix_rec:
                python_spec = specs_map['python']
                if not python_spec.get('version'):
                    pinned_version = get_major_minor_version(python_prefix_rec.version) + '.*'
                    specs_map['python'] = MatchSpec(python_spec, version=pinned_version)

        # For the aggressive_update_packages configuration parameter, we strip any target
        # that's been set.
        if not context.offline:
            for spec in context.aggressive_update_packages:
                if spec.name in specs_map:
                    specs_map[spec.name] = spec
            if (context.auto_update_conda and paths_equal(self.prefix, context.root_prefix)
                    and any(prec.name == "conda" for prec in solution)):
                specs_map["conda"] = MatchSpec("conda")

        # add in explicitly requested specs from specs_to_add
        # this overrides any name-matching spec already in the spec map
        specs_map.update((s.name, s) for s in specs_to_add)

        # collect additional specs to add to the solution
        track_features_specs = pinned_specs = ()
        if context.track_features:
            track_features_specs = tuple(MatchSpec(x + '@') for x in context.track_features)
        if not ignore_pinned:
            pinned_specs = get_pinned_specs(self.prefix)

        final_environment_specs = IndexedSet(concatv(
            itervalues(specs_map),
            track_features_specs,
            pinned_specs,
        ))

        # We've previously checked `solution` for consistency (which at that point was the
        # pre-solve state of the environment). Now we check our compiled set of
        # `final_environment_specs` for the possibility of a solution.  If there are conflicts,
        # we can often avoid them by neutering specs that have a target (e.g. removing version
        # constraint) and also making them optional. The result here will be less cases of
        # `UnsatisfiableError` handed to users, at the cost of more packages being modified
        # or removed from the environment.
        conflicting_specs = r.get_conflicting_specs(tuple(final_environment_specs))
        if log.isEnabledFor(DEBUG):
            log.debug("conflicting specs: %s", dashlist(conflicting_specs))
        for spec in conflicting_specs:
            if spec.target:
                final_environment_specs.remove(spec)
                neutered_spec = MatchSpec(spec.name, target=spec.target, optional=True)
                final_environment_specs.add(neutered_spec)

        # Finally! We get to call SAT.
        if log.isEnabledFor(DEBUG):
            log.debug("final specs to add: %s",
                      dashlist(sorted(text_type(s) for s in final_environment_specs)))
        solution = r.solve(tuple(final_environment_specs))  # return value is List[PackageRecord]

        # add back inconsistent packages to solution
        if add_back_map:
            for name, (prec, spec) in iteritems(add_back_map):
                if not any(d.name == name for d in solution):
                    solution.append(prec)
                    if spec:
                        final_environment_specs.add(spec)

        # Special case handling for various DepsModifier flags. Maybe this block could be pulled
        # out into its own non-public helper method?
        if deps_modifier == DepsModifier.NO_DEPS:
            # In the NO_DEPS case, we need to start with the original list of packages in the
            # environment, and then only modify packages that match specs_to_add or
            # specs_to_remove.
            _no_deps_solution = IndexedSet(prefix_data.iter_records())
            only_remove_these = set(prec
                                    for spec in specs_to_remove
                                    for prec in _no_deps_solution
                                    if spec.match(prec))
            _no_deps_solution -= only_remove_these

            only_add_these = set(prec
                                 for spec in specs_to_add
                                 for prec in solution
                                 if spec.match(prec))
            remove_before_adding_back = set(prec.name for prec in only_add_these)
            _no_deps_solution = IndexedSet(prec for prec in _no_deps_solution
                                           if prec.name not in remove_before_adding_back)
            _no_deps_solution |= only_add_these
            solution = _no_deps_solution
        elif (deps_modifier == DepsModifier.ONLY_DEPS
                and update_modifier != UpdateModifier.UPDATE_DEPS):
            # Using a special instance of PrefixGraph to remove youngest child nodes that match
            # the original specs_to_add.  It's important to remove only the *youngest* child nodes,
            # because a typical use might be `conda install --only-deps python=2 flask`, and in
            # that case we'd want to keep python.
            graph = PrefixGraph(solution, specs_to_add)
            graph.remove_youngest_descendant_nodes_with_specs()
            solution = tuple(graph.graph)

        elif update_modifier == UpdateModifier.UPDATE_DEPS:
            # Here we have to SAT solve again :(  It's only now that we know the dependency
            # chain of specs_to_add.
            specs_to_add_names = set(spec.name for spec in specs_to_add)
            update_names = set()
            graph = PrefixGraph(solution, final_environment_specs)
            for spec in specs_to_add:
                node = graph.get_node_by_name(spec.name)
                for ancestor_record in graph.all_ancestors(node):
                    ancestor_name = ancestor_record.name
                    if ancestor_name not in specs_to_add_names:
                        update_names.add(ancestor_name)
            grouped_specs = groupby(lambda s: s.name in update_names, final_environment_specs)
            new_final_environment_specs = set(grouped_specs.get(False, ()))
            update_specs = set(MatchSpec(spec.name, optional=spec.optional)
                               for spec in grouped_specs.get(True, ()))
            final_environment_specs = new_final_environment_specs | update_specs
            solution = r.solve(final_environment_specs)

            if deps_modifier == DepsModifier.ONLY_DEPS:
                # duplicated from DepsModifier.ONLY_DEPS
                graph = PrefixGraph(solution, specs_to_add)
                graph.remove_youngest_descendant_nodes_with_specs()
                solution = tuple(graph.graph)

        if prune:
            graph = PrefixGraph(solution, final_environment_specs)
            graph.prune()
            solution = tuple(graph.graph)

        self._check_solution(solution, pinned_specs)

        solution = IndexedSet(PrefixGraph(solution).graph)
        log.debug("solved prefix %s\n"
                  "  solved_linked_dists:\n"
                  "    %s\n",
                  self.prefix, "\n    ".join(prec.dist_str() for prec in solution))
        return solution
Example #55
0
 def query_all(spec):
     futures = tuple(executor.submit(sd.query, spec) for sd in subdir_datas)
     return tuple(concat(future.result() for future in as_completed(futures)))
Example #56
0
 def iterations(self) -> int:
     return len(list(concat(self.epoch)))
Example #57
0
def test_concat():
    assert list(concat([[], [], []])) == []
    assert list(take(5, concat([["a", "b"], range(1000000000)]))) == ["a", "b", 0, 1, 2]