Ejemplo n.º 1
0
def line_length(line, ellipsoid='WGS-84', shipping=True):
    """Length of a line in meters, given in geographic coordinates

    Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285

    Arguments:
        line {Shapely LineString} -- a shapely LineString object with WGS-84 coordinates
        ellipsoid {String} -- string name of an ellipsoid that `geopy` understands (see
            http://geopy.readthedocs.io/en/latest/#module-geopy.distance)

    Returns:
        Length of line in kilometers
    """
    if shipping == True:
        if line.geometryType() == 'MultiLineString':
            return sum(line_length(segment) for segment in line)

        return sum(
            vincenty(tuple(reversed(a)),
                     tuple(reversed(b)),
                     ellipsoid=ellipsoid).kilometers
            for a, b in pairwise(line.coords))

    else:
        if line.geometryType() == 'MultiLineString':
            return sum(line_length(segment) for segment in line)

        return sum(
            vincenty(a, b, ellipsoid=ellipsoid).
            kilometers  ###WARNING TODO: WILL BE DEPRECIATED ####
            for a, b in pairwise(line.coords))
Ejemplo n.º 2
0
def _cumulative_residual_entropy(dist, generalized=False):
    """
    The cumulative residual entropy is an alternative to the Shannon
    differential entropy with several advantageous properties.

    Parameters
    ----------
    dist : ScalarDistribution
        The distribution to compute the cumulative residual entropy of.
    generalized : bool
        Whether to integrate from zero over the CDF or to integrate from zero
        over the CDF of the absolute value.

    Returns
    -------
    CRE : float
        The (generalized) cumulative residual entropy.

    Examples
    --------
    """
    numerical_test(dist)
    eps = ((e if generalized else abs(e), p) for e, p in dist.zipped())
    events, probs = zip(*sorted(eps))
    cdf = dict((a, p) for a, p in zip(events, np.cumsum(probs)))
    terms = []
    for a, b in pairwise(events):
        pgx = cdf[a]
        term = (b - a) * pgx * np.log2(pgx)
        terms.append(term)
    return -np.nansum(terms)
Ejemplo n.º 3
0
def coupling_metric(dists, p=1.0):
    """
    Compute the minimum possible residual entropy of a joint distribution
    with `dists` as marginals.

    Parameters
    ----------
    dists : list of Distributions
        The distributions to consider as marginals
    p : float
        The p-norm.

    Returns
    -------
    cm : float
        The minimum residual entropy over all possible distributions with
        `dists` as marginals.
    """
    d = dists[0]
    for d2 in dists[1:]:
        d = d.__matmul__(d2)

    lengths = [0] + [len(dist.rvs) for dist in dists]
    dist_ids = [list(range(a, b)) for a, b in pairwise(np.cumsum(lengths))]

    meo = MinEntOptimizer(d, dist_ids)
    meo.optimize(niter=25)

    od = meo.construct_dist()
    re = residual_entropy(od, rvs=dist_ids, p=p)

    return re
Ejemplo n.º 4
0
    def _construct_auxvars(self, auxvars):
        """
        Register the auxiliary variables.

        Parameters
        ----------
        auxvars : [(tuple, int)]
            The bases and bounds for each auxiliary variable.
        """
        self._aux_vars = []

        for bases, bound in auxvars:
            shape = [self._shape[i] for i in bases] + [bound]
            mask = np.ones(shape) / bound
            self._aux_vars.append(AuxVar(bases, bound, shape, mask, prod(shape)))
            self._shape += (bound,)
            self._full_shape += (bound,)
            self._all_vars |= {len(self._all_vars)}

        self._arvs = self._all_vars - (self._rvs | self._crvs)
        self._aux_bounds = [av.bound for av in self._aux_vars]
        self._optvec_size = sum([av.size for av in self._aux_vars])
        self._default_hops = prod(self._aux_bounds)
        self._parts = list(pairwise(np.cumsum([0] + [av.size for av in self._aux_vars])))
        self._construct_slices()
        if len(self._aux_vars) == 1:
            self.construct_joint = self._construct_joint_single
Ejemplo n.º 5
0
def maxent_binning(ts, bins):
    """

    Parameters
    ----------
    ts : ndarray
        The real-valued array to bin
    bins : int
        The number of bins to map the data into.

    Returns
    -------
    symb : ndarray
        The discretized time-series.
    """
    symb = np.full_like(ts, np.nan)

    percentiles = np.percentile(ts, [100 * i / bins for i in range(bins + 1)])

    # Sometimes with large magnetude values things get weird. This helps:
    percentiles[0] = -np.inf
    percentiles[-1] = np.inf

    for i, (a, b) in enumerate(pairwise(percentiles)):
        symb[(a <= ts) & (ts < b)] = i

    symb = symb.astype(int)

    return symb
Ejemplo n.º 6
0
def coupling_metric(dists, p=1.0):
    """
    Compute the minimum possible residual entropy of a joint distribution
    with `dists` as marginals.

    Parameters
    ----------
    dists : list of Distributions
        The distributions to consider as marginals
    p : float
        The p-norm.

    Returns
    -------
    cm : float
        The minimum residual entropy over all possible distributions with
        `dists` as marginals.
    """
    d = dists[0]
    for d2 in dists[1:]:
        d = d.__matmul__(d2)

    lengths = [0] + [len(dist.rvs) for dist in dists]
    dist_ids = [list(range(a, b)) for a, b in pairwise(np.cumsum(lengths))]

    meo = MinEntOptimizer(d, dist_ids)
    meo.optimize(niter=25)

    od = meo.construct_dist()
    re = residual_entropy(od, rvs=dist_ids, p=p)

    return re
Ejemplo n.º 7
0
def collect_news_fragments(
    repo: Repo,
    include_unreleased: bool,
    version_tag_pattern: Pattern,
    news_fragment_dir: str,
    last_tag: str,
    submodules: List[SubmoduleConfig],
) -> List[VersionNews]:
    tags = get_tags(repo, version_tag_pattern)

    # tear of things after last tag
    last_tag = repo.tags[last_tag] if last_tag in repo.tags else None
    if last_tag:
        tags = list(takewhile(lambda tag: tag != last_tag, tags))
        tags.append(last_tag)

    commits = get_commits(repo,
                          tags,
                          include_unreleased,
                          include_root=not last_tag)

    result = [
        VersionNews(
            news=get_news_between_commits(commit_from.commit, commit_to.commit,
                                          news_fragment_dir),
            tag=commit_to.name,
            version=get_version(commit_to.name, version_tag_pattern),
            date=commit_to.date,
            submodule_news=get_submodule_news(commit_from.commit,
                                              commit_to.commit, submodules),
        ) for commit_to, commit_from in pairwise(commits)
    ]

    return result
Ejemplo n.º 8
0
def get_submodule_news(
        commit_from: Commit, commit_to: Commit,
        submodules: List[SubmoduleConfig]) -> List[SubmoduleNews]:
    news = []
    for submodule in submodules:
        submodule_from = get_submodule_commit(commit_from, submodule.name)
        submodule_to = get_submodule_commit(commit_to, submodule.name)
        srepo = submodule_from.repo
        tag_commits = [
            tag.commit
            for tag in get_tags(srepo, submodule.version_tag_pattern)
            if srepo.is_ancestor(submodule_from, tag.commit)
            and srepo.is_ancestor(tag.commit, submodule_to)
        ]

        commits = [submodule_to, *tag_commits, submodule_from]

        snews = SubmoduleNews(name=submodule.name,
                              display_name=submodule.display_name)

        for c_to, c_from in pairwise(commits):
            snews.news.extend(
                get_news_between_commits(c_from, c_to,
                                         submodule.news_fragments_directory))
        news.append(snews)
    return news
Ejemplo n.º 9
0
def maxent_binning(ts, bins):
    """

    Parameters
    ----------
    ts : ndarray
        The real-valued array to bin
    bins : int
        The number of bins to map the data into.

    Returns
    -------
    symb : ndarray
        The discretized time-series.
    """
    symb = np.full_like(ts, np.nan)

    percentiles = np.percentile(ts, [100*i/bins for i in range(bins+1)])

    # Sometimes with large magnetude values things get weird. This helps:
    percentiles[0] = -np.inf
    percentiles[-1] = np.inf

    for i, (a, b) in enumerate(pairwise(percentiles)):
        symb[(a <= ts) & (ts < b)] = i

    symb = symb.astype(int)

    return symb
Ejemplo n.º 10
0
    def _construct_auxvars(self, auxvars):
        """
        Register the auxiliary variables.

        Parameters
        ----------
        auxvars : [(tuple, int)]
            The bases and bounds for each auxiliary variable.
        """
        self._aux_vars = []

        for bases, bound in auxvars:
            shape = [self._shape[i] for i in bases] + [bound]
            mask = np.ones(shape) / bound
            self._aux_vars.append(
                AuxVar(bases, bound, shape, mask, prod(shape)))
            self._shape += (bound, )
            self._full_shape += (bound, )
            self._all_vars |= {len(self._all_vars)}

        self._arvs = self._all_vars - (self._rvs | self._crvs)
        self._aux_bounds = [av.bound for av in self._aux_vars]
        self._optvec_size = sum(av.size for av in self._aux_vars)
        self._default_hops = prod(self._aux_bounds)
        self._parts = list(
            pairwise(np.cumsum([0] + [av.size for av in self._aux_vars])))
        self._construct_slices()
        if len(self._aux_vars) == 1:
            self.construct_joint = self._construct_joint_single
Ejemplo n.º 11
0
 def append_subdoc_chain(doc_parts):
     doc_chain = list(doc_parts)
     doc_chain = [
         modify("/".join(doc_chain[:i + 1]))
         for i in range(len(doc_chain))
     ]
     # FIXME: subdocs ignore double-slashes or final slash!
     doc_chain = [p for p in doc_chain if p]
     graph.add_edges_from(unseen_subdoc_edges(pairwise(doc_chain)))
Ejemplo n.º 12
0
    def sents(self):
        """ Regroup raw_text into sentences """

        # Get sentence boundaries
        sent_idx = [idx+1
                    for idx, token in enumerate(self.tokens)
                    if token in ['.', '?', '!']]

        # Regroup (returns list of lists)
        return [self.tokens[i1:i2] for i1, i2 in pairwise([0] + sent_idx)]
Ejemplo n.º 13
0
def markov_chains(draw, alphabets=((2, 4), (2, 4), (2, 4))):
    """
    Generate Markov chains for use with hypothesis.

    Parameters
    ----------
    draw : function
        A sampling function passed in by hypothesis.
    alphabets : int, tuple of ints, tuple of pairs of ints
        If an int, it is the length of the chain and each variable is assumed to be binary.
        If a tuple of ints, the ints are assumed to be the size of each variable. If a tuple
        of pairs of ints, each pair represents the min and max alphabet size of each variable.

    Returns
    -------
    dist : Distribution
        A Markov chain with variable sizes.
    """
    try:
        len(alphabets)
        try:
            len(alphabets[0])
        except TypeError:
            alphabets = tuple((alpha, alpha) for alpha in alphabets)
    except TypeError:
        alphabets = ((2, 2), ) * alphabets

    alphabets = [int(draw(integers(*alpha))) for alpha in alphabets]

    px = draw(arrays(np.float, shape=alphabets[0], elements=floats(0, 1)))
    cds = [
        draw(arrays(np.float, shape=(a, b), elements=floats(0, 1)))
        for a, b in pairwise(alphabets)
    ]

    # assume things
    assume(px.sum() > 0)
    for cd in cds:
        for row in cd:
            assume(row.sum() > 0)

    px /= px.sum()

    # construct dist
    for cd in cds:
        cd /= cd.sum(axis=1, keepdims=True)
        slc = (np.newaxis, ) * (len(px.shape) - 1) + (colon, colon)
        px = px[..., np.newaxis] * cd[slc]

    dist = Distribution.from_ndarray(px)
    dist.normalize()
    return dist
Ejemplo n.º 14
0
    def sents(self):
        """ Regroup raw_text into sentences """

        # Get sentence boundaries
        sent_idx = [
            idx + 1 for idx, token in enumerate(self.tokens)
            if token in ['.', '?', '!']
        ]  # TODO(tilo): WTF!!!

        # Regroup (returns list of lists)
        return [
            self.tokens[start:end] for start, end in pairwise([0] + sent_idx)
        ]
def line_length(line, ellipsoid='WGS-84'):
    """Length of a line in meters, given in geographic coordinates.
    Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285
    Args:
        *line* : A shapely LineString object with WGS-84 coordinates.
        
        *ellipsoid* : The string name of an ellipsoid that `geopy` understands (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance).
    Returns:
        The length of the line in meters.
    """
    if line.geometryType() == 'MultiLineString':
        return sum(line_length(segment) for segment in line)

    try:
        return sum(
            vincenty(a, b, ellipsoid=ellipsoid).kilometers
            for a, b in pairwise(line.coords)
        )
    except:
        return sum(
            vincenty(a, b, ellipsoid=ellipsoid).kilometers
            for a, b in pairwise(list([t[::-1] for t in list(line.coords)]))
        )
Ejemplo n.º 16
0
def markov_chains(draw, alphabets=((2, 4), (2, 4), (2, 4))):
    """
    Generate Markov chains for use with hypothesis.

    Parameters
    ----------
    draw : function
        A sampling function passed in by hypothesis.
    alphabets : int, tuple of ints, tuple of pairs of ints
        If an int, it is the length of the chain and each variable is assumed to be binary.
        If a tuple of ints, the ints are assumed to be the size of each variable. If a tuple
        of pairs of ints, each pair represents the min and max alphabet size of each variable.

    Returns
    -------
    dist : Distribution
        A Markov chain with variable sizes.
    """
    try:
        len(alphabets)
        try:
            len(alphabets[0])
        except TypeError:
            alphabets = tuple((alpha, alpha) for alpha in alphabets)
    except TypeError:
        alphabets = ((2, 2),)*alphabets

    alphabets = [int(draw(integers(*alpha))) for alpha in alphabets]

    px = draw(arrays(np.float, shape=alphabets[0], elements=floats(0, 1)))
    cds = [draw(arrays(np.float, shape=(a, b), elements=floats(0, 1))) for a, b in pairwise(alphabets)]

    # assume things
    assume(px.sum() > 0)
    for cd in cds:
        for row in cd:
            assume(row.sum() > 0)

    px /= px.sum()

    # construct dist
    for cd in cds:
        cd /= cd.sum(axis=1, keepdims=True)
        slc = (np.newaxis,)*(len(px.shape)-1) + (colon, colon)
        px = px[..., np.newaxis] * cd[slc]

    dist = Distribution.from_ndarray(px)
    dist.normalize()
    return dist
Ejemplo n.º 17
0
def conll_ner(sents, pred, true, tag_enc=None, outfile=None):
    if tag_enc is not None:
        pred = tag_enc.inverse_transform(pred)
        true = tag_enc.inverse_transform(true)
    token_lines = list(map(" ".join, zip(flatten(sents), true, pred)))
    sent_offsets = np.cumsum([0] + list(map(len, sents)))
    sent_lines = "\n\n".join(
        map(lambda p: "\n".join(token_lines[slice(*p)]),
            pairwise(sent_offsets)))
    if outfile:
        with outfile.open("w", encoding="utf8") as out:
            out.write(sent_lines)
    eval_out, eval_parsed = run_conll_eval(sent_lines)
    print(eval_out)
    return eval_parsed
Ejemplo n.º 18
0
    def line_length(self, line, ellipsoid='WGS-84'):
        """
        Returns length of a line in kilometers, given in geographic coordinates. Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285

        :param line: a shapely LineString object with WGS-84 coordinates
        :param string ellipsoid: string name of an ellipsoid that `geopy` understands (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance)
        :returns: Length of line in kilometers
        """

        if line.geometryType() == 'MultiLineString':
            return sum(line_length(segment) for segment in line)

        return sum(
            distance.geodesic(
                tuple(reversed(a)), tuple(reversed(b)), ellipsoid=ellipsoid).km
            for a, b in pairwise(line.coords))
Ejemplo n.º 19
0
def add_path(graph,
             result,
             atlas_src,
             measurement_id,
             poisons=None,
             first=False):
    new_path = False
    path_asns, _, path_rtts, success = get_traceroute_path(
        atlas_src, result['result'])
    if not success:
        log_message("Lost connectivity! Traceroute failed")
        if poisons is None:
            poisons = set()
        write_frrp_entry(
            "2|src,{},atlas,{},poisons,[{}],path,[{}],rtts,[{}]".format(
                atlas_src.asn, measurement_id,
                ",".join(list([str(x) for x in poisons])),
                ",".join([str(x) for x in path_asns]),
                ",".join(list([str(x) for x in get_kv_string(path_rtts)]))))
        return None, None, None, False

    for lhs_asn, rhs_asn in pairwise(path_asns):
        lhs_asn_rtt = path_rtts.get(lhs_asn, None)
        rhs_asn_rtt = path_rtts.get(rhs_asn, None)

        lhs_as, rhs_as = AS(lhs_asn), AS(rhs_asn)

        if lhs_asn_rtt:
            lhs_as.set_rtt(lhs_asn_rtt)
        if rhs_asn_rtt:
            rhs_as.set_rtt(rhs_asn_rtt)

        lhs_as.add_preference(rhs_as)
        if first:
            lhs_as.set_preferred(rhs_as)

        if graph.has_edge(lhs_as, rhs_as):
            continue
        else:
            new_path = True
            graph.add_edge(lhs_as, rhs_as)

    log_message("Current total observed ASes: {}".format(
        str(graph.number_of_nodes())))

    return new_path, path_asns, path_rtts, False
Ejemplo n.º 20
0
def add_path(frrp_run,
             graph,
             result,
             atlas_src,
             measurement_id,
             poisons=None,
             first=False):
    new_path = False
    path_asns, _, path_rtts, success = get_traceroute_path(
        atlas_src, result['result'])
    if not success:
        log_message("Lost connectivity! Traceroute failed")
        if poisons is None:
            poisons = set()
        write_frrp_entry("2|src,{},atlas,{},poisons,{},path,{},rtts,{}".format(
            atlas_src.asn, measurement_id,
            "-".join(list([str(x) for x in poisons])),
            "-".join([str(x) for x in path_asns]),
            "-".join(list([str(x) for x in path_rtts]))))
        as_path = AnnotatedASPath()
        as_path.poisons = poisons
        for _as, _rtt in zip(path_asns, path_rtts):
            as_path.add_as(_as, _rtt)
        as_path.atlas_m_id = measurement_id

        frrp_run.add_lost_connectivity_path(as_path)

        return None, None, None, False

    for lhs_asn, rhs_asn in pairwise(path_asns):
        lhs_as, rhs_as = AS(lhs_asn), AS(rhs_asn)

        lhs_as.add_preference(rhs_as)
        if first:
            lhs_as.set_preferred(rhs_as)

        if graph.has_edge(lhs_as, rhs_as):
            continue
        else:
            new_path = True
            graph.add_edge(lhs_as, rhs_as)

    log_message("Current total observed ASes: {}".format(
        str(graph.number_of_nodes())))

    return new_path, path_asns, path_rtts, False
Ejemplo n.º 21
0
def collect_news_fragments(
    repo: Repo,
    pm: PluginManager,
) -> List[VersionNews]:

    tags: List[Tag] = []
    pm.hook.process_tags(repo=repo, tags=tags)

    commits: List[Tag] = []
    pm.hook.process_commits(repo=repo, tags=tags, commits=commits)

    version_news = [
        get_version_news(pm, repo, commit_from, commit_to)
        for commit_to, commit_from in pairwise(commits)
    ]

    pm.hook.process_version_news(version_news=version_news)

    return version_news
Ejemplo n.º 22
0
def maxent_binning(ts, bins):
    """

    Parameters
    ----------
    ts : ndarray
        The real-valued array to bin
    bins : int
        The number of bins to map the data into.

    Returns
    -------
    symb : ndarray
        The discretized time-series.
    """
    symb = ts.copy()
    percentiles = np.percentile(symb, [100*i/bins for i in range(bins+1)])
    percentiles[-1] += 1e-12
    for i, (a, b) in enumerate(pairwise(percentiles)):
        symb[(a <= ts) & (ts < b)] = i
    symb = symb.astype(int)
    return symb
Ejemplo n.º 23
0
def train_batch(batch, s_encoder, classifier):
    """Train the batch.
    """
    x, reorder = batch.packed_sentence_tensor()

    # Encode sentences.
    sents = s_encoder(x, reorder)

    # Generate x / y pairs.
    x, y = [], []
    for ab in batch.unpack_sentences(sents):
        for s1, s2 in pairwise(ab):

            x.append(torch.cat([s1, s2]))
            y.append(0)

            x.append(torch.cat([s2, s1]))
            y.append(1)

    x = torch.stack(x)
    y = Variable(torch.LongTensor(y)).type(itype)

    return classifier(x), y
Ejemplo n.º 24
0
def line_length(line, ellipsoid='WGS-84'):
    """Length of a line in meters, given in geographic coordinates.

    Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285

    Args:
        line: a shapely LineString object with WGS-84 coordinates.
        ellipsoid: string name of an ellipsoid that `geopy` understands
            (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance).
    Returns:
        Length of line in kilometers.

    Depends on:
        from geopy.distance import vincenty
        from boltons.iterutils import pairwise
    """

    if line.geometryType() == 'MultiLineString':
        return sum(line_length(segment) for segment in line)

    return sum(
        vincenty(tuple(reversed(a)), tuple(reversed(b)),
                 ellipsoid=ellipsoid).kilometers
        for a, b in pairwise(line.coords))
Ejemplo n.º 25
0
def pairwise_indexes(spans):
    """ Get indices for indexing into pairwise_scores """
    indexes = [0] + [len(s.antecedent_spans) for s in spans]
    indexes = [sum(indexes[:idx + 1]) for idx, _ in enumerate(indexes)]
    return pairwise(indexes)
Ejemplo n.º 26
0
def get_graph(levels=1):
    graph = nx.Graph()

    points = {}
    letters = set()
    x, y = 0, 0
    width, height = 0, 0

    for line in utils.get_input(__file__, delimiter='', cast=str):
        for point in line:
            points[(x, y)] = point
            if PointType(point) == PointType.LETTER:
                letters.add((x, y, 0))

            x += 1
            width = max(width, x)

        x = 0
        y -= 1
        height = max(height, abs(y))

    for coordinate, point in points.items():
        if PointType(point) not in [PointType.WALL, PointType.SPACE]:
            for i in range(levels):
                graph.add_node(coordinate + (i, ), value=point)

    labels = collections.defaultdict(list)
    while letters:
        first_letter = letters.pop()
        label = get_label(graph, first_letter)

        if not label:
            # This is not actually a first letter
            continue

        second_letter = label.name[1]
        labels[label.name].append(label.position)

        for letter_position in label.letter_positions:
            letters.discard(letter_position)

        for i in range(levels):
            for letter_position in label.letter_positions:
                graph.remove_node(letter_position[:-1] + (i, ))

            graph.add_node(label.position[:-1] + (i, ), value=label.name)

    for positions in labels.values():
        for pad_1, pad_2 in itertools.combinations(positions, 2):
            if levels == 1:
                graph.add_edge(pad_1, pad_2)
            else:
                if is_outer_portal(width, height, pad_1):
                    outer_portal, inner_portal = pad_1, pad_2
                else:
                    outer_portal, inner_portal = pad_2, pad_1

                for prev_level, next_level in iterutils.pairwise(
                        range(levels)):
                    graph.add_edge(
                        inner_portal[:-1] + (prev_level, ),
                        outer_portal[:-1] + (next_level, ),
                    )

    for coordinate in graph.nodes:
        for vector in VECTORS:
            neighbor = tuple(np.array(coordinate) + vector)
            if neighbor in graph.nodes:
                graph.add_edge(coordinate, neighbor)

    start, end = labels['AA'][0], labels['ZZ'][0]

    return graph, start, end
Ejemplo n.º 27
0
def spans(doc):
    """Pull apart separator-delimited spans.
    """
    return [doc[i1+1:i2] for i1, i2 in pairwise(doc._.break_idxs)]