def line_length(line, ellipsoid='WGS-84', shipping=True): """Length of a line in meters, given in geographic coordinates Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285 Arguments: line {Shapely LineString} -- a shapely LineString object with WGS-84 coordinates ellipsoid {String} -- string name of an ellipsoid that `geopy` understands (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance) Returns: Length of line in kilometers """ if shipping == True: if line.geometryType() == 'MultiLineString': return sum(line_length(segment) for segment in line) return sum( vincenty(tuple(reversed(a)), tuple(reversed(b)), ellipsoid=ellipsoid).kilometers for a, b in pairwise(line.coords)) else: if line.geometryType() == 'MultiLineString': return sum(line_length(segment) for segment in line) return sum( vincenty(a, b, ellipsoid=ellipsoid). kilometers ###WARNING TODO: WILL BE DEPRECIATED #### for a, b in pairwise(line.coords))
def _cumulative_residual_entropy(dist, generalized=False): """ The cumulative residual entropy is an alternative to the Shannon differential entropy with several advantageous properties. Parameters ---------- dist : ScalarDistribution The distribution to compute the cumulative residual entropy of. generalized : bool Whether to integrate from zero over the CDF or to integrate from zero over the CDF of the absolute value. Returns ------- CRE : float The (generalized) cumulative residual entropy. Examples -------- """ numerical_test(dist) eps = ((e if generalized else abs(e), p) for e, p in dist.zipped()) events, probs = zip(*sorted(eps)) cdf = dict((a, p) for a, p in zip(events, np.cumsum(probs))) terms = [] for a, b in pairwise(events): pgx = cdf[a] term = (b - a) * pgx * np.log2(pgx) terms.append(term) return -np.nansum(terms)
def coupling_metric(dists, p=1.0): """ Compute the minimum possible residual entropy of a joint distribution with `dists` as marginals. Parameters ---------- dists : list of Distributions The distributions to consider as marginals p : float The p-norm. Returns ------- cm : float The minimum residual entropy over all possible distributions with `dists` as marginals. """ d = dists[0] for d2 in dists[1:]: d = d.__matmul__(d2) lengths = [0] + [len(dist.rvs) for dist in dists] dist_ids = [list(range(a, b)) for a, b in pairwise(np.cumsum(lengths))] meo = MinEntOptimizer(d, dist_ids) meo.optimize(niter=25) od = meo.construct_dist() re = residual_entropy(od, rvs=dist_ids, p=p) return re
def _construct_auxvars(self, auxvars): """ Register the auxiliary variables. Parameters ---------- auxvars : [(tuple, int)] The bases and bounds for each auxiliary variable. """ self._aux_vars = [] for bases, bound in auxvars: shape = [self._shape[i] for i in bases] + [bound] mask = np.ones(shape) / bound self._aux_vars.append(AuxVar(bases, bound, shape, mask, prod(shape))) self._shape += (bound,) self._full_shape += (bound,) self._all_vars |= {len(self._all_vars)} self._arvs = self._all_vars - (self._rvs | self._crvs) self._aux_bounds = [av.bound for av in self._aux_vars] self._optvec_size = sum([av.size for av in self._aux_vars]) self._default_hops = prod(self._aux_bounds) self._parts = list(pairwise(np.cumsum([0] + [av.size for av in self._aux_vars]))) self._construct_slices() if len(self._aux_vars) == 1: self.construct_joint = self._construct_joint_single
def maxent_binning(ts, bins): """ Parameters ---------- ts : ndarray The real-valued array to bin bins : int The number of bins to map the data into. Returns ------- symb : ndarray The discretized time-series. """ symb = np.full_like(ts, np.nan) percentiles = np.percentile(ts, [100 * i / bins for i in range(bins + 1)]) # Sometimes with large magnetude values things get weird. This helps: percentiles[0] = -np.inf percentiles[-1] = np.inf for i, (a, b) in enumerate(pairwise(percentiles)): symb[(a <= ts) & (ts < b)] = i symb = symb.astype(int) return symb
def collect_news_fragments( repo: Repo, include_unreleased: bool, version_tag_pattern: Pattern, news_fragment_dir: str, last_tag: str, submodules: List[SubmoduleConfig], ) -> List[VersionNews]: tags = get_tags(repo, version_tag_pattern) # tear of things after last tag last_tag = repo.tags[last_tag] if last_tag in repo.tags else None if last_tag: tags = list(takewhile(lambda tag: tag != last_tag, tags)) tags.append(last_tag) commits = get_commits(repo, tags, include_unreleased, include_root=not last_tag) result = [ VersionNews( news=get_news_between_commits(commit_from.commit, commit_to.commit, news_fragment_dir), tag=commit_to.name, version=get_version(commit_to.name, version_tag_pattern), date=commit_to.date, submodule_news=get_submodule_news(commit_from.commit, commit_to.commit, submodules), ) for commit_to, commit_from in pairwise(commits) ] return result
def get_submodule_news( commit_from: Commit, commit_to: Commit, submodules: List[SubmoduleConfig]) -> List[SubmoduleNews]: news = [] for submodule in submodules: submodule_from = get_submodule_commit(commit_from, submodule.name) submodule_to = get_submodule_commit(commit_to, submodule.name) srepo = submodule_from.repo tag_commits = [ tag.commit for tag in get_tags(srepo, submodule.version_tag_pattern) if srepo.is_ancestor(submodule_from, tag.commit) and srepo.is_ancestor(tag.commit, submodule_to) ] commits = [submodule_to, *tag_commits, submodule_from] snews = SubmoduleNews(name=submodule.name, display_name=submodule.display_name) for c_to, c_from in pairwise(commits): snews.news.extend( get_news_between_commits(c_from, c_to, submodule.news_fragments_directory)) news.append(snews) return news
def maxent_binning(ts, bins): """ Parameters ---------- ts : ndarray The real-valued array to bin bins : int The number of bins to map the data into. Returns ------- symb : ndarray The discretized time-series. """ symb = np.full_like(ts, np.nan) percentiles = np.percentile(ts, [100*i/bins for i in range(bins+1)]) # Sometimes with large magnetude values things get weird. This helps: percentiles[0] = -np.inf percentiles[-1] = np.inf for i, (a, b) in enumerate(pairwise(percentiles)): symb[(a <= ts) & (ts < b)] = i symb = symb.astype(int) return symb
def _construct_auxvars(self, auxvars): """ Register the auxiliary variables. Parameters ---------- auxvars : [(tuple, int)] The bases and bounds for each auxiliary variable. """ self._aux_vars = [] for bases, bound in auxvars: shape = [self._shape[i] for i in bases] + [bound] mask = np.ones(shape) / bound self._aux_vars.append( AuxVar(bases, bound, shape, mask, prod(shape))) self._shape += (bound, ) self._full_shape += (bound, ) self._all_vars |= {len(self._all_vars)} self._arvs = self._all_vars - (self._rvs | self._crvs) self._aux_bounds = [av.bound for av in self._aux_vars] self._optvec_size = sum(av.size for av in self._aux_vars) self._default_hops = prod(self._aux_bounds) self._parts = list( pairwise(np.cumsum([0] + [av.size for av in self._aux_vars]))) self._construct_slices() if len(self._aux_vars) == 1: self.construct_joint = self._construct_joint_single
def append_subdoc_chain(doc_parts): doc_chain = list(doc_parts) doc_chain = [ modify("/".join(doc_chain[:i + 1])) for i in range(len(doc_chain)) ] # FIXME: subdocs ignore double-slashes or final slash! doc_chain = [p for p in doc_chain if p] graph.add_edges_from(unseen_subdoc_edges(pairwise(doc_chain)))
def sents(self): """ Regroup raw_text into sentences """ # Get sentence boundaries sent_idx = [idx+1 for idx, token in enumerate(self.tokens) if token in ['.', '?', '!']] # Regroup (returns list of lists) return [self.tokens[i1:i2] for i1, i2 in pairwise([0] + sent_idx)]
def markov_chains(draw, alphabets=((2, 4), (2, 4), (2, 4))): """ Generate Markov chains for use with hypothesis. Parameters ---------- draw : function A sampling function passed in by hypothesis. alphabets : int, tuple of ints, tuple of pairs of ints If an int, it is the length of the chain and each variable is assumed to be binary. If a tuple of ints, the ints are assumed to be the size of each variable. If a tuple of pairs of ints, each pair represents the min and max alphabet size of each variable. Returns ------- dist : Distribution A Markov chain with variable sizes. """ try: len(alphabets) try: len(alphabets[0]) except TypeError: alphabets = tuple((alpha, alpha) for alpha in alphabets) except TypeError: alphabets = ((2, 2), ) * alphabets alphabets = [int(draw(integers(*alpha))) for alpha in alphabets] px = draw(arrays(np.float, shape=alphabets[0], elements=floats(0, 1))) cds = [ draw(arrays(np.float, shape=(a, b), elements=floats(0, 1))) for a, b in pairwise(alphabets) ] # assume things assume(px.sum() > 0) for cd in cds: for row in cd: assume(row.sum() > 0) px /= px.sum() # construct dist for cd in cds: cd /= cd.sum(axis=1, keepdims=True) slc = (np.newaxis, ) * (len(px.shape) - 1) + (colon, colon) px = px[..., np.newaxis] * cd[slc] dist = Distribution.from_ndarray(px) dist.normalize() return dist
def sents(self): """ Regroup raw_text into sentences """ # Get sentence boundaries sent_idx = [ idx + 1 for idx, token in enumerate(self.tokens) if token in ['.', '?', '!'] ] # TODO(tilo): WTF!!! # Regroup (returns list of lists) return [ self.tokens[start:end] for start, end in pairwise([0] + sent_idx) ]
def line_length(line, ellipsoid='WGS-84'): """Length of a line in meters, given in geographic coordinates. Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285 Args: *line* : A shapely LineString object with WGS-84 coordinates. *ellipsoid* : The string name of an ellipsoid that `geopy` understands (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance). Returns: The length of the line in meters. """ if line.geometryType() == 'MultiLineString': return sum(line_length(segment) for segment in line) try: return sum( vincenty(a, b, ellipsoid=ellipsoid).kilometers for a, b in pairwise(line.coords) ) except: return sum( vincenty(a, b, ellipsoid=ellipsoid).kilometers for a, b in pairwise(list([t[::-1] for t in list(line.coords)])) )
def markov_chains(draw, alphabets=((2, 4), (2, 4), (2, 4))): """ Generate Markov chains for use with hypothesis. Parameters ---------- draw : function A sampling function passed in by hypothesis. alphabets : int, tuple of ints, tuple of pairs of ints If an int, it is the length of the chain and each variable is assumed to be binary. If a tuple of ints, the ints are assumed to be the size of each variable. If a tuple of pairs of ints, each pair represents the min and max alphabet size of each variable. Returns ------- dist : Distribution A Markov chain with variable sizes. """ try: len(alphabets) try: len(alphabets[0]) except TypeError: alphabets = tuple((alpha, alpha) for alpha in alphabets) except TypeError: alphabets = ((2, 2),)*alphabets alphabets = [int(draw(integers(*alpha))) for alpha in alphabets] px = draw(arrays(np.float, shape=alphabets[0], elements=floats(0, 1))) cds = [draw(arrays(np.float, shape=(a, b), elements=floats(0, 1))) for a, b in pairwise(alphabets)] # assume things assume(px.sum() > 0) for cd in cds: for row in cd: assume(row.sum() > 0) px /= px.sum() # construct dist for cd in cds: cd /= cd.sum(axis=1, keepdims=True) slc = (np.newaxis,)*(len(px.shape)-1) + (colon, colon) px = px[..., np.newaxis] * cd[slc] dist = Distribution.from_ndarray(px) dist.normalize() return dist
def conll_ner(sents, pred, true, tag_enc=None, outfile=None): if tag_enc is not None: pred = tag_enc.inverse_transform(pred) true = tag_enc.inverse_transform(true) token_lines = list(map(" ".join, zip(flatten(sents), true, pred))) sent_offsets = np.cumsum([0] + list(map(len, sents))) sent_lines = "\n\n".join( map(lambda p: "\n".join(token_lines[slice(*p)]), pairwise(sent_offsets))) if outfile: with outfile.open("w", encoding="utf8") as out: out.write(sent_lines) eval_out, eval_parsed = run_conll_eval(sent_lines) print(eval_out) return eval_parsed
def line_length(self, line, ellipsoid='WGS-84'): """ Returns length of a line in kilometers, given in geographic coordinates. Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285 :param line: a shapely LineString object with WGS-84 coordinates :param string ellipsoid: string name of an ellipsoid that `geopy` understands (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance) :returns: Length of line in kilometers """ if line.geometryType() == 'MultiLineString': return sum(line_length(segment) for segment in line) return sum( distance.geodesic( tuple(reversed(a)), tuple(reversed(b)), ellipsoid=ellipsoid).km for a, b in pairwise(line.coords))
def add_path(graph, result, atlas_src, measurement_id, poisons=None, first=False): new_path = False path_asns, _, path_rtts, success = get_traceroute_path( atlas_src, result['result']) if not success: log_message("Lost connectivity! Traceroute failed") if poisons is None: poisons = set() write_frrp_entry( "2|src,{},atlas,{},poisons,[{}],path,[{}],rtts,[{}]".format( atlas_src.asn, measurement_id, ",".join(list([str(x) for x in poisons])), ",".join([str(x) for x in path_asns]), ",".join(list([str(x) for x in get_kv_string(path_rtts)])))) return None, None, None, False for lhs_asn, rhs_asn in pairwise(path_asns): lhs_asn_rtt = path_rtts.get(lhs_asn, None) rhs_asn_rtt = path_rtts.get(rhs_asn, None) lhs_as, rhs_as = AS(lhs_asn), AS(rhs_asn) if lhs_asn_rtt: lhs_as.set_rtt(lhs_asn_rtt) if rhs_asn_rtt: rhs_as.set_rtt(rhs_asn_rtt) lhs_as.add_preference(rhs_as) if first: lhs_as.set_preferred(rhs_as) if graph.has_edge(lhs_as, rhs_as): continue else: new_path = True graph.add_edge(lhs_as, rhs_as) log_message("Current total observed ASes: {}".format( str(graph.number_of_nodes()))) return new_path, path_asns, path_rtts, False
def add_path(frrp_run, graph, result, atlas_src, measurement_id, poisons=None, first=False): new_path = False path_asns, _, path_rtts, success = get_traceroute_path( atlas_src, result['result']) if not success: log_message("Lost connectivity! Traceroute failed") if poisons is None: poisons = set() write_frrp_entry("2|src,{},atlas,{},poisons,{},path,{},rtts,{}".format( atlas_src.asn, measurement_id, "-".join(list([str(x) for x in poisons])), "-".join([str(x) for x in path_asns]), "-".join(list([str(x) for x in path_rtts])))) as_path = AnnotatedASPath() as_path.poisons = poisons for _as, _rtt in zip(path_asns, path_rtts): as_path.add_as(_as, _rtt) as_path.atlas_m_id = measurement_id frrp_run.add_lost_connectivity_path(as_path) return None, None, None, False for lhs_asn, rhs_asn in pairwise(path_asns): lhs_as, rhs_as = AS(lhs_asn), AS(rhs_asn) lhs_as.add_preference(rhs_as) if first: lhs_as.set_preferred(rhs_as) if graph.has_edge(lhs_as, rhs_as): continue else: new_path = True graph.add_edge(lhs_as, rhs_as) log_message("Current total observed ASes: {}".format( str(graph.number_of_nodes()))) return new_path, path_asns, path_rtts, False
def collect_news_fragments( repo: Repo, pm: PluginManager, ) -> List[VersionNews]: tags: List[Tag] = [] pm.hook.process_tags(repo=repo, tags=tags) commits: List[Tag] = [] pm.hook.process_commits(repo=repo, tags=tags, commits=commits) version_news = [ get_version_news(pm, repo, commit_from, commit_to) for commit_to, commit_from in pairwise(commits) ] pm.hook.process_version_news(version_news=version_news) return version_news
def maxent_binning(ts, bins): """ Parameters ---------- ts : ndarray The real-valued array to bin bins : int The number of bins to map the data into. Returns ------- symb : ndarray The discretized time-series. """ symb = ts.copy() percentiles = np.percentile(symb, [100*i/bins for i in range(bins+1)]) percentiles[-1] += 1e-12 for i, (a, b) in enumerate(pairwise(percentiles)): symb[(a <= ts) & (ts < b)] = i symb = symb.astype(int) return symb
def train_batch(batch, s_encoder, classifier): """Train the batch. """ x, reorder = batch.packed_sentence_tensor() # Encode sentences. sents = s_encoder(x, reorder) # Generate x / y pairs. x, y = [], [] for ab in batch.unpack_sentences(sents): for s1, s2 in pairwise(ab): x.append(torch.cat([s1, s2])) y.append(0) x.append(torch.cat([s2, s1])) y.append(1) x = torch.stack(x) y = Variable(torch.LongTensor(y)).type(itype) return classifier(x), y
def line_length(line, ellipsoid='WGS-84'): """Length of a line in meters, given in geographic coordinates. Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285 Args: line: a shapely LineString object with WGS-84 coordinates. ellipsoid: string name of an ellipsoid that `geopy` understands (see http://geopy.readthedocs.io/en/latest/#module-geopy.distance). Returns: Length of line in kilometers. Depends on: from geopy.distance import vincenty from boltons.iterutils import pairwise """ if line.geometryType() == 'MultiLineString': return sum(line_length(segment) for segment in line) return sum( vincenty(tuple(reversed(a)), tuple(reversed(b)), ellipsoid=ellipsoid).kilometers for a, b in pairwise(line.coords))
def pairwise_indexes(spans): """ Get indices for indexing into pairwise_scores """ indexes = [0] + [len(s.antecedent_spans) for s in spans] indexes = [sum(indexes[:idx + 1]) for idx, _ in enumerate(indexes)] return pairwise(indexes)
def get_graph(levels=1): graph = nx.Graph() points = {} letters = set() x, y = 0, 0 width, height = 0, 0 for line in utils.get_input(__file__, delimiter='', cast=str): for point in line: points[(x, y)] = point if PointType(point) == PointType.LETTER: letters.add((x, y, 0)) x += 1 width = max(width, x) x = 0 y -= 1 height = max(height, abs(y)) for coordinate, point in points.items(): if PointType(point) not in [PointType.WALL, PointType.SPACE]: for i in range(levels): graph.add_node(coordinate + (i, ), value=point) labels = collections.defaultdict(list) while letters: first_letter = letters.pop() label = get_label(graph, first_letter) if not label: # This is not actually a first letter continue second_letter = label.name[1] labels[label.name].append(label.position) for letter_position in label.letter_positions: letters.discard(letter_position) for i in range(levels): for letter_position in label.letter_positions: graph.remove_node(letter_position[:-1] + (i, )) graph.add_node(label.position[:-1] + (i, ), value=label.name) for positions in labels.values(): for pad_1, pad_2 in itertools.combinations(positions, 2): if levels == 1: graph.add_edge(pad_1, pad_2) else: if is_outer_portal(width, height, pad_1): outer_portal, inner_portal = pad_1, pad_2 else: outer_portal, inner_portal = pad_2, pad_1 for prev_level, next_level in iterutils.pairwise( range(levels)): graph.add_edge( inner_portal[:-1] + (prev_level, ), outer_portal[:-1] + (next_level, ), ) for coordinate in graph.nodes: for vector in VECTORS: neighbor = tuple(np.array(coordinate) + vector) if neighbor in graph.nodes: graph.add_edge(coordinate, neighbor) start, end = labels['AA'][0], labels['ZZ'][0] return graph, start, end
def spans(doc): """Pull apart separator-delimited spans. """ return [doc[i1+1:i2] for i1, i2 in pairwise(doc._.break_idxs)]