def build(self): self.warn_if_parallel() allpoints = geos.MultiPoint( [pts for sublist in self.tokens.itervalues() for pts in sublist], srid=self.srid) l.debug('fitting All_Tweets to %d points...' % len(allpoints)) self.global_model = gmm_fit_tokenpoints('_all_tweets_', allpoints)[1]
def group_tokens(self, tweets, trim_head_frac, min_instance_ct): # list of (token, point) pairs tps = [] for tw in tweets: for tok in tw.tokens: tps.append((tok, tw.geom)) tps.sort(key=operator.itemgetter(0)) # grouped tokens = list() for (key, group) in itertools.groupby(tps, key=operator.itemgetter(0)): tokens.append((key, [i[1] for i in group])) l.debug('%d tokens total' % (len(tokens))) # remove infrequent tokens = filter(lambda t: len(t[1]) >= min_instance_ct, tokens) l.debug('%d tokens appear >= %d times' % (len(tokens), min_instance_ct)) # convert to multipoint tokens = [(tok, geos.MultiPoint(pts, srid=tweets[0].geom.srid)) for (tok, pts) in tokens] l.info('created %d multipoint token groups, %d total points' % (len(tokens), sum(len(t[1]) for t in tokens))) # remove frequent assert (0 <= trim_head_frac < 1) tokens.sort(key=lambda i: len(i[1]), reverse=True) tokens = tokens[int(round(trim_head_frac * len(tokens))):] l.debug('%d tokens after head trim' % (len(tokens))) assert (len(tokens) > 0) # done return dict(tokens)
def geodesic_distance_sph(a, b): '''Return the spherical geodesic distance in kilometers from geos.Point a to geos.Point b. E.g. (this is in error by about 0.2%): >>> geodesic_distance_sph(geos.Point(-86.67, 36.12, srid=4326), ... geos.Point(-118.40, 33.94, srid=4326)) 2886.44...''' return geodesic_distance_mp_sph(a, geos.MultiPoint([b], srid=b.srid))[0]
def geodesic_distance_ell(a, b): '''Return the ellipsoidal geodesic distance in kilometers from geos.Point a to geos.Point b, which can be in any SRS. For example, to compute distance from BNA to LAX (http://en.wikipedia.org/wiki/Great-circle_distance): >>> geodesic_distance_ell(geos.Point(-86.67, 36.12, srid=4326), ... geos.Point(-118.40, 33.94, srid=4326)) 2892.77...''' return geodesic_distance_mp_ell(a, geos.MultiPoint([b], srid=b.srid))[0]
def locate(self, tokens, confidence): tweet_points = [] for token in tokens: if token in self.tokens: tweet_points.extend(self.tokens[token]) if len(tweet_points) == 0: return None else: model = gmm_fit_tokenpoints('all', geos.MultiPoint(tweet_points))[1] return Location_Estimate(model, confidence, self.srid)
def populate_samples(self, sample_ct): sraw = [ geos.Point(tuple(i), srid=self.srid) for i in self.sample(sample_ct, u.rand_np) ] evals = self.eval([i.coords for i in sraw]) logprobs = evals[0] component_is = [np.argmax(i) for i in evals[1]] self.samples = zip(sraw, logprobs, component_is) self.samples.sort(reverse=True, key=operator.itemgetter(1)) mp = geos.MultiPoint([i[0] for i in self.samples], srid=self.srid) self.samples_inbound_mp = srs.trim(mp)
def sample_points(rs, components, ct): ''' Sample ct points from a random gaussian mixture model with the specified number of components. An equal number of samples are drawn from each component.''' sz = ct // components samples = np.array([]).reshape(0, 2) mean = 0 for i in range(0, components): (mean, covar) = sample_gaussian(rs, mean) samples = np.append(samples, rs.multivariate_normal(mean, covar, sz), 0) mean += 5 return geos.MultiPoint([geos.Point(*xy) for xy in samples], srid=srs.SRID_WGS84)
def populate_pred_region_real(self, trim=True): # what's the contour value? threshold_idx = int(round(self.pred_coverage * len(self.samples))) self.pred_region_threshold = self.samples[threshold_idx][1] bests = self.samples[:threshold_idx] # compute contours regions = [] for i in xrange(self.n_components): points = filter(lambda j: j[2] == i, bests) if (len(points) < 3): # can't make a polygon with less than 3 vertices, skip continue points = geos.MultiPoint([i[0] for i in points], srid=self.srid) regions.append(points.convex_hull) # create a multipolygon and clean up assert (len(regions) > 0) pr = geos.MultiPolygon(regions, srid=self.srid).cascaded_union if (trim): pr = srs.trim(pr) if (pr.geom_type == 'Polygon'): # cascaded_union can collapse a MultiPolygon into a single Polygon pr = geos.MultiPolygon([pr], srid=self.srid) assert (pr.geom_type == 'MultiPolygon') self.pred_region = pr
def test_fitting(coverage, seed, sample_ct): result = {} rs = np.random.RandomState(seed) Model.parms_init({'mc_sample_ct': sample_ct}) # FIXME: kludge ugly here # Create and fit a GMM. We fit random points centered on Alert, Nunavut (83 # degrees north) as well as Los Alamos in order to test clamping for sampled # points that are too far north. The two places are roughly 5,447 km apart. ct = sample_ct alert_xys = zip(-62.33 + rs.normal(scale=4.0, size=ct * 1.5), 82.50 + rs.normal(scale=8.0, size=ct * 1.5)) # make sure we are indeed slushing over the northern boundary of the world assert (len(filter(lambda xy: xy[1] >= 90, alert_xys)) > 8) la_xys = zip(-106.30 + rs.normal(scale=3.0, size=ct), 35.89 + rs.normal(scale=2.0, size=ct)) all_xys = alert_xys + la_xys inbounds_xys = filter(lambda xy: xy[1] < 90, all_xys) l.info('true points in bounds = %d/%d = %g' % (len(inbounds_xys), len(all_xys), len(inbounds_xys) / len(all_xys))) result['all_xys'] = all_xys result['inbounds_xys'] = inbounds_xys all_mp = geos.MultiPoint([geos.Point(xy) for xy in all_xys], srid=srs.SRID_WGS84) t1 = time.time() g = Geo_GMM.from_fit(all_mp, 2) result['g'] = g l.info('fitted %d components in %gs' % (len(g.weights_), time.time() - t1)) t1 = time.time() g.prepare(coverage) l.info("prepare()'d %d points in %gs" % (len(g.samples), time.time() - t1)) l.info('component weights: %s' % ([g.weights_], )) l.info('component assignments: %s' % ([ len([i for i in g.samples if i[2] == 0]), len([i for i in g.samples if i[2] == 1]) ], )) # coverage covers_ct = sum( g.covers_p(geos.Point(xy, srid=srs.SRID_WGS84)) for xy in inbounds_xys) result['coverage_req'] = coverage result['coverage_obs'] = covers_ct / len(inbounds_xys) result['coverage_error'] = result['coverage_obs'] - coverage result['coverage_fudge'] = coverage / result['coverage_obs'] l.info('observed coverage (in-bounds) = %d/%d = %g' % (covers_ct, len(inbounds_xys), result['coverage_obs'])) t1 = time.time() result['contour'] = sum( g.contour(geos.Point(xy, srid=srs.SRID_WGS84)) for xy in inbounds_xys) / len(inbounds_xys) l.info('computed contour() in %gs per point' % ((time.time() - t1) / len(inbounds_xys))) covers_ct = sum( g.coverst_p(geos.Point(xy, srid=srs.SRID_WGS84)) for xy in inbounds_xys) result['coveraget_obs'] = covers_ct / len(inbounds_xys) result['coveraget_error'] = result['coveraget_obs'] - coverage result['coveraget_fudge'] = coverage / result['coveraget_obs'] l.info('observed coverage (in-bounds, coverst) = %d/%d = %g' % (covers_ct, len(inbounds_xys), result['coveraget_obs'])) # absolute error for a random true point inb_sample = inbounds_xys[:1] t1 = time.time() sae = [g.sae(geos.Point(p, srid=srs.SRID_WGS84)) for p in inb_sample] l.info('computed SAE in %gs per point' % ((time.time() - t1) / len(inb_sample))) result['msae'] = np.mean(sae) t1 = time.time() cae = [g.cae(geos.Point(p, srid=srs.SRID_WGS84)) for p in inb_sample] l.info('computed CAE in %gs per point' % ((time.time() - t1) / len(inb_sample))) result['mcae'] = np.mean(cae) # area of confidence region result['pra'] = g.pred_area return result
def test_multicentroid(self): p1, p2 = geos.Point(10, 10), geos.Point(11, 11) f1 = Feature.objects.create(geom_point=p1) f2 = Feature.objects.create(geom_point=p2) mp = geos.MultiPoint([p1, p2])