def test_cartocontext_map_geom_type(self): """context.CartoContext.map basemap geometry type defaults""" from cartoframes import Layer, QueryLayer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # baseid1 = dark, labels1 = labels on top in named map name labels_polygon = cc.map(layers=Layer(self.test_read_table)) self.assertRegexpMatches(labels_polygon.__html__(), '.*baseid2_labels1.*', msg='labels should be on top since only a ' 'polygon layer is present') # baseid2 = voyager, labels0 = labels on bottom labels_point = cc.map(layers=Layer(self.test_point_table)) self.assertRegexpMatches(labels_point.__html__(), '.*baseid2_labels0.*', msg='labels should be on bottom because a ' 'point layer is present') labels_multi = cc.map( layers=[Layer(self.test_point_table), Layer(self.test_read_table)]) self.assertRegexpMatches(labels_multi.__html__(), '.*baseid2_labels0.*', msg='labels should be on bottom because a ' 'point layer is present') # create a layer with points and polys, but with more polys # should default to poly layer (labels on top) multi_geom_layer = QueryLayer(''' (SELECT the_geom, the_geom_webmercator, row_number() OVER () AS cartodb_id FROM "{polys}" WHERE the_geom IS NOT null LIMIT 10) UNION ALL (SELECT the_geom, the_geom_webmercator, (row_number() OVER ()) + 10 AS cartodb_id FROM "{points}" WHERE the_geom IS NOT null LIMIT 5) '''.format(polys=self.test_read_table, points=self.test_point_table)) multi_geom = cc.map(layers=multi_geom_layer) self.assertRegexpMatches(multi_geom.__html__(), '.*baseid2_labels1.*', msg='layer has more polys than points, so it ' 'should default to polys labels (on top)')
def test_cartocontext_map_time(self): """context.CartoContext.map time options""" from cartoframes import Layer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) html_map = cc.map( layers=Layer(self.test_point_table, time='cartodb_id')) self.assertIsInstance(html_map, IPython.core.display.HTML) # category map cat_map = cc.map(layers=Layer(self.test_point_table, time='actor_postedtime', color='twitter_lang')) self.assertRegexpMatches(cat_map.__html__(), '.*CDB_Math_Mode\(cf_value_twitter_lang\).*') with self.assertRaises( ValueError, msg='cannot create static torque maps currently'): cc.map(layers=Layer(self.test_point_table, time='cartodb_id'), interactive=False) with self.assertRaises(ValueError, msg='cannot have more than one torque layer'): cc.map(layers=[ Layer(self.test_point_table, time='cartodb_id'), Layer(self.test_point_table, color='cartodb_id') ]) with self.assertRaises( ValueError, msg='cannot do a torque map off a polygon dataset'): cc.map(layers=Layer(self.test_read_table, time='cartodb_id'))
def kmeans(): """k-means analysis Params: cols (str): Comma-separated list of columns in `table`. table (str): Name of table for data with columns `cols`. n_clusters (int): Number of clusters for the analysis. Defaults to 5. user (str): Username for CARTO account. key (str): User's CARTO API Key """ from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler, Imputer from cartoframes import Layer from cartoframes.styling import bold import time n_clusters = int(request.args.get('n_clusters', 5)) cols = request.args.get('cols').split(',') table = request.args.get('table') user = request.args.get('user') key = request.args.get('key') debug = request.args.get('debug', False) outtable = request.args.get( 'outtable', '{0}_kmeans_out_{1}'.format(table, str(time.time())[-5:])) if debug: debug_print(outtable=outtable) out_format = request.args.get('format', 'html') if not all((cols, table, user, key)): return json.dumps({'result': 'error'}) cc = cartoframes.CartoContext( base_url='https://{}.carto.com/'.format(user), api_key=key) # gather the data dataframe = cc.query(''' SELECT * FROM {table} '''.format(table=table)) scaler = StandardScaler() imp = Imputer(missing_values='NaN', strategy='mean', axis=0) imp.fit(dataframe[cols].values) data = imp.transform(dataframe[cols].values) data = scaler.fit_transform(data) km = KMeans(n_clusters=n_clusters).fit(data) dataframe['labels'] = km.labels_ dataframe['labels'] = dataframe['labels'].astype(str) warnings.warn(str(dataframe.dtypes)) cc.write(dataframe, outtable, overwrite=True) if out_format != 'html': return json.dumps( {'result': { 'success': 'Table written to ' + outtable }}) msg = ('Performing <b>k-means</b> on columns {cols} from {table} ' 'using {n} clusters.').format(cols=', '.join(cols), table=table, n=n_clusters) map_html = cc.map(layers=Layer(outtable, color={ 'column': 'labels', 'scheme': bold(n_clusters) })).data table_link = '{0}/dataset/{1}'.format(cc.creds.base_url(), outtable) return render_template('kmeans.html', map_html=map_html, table=outtable, table_link=table_link, user=user, msg=msg, plot=plot(dataframe, cols, hue='labels'))
def test_cartocontext_map(self): """context.CartoContext.map normal usage""" from cartoframes import Layer, QueryLayer, BaseMap try: import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt except ImportError: plt = None cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # test with no layers - should produce basemap if plt: basemap_only_static_mpl = cc.map(interactive=False) cartoframes.context.HAS_MATPLOTLIB = False basemap_only_static = cc.map(interactive=False) basemap_only_interactive = cc.map(interactive=True) # are of the correct type instances if plt: self.assertIsInstance(basemap_only_static_mpl, plt.Axes) self.assertIsInstance(basemap_only_static, IPython.core.display.Image) self.assertIsInstance(basemap_only_interactive, IPython.core.display.HTML) # have the HTML innards that are to be expected if sys.version[0] == 3: self.assertRegex(basemap_only_static.data, ('^<img src="https://.*api/v1/map/static/named/' 'cartoframes_ver.*" />$')) self.assertRegex(basemap_only_interactive.data, '^<iframe srcdoc="<!DOCTYPE html>.*') elif sys.version[0] == 2: self.assertRegexMatches( basemap_only_static.data, ('^<img src="https://.*api/v1/map/static/named/' 'cartoframes_ver.*" />$')) self.assertRegexMatches(basemap_only_interactive.data, '^<iframe srcdoc="<!DOCTYPE html>.*') # test with labels on front labels_front = cc.map(layers=BaseMap('light', labels='front')) self.assertIsInstance(labels_front, IPython.core.display.HTML) # test with one Layer one_layer = cc.map(layers=Layer('tweets_obama')) self.assertIsInstance(one_layer, IPython.core.display.HTML) # test with two Layers two_layers = cc.map( layers=[Layer('tweets_obama'), Layer(self.test_read_table)]) self.assertIsInstance(two_layers, IPython.core.display.HTML) # test with one Layer, one QueryLayer onelayer_onequery = cc.map(layers=[ QueryLayer(''' SELECT * FROM tweets_obama LIMIT 100'''), Layer(self.test_read_table) ]) self.assertIsInstance(onelayer_onequery, IPython.core.display.HTML) # test with BaseMap, Layer, QueryLayer cc.map(layers=[ BaseMap('light'), QueryLayer(''' SELECT * FROM tweets_obama LIMIT 100''', color='favoritescount'), Layer(self.test_read_table) ]) # Errors # too many layers with self.assertRaises(ValueError): layers = [Layer('tweets_obama')] * 9 cc.map(layers=layers) # zoom needs to be specified with lng/lat with self.assertRaises(ValueError): cc.map(lng=44.3386, lat=68.2733) # only one basemap layer can be added with self.assertRaises(ValueError): cc.map(layers=[BaseMap('dark'), BaseMap('light')]) # only one time layer can be added with self.assertRaises(ValueError): cc.map(layers=[ Layer(self.test_read_table, time='cartodb_id'), Layer(self.test_read_table, time='cartodb_id') ]) # no geometry with self.assertRaises(ValueError): cc.map(layers=QueryLayer(''' SELECT null::geometry as the_geom, null::geometry as the_geom_webmercator, row_number() OVER () as cartodb_id FROM generate_series(1, 10) as m(i) '''))
resultsPath = 'results/result_3.tsv' if os.path.isdir(resultsPath): shutil.rmtree(resultsPath) result_task3.coalesce(1).saveAsTextFile(resultsPath) # #### Using cartoframes library to visualize cartoDB map in notebook # In[40]: import cartoframes from cartoframes import Layer, BaseMap, styling BASEURL = 'https://larshbj.carto.com' APIKEY = '299d2d825191b9879da6fc859d1064930f28d061' cc = cartoframes.CartoContext(base_url=BASEURL, api_key=APIKEY) cc.map(layers=Layer('result_task3_carto_4', size=7), interactive=False) # ## Task 4 # #### Method for calculating local time by converting timestamp to UTC and adding timezone offset. Outputs time rounded to the hour # In[41]: def getLocalTimeHour(timestamp, offset): s = timestamp / 1000.0 + offset return str(datetime.datetime.fromtimestamp(s).hour) # #### Method using Python Counter class to calculate 1-hour interval with most tweets.