def test_ubuntu4(self): import tempfile import os import shutil import datetime dirpath=tempfile.mkdtemp() try: # Test setup stacktrace_path = os.path.join(dirpath, "Stacktrace.txt") with open(stacktrace_path, 'w') as stacktrace_file: stacktrace_file.write(self.example_ubuntu_stacktrace4) post_path = os.path.join(dirpath, "Post.txt") with open(post_path, 'w') as post_file: post_file.write(self.example_ubuntu_post4) # Test crash loader crash = Crash.load_from_file(dirpath) # test that contents are loaded correctly assert (isinstance(crash, Crash)) stacktrace = crash['stacktrace'] assert stacktrace is crash.stacktrace assert (isinstance(stacktrace, Stacktrace)) assert (isinstance(stacktrace[0], Stackframe)) assert stacktrace[0]['function'] is None finally: shutil.rmtree(dirpath)
def test_ubuntu2(self): import tempfile import os import shutil import datetime dirpath=tempfile.mkdtemp() try: # Test setup stacktrace_path = os.path.join(dirpath, "Stacktrace.txt (retraced)") with open(stacktrace_path, 'w') as stacktrace_file: stacktrace_file.write(self.example_ubuntu_stacktrace2) post_path = os.path.join(dirpath, "Post.txt") with open(post_path, 'w') as post_file: post_file.write(self.example_ubuntu_post2) # Test crash loader crash = Crash.load_from_file(dirpath) # test that contents are loaded correctly assert (isinstance(crash, Crash)) assert (crash['cpu'] == 'amd64') assert (crash['date'] == datetime.datetime(2007, 6, 20, 10, 27, 6)) stacktrace = crash['stacktrace'] assert stacktrace is crash.stacktrace assert (isinstance(stacktrace, Stacktrace)) assert (isinstance(stacktrace[0], Stackframe)) assert (stacktrace[0]['depth'] == 0) assert (stacktrace[0]['function'] == 'cairo_transform') assert (not 'address' in stacktrace[0]) assert (stacktrace[1]['depth'] == 1) assert (stacktrace[1]['address'] == '0x00002b344498a150') finally: shutil.rmtree(dirpath)
def test_ubuntu3(self): import tempfile import os import shutil import datetime dirpath=tempfile.mkdtemp() try: # Test setup stacktrace_path = os.path.join(dirpath, "Stacktrace.txt (retraced)") with open(stacktrace_path, 'w') as stacktrace_file: stacktrace_file.write(self.example_ubuntu_stacktrace3) post_path = os.path.join(dirpath, "Post.txt") with open(post_path, 'w') as post_file: post_file.write(self.example_ubuntu_post3) # Test crash loader crash = Crash.load_from_file(dirpath) # test that contents are loaded correctly assert (isinstance(crash, Crash)) stacktrace = crash['stacktrace'] assert stacktrace is crash.stacktrace assert (isinstance(stacktrace, Stacktrace)) assert (isinstance(stacktrace[0], Stackframe)) assert (stacktrace[0]['depth'] == 0) assert (stacktrace[1]['depth'] == 1) assert (stacktrace[6]['function'] == '<signal handler called>') assert (stacktrace[4]['extra'][0] == 'No locals.') assert (stacktrace[0]['file'] == '../../src/QuExt.c') assert (stacktrace[0]['fileline'] == '46') assert (len(stacktrace[0]['extra']) == 2) finally: shutil.rmtree(dirpath)
def test_ubuntu(self): import tempfile import os import shutil import datetime dirpath=tempfile.mkdtemp() try: # Test setup stacktrace_path = os.path.join(dirpath, "Stacktrace.txt") with open(stacktrace_path, 'w') as stacktrace_file: stacktrace_file.write(self.example_ubuntu_stacktrace) post_path = os.path.join(dirpath, "Post.txt") with open(post_path, 'w') as post_file: post_file.write(self.example_ubuntu_post) # Test crash loader crash = Crash.load_from_file(dirpath) # test that contents are loaded correctly assert (isinstance(crash, Crash)) assert (crash['cpu'] == 'i386') assert (crash['date'] == datetime.datetime(2008, 4, 11, 22, 12, 11)) stacktrace = crash['stacktrace'] assert stacktrace is crash.stacktrace assert (isinstance(stacktrace, Stacktrace)) assert (isinstance(stacktrace[0], Stackframe)) assert (stacktrace[0]['depth'] == 0) assert (stacktrace[0]['function'] == 'free') assert (stacktrace[1]['depth'] == 1) assert (stacktrace[1]['address'] == '0xb78e78b1') finally: shutil.rmtree(dirpath)
def rg(): pygame.init() screen = pygame.display.set_mode((1200, 800)) pygame.display.set_caption('blue sky') bg_color = (0, 0, 230) crash = Crash(screen) while True: for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() screen.fill(bg_color) crash.blitme() pygame.display.flip()
def test_no_function(self): should_contain = 'launchpad:122451#0' crash = Crash.fromjson(self.exampleJson1) topn = TopN(3) topna = TopNAddress(3) topnf = TopNFile(3) signature = topn.get_signature(crash) assert 'launchpad:122451#0' in signature print repr(signature) assert signature == \ u'launchpad:122451#0 ≻ launchpad:122451#1 ≻ launchpad:122451#2' assert topna.get_signature(crash) == \ u'0x0805f92c ≻ 0x085e5618 ≻ 0x085e5618' crash2 = Crash.fromjson(self.exampleJson2) assert not topn.compare(crash, crash2) assert topn.compare(crash, crash) assert not topna.compare(crash, crash2) assert topna.compare(crash, crash) assert not topnf.compare(crash, crash2) assert topnf.compare(crash, crash)
def getrawbyid(cls, database_id, index=None): if index is None: raise ValueError('No ElasticSearch index specified!') if cls.es is None: raise RuntimeError( 'Forgot to monkey-patch ES connection to ESCrash!') if index in cls.crashes: if database_id in cls.crashes[index]: return cls.crashes[database_id] try: response = cls.es.get(index=index, id=database_id) except elasticsearch.exceptions.NotFoundError: return None return Crash(response['_source'])
def test_es_add(self): import gc es = ESCrash.es es.indices.delete(index='crashes', ignore=[400, 404]) mycrash = ESCrash(self.exampleCrash1) mycrash_dupe = ESCrash(self.exampleCrash1) assert mycrash is mycrash_dupe mycrash_another = ESCrash('exampleCrash1') assert mycrash is mycrash_another del mycrash del mycrash_another del mycrash_dupe gc.collect() es.indices.flush(index='crashes') time.sleep(1) fetched_from_es = ESCrash('exampleCrash1') fetched_from_es_undone = Crash(fetched_from_es) assert fetched_from_es_undone == self.exampleCrash1 fetched_from_es['cpu'] = 'amd64'
def __call__(cls, crash=None, index=None, unsafe=False): # This is the case that the constructor was called with a whole # crash datastructure if index is None: raise ValueError('No ElasticSearch index specified!') if index not in cls._cached: cls._cached[index] = WeakValueDictionary() if isinstance(crash, Crash): if 'database_id' in crash: # The case that we already have it in memory if crash['database_id'] in cls._cached[index]: already = cls._cached[index][crash['database_id']] assert (Crash(already) == crash) return already # We don't have it in memory so see if its already in ES if not unsafe: existing = cls.getrawbyid(crash['database_id'], index=index) else: existing = None if not existing is None: if existing != crash: # We already know of it! Raise an identical report # error. # TODO: not resilient to multiple running instances of # PartyCrasher :c raise IdenticalReportError(existing) # It is already in elastic search # make sure its the same data newish = super(ESCrashMeta, cls).__call__(crash=existing, index=index) # cache it as a weak reference cls._cached[index][crash['database_id']] = newish return newish # It's not in ES, so add it else: # Ensure this is UTC ISO format now = datetime.datetime.utcnow() crash.setdefault('date', now.isoformat()) if 'stacktrace' in crash and crash[ 'stacktrace'] is not None: for frame in crash['stacktrace']: if 'logdf' in frame: raise ValueError( "logdf should not be stored in ElasticSearch" ) crash_es_safe = crash.copy() try: response = cls.es.create( index=index, doc_type='crash', body=json.dumps(crash, cls=ESCrashEncoder), id=crash['database_id'], refresh=True) assert response['created'] except elasticsearch.exceptions.ConflictError as e: if (('DocumentAlreadyExistsException' in e.error) or ('document_already_exists_exception' in e.error)): print( "Got DocumentAlreadyExistsException on create!", file=sys.stderr) already = None while already is None: print( "Waiting for ElasticSearch to catch up...", file=sys.stderr) time.sleep(1) # Let ES think about its life... already = cls.getrawbyid(crash['database_id'], index=index) if not already is None: # It got added... # I think what is happening here is that the # python client lib is retrying after the create # times out, but ES did recieve the create and # created the document but didn't return in time pass else: raise else: raise new = super(ESCrashMeta, cls).__call__(crash=crash, index=index) # Cache it as a weak reference cls._cached[index][crash['database_id']] = new return new else: raise Exception("Crash with no database_id!") # The case where the constructor is called with a database id only elif isinstance(crash, str) or isinstance(crash, unicode): if crash in cls._cached[index]: return cls._cached[index][crash] existing = cls.getrawbyid(crash, index=index) if existing is not None: # Found it in ElasticSearch! newish = super(ESCrashMeta, cls).__call__(crash=existing, index=index) cls._cached[index][crash] = newish return newish else: raise ReportNotFoundError(crash) else: raise ValueError()
class TestCrash(unittest.TestCase): exampleCrash1 = Crash({ 'database_id': 'exampleCrash1', 'project': 'Ubuntu', 'CrashCounter': '1', 'ExecutablePath': '/bin/nbd-server', 'NonfreeKernelModules': 'fglrx', 'Package': 'nbd-server 1:2.9.3-3ubuntu1', 'PackageArchitecture': 'i386', 'ProcCmdline': '/bin/nbd-server', 'ProcCwd': '/', 'ProcEnviron': 'PATH=/sbin:/bin:/usr/sbin:/usr/bin', 'Signal': '11', 'SourcePackage': 'nbd', 'StacktraceTop': '\xa0?? ()', 'Title': 'nbd-server crashed with SIGSEGV', 'Uname': 'Linux mlcochff 2.6.22-7-generic #1 SMP Mon Jun 25 17:33:14 GMT 2007 i686 GNU/Linux', 'cpu': 'i386', 'date': datetime.datetime(2007, 6, 27, 12, 4, 43), 'os': 'Ubuntu 7.10', 'stacktrace': Stacktrace([ Stackframe({ 'address': u'0x0804cbd3', 'args': u'argc=', 'depth': 0, 'extra': [ u'\tserve = (SERVER *) 0x0', u'\tservers = (GArray *) 0x8051418', u'\terr = (GError *) 0x0' ], 'file': u'nbd-server.c:1546', 'function': u'main' }), Stackframe({ 'address': u'0xb7cfcebc', 'args': u'', 'depth': 1, 'function': u'??' }), Stackframe({ 'address': u'0x00000001', 'args': u'', 'depth': 2, 'function': u'??' }), Stackframe({ 'address': u'0xbfeff544', 'args': u'', 'depth': 3, 'function': u'??' }), Stackframe({ 'address': u'0xbfeff54c', 'args': u'', 'depth': 4, 'function': u'??' }), Stackframe({ 'address': u'0xb7f1b898', 'args': u'', 'depth': 5, 'function': u'??' }), Stackframe({ 'address': u'0x00000000', 'args': u'', 'depth': 6, 'function': u'??' }) ]), 'type': 'Crash' }) def test_es_reachable_working(self): es = Elasticsearch(hosts=['localhost']) es.indices.create(index='test-index', ignore=400) es.indices.delete(index='test-index', ignore=[400, 404]) def test_es_add(self): import gc es = ESCrash.es es.indices.delete(index='crashes', ignore=[400, 404]) mycrash = ESCrash(self.exampleCrash1) mycrash_dupe = ESCrash(self.exampleCrash1) assert mycrash is mycrash_dupe mycrash_another = ESCrash('exampleCrash1') assert mycrash is mycrash_another del mycrash del mycrash_another del mycrash_dupe gc.collect() es.indices.flush(index='crashes') time.sleep(1) fetched_from_es = ESCrash('exampleCrash1') fetched_from_es_undone = Crash(fetched_from_es) assert fetched_from_es_undone == self.exampleCrash1 fetched_from_es['cpu'] = 'amd64'
def main(): """ ********************************************* Extract and clean data from nyc open data ********************************************* """ APP_TOKEN = app_token() base_url = "https://data.cityofnewyork.us/resource/h9gi-nx95.json?$$app_token={}".format( APP_TOKEN) url = base_url + "{}" cnt_url = base_url + "{}{}" # select , where where_inj = "&$where=number_of_cyclist_injured>0.0&$limit=50000" where_kill = "&$where=number_of_cyclist_killed>0.0" inj_df = pd.read_json(url.format(where_inj)) killed_df = pd.read_json(url.format(where_kill)) def dt(date, time): date = pd.to_datetime(date).dt.date time = pd.to_datetime(time).dt.time return date, time # so frustrating. NYC open data changed columns from "accident" to "crash" killed_df.crash_date, killed_df.crash_time = dt(killed_df.crash_date, killed_df.crash_time) inj_df.crash_date, inj_df.crash_time = dt(inj_df.crash_date, inj_df.crash_time) killed_df = killed_df.rename(columns={ 'crash_date': 'accident_date', 'crash_time': 'accident_time' }) inj_df = inj_df.rename(columns={ 'crash_date': 'accident_date', 'crash_time': 'accident_time' }) df = (pd.concat([ inj_df, killed_df ]).drop(columns='location').drop_duplicates().reset_index(drop=True)) df.vehicle_type_code1 = df.vehicle_type_code1.apply( lambda x: str(x).upper()) df.vehicle_type_code2 = df.vehicle_type_code2.apply( lambda x: str(x).upper()) df['Accident Year'] = df.accident_date.apply(lambda x: x.year) df['Accident Month'] = df.accident_date.apply(lambda x: x.month) df['Accident Hour'] = df.accident_time.apply(lambda x: x.hour) def create_df(group): return (df.groupby(group).collision_id.count().reset_index().rename( columns={'collision_id': 'Number of Accidents'})) """ ********************************************* Create figures for month and hour data ********************************************* """ crash_mo_yr = create_df(['Accident Year', 'Accident Month']) crash_hr = create_df('Accident Hour') crash_mo_hr = create_df(['Accident Month', 'Accident Hour']) killed_df['accident_year'] = killed_df.accident_date.apply( lambda x: x.year) killed_df['accident_month'] = killed_df.accident_date.apply( lambda x: x.month) killed_df['accident_hr'] = killed_df.accident_time.apply(lambda x: x.hour) mo_fig = px.area(crash_mo_yr, x="Accident Month", y="Number of Accidents", animation_frame="Accident Year", range_y=[0, 800], range_x=[1, 12]) mo_fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000 mo_fig.layout.title = "Bicycle Accidents by Month for Each Year" pio.write_html(mo_fig, file="app/static/mo_fig.html", auto_play=False) hr_fig = px.area(crash_mo_hr, x="Accident Hour", y="Number of Accidents", animation_frame="Accident Month", range_y=[0, 400], range_x=[0, 23]) hr_fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000 hr_fig.layout.title = "Bicycle Accidents by Hour For Each Month" pio.write_html(hr_fig, file="app/static/hr_fig.html", auto_play=False) """ ********************************************* Extract data from citibike files - all trips ********************************************* """ fdir = './agg_trip' agg_files = os.listdir(fdir) agg_df = pd.read_csv(fdir + '/' + agg_files[0]).iloc[:, [0, 1]] for i in range(1, len(agg_files)): agg_df = agg_df.append( pd.read_csv(fdir + '/' + agg_files[i]).iloc[:, [0, 1]]) agg_df.Date = pd.to_datetime(agg_df.Date).dt.date agg_df = agg_df.rename(columns={ 'Trips over the past 24-hours (midnight to 11:59pm)': 'Number of Trips' }) agg_df = agg_df.sort_values('Date') fig = px.line(agg_df, x='Date', y='Number of Trips', title="Number of CitiBike Trips by Day", hover_name='Date') pio.write_html(fig, file="app/static/fig.html", auto_play=False) """ ********************************************* Using 9/25/2019 to map common citibike routes ********************************************* """ high_day = pd.read_csv('./app/static/high_day.csv') coord092519 = high_day[[ 'start station name', 'start station id', 'start station latitude', 'start station longitude', 'end station name', 'end station id', 'end station latitude', 'end station longitude' ]].copy() coord092519['id'] = (coord092519['start station name'] + coord092519['end station name']) coord092519 = coord092519.groupby([ 'start station name', 'start station id', 'start station latitude', 'start station longitude', 'end station name', 'end station id', 'end station latitude', 'end station longitude' ]).id.count().reset_index() coord092519['filt'] = coord092519.apply( lambda x: 'y' if x['start station name'] == x['end station name'] else '', axis=1) coord092519 = coord092519[coord092519.filt != 'y'].reset_index(drop=True) cohort = coord092519[coord092519.id >= 4] cohort = cohort.rename(columns={'id': 'count'}) cohort['id'] = cohort['start station id'].apply( str) + '-' + cohort['end station id'].apply(str) routes = pd.read_csv('./app/static/backup_route_file.csv') routes = routes[ routes.geojson != '{"message":"Too Many Requests"}'].reset_index( drop=True) cohort_df = pd.merge(cohort, routes[['id', 'geojson']], on='id', how='inner') cohort_df = cohort_df[['geojson']].drop_duplicates() geojson = list(cohort_df.geojson) gjson = [] for i in range(len(geojson)): gjson.append( json.loads(geojson[i])['routes'][0]['geometry']['coordinates']) for i in gjson: for j in i: j.reverse() """ ********************************************* mapping the accidents ********************************************* """ loc_df = df[[ 'borough', 'latitude', 'longitude', 'on_street_name', 'off_street_name', 'accident_date' ]].copy() loc_df = loc_df[(pd.isna(loc_df.latitude) == False) & (loc_df.latitude != 0) & (loc_df.longitude != 0)] loc_df.on_street_name = loc_df.on_street_name.str.strip() loc_df.off_street_name = loc_df.off_street_name.str.strip() loc_df.accident_date = loc_df.accident_date.apply(str) loc_df['lat_lon_list'] = loc_df.apply(lambda x: [x.longitude, x.latitude], axis=1) loc_df = loc_df.sort_values('accident_date').reset_index(drop=True) intersect_df = loc_df.copy() intersect_df[ 'intersection'] = intersect_df.on_street_name + ';' + intersect_df.off_street_name intersect_df.intersection = intersect_df.intersection.apply( lambda x: ' & '.join(sorted(x.split(';'))) if pd.isna(x) == False else x) dang_int = (intersect_df.groupby( ['borough', 'intersection'])['accident_date'].count().reset_index().sort_values( 'accident_date', ascending=False).rename( columns={'accident_date': 'Number of Bike Accidents'})) # For the table dang_int_viz = (dang_int[dang_int['Number of Bike Accidents'] >= 10].copy( ).reset_index(drop=True).rename(columns={ 'borough': 'Borough', 'intersection': 'Intersection' })) for i in dang_int_viz.index: Crash( dang_int_viz.iloc[i].Borough, dang_int_viz.iloc[i].Intersection).create_map().save( 'app/static/crash_maps/' + dang_int_viz.iloc[i].Borough + dang_int_viz.iloc[i].Intersection.replace(' ', '_') + '.html') dang_int_viz.Intersection = dang_int_viz.apply( lambda x: '<a href={} target="iframe_map">{}</a>'.format( '../static/crash_maps/' + x.Borough + x.Intersection.replace( ' ', '_') + '.html', x.Intersection), axis=1) html = """<table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th>Borough</th> <th>Intersection</th> <th>Number of Bike Accidents</th> </tr> </thead> <tbody> """ for i in dang_int_viz.index: html = (html + '<tr><td>' + dang_int_viz.iloc[i].Borough + '</td><td>' + dang_int_viz.iloc[i].Intersection + '</td><td>' + str(dang_int_viz.iloc[i]['Number of Bike Accidents']) + '</td></tr>') html = html + "</tbody></table>" html = BeautifulSoup(html, "lxml") html.body.insert( 0, BeautifulSoup('<link rel="stylesheet" href="/static/style.css">', "lxml")) with open('app/static/crash_table.html', 'w') as f: f.write(str(html)) lat_lon = intersect_df[['intersection', 'lat_lon_list']].copy() lat_lon.lat_lon_list = lat_lon.lat_lon_list.apply( lambda x: str(round(x[0], 5)) + ';' + str(round(x[1], 5))) lat_lon = lat_lon.drop_duplicates().reset_index(drop=True) lat_lon.lat_lon_list = lat_lon.lat_lon_list.apply( lambda x: [float(i) for i in x.split(';')]) for i in lat_lon.index: lat_lon.lat_lon_list[i].reverse() dang_int = pd.merge(dang_int, lat_lon, on='intersection', how='left') dang_int.to_csv('app/static/dang_int.csv', index=False) dang_int_10 = ( dang_int[(dang_int['Number of Bike Accidents'] >= 10) & (dang_int['Number of Bike Accidents'] < 15)].reset_index( drop=True)) dang_int_15 = ( dang_int[(dang_int['Number of Bike Accidents'] >= 15) & (dang_int['Number of Bike Accidents'] < 20)].reset_index( drop=True)) dang_int_20 = ( dang_int[dang_int['Number of Bike Accidents'] >= 20].reset_index( drop=True)) features = [{ 'type': 'Feature', 'geometry': { 'type': 'MultiPoint', 'coordinates': list(loc_df.lat_lon_list), }, 'properties': { 'times': list(loc_df.accident_date), 'icon': 'circle', 'iconstyle': { 'fillColor': 'red', 'fillOpacity': 0.5, 'stroke': 'false', 'radius': 5 }, 'style': { 'weight': 0.5 } } }] """ ********************************************* Getting the bike lanes and formatting the data ********************************************* """ bike_lanes = pd.read_json('./app/static/Bicycle Routes.geojson') bl_prot_json = [] bl_stand_json = [] for i in bike_lanes.index: if bike_lanes.iloc[i].features['properties']['facilitycl'] == 'I': for j in range( len(bike_lanes.iloc[i].features['geometry'] ['coordinates'])): bl_prot_json.append( bike_lanes.iloc[i].features['geometry']['coordinates'][j]) else: for j in range( len(bike_lanes.iloc[i].features['geometry'] ['coordinates'])): bl_stand_json.append( bike_lanes.iloc[i].features['geometry']['coordinates'][j]) for i in bl_prot_json: for j in i: j.reverse() for i in bl_stand_json: for j in i: j.reverse() """ ********************************************* Creating the map and interactive features ********************************************* """ nyc_map = folium.Map(location=[40.735, -73.95], zoom_start=11.5, tiles=None) folium.TileLayer('cartodbdark_matter', control=False).add_to(nyc_map) # Add bike lanes folium.PolyLine(bl_prot_json, weight=1, opacity=0.9, color='lime').add_to( folium.FeatureGroup(name='Protected Bike Lanes').add_to(nyc_map)) folium.PolyLine(bl_stand_json, weight=1, opacity=0.9, color='yellow').add_to( folium.FeatureGroup( name='Non-Protected Bike Lanes').add_to(nyc_map)) # Add citibike routes folium.PolyLine(gjson, weight=1, opacity=0.2).add_to( folium.FeatureGroup(name='Commonly Used Citibike Routes', overlay=False).add_to(nyc_map)) # Add Dangerous intersections data over10 = folium.FeatureGroup(name='Intersections w/10-14 Accidents', overlay=False) for i in dang_int_10.index: over10.add_child( folium.Marker( dang_int_10.lat_lon_list[i], tooltip=(dang_int_10.intersection[i] + ':\t' + str(dang_int_10['Number of Bike Accidents'][i]) + ' Accidents'), icon=folium.Icon(color='red', prefix='fa', icon='fas fa-bicycle'))) over15 = folium.FeatureGroup(name='Intersections w/15-19 Accidents', overlay=False) for i in dang_int_15.index: over15.add_child( folium.Marker( dang_int_15.lat_lon_list[i], tooltip=(dang_int_15.intersection[i] + ':\t' + str(dang_int_15['Number of Bike Accidents'][i]) + ' Accidents'), icon=folium.Icon(color='red', prefix='fa', icon='fas fa-bicycle'))) over20 = folium.FeatureGroup(name='Intersections w/20 or More Accidents', overlay=False) for i in dang_int_20.index: over20.add_child( folium.Marker( dang_int_20.lat_lon_list[i], tooltip=(dang_int_20.intersection[i] + ':\t' + str(dang_int_20['Number of Bike Accidents'][i]) + ' Accidents'), icon=folium.Icon(color='red', prefix='fa', icon='fas fa-bicycle'))) nyc_map.add_child(over10) nyc_map.add_child(over15) nyc_map.add_child(over20) plugins.TimestampedGeoJson( { 'type': 'FeatureCollection', 'features': features }, period='P1M', add_last_point=True, auto_play=True, loop=False, max_speed=2, loop_button=True, date_options='YYYY-MM-DD', time_slider_drag_update=True, duration='P1M').add_to(nyc_map) folium.LayerControl().add_to(nyc_map) nyc_map.save('app/static/map_nyc.html') """ ********************************************* Bike crash causes ********************************************* """ # Decided not to use the below for now. Could use it in the future... bike_list = ['BIKE', 'BICYCLE', 'E-BIK', 'BICYCLE', 'BYCIC'] cause_df = df[((pd.isna(df.contributing_factor_vehicle_3) == True) & ((df.vehicle_type_code1.isin(bike_list) == True) | (df.vehicle_type_code2.isin(bike_list) == True)))] cause_df = cause_df[(cause_df.vehicle_type_code1.isin(bike_list) == False) | (cause_df.vehicle_type_code2.isin(bike_list) == False)] def bike_cause(x): if x.vehicle_type_code1 in bike_list: return x.contributing_factor_vehicle_1 else: return x.contributing_factor_vehicle_2 def veh_cause(x): if x.vehicle_type_code1 not in bike_list: return x.contributing_factor_vehicle_1 else: return x.contributing_factor_vehicle_2 cause_df['bike_cause'] = cause_df.apply(bike_cause, axis=1) cause_df['veh_cause'] = cause_df.apply(veh_cause, axis=1) # remove Unspecified from dataset. Not useful bike_cause_df = (cause_df.groupby( 'bike_cause').collision_id.count().reset_index().sort_values( 'collision_id', ascending=False).head(15).reset_index(drop=True)) bike_cause_df = bike_cause_df[bike_cause_df.bike_cause != 'Unspecified'] veh_cause_df = (cause_df.groupby( 'veh_cause').collision_id.count().reset_index().sort_values( 'collision_id', ascending=False).head(15).reset_index(drop=True)) veh_cause_df = veh_cause_df[veh_cause_df.veh_cause != 'Unspecified']
def make_mlt_query_explain(self, crash, index=None, use_existing=None, max_query_terms=None, terminate_after=None, dont_explain=False): """ Builds the more_like_this query. """ if index is None: index = self.index if isinstance(crash, basestring): crash = Crash(ESCrash(crash, index=index)) del crash['buckets'] if use_existing is None: use_existing = False # this should be true but ES seems to be broken? else: if use_existing is None: use_existing = False body = self.make_more_like_this_query(crash, max_query_terms, terminate_after) body["explain"] = (not dont_explain) if use_existing: # Prevent crash from being its own highest match when its already in ES del body["query"]["more_like_this"]["like"][0]["doc"] body["query"]["more_like_this"]["like"][0]["_id"] = crash[ "database_id"] skip_fields = Set([ 'database_id', 'buckets', 'force_bucket', 'depth', 'date', ]) def all_but_skip_fields(c, prefix=""): fields = Set() if isinstance(c, dict): for k, v in c.iteritems(): #print(prefix + k, file=sys.stderr) if k not in skip_fields: fields.add(prefix + k) subfields = all_but_skip_fields(v, prefix + k + ".") #print(len(fields)) fields.update(subfields) elif isinstance(c, list): for i in c: subfields = all_but_skip_fields(i, prefix) fields.update(subfields) elif isinstance(c, basestring) or c is None: pass elif isinstance(c, datetime): pass else: raise NotImplementedError("all_but_fields can't handle " + c.__class__.__name__) return fields fields = list(all_but_skip_fields(crash)) # including the extra field seems to improve recall at the expense of precision # overall F-score seems to go down so I'm not sure this is worthwhile #if 'stacktrace.function' in fields: # fields.append('stacktrace.function.whole') for field in fields: assert "buckets" not in field body["query"]["more_like_this"]["fields"] = fields #self.ensure_field_mappings(fields) #print(json.dumps(fields, indent=2), file=sys.stderr) return body