def get_311(data_id): ''' Get 2012 and 2017 311 data from chicago data portal Input: data_id: id of different type of report Return: pandas dataframe with the columns and dtypes as COL_TYPES ''' '''COL_TYPES = {'sr_type': str, #'created_date': str, 'zip_code': int, 'longitude': float, 'latitude': float, 'legacy_record': bool} ''' #cols = [item for item in COL_TYPES.keys()] client = Socrata('data.cityofchicago.org', 'E0eO5nY1aKuEY1pVrunfqFhDz', username='******', password='******') conds = '''legacy_sr_number IS NULL''' res = client.get( data_id, #where= conds, limit=1000000) client.close() df = pd.DataFrame.from_records(res) # df['created_date'] = pd.to_datetime(df['created_date']) # df = df[df['created_date']>start_year] # df = df[df['created_date']<end_year] return df
def get_driver_license_info(self): client = Socrata("www.datossct.gob.mx", None) for rec in self: try: driver_license = client.get('3qhi-59v6', licencia=rec.driver_license) license_valid_from = datetime.strptime( driver_license[0]['fecha_inicio_vigencia'], '%Y-%m-%dT%H:%M:%S.%f') license_expiration = datetime.strptime( driver_license[0]['fecha_fin_vigencia'], '%Y-%m-%dT%H:%M:%S.%f') rec.write({ 'license_type': driver_license[0]['categoria_de_la_licencia'], 'license_valid_from': license_valid_from, 'license_expiration': license_expiration, }) client.close() except Exception: client.close() raise ValidationError( _('The driver license is not in SCT database'))
def get_crime_data(name_of_borough): #connect to NYC Crime Database client = Socrata("data.cityofnewyork.us", None) #get past days last_date = (datetime.datetime.now() + datetime.timedelta(days=-250)).isoformat() if (name_of_borough == 'staten'): upper_borough = 'STATEN ISLAND' else: upper_borough = name_of_borough.upper() #get all felonies without being throttled num_crimes = 0 for i in range(15): offset = 1000 * i #build query to get crimes that are within x days and happened in name_of_borough q = "SELECT * WHERE cmplnt_fr_dt > '%s' and boro_nm = '%s' and law_cat_cd = '%s' OFFSET %d" % ( last_date, upper_borough, "FELONY", offset) #run query num_crimes += len(client.get("7x9x-zpz6", query=q)) print(num_crimes) client.close() return num_crimes
def insert(connection, city): client = Socrata(urls[city], "FXdTHdcGpX9fqDNp9wnN9DGTq", username="******", password="******") lim = 50000 inc = 0 results = [''] while len(results) > 0: results = client.get(datasets[city], content_type="json", limit=lim, offset=lim * inc) rows = [] for i in xrange(len(results)): try: rows.append(createRow(results[i], city)) except: pass cursor = connection.cursor() query = "INSERT INTO %s " % city query = query + "(`city_id`,`crime_description`, `crime_code`, `datetime_occured`, `location_description`, \ `latitude`, `longitude`) \ VALUES (%s, %s, %s, %s, %s, %s, %s)" cursor.executemany(query, rows) connection.commit() inc += 1 client.close()
def test_replace(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "replace_songs.txt" data = [ { "theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010" }, { "theme": "History", "artist": "Best Friends Forever", "title": "Abe Lincoln", "year": "2008" }, ] setup_mock(adapter, "PUT", response_data, 200) response = client.replace(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 2 client.close()
def get_crime_data(): # call crime data api client = Socrata(settings.SOURCE_DOMAIN, settings.SOCRATA_APP_TOKEN) limit = 20000 offset = 0 tries = 10 # return the headers yield ['cad_cdw_id', 'event_clearance_date', 'event_clearance_group'] for _ in xrange(tries): # sodapy throws an exception if we get a bad response # no need to handle it here, we'll catch it on the outside response = client.get( settings.DATASET_ID, content_type="csv", select='cad_cdw_id, event_clearance_group, event_clearance_date', where='event_clearance_date is not null and \ within_circle(incident_location, {0}, {1}, {2})'.format( settings.CLINK_LAT, settings.CLINK_LON, settings.RADIUS), order='event_clearance_date DESC', limit=limit, offset=offset ) offset += len(response) - 1 # don't count the header if len(response) == 1: # if it's just the header break # then there's nothing left to ask for # trim off the header row # using itertools so we don't make a shallow copy of the giant list response_iter = islice(response, 1, None) for row in imap(_convert_row, response_iter): yield row client.close()
class SocrataClient: def __init__(self): self.client = Socrata(conf.DOMAIN, conf.TOKEN, timeout=conf.TIMEOUT) def __del__(self): self.client.close() def dataset_id(self, year): return conf.DATASET_IDS[year] def get(self, year, **kwargs): id = self.dataset_id(year) for attempt in range(conf.ATTEMPTS): try: return self.client.get(id, **kwargs) except Exception as e: if attempt < conf.ATTEMPTS - 1: continue else: raise e def get_metadata(self, year): id = self.dataset_id(year) return self.client.get_metadata(id) def get_datasets(self): ''' Search for "MyLA311 Service Request Data" within the response to get the dataset ids for each year. ''' return self.client.datasets()
def test_get_all(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) setup_mock(adapter, "GET", "bike_counts_page_1.json", 200, query="$offset=0") setup_mock(adapter, "GET", "bike_counts_page_2.json", 200, query="$offset=1000") response = client.get_all(DATASET_IDENTIFIER) assert inspect.isgenerator(response) data = list(response) assert len(data) == 1001 assert data[0]["date"] == "2016-09-21T15:45:00.000" assert data[-1]["date"] == "2016-10-02T01:45:00.000" client.close()
def test_create(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None) columns = [ {"fieldName": "foo", "name": "Foo", "dataTypeName": "text"}, {"fieldName": "bar", "name": "Bar", "dataTypeName": "number"} ] tags = ["foo", "bar"] response = client.create("Foo Bar", description="test dataset", columns=columns, tags=tags, row_identifier="bar") request = adapter.request_history[0] request_payload = json.loads(request.text) # can't figure out how to use .json # Test request payload for dataset_key in ["name", "description", "columns", "tags"]: assert dataset_key in request_payload for column_key in ["fieldName", "name", "dataTypeName"]: assert column_key in request_payload["columns"][0] # Test response assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def get_comps_sdat(address, miles=1, save_csv=False): meta_df = property_metadata(address) query = get_query(meta_df, miles) client = Socrata(url_md, app_id) response = client.get(id_md, query=query) df = pd.DataFrame(response) num = [] non_num = [] for col in df.columns: try: df[col] = pd.to_numeric(df[col]) num.append(col) except: non_num.append(col) df['id'] = meta_df.address.iloc[0] df.date = pd.to_datetime(df['date']) df.date2 = pd.to_datetime(df['date2']) df['basement'] = df['style_code'].apply( lambda x: 'with basement' in x.lower()) df['price_change'] = df['price'].astype(float) - df['price2'].astype(float) df['date_change'] = (df.date - df.date2).dt.days df['norm_price'] = ( (1 + (0.03 / 365))**(pd.datetime.today() - df['date']).dt.days) * df.price client.close() if save_csv: df.to_csv(meta_df.address.iloc[0] + ".csv", index=False) results = (meta_df, df) return results #df
def fillCDC500CitiesData(self): client = Socrata( "chronicdata.cdc.gov", 'QoQet97KEDYpMW4x4Manaflkp' ) #My (John Pham's) access token is QoQet97KEDYpMW4x4Manaflkp censusTractRetrievalString = "place_tractid='3651000-" + self.censusTractNum + "'" censusTract = client.get("47z2-4wuh", where=censusTractRetrievalString, content_type="json", order="place_tractid ASC", limit=1000) client.close() self.coreMenCrudePrev = float( censusTract[0]["corem_crudeprev"] ) #Model-based estimate for crude prevalence of older adult men aged >=65 years who are up to date on a core set of clinical preventive services: Flu shot past year, PPV shot ever, Colorectal cancer screening, 2016 self.coreWomenCrudePrev = float( censusTract[0]["corew_crudeprev"] ) #Model-based estimate for crude prevalence of older adult women aged >=65 years who are up to date on a core set of clinical preventive services: Flu shot past year, PPV shot ever, Colorectal cancer screening, and Mammogram past 2 years, 2016 self.colonScreenCrudePrev = float( censusTract[0]["colon_screen_crudeprev"] ) #Model-based estimate for crude prevalence of fecal occult blood test, sigmoidoscopy, or colonoscopy among adults aged 50–75 years, 2016 self.mammoUseCrudePrev = float( censusTract[0]["mammouse_crudeprev"] ) #Model-based estimate for crude prevalence of mammography use among women aged 50–74 years, 2016 self.teethLostCrudePrev = float( censusTract[0]["teethlost_crudeprev"] ) #Model-based estimate for crude prevalence of all teeth lost among adults aged >=65 years, 2016 self.geolocation = censusTract[0][ "geolocation"] #Latitude, longitude of census tract centroid self.coreMenPercentage = self.coreMenCrudePrev / self.totalMales65Plus * 100 #save this value as a number in the range of 0-100 self.coreWomenPercentage = self.coreWomenCrudePrev / self.totalFemales65Plus * 100 #save this value as a number in the range of 0-100 self.mammoUsePercentage = self.mammoUseCrudePrev / self.totalFemales50To74 * 100 #save this value as a number in the range of 0-100 self.colonScreenPercentage = self.colonScreenCrudePrev / self.totalOlderAdults50To74 * 100 #save this value as a number in the range of 0-100 self.teethLostPercentage = self.teethLostCrudePrev / self.totalOlderAdults65Plus * 100 #save this value as a number in the range of 0-100
def test_delete(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata( DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter, ) uri = "{0}{1}{2}/{3}.json".format(PREFIX, DOMAIN, OLD_API_PATH, DATASET_IDENTIFIER) adapter.register_uri("DELETE", uri, status_code=200) response = client.delete(DATASET_IDENTIFIER) assert response.status_code == 200 try: client.delete("foobar") except Exception as e: assert isinstance(e, requests_mock.exceptions.NoMockAddress) finally: client.close()
def test_set_permission(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "empty.txt" setup_old_api_mock(adapter, "PUT", response_data, 200) # Test response response = client.set_permission(DATASET_IDENTIFIER, "public") assert response.status_code == 200 # Test request request = adapter.request_history[0] query_string = request.url.split("?")[-1] params = query_string.split("&") assert len(params) == 2 assert "method=setPermission" in params assert "value=public.read" in params client.close()
def getDataset(dataset_id): table = '' try: # Creating Socrata Client client = Socrata(cfg["web"], cfg["token"], username=cfg["email"], password=cfg["password"]) data = client.get(dataset_id, content_type="json") data = str(data) data = data.replace("'", "\"") data = data.upper() #getting data to compare with the uploaded data #print(type(data)) #print(data) table = pd.read_json(data) #Replacing NaN for '' table = table.replace(pd.np.nan, '', regex=True) table = table.to_html(classes='table-striped " id = "my_table', index=False) vistas = client.get_metadata(dataset_id) vistas = str(vistas.get("viewCount")) client.close() except BaseException as e: #if there is an error, reload login with error message error = str(e) print('Error description:') print(error) client.close() return table, vistas
def DatasetColumns(dataset_id): try: #Creating Socrata Client client = Socrata(cfg["web"], cfg["token"], username=cfg["email"], password=cfg["password"]) data = client.get(dataset_id, limit=2) data = sorted(data[0].keys()) #getting columns that could be used in the dataframe columns = str(data).upper() columns = columns.replace("[", "") columns = columns.replace("]", "") columns = columns.replace("'", "") columns = columns.replace(" ", "") columns = columns.split(",") #getting data to compare with the uploaded data for i in range(0, len(data)): data[i] = (str(data[i]).upper(), str(data[i]).upper()) client.close() except BaseException as e: #if there is an error, reload login with error message error = str(e) print('Error description:') print(error) data = None columns = None client.close() return data, columns
def test_upsert(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata( DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter, ) response_data = "upsert_songs.txt" data = [{ "theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010", }] setup_mock(adapter, "POST", response_data, 200) response = client.upsert(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 1 client.close()
class SocrataClient: def __init__(self, config=None): config = config['Socrata'] domain = config['DOMAIN'] token = None if config['TOKEN'] == 'None' else config['TOKEN'] timeout = int(config['TIMEOUT']) self.client = Socrata(domain, token, timeout=timeout) self.attempts = int(config['ATTEMPTS']) self.config = config def __del__(self): self.client.close() def dataset_id(self, year): return self.config['AP' + str(year)] def get(self, year, **kwargs): id = self.dataset_id(year) for attempt in range(self.attempts): try: return self.client.get(id, **kwargs) except Exception as e: if attempt < self.attempts - 1: continue else: raise e def get_metadata(self, year): id = self.dataset_id(year) return self.client.get_metadata(id)
def property_metadata(address, save_csv=False, select=select_statement): ''' Query SDAT Return dictionary with metadata ''' client = Socrata(url_md, app_id) address = clean_addresses(address) results = client.get( id_md, select=select, where="mdp_street_address_mdp_field_address='{}'".format(address)) df = pd.DataFrame(results) closest = difflib.get_close_matches(address.upper(), df.address, n=1) #print('Closest Property:', closest[0]) df = df[df.address == closest[0]] #df['lon'] = df.location.apply(lambda x: x['coordinates'][0]) #df['lat'] = df.location.apply(lambda x: x['coordinates'][1]) client.close() if save_csv: sdat = pd.read_csv('sdat_properties.csv') #sdat = pd.concat([sdat, df], ignore_index=True) result = pd.concat([sdat, df]) result.to_csv('sdat_properties.csv', index=False) return df
def test_replace_non_data_file(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata( DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter, ) response_data = "successblobres.txt" nondatasetfile_path = "tests/test_data/nondatasetfile.zip" setup_replace_non_data_file(adapter, "POST", response_data, 200) with open(nondatasetfile_path, "rb") as fin: file = {"file": ("nondatasetfile.zip", fin)} response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file) assert isinstance(response, dict) assert response.get("blobFileSize") == 496 client.close()
def get_exposed(): # Expuestos client = Socrata("www.datos.gov.co", "6aek14sky6N2pVL12sw1qfzoQ") results = client.get("8835-5baf", limit=5000000) df_expuestos_col = pd.DataFrame.from_records(results) df_expuestos_col = df_expuestos_col[["fecha", "acumuladas"]] df_expuestos_col = df_expuestos_col.iloc[1:].reset_index(drop=True) df_expuestos_col["fecha"] = df_expuestos_col["fecha"].astype(str) df_expuestos_col["fecha"] = df_expuestos_col["fecha"].transform( lambda x: x[:10]) df_expuestos_col["acumuladas"] = df_expuestos_col["acumuladas"].astype( float) df_expuestos_col = df_expuestos_col.sort_values(by=["fecha"]).reset_index( drop=True) # pasando de acumuladas a por dia df_expuestos_col["expuestos"] = 0 for i, row in df_expuestos_col.iterrows(): if i == 0: df_expuestos_col.loc[i, "expuestos"] = df_expuestos_col.loc[ i, "acumuladas"] else: df_expuestos_col.loc[i, "expuestos"] = ( df_expuestos_col.loc[i, "acumuladas"] - df_expuestos_col.loc[i - 1, "acumuladas"]) client.close() return df_expuestos_col
def update_crime_table(): """ Updates the crime reports table and inserts it into the database file (replacing an existing table of the same name). """ logging.warn("UPDATING CRIME REPORTS TABLE") results = None # authenticated client with api token try: client = Socrata(CRIME_DATABASE_URL, SOCRATA_API_KEY) if not isinstance(client, Socrata): logging.error( "Failed to create Socrata object to receive crime database") client.close() return # First 10000000 results, as a python dictionary version of the JSON from API results = client.get(CRIME_DATABASE_UID, limit=10000000) client.close() except Exception as e: logging.error( 'Fatal error when attempting to retreive crime database\n' + traceback.format_exception_only(*sys.exc_info()[0:2])[0]) if results is None or len(results) < 1: logging.error("received no data from the crime database") return # Convert to pandas DataFrame database = pd.DataFrame.from_records(results) # CLEAN DATA # manually converting all instances of "NA" in the "Weapon" column to NaN # so that program can properly handle it for any calculations database['weapon'] = database['weapon'].replace("NA", np.nan) # fixing some formatting inconsistencies database['inside_outside'] = database['inside_outside'].replace( "(?i)outside", "O", regex=True) database['inside_outside'] = database['inside_outside'].replace( "(?i)inside", "I", regex=True) database['crimedate'] = pd.to_datetime(database['crimedate'], format="%Y-%m-%dT%H:%M:%S.%f") #clear out any special funny chars so we can clean the requested data database = database.replace("[#\"\'_;]", "", regex=True) database = database.astype({ "longitude": float, "latitude": float, "total_incidents": float }) database.to_sql(CRIME_TABLE_NAME, con=DATABASE, index=True, index_label='id', if_exists='replace') logging.info([ " ".join([str(x['name']), str(x['type'])]) for x in inspect(DATABASE).get_columns(CRIME_TABLE_NAME) ])
class Service(object): def __init__(self, app_token, domain="data.cityofnewyork.us"): # print(f"domain={domain}, app_token={app_token}") self.client = Socrata(domain, app_token) def __enter__(self): return self def get_info(self, location="nc67-uf89", limit=10): try: print(f"location={location}, limit={str(limit)}") return self.client.get(location, limit=limit) except HTTPError as e: print(f"Failed to make API call: {e}") raise except KeyError as e: print(f"Failed to get rates from response: {e}") raise except Exception as e: print(f"Something went wrong: {e}") raise def get_next_info(self, location="nc67-uf89", limit=10, offset=10): print(f"limit={str(limit)}, offset={str(offset)}") try: return self.client.get(location, limit=limit, offset=offset) except HTTPError as e: print(f"Failed to make API call: {e}") raise except KeyError as e: print(f"Failed to get rates from response: {e}") raise except Exception as e: print(f"Something went wrong: {e}") raise def get_size(self, location="nc67-uf89"): try: ret = self.client.get(location, select='COUNT(*)') return int(ret[0]['COUNT']) except HTTPError as e: print(f"Failed to make API call: {e}") raise except KeyError as e: print(f"Failed to get rates from response: {e}") raise except Exception as e: print(f"Something went wrong: {e}") raise def close(self): self.client.close() def __exit__(self, exc_type, exc_value, traceback): self.close()
def main(): arguments = docopt(__doc__, version=__version__) client = Socrata(arguments['<site>'], arguments['-a']) try: if arguments['ls']: datasets = list_datasets(client, arguments['<site>']) print(tabulate(datasets, headers='keys', tablefmt='psql')) elif arguments['insert']: dataset_id = arguments['<dataset_id>'] metadata = client.get_metadata(dataset_id) engine, session, geo = get_connection(arguments['-d'], metadata) Binding = get_binding(client, dataset_id, metadata, geo, arguments['-t']) # Create the table try: Binding.__table__.create(engine) except ProgrammingError as e: # Catch these here because this is our first attempt to # actually use the DB if 'already exists' in str(e): raise CLIError( 'Destination table already exists. Specify a new table' ' name with -t.') raise CLIError('Error creating destination table: %s' % str(e)) num_rows = get_row_count(client, dataset_id) bar = FillingCirclesBar(' ▶ Loading from API', max=num_rows) # Iterate the dataset and INSERT each page for page in get_dataset(client, dataset_id): to_insert = [] for row in page: to_insert.append(Binding(**parse_row(row, Binding))) session.add_all(to_insert) session.flush() bar.next(n=len(to_insert)) bar.finish() ui.item( 'Committing rows (this can take a bit for large datasets).') session.commit() success = 'Successfully imported %s rows from "%s".' % ( num_rows, metadata['name']) ui.header(success, color='\033[92m') client.close() except CLIError as e: ui.header(str(e), color='\033[91m')
def datavis(request, dataset_id): ods = OpenDataSource.objects.get(pk=dataset_id) client = Socrata(ods.website, ods.token, ods.user, ods.password) dataset = DataSet.objects.get(pk=dataset_id) data = client.get(dataset.identifier) metadata = client.get_metadata(dataset.identifier) client.close() template = loader.get_template('datavis/datavis.html') data = json.dumps(data, indent=4, sort_keys=True) context = {'data': data, 'metadata': metadata, 'dataset': dataset} return HttpResponse(template.render(context, request))
def main(startdate): # Unauthenticated client only works with public data sets. # SODAPYAPPTOKEN exists in defined in local bash_profile app_token = os.environ.get("SODAPYAPPTOKEN") client = Socrata("data.cityofchicago.org", app_token) download_food_inspections(client, startdate) download_crime_data(client, startdate) download_service_request_data_after_july01_2018(client) download_service_request_data_before_july01_2018(client, startdate) download_business_data(client, startdate) client.close()
def get_owners_data(): client = Socrata('data.cityofchicago.org', 'your-password-here') data_set = 'ezma-pppn' data = client.get(data_set, query=""" SELECT account_number, owner_first_name, owner_last_name, owner_title LIMIT 300000 """) client.close() return data
def CreateETL(view_data, title, description, category, tags): #preparing data cols = list(view_data) columns = [{ "fieldName": cols[0].lower(), "name": cols[0], "dataTypeName": "text" }] for i in range(1, len(cols)): x = { "fieldName": cols[i].lower(), "name": cols[i], "dataTypeName": "text" } columns.append(x) tags = tags.split(",") #Uploadin data try: client = Socrata(cfg["web"], cfg["token"], username=cfg["email"], password=cfg["password"]) print(tags) print(category) print(description) print(columns) print(cols) NewDataSet = client.create(title, description=description, columns=columns, tags=tags, category=category) client.publish(NewDataSet.get('id')) client.set_permission(NewDataSet.get('id'), "private") # Convertion to JSON datajson = view_data.to_json(None, orient='records') # JSON to list datajson = json.loads(datajson) client.replace(NewDataSet.get('id'), datajson) print('Socrata done') error = 'OK' dataset_id = NewDataSet.get('id') client.close() except BaseException as e: #if there is an error, reload login with error message error = str(e) print('Error description:') print(error) dataset_id = 'NoData' return error, dataset_id
def main(): global DATASET_ID #define mapping index_mapping() #arguments parser = argparse.ArgumentParser( description="Load data into elasticsearch. ") parser.add_argument('--page_size', type=int, help='specify the number of records on each page', required=True, metavar='') parser.add_argument('--num_pages', type=int, help='specify the number of pages you want to load in', metavar='') args = parser.parse_args() #socrata API_KEY = os.environ['APP_TOKEN'] socrata_domain = "data.cityofnewyork.us" client = Socrata(socrata_domain, API_KEY, timeout=120) counter = 0 if args.num_pages != None: for num in range(args.num_pages): parse_and_push(client, args.page_size, num * args.page_size) else: edge = int(client.get(DATASET_ID, select='COUNT(*)')[0]['COUNT']) offset1 = 0 while offset1 < edge: threads = [] for i in range(10): offset1 = counter * args.page_size counter += 1 t = threading.Thread( target=parse_and_push, args=( client, args.page_size, offset1, ), ) threads.append(t) t.start() for th in threads: th.join() client.close()
def get_bus_lic_data(): client = Socrata('data.cityofchicago.org', 'your-password-here') data_set = 'uupf-x98q' data = client.get(data_set, query=""" SELECT license_id, account_number, legal_name, doing_business_as_name, business_activity, address, city, state, zip_code, license_description, license_start_date, expiration_date LIMIT 100000 """) client.close() return data
def get_grouped_crime_data(start, end): seattle_time = pytz.timezone('America/Los_Angeles') start_date = pytz.utc.localize(datetime.utcfromtimestamp(int(start)/1000)) local_start_date = start_date.astimezone(seattle_time).replace(tzinfo=None) end_date = pytz.utc.localize(datetime.utcfromtimestamp(int(end)/1000)) local_end_date = end_date.astimezone(seattle_time).replace(tzinfo=None) # call crime data api client = Socrata(settings.SOURCE_DOMAIN, settings.SOCRATA_APP_TOKEN) limit = 2000 offset = 0 tries = 10 # return the headers yield ['count', 'date', 'event_clearance_group'] for _ in xrange(tries): # sodapy throws an exception if we get a bad response # no need to handle it here, we'll catch it on the outside response = client.get( settings.DATASET_ID, content_type="csv", select='date_trunc_ymd(event_clearance_date) as date, event_clearance_group, count(*)', where='event_clearance_date is not null \ and event_clearance_date >= \'{0}\' \ and event_clearance_date <= \'{1}\' \ and within_circle(incident_location, {2}, {3}, {4})'.format( local_start_date.isoformat().split('+')[0], local_end_date.isoformat().split('+')[0], settings.CLINK_LAT, settings.CLINK_LON, settings.RADIUS), group='date, event_clearance_group', order='date DESC, count DESC', limit=limit, offset=offset ) offset += len(response) - 1 # don't count the header if len(response) == 1: # if it's just the header break # then there's nothing left to ask for # trim off the header row # using itertools so we don't make a shallow copy of the giant list response_iter = islice(response, 1, None) for row in imap(_convert_grouped_row, response_iter): yield row client.close()
def test_publish(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_publish_mock(adapter, "POST", response_data, 200) response = client.publish(DATASET_IDENTIFIER) assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def test_get(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "get_songs.txt" setup_mock(adapter, "GET", response_data, 200) response = client.get(DATASET_IDENTIFIER) assert isinstance(response, list) assert len(response) == 10 client.close()
def test_get_metadata(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "get_song_metadata.txt" setup_old_api_mock(adapter, "GET", response_data, 200) response = client.get_metadata(DATASET_IDENTIFIER) assert isinstance(response, dict) assert "newBackend" in response assert "attachments" in response["metadata"] client.close()
def test_publish(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" resource = "/api/views/songs/publication.json" # publish() removes .json set_up_mock(adapter, "POST", response_data, 200, resource=resource) response = client.publish("/resource/songs.json") # hard-coded so request uri is matched assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def test_upsert(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "upsert_songs.txt" data = [{"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}] setup_mock(adapter, "POST", response_data, 200) response = client.upsert(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 1 client.close()
def test_delete(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) uri = "{0}{1}/api/views/{2}.json".format(PREFIX, DOMAIN, DATASET_IDENTIFIER) adapter.register_uri("DELETE", uri, status_code=200) response = client.delete(DATASET_IDENTIFIER) assert response.status_code == 200 try: client.delete("foobar") except Exception as e: assert isinstance(e, requests_mock.exceptions.NoMockAddress) finally: client.close()
def test_upsert_exception(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "403_response_json.txt" setup_mock(adapter, "POST", response_data, 403, reason="Forbidden") data = [{"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}] try: client.upsert(DATASET_IDENTIFIER, data) except Exception as e: assert isinstance(e, requests.exceptions.HTTPError) else: raise AssertionError("No exception raised for bad request.") finally: client.close()
def test_set_permission(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "empty.txt" resource = "/api/views" + PATH set_up_mock(adapter, "PUT", response_data, 200, resource=resource) # Test response response = client.set_permission(PATH, "public") assert response.status_code == 200 # Test request request = adapter.request_history[0] qs = request.url.split("?")[-1] assert qs == "method=setPermission&value=public.read" client.close()
def test_replace(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "replace_songs.txt" data = [ {"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}, {"theme": "History", "artist": "Best Friends Forever", "title": "Abe Lincoln", "year": "2008"}, ] setup_mock(adapter, "PUT", response_data, 200) response = client.replace(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 2 client.close()
def test_replace_non_data_file(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "successblobres.txt" nondatasetfile_path = 'tests/test_data/nondatasetfile.zip' setup_replace_non_data_file(adapter, "POST", response_data, 200) with open(nondatasetfile_path, 'rb') as fin: file = ( {'file': ("nondatasetfile.zip", fin)} ) response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file) assert isinstance(response, dict) assert response.get("blobFileSize") == 496 client.close()
def get(self, request, *args, **kwargs): ''' Get criminal records from the dataset and save them the database as CriminalRecord objects''' ids = CriminalRecord.objects.values_list('case_id', flat=True) domain = settings.SOCRATA_DOMAIN token = settings.SOCRATA_APP_TOKEN endpoint = settings.SOCRATA_DATASET_ENDPOINT client = Socrata(domain, token) order = "date" where = 'latitude IS NOT NULL' offset = 0 limit = 1000 status = 200 try: data = client.get( endpoint, order=order, where=where, offset=offset, limit=limit) while data: for record in data: if self.to_int(record.get('id')) not in ids: attrs = { 'case_id': self.to_int( self.get_from_dict(record, 'id')), 'case_number': self.get_from_dict( record, 'case_number'), 'date': datetime.strptime( self.get_from_dict(record, 'date'), '%Y-%m-%dT%H:%M:%S'), 'block': self.get_from_dict(record, 'block'), 'iucr': self.get_from_dict(record, 'iucr'), 'primary_type': self.get_from_dict( record, 'primary_type'), 'crime_description': self.get_from_dict( record, 'description'), 'location_description': self.get_from_dict( record, 'location_description'), 'has_arrested': self.get_from_dict( record, 'arrest'), 'is_domestic': self.get_from_dict( record, 'domestic'), 'beat': self.get_from_dict(record, 'beat'), 'district': self.get_from_dict( record, 'district'), 'ward': self.to_int( self.get_from_dict(record, 'ward')), 'community_area': self.get_from_dict( record, 'community_area'), 'fbi_code': self.get_from_dict( record, 'fbi_code'), 'x_coordinate': self.to_int( self.get_from_dict( record, 'x_coordinate')), 'y_coordinate': self.to_int( self.get_from_dict( record, 'y_coordinate')), 'year': self.to_int( self.get_from_dict(record, 'year')), 'updated_on': datetime.strptime( self.get_from_dict(record, 'updated_on'), '%Y-%m-%dT%H:%M:%S'), 'latitude': float(self.get_from_dict( record, 'latitude')), 'longitude': float(self.get_from_dict( record, 'longitude')), 'location': Point( float(self.get_from_dict( record, 'longitude')), float(self.get_from_dict( record, 'latitude'))) } CriminalRecord.objects.create(**attrs) offset += limit data = client.get( endpoint, order=order, where=where, offset=offset, limit=limit) client.close() except Exception, e: print e status = 400
theft = client.get("cuks-n6tp", select ="category,time,location", where ="category='VEHICLE THEFT'",limit=10) print "vehicle theft data" print theft vandalism = client.get("cuks-n6tp", select ="category,time,location", where ="category='VANDALISM'",limit=10) print "VANDALISM data" print vandalism kidnapping = client.get("cuks-n6tp", select ="category,time,location", where ="category='KIDNAPPING'",limit=10) print "KIDNAPPING data" print kidnapping sex = client.get("cuks-n6tp", select ="category,time,location", where ="category='SEX OFFENSES, FORCIBLE'",limit=10) print "SEX OFFENSES, FORCIBLE data" print sex dui = client.get("cuks-n6tp", select ="category,time,location", where ="category='DRIVING UNDER THE INFLUENCE'",limit=10) print "DUI data" print dui client.close() # search radius # https://data.sfgov.org/resource/cuks-n6tp.json?$where=within_circle(location,%2037.78,%20-122.41,%201000) # https://data.cms.gov/resource/97k6-zzx3.json?$limit=5 # client = Socrata("https://data.cms.gov/resource/", None) # print client.get("http://data.cms.gov/resource/97k6-zzx3.json?$order=average_covered_charges$limit=1", limit=2) # app token - need it eventually # https://data.seattle.gov/resource/3k2p-39jp.json?$$app_token=APP_TOKEN # https://dev.socrata.com/docs/app-tokens.html # 8gffbg1meMZ1e2Z0yOz2OpwZq # secret token at https://data.sfgov.org/profile/alex-wap/7wfp-cp8m/app_tokens ''' SF DATA FORMAT
def test_client(): client = Socrata(DOMAIN, APPTOKEN) assert isinstance(client, Socrata) client.close()