def test_create(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None) columns = [ {"fieldName": "foo", "name": "Foo", "dataTypeName": "text"}, {"fieldName": "bar", "name": "Bar", "dataTypeName": "number"} ] tags = ["foo", "bar"] response = client.create("Foo Bar", description="test dataset", columns=columns, tags=tags, row_identifier="bar") request = adapter.request_history[0] request_payload = json.loads(request.text) # can't figure out how to use .json # Test request payload for dataset_key in ["name", "description", "columns", "tags"]: assert dataset_key in request_payload for column_key in ["fieldName", "name", "dataTypeName"]: assert column_key in request_payload["columns"][0] # Test response assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def test_set_permission(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "empty.txt" setup_old_api_mock(adapter, "PUT", response_data, 200) # Test response response = client.set_permission(DATASET_IDENTIFIER, "public") assert response.status_code == 200 # Test request request = adapter.request_history[0] query_string = request.url.split("?")[-1] params = query_string.split("&") assert len(params) == 2 assert "method=setPermission" in params assert "value=public.read" in params client.close()
def getData(self): # Get crime records via API if self.storage == "remote": print("Getting crime data from remote source.\nThis takes a while (approx. 5 mins)! Please be patient.") # API request information client_crime = Socrata('data.lacity.org','7pTgt6f2oTY53aDI1jXNJoNZD') offset_temp = 0 conn = sq.connect("ReferralCrimeMap.db") cur = conn.cursor() cur.execute("DROP TABLE IF EXISTS 'Crime2016'") # Getting data in dataframe then manipulate before storing in ReferralCrimeMap.db while True: results = client_crime.get("7fvc-faax", limit=10000, offset=offset_temp) crime_df_temp = pd.DataFrame.from_records(results) # This loop stops when the next block of dataframe is empty if crime_df_temp.empty == True: break # Split location_1 into lat and long # Create 'year_rptd' to filter cases reported in 2016 # Create 'count' for later data analysis crime_df_temp['location_1'] = crime_df_temp['location_1'].astype('str') crime_df_temp['long'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[0]) crime_df_temp['lat'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[-1]) crime_df_temp['year_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[0]) crime_df_temp['month_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[1]) crime_df_temp['count'] = 1 crime_df_temp = crime_df_temp[crime_df_temp['year_rptd']=='2016'] # Insert dataframe into ReferralCrimeMap.db pd_sql.to_sql(crime_df_temp, 'Crime2016', conn, if_exists='append', index=False) offset_temp+=10000 # Shows the percentage of data if offset_temp % 100000 == 0: print(offset_temp/2000000*100,"%") else: continue cur.execute("SELECT * FROM Crime2016") print(cur.fetchone()) conn.close() # Load local data if -source is set to local else: print("Getting crime data from local source.") conn = sq.connect("ReferralCrimeMap.db") cur = conn.cursor() query = "SELECT * FROM Crime" try: crime = pd.read_sql(query, conn) conn.close() print(crime.head()) except Exception as e: print("There is an error:", e) print("Please set data course as remote.") exit()
def gen_data(filepath, api_key, username=None, password=None, output='json'): api = Socrata('data.seattle.gov', api_key, username=username, password=password) with open(filepath) as fp: uid = set([i.strip() for i in fp.readlines()]) | set([i.strip() for i in open('completed.json')]) for dataset in uid: print(dataset, file=open('completed.json', 'a')) yield {dataset: api.get('/resource/' + dataset + '.' + output)}
def createBarChart(charttype): with open("database_charts/chartconfig.json") as config: jsonData = json.load(config) config.close() chart = jsonData[charttype] chartTitle = chart['chart-title'] dataTitle = chart['data-title'] yFormat = chart['y-axis-format'] xaxis = chart['x-axis'] items = chart['y-axis'] colors = chart['color'] with open("database_charts/url_info.json") as urlConfig: urlInfo = json.load(urlConfig) client = Socrata(urlInfo['url'], None, username=urlInfo['username'], password=urlInfo['password']) colorIndex = 0 data = [] for item in items: dict = { "type": chart["chart-type"], "legendText": item.replace("_", " "), "cursor": "pointer" if len(items) > 1 else "default", "showInLegend": True, 'legendMarkerColor': colors[colorIndex], "toolTipContent": item.replace("_", " ") + " in year " + "{label}: {y}" } dataPoints = [] request = client.get(dataTitle, select=xaxis + ", " + item) for r in request: if len(r) > 1: d = { "label": int(r[xaxis]), "y": int(r[item]), "color": colors[colorIndex], } if d['label'] is not None: dataPoints.append(d) colorIndex += 1 dict["dataPoints"] = dataPoints data.append(dict) # Create Chart Information chartInfo = {} chartInfo["chartTitle"] = chartTitle chartInfo["data"] = data chartInfo["json"] = json.dumps(data) chartInfo["valueFormat"] = yFormat chartInfo['addClick'] = False if len(items) == 1 else True return chartInfo
def createPieChart(self): with open("database_charts/chartconfig.json") as config: data = json.load(config) chartconfig = {} for key, value in data.items(): if key == self.chartType: chartconfig = value # The position index from the excel file xaxis = chartconfig['x-axis'] # An array of dictionary with Name of the category and index in excel file yaxis = chartconfig['y-axis'] charttitle = chartconfig['chart-title'] datatitle = chartconfig['data-title'] colors = chartconfig['color'] colorIndex = 0 with open("database_charts/url_info.json") as urlConfig: urlInfo = json.load(urlConfig) client = Socrata(urlInfo['url'], None, username=urlInfo['username'], password=urlInfo['password']) request = client.get(datatitle, select=",".join(yaxis), where=xaxis + "=" + str(self.year))[0] newData = [] dic = { "type": "pie", "showInLegend": True, "toolTipContent": "{y} - #percent %", "yValueFormatString": "#0.#,,. Million", "legendText": "{indexLabel}", } dataPoints = {} for cat in yaxis: if cat in request.keys() and int(request[cat]) > 0: if cat not in dataPoints.keys(): dataPoints[cat] = {'y': int(request[cat]), 'indexLabel': cat.replace("_", " "), 'legendMarkerColor': colors[colorIndex], 'color': colors[colorIndex]} else: dataPoints[cat]['y'] += int(request[cat]) colorIndex += 1 for item in dataPoints: newData.append(dataPoints[item]) dic['dataPoints'] = newData pieChartInfo = {} pieChartInfo['title'] = charttitle + str(self.year) pieChartInfo['data'] = dic pieChartInfo['json'] = json.dumps(dic) return pieChartInfo
def test_get(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "get_songs.txt" setup_mock(adapter, "GET", response_data, 200) response = client.get(DATASET_IDENTIFIER) assert isinstance(response, list) assert len(response) == 10 client.close()
def test_publish(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_publish_mock(adapter, "POST", response_data, 200) response = client.publish(DATASET_IDENTIFIER) assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def test_publish(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" resource = "/api/views/songs/publication.json" # publish() removes .json set_up_mock(adapter, "POST", response_data, 200, resource=resource) response = client.publish("/resource/songs.json") # hard-coded so request uri is matched assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def test_get_metadata(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "get_song_metadata.txt" setup_old_api_mock(adapter, "GET", response_data, 200) response = client.get_metadata(DATASET_IDENTIFIER) assert isinstance(response, dict) assert "newBackend" in response assert "attachments" in response["metadata"] client.close()
def getData(self): if self.storage == "remote": print("Getting DCFS referral data from remote source.") # API request client_dcfs = Socrata('data.lacounty.gov','7pTgt6f2oTY53aDI1jXNJoNZD') offset_temp = 0 dcfs_df=pd.DataFrame() conn = sq.connect("ReferralCrimeMap.db") cur = conn.cursor() cur.execute('DROP TABLE IF EXISTS dcfs') print('Inserting DCFS referral dataframe into ReferralCrimeMap.db.') while True: results = client_dcfs.get("8vmx-hhtu", limit=5000, offset=offset_temp) dcfs_df = pd.DataFrame.from_records(results) # Break the loop and stop requesting if the block is empty if dcfs_df.empty == True: break # Insert dataframe into ReferralCrimeMap.db dcfs_df['location'] = dcfs_df['location'].astype('str') pd_sql.to_sql(dcfs_df, 'dcfs', conn, if_exists='append', index=False) offset_temp+=5000 # I didn't use time.sleep as this API is unlimited # time.sleep(1) conn.close() else: print ("Getting DCFS referral data from local source.") conn = sq.connect("ReferralCrimeMap.db") cur = conn.cursor() query = ''' SELECT * FROM dcfs ''' try: dcfs = pd.read_sql(query, conn) conn.close() print(dcfs.head()) # If the table does not exist it will throw an error. except Exception as e: print('There is an error:', e) print('Please enter remote source.') exit()
def test_upsert(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "upsert_songs.txt" data = [{"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}] setup_mock(adapter, "POST", response_data, 200) response = client.upsert(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 1 client.close()
def test_delete(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) uri = "{0}{1}{2}".format(PREFIX, DOMAIN, PATH) adapter.register_uri("DELETE", uri, status_code=200) response = client.delete(PATH) assert response.status_code == 200 try: client.delete("/foobar.json") except Exception, e: assert isinstance(e, requests_mock.exceptions.NoMockAddress)
def test_upsert_exception(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) path = "/songs.json" response_data = "403_response_json.txt" set_up_mock(adapter, "POST", response_data, 403, reason="Forbidden") data = [{"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}] try: response = client.upsert(PATH, data) except Exception, e: assert isinstance(e, requests.exceptions.HTTPError)
def test_upsert_exception(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "403_response_json.txt" setup_mock(adapter, "POST", response_data, 403, reason="Forbidden") data = [{"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}] try: client.upsert(DATASET_IDENTIFIER, data) except Exception as e: assert isinstance(e, requests.exceptions.HTTPError) else: raise AssertionError("No exception raised for bad request.")
def log_request(): print 'log_request', request.url print (socrata_app_token and socrata_username and socrata_password and socrata_access_log_domain and socrata_access_log_datasetid) if socrata_app_token and socrata_username and socrata_password and socrata_access_log_domain and socrata_access_log_datasetid: client = Socrata(socrata_access_log_domain, socrata_app_token, username=socrata_username, password=socrata_password) # fix this, see http://esd.io/blog/flask-apps-heroku-real-ip-spoofing.html if not request.headers.getlist("X-Forwarded-For"): ip = request.remote_addr else: ip = request.headers.getlist("X-Forwarded-For")[0] # for some reason a space and a * is causing an upsert error so am replacing space with %20 url = str(request.url).replace(" ", "%20") # See Socrata's time format https://support.socrata.com/hc/en-us/articles/202949918-Importing-Data-Types-and-You- dtnow = datetime.utcnow().isoformat() dtnow = dtnow[:dtnow.index('.')]+'Z' data = [{'datetime': dtnow, 'ip_address': str(ip), 'url': url}] print data print 'upsert', client.upsert(socrata_access_log_datasetid, data)
def fetch_data(): # Make a connection conn = r.connect(host="localhost", port=28015, db="test") # You need to register the App Token (manually?) client = Socrata("data.sunshinecoast.qld.gov.au", "6MbT9NoWolynKM1ooRzrvm7Fs") # API Endpoint endpoint = "/resource/mn3m-fqri.json" off = 0 while True: data = client.get(endpoint, limit=50000,offset=off) if len(data) == 0: break for elem in data: try: # Only store if we have a date cur = elem['d_date_rec'] r.table("planning").insert(elem).run(conn) except KeyError: pass off = off+50000
def test_set_permission(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "empty.txt" resource = "/api/views" + PATH set_up_mock(adapter, "PUT", response_data, 200, resource=resource) # Test response response = client.set_permission(PATH, "public") assert response.status_code == 200 # Test request request = adapter.request_history[0] qs = request.url.split("?")[-1] assert qs == "method=setPermission&value=public.read" client.close()
def test_replace(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "replace_songs.txt" data = [ {"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}, {"theme": "History", "artist": "Best Friends Forever", "title": "Abe Lincoln", "year": "2008"}, ] setup_mock(adapter, "PUT", response_data, 200) response = client.replace(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 2 client.close()
def get_resource_data(socrata_resource, since=None, limit=1000): client = Socrata(socrata_resource.domain, socrata_resource.token) kwargs = { 'limit': limit, 'where': socrata_resource.conditions, } if socrata_resource.unique_key: kwargs['order'] = socrata_resource.unique_key while True: if since: kwargs['where'] = "{} and {} > '{}'".format( socrata_resource.conditions, socrata_resource.unique_key, since ) batch = client.get(socrata_resource.endpoint, **kwargs) if len(batch) > 0: since = batch[-1][socrata_resource.unique_key] yield batch else: return
def test_replace_non_data_file(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "successblobres.txt" nondatasetfile_path = 'tests/test_data/nondatasetfile.zip' setup_replace_non_data_file(adapter, "POST", response_data, 200) with open(nondatasetfile_path, 'rb') as fin: file = ( {'file': ("nondatasetfile.zip", fin)} ) response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file) assert isinstance(response, dict) assert response.get("blobFileSize") == 496 client.close()
def run_script(): client = Socrata("data.sfgov.org", "wvRAyq5wvCnf9YKGmiuZ7T9y3") fetched_data = client.get("/resource/rqzj-sfat.json", select="objectid,latitude,longitude,fooditems,expirationdate") data = [] for i in fetched_data: a = Node( i.get("objectid", None), i.get("latitude", None), i.get("longitude", None), i.get("fooditems", None), i.get("expirationdate", None), ) print repr(a) data.append(a._asdict()) urlparse.uses_netloc.append("postgres") try: url = urlparse.urlparse(os.environ["DATABASE_URL"]) conn = psycopg2.connect( database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port ) except: url = urlparse.urlparse("postgresql://localhost/mydb") conn = psycopg2.connect( database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port ) print "I am unable to connect to the database" cur = conn.cursor() try: cur.executemany( "insert into foodtrucks(objectid,latitude, longitude,fooditems,expirationdate) select %(objectid)s,%(latitude)s,%(longitude)s,%(fooditems)s,%(expirationdate)s where not exists (select 1 from foodtrucks where objectid=%(objectid)s)", data, ) except Exception as e: print "unable to query postgres =>", e conn.commit() return render_template("successScript.html")
def pull(self): """ Utilities for spatio temporal analysis @author zed.uchicago.edu Pulls new entries from datasource NOTE: should make flexible but for now use city of Chicago data """ socrata_domain = "data.cityofchicago.org" socrata_dataset_identifier = "crimes" socrata_token = "ZIgqoPrBu0rsvhRr7WfjyPOzW" client = Socrata(socrata_domain, socrata_token)
def run(self): ''' Consulta de los datos en la api de 311 ''' # Autenticación en S3 ses = boto3.session.Session(profile_name='luigi_dpa', region_name='us-west-2') s3_resource = ses.resource('s3') obj = s3_resource.Bucket(self.bucket) print(ses) # Autenticación del cliente: client = Socrata("data.cityofnewyork.us", "N2WpW61JnP5RoT5mrYGUaSUg9", username="******", password="******") # los resultados son retornados como un archivo JSON desde la API / # convertida a una lista de Python usando sodapy client.timeout =1000 results = client.get("erm2-nwe9", limit=100) with self.output().open('w') as json_file: json.dump(results, json_file)
def test_replace_non_data_file(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "successblobres.txt" nondatasetfile_path = 'tests/test_data/nondatasetfile.zip' setup_replace_non_data_file(adapter, "POST", response_data, 200) with open(nondatasetfile_path, 'rb') as fin: file = ({'file': ("nondatasetfile.zip", fin)}) response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file) assert isinstance(response, dict) assert response.get("blobFileSize") == 496 client.close()
def execute(self, context): # Authenticate Socrata client self.log.info('Authenticate Socrata client') client = Socrata(self.socrata_domain, self.socrata_token) rendered_socrata_query_filters = copy.deepcopy(self.socrata_query_filters) if rendered_socrata_query_filters is not None: for filter, filter_value in self.socrata_query_filters.items(): if isinstance(filter_value, str): rendered_socrata_query_filters[filter] = filter_value.format(**context) # Get JSON results from API endpoint self.log.info('Query API') results = client.get(self.socrata_dataset_identifier, **rendered_socrata_query_filters) self.log.info('Got {} results'.format(len(results))) self.log.info('Write JSON to file') rendered_json_output_filepath = self.json_output_filepath.format(**context) with open(rendered_json_output_filepath, 'w') as outfile: json.dump(results, outfile) self.log.info('Write JSON to {}'.format(rendered_json_output_filepath))
def __init__(self, dataset_id, socrata_client=None, socrata_params=None, float_fields=None, logger=None): """ Initialization function of the SocrataDataset class. Parameters: dataset_id: 4x4 ID of the Socrata draft (e.g. x123-bc12) client: Optional parameter if the user chooses to pass in the socrata_params parameter. If user chooses not to pass in socrata_params, they can also pass in an sodapy.Socrata object that has been initialized with the proper socrata credentials. socrata_params: Optional parameter if the user choose to pass in the socrata_client parameter. Dictionary object containing Socrata credentials. Must include the following fields: 'username', 'password', 'app_token', 'domain'. float_fields: An array of Socrata field names that should be of float types (numbers with decimals). logger: Optional parameter. Could pass in a logger object or not pass in anything. If a logger object is passed in, information will be logged instead of printed. If not, information will be printed. """ self.socrata_params = {} self.float_fields = [] self.dataset_id = dataset_id self.client = socrata_client if not socrata_client and socrata_params: self.client = Socrata(**socrata_params) self.socrata_params = socrata_params self.col_dtype_dict = self.get_col_dtype_dict() self.float_fields = float_fields self.print_func = print if logger: self.print_func = logger.info
def get_data(APP_KEY, page_size, num_pages, output): try: client = Socrata('data.cityofnewyork.us', APP_KEY) es = create_and_update_index('violationparking-index', 'violations') if num_pages == '': num_row = client.get('nc67-uf89', select='COUNT(*)') #count num of row in dataset print("SOMETHING") total = int(num_row[0]['COUNT']) num_pages = (total / page_size) #get the num of pages return num_pages else: for i in range (num_pages): data = client.get('nc67-uf89', limit=page_size, offset=i*page_size) if output != '': with open(output, 'a') as fout: for result in data: fout.write(json.dumps(result) + '\n') print("HERE") load_ES(result, es) else: for result in data: print(result + '\n') except Exception as e: print(f'Something went wrong {e}') raise
def execute(trial=False): startTime = datetime.datetime.now() # ---[ Connect to Database ]--------------------------------- client = dml.pymongo.MongoClient() repo = client.repo repo.authenticate('smithnj', 'smithnj') repo_name = 'smithnj.ctastats' # ---[ Grab Data ]------------------------------------------- client = Socrata("data.cityofchicago.org", "xbEYuk5XxkYsIaXl3hn79XIoR") if (trial): results = client.get( "t2rn-p8d7", select="station_id,stationame,month_beginning,monthtotal", where= "month_beginning > '2012-12-01T00:00:00.000' AND month_beginning < '2019-01-01T00:00:00.000'", limit=100) if (trial == False): results = client.get( "t2rn-p8d7", select="station_id,stationame,month_beginning,monthtotal", where= "month_beginning > '2012-12-01T00:00:00.000' AND month_beginning < '2019-01-01T00:00:00.000'", limit=30000) df = pd.DataFrame.from_records(results).to_json(orient="records") loaded = json.loads(df) # ---[ MongoDB Insertion ]------------------------------------------- repo.dropCollection(repo_name) repo.createCollection(repo_name) print('done') repo[repo_name].insert_many(loaded) repo[repo_name].metadata({'complete': True}) # ---[ Finishing Up ]------------------------------------------- print(repo[repo_name].metadata()) repo.logout() endTime = datetime.datetime.now()
def send_crab_data(self): CONVERSION_RATE = .00000701549 oocytes = Oocyte.objects.filter(crab=self).filter(chosen_count=10) client = Socrata("noaa-fisheries-afsc.data.socrata.com", "q3DhSQxvyWbtq1kLPs5q7jwQp", username="******", password="******") data = { 'area_2': '', 'area_5': '', 'calibration_5x': 0.00028, 'area_4': '', 'area_7': '', 'area_10': '', 'calibration_10x': 0.00056, 'area_9': '', 'year': '', 'sample': '', 'area_3': '', 'area_8': '', 'area_1': '', 'area_6': '' } data['area_1'] = oocytes[0].area * CONVERSION_RATE data['area_2'] = oocytes[1].area * CONVERSION_RATE data['area_3'] = oocytes[2].area * CONVERSION_RATE data['area_4'] = oocytes[3].area * CONVERSION_RATE data['area_5'] = oocytes[4].area * CONVERSION_RATE data['area_6'] = oocytes[5].area * CONVERSION_RATE data['area_7'] = oocytes[6].area * CONVERSION_RATE data['area_8'] = oocytes[7].area * CONVERSION_RATE data['area_9'] = oocytes[8].area * CONVERSION_RATE data['area_10'] = oocytes[9].area * CONVERSION_RATE data['year'] = datetime.datetime.now().year data['sample'] = self.sample_num payload = [data] client.upsert("km2u-hwjw", payload)
def get_crime(start_year, end_year): ''' Get 2013 to 2018 crime data from chicago data portal Return: pandas dataframe with the columns and dtypes as COL_TYPES ''' crime_type = [ "HOMICIDE", "CRIM SEXUAL ASSAULT", "ROBBERY", "ASSAULT", "BATTERY", "BURGLARY", "ARSON", "MOTOR VEHICLE THEFT", "THEFT" ] COL_TYPES = { 'block': str, 'case_number': str, 'primary_type': 'category', 'date': str, 'latitude': float, 'longitude': float, 'year': int } MAX_ROWS = 100000000 # the total rows of the original data CRIME_DATA_ID = "6zsd-86xi" cols = [item for item in COL_TYPES.keys()] client = Socrata('data.cityofchicago.org', 'E0eO5nY1aKuEY1pVrunfqFhDz', username='******', password='******') conds = "year >= {} AND year <= {}".format(start_year, end_year) res = client.get(CRIME_DATA_ID, select=",".join(cols), where=conds, limit=MAX_ROWS) client.close() df = pd.DataFrame.from_records(res) #df['date'] = pd.to_datetime(df['date']) df = df[df.primary_type.isin(crime_type)] df = df.astype(COL_TYPES) return df
def api_to_s3(self, filename, table_name, limit=2000): """ Loads data via api, uploads data file to s3. Parameters ---------- filename : str file name with extension table_name : str name of dataset fetched from api limit : int number of records to download """ # load api module from sodapy import Socrata api_client = Socrata(self.url, self.api_token, self.api_user_email, self.api_user_pwd) # get records logging.info(f'Fetching data from {self.url} API.') records = api_client.get(table_name, limit=limit) df = pd.DataFrame.from_records(records) # write csv csv_buf = StringIO() df.to_csv(csv_buf, header=True, index=False) csv_buf.seek(0) content = csv_buf.getvalue() # add "_current" to filename filename = self.__rename_file(filename) # copy "_current" file in bucket to "_previous" self.__archive_file(filename) # create s3 object obj = self.s3_conn.Object(self.bucket_name, filename) obj.put(Body=content) logging.info(f'{filename} uploaded to s3 bucket.')
def get_scooters(url=None, key=None, save=True): ''' # must input key use the Socrata API call to dataset 2kfw-zvte return dataframe of individual scooter trips over Chicago 2019 pilot ''' # check to see if weather pickle exists if os.path.exists('scooter_df.pickle'): print('loading pickle!') scooter_df = pd.read_pickle('scooter_df.pickle') else: print('calling API!') from sodapy import Socrata #client if key == None: key = json.load(open('hidden.json', 'r'))['CHI'] client = Socrata(key['url'], key['key']) # First 2000 results, returned as JSON from API / converted to Python list of # dictionaries by sodapy. results = client.get("2kfw-zvte", limit=712_000) # Convert to pandas DataFrame scooter_df = pd.DataFrame(results) scooter_df.drop(columns=[ ':@computed_region_bdys_3d7i', ':@computed_region_vrxf_vc4k' ], inplace=True) if save: # saving, so you don't have to make the request again :praise: scooter_df.to_pickle('scooter_df.pickle') print('saved to pickle!') return scooter_df
def PersonToCorreoDireccionTelefono_mk5f_bdwx(m): TRX = MaltegoTransform() #m.parseArguments(sys.argv) #nombre=sys.argv[1] nombre = m.Value try: client = Socrata("www.datos.gov.co", None) r = client.get("u5mc-hpr6", limit=2000) #for key, value in data.items(): #print key, value for i in range(len(r)): if (r[i]['nombre'] == nombre): celular = r[i]['celular'] correo_electronico = r[i]['correo_electronico'] direccion = r[i]['direccion'] celular2 = r[i]['telefonos'] barrio = r[i]['municipio'] break ent1 = TRX.addEntity('maltego.EmailAddress', correo_electronico) ent2 = TRX.addEntity('maltego.PhoneNumber', celular) ent2.addAdditionalFields("phonenumber.countrycode", "Country Code", True, "57") ent3 = TRX.addEntity('maltego.PhoneNumber', celular2) ent3.addAdditionalFields("phonenumber.countrycode", "Country Code", True, "57") ent4 = TRX.addEntity('maltego.Location', direccion) ent4.addAdditionalFields("country", "Country", True, "Colombia") ent4.addAdditionalFields("location.area", "Area", True, barrio) ent4.addAdditionalFields("streetaddress", "Street Address", True, direccion) except Exception as e: TRX.addUIMessage("Cedula no encontrada en la base de datos") TRX.returnOutput()
def create_vacc_df(): columns = [ 'date, fips, recip_county, recip_state, series_complete_pop_pct, series_complete_yes, booster_doses, booster_doses_vax_pct' ] client = Socrata("data.cdc.gov", app_token="SMDNVaBjBRb2aY7ZjRLbnLpZc", username="******", password="******") results = client.get("8xkx-amqh", limit=100000000) df_vacc_1 = pd.DataFrame.from_records(results) df_vacc_1['date'] = df_vacc_1['date'].astype('datetime64') df_vacc_1['recip_county'] = df_vacc_1['recip_county'].astype('string') df_vacc_1['recip_state'] = df_vacc_1['recip_state'].astype('string') df_vacc_1['series_complete_pop_pct'] = df_vacc_1[ 'series_complete_pop_pct'].astype('float') df_vacc_1['series_complete_yes'] = df_vacc_1['series_complete_yes'].astype( 'float') df_vacc_1['booster_doses'] = df_vacc_1['booster_doses'].astype('float') df_vacc_1['booster_doses_vax_pct'] = df_vacc_1[ 'booster_doses_vax_pct'].astype('float') date_min = df_vacc_1['date'].min() date_max = df_vacc_1['date'].max() date_list = [] for mon in pd.date_range(date_min, date_max, freq='MS'): date_list.append(mon.strftime("%Y-%m-%d")) df_vacc_animated = df_vacc_1[df_vacc_1['date'].isin(date_list)] df_vacc_static = df_vacc_1[df_vacc_1['date'] == date_max] df_vacc_animated = df_vacc_animated.sort_values('date', ascending=True) df_vacc_animated['date'] = df_vacc_animated['date'].astype('string') with urlopen( 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json' ) as response: counties = json.load(response) return counties, df_vacc_static, df_vacc_animated
def TelefonoToCorreoPerson_u5mc_hpr6(m): TRX = MaltegoTransform() #m.parseArguments(sys.argv) #telefono=sys.argv[1] telefono = m.Value try: client = Socrata("www.datos.gov.co", None) r = client.get("u5mc-hpr6", limit=2000) #for key, value in data.items(): #print key, value for i in range(len(r)): if (r[i]['celular'] == telefono): nombre = r[i]['nombre'] email = r[i]['correo_electr_nico'] break nombre = nombre.split(" ") if (len(nombre) == 4): firts = nombre[0] + " " + nombre[1] last = nombre[2] + " " + nombre[3] full = nombre[0] + " " + nombre[1] + " " + nombre[ 2] + " " + nombre[3] else: firts = nombre[0] last = nombre[1] + " " + nombre[2] full = nombre[0] + " " + nombre[1] + " " + nombre[2] ent = TRX.addEntity('maltego.Person', full) ent.addAdditionalFields("person.firtsnames", "Firts Names", True, firts) ent.addAdditionalFields("person.lastname", "Surname", True, last) ent2 = TRX.addEntity('maltego.EmailAddress', email) except Exception as e: TRX.addUIMessage("Cedula no encontrada en la base de datos") TRX.returnOutput()
def load_data(): client = Socrata("data.cincinnati-oh.gov", None) crimes = client.get("k59e-2pvf", limit=500000) crimes_na = pd.DataFrame.from_records(crimes) crimes_df = crimes_na.replace(r'^\s*$', np.nan, regex=True) # convert dates to datetime crimes_df.date_reported = pd.to_datetime(crimes_df.date_reported, errors='coerce').dt.tz_localize( 'US/Eastern', ambiguous='NaT', nonexistent='NaT') crimes_df.date_from = pd.to_datetime( crimes_df.date_from, errors='coerce').dt.tz_localize('US/Eastern', ambiguous='NaT', nonexistent='NaT') crimes_df.date_to = pd.to_datetime( crimes_df.date_to, errors='coerce').dt.tz_localize('US/Eastern', ambiguous='NaT', nonexistent='NaT') crimes_df.date_of_clearance = pd.to_datetime( crimes_df.date_of_clearance, errors='coerce').dt.tz_localize('US/Eastern', ambiguous='NaT', nonexistent='NaT') crimes = crimes_df.rename(columns={ 'longitude_x': 'lon', 'latitude_x': 'lat' }) crimes['lon'] = crimes['lon'].astype(float) crimes['lat'] = crimes['lat'].astype(float) crimestats = crimes return crimestats
def update_vac_data(self): vac_df = pd.DataFrame() # 51 US states state_str = ', '.join( ['"' + x + '"' for x in list(self.state_name_abbr.values())]) # socrata API client = Socrata("data.cdc.gov", None) # API query dates = client.get("8xkx-amqh", select='distinct date') # for date in dates: # * for testing purpose only * # for date in dates[:1]: for date in dates: date = date['date'] visited = self.vac_visited.get_item(Key={'date_visited': date}) # if not visited if 'Item' not in visited.keys(): self.vac_visited.put_item(Item={'date_visited': date}) results = client.get( "8xkx-amqh", select= 'date, recip_county, recip_state, series_complete_yes, series_complete_pop_pct', where=f'date = "{date}" AND recip_state in ({state_str})', limit=2000000) vac_df = vac_df.append(pd.DataFrame.from_records(results)) else: pass if vac_df.shape[0] > 0: vac_df = self.preprocess(vac_df, 'vac_df') # push preprocessed data to danamodb with self.vac_data_con.batch_writer() as batch: for index, row in vac_df.iterrows(): batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
def downloadata(domain, dataset_id, app_token): start_time = time.time() client = Socrata(domain, app_token) # Example authenticated client (needed for non-public datasets): # client = Socrata(www.dati.lombardia.it, # MyAppToken, # userame="*****@*****.**", # password="******") print('Querying opendata emails...') results = downloader(client, dataset_id, taglist) if results is not None: # Convert to pandas DataFrame results_df = pd.DataFrame.from_records(results) print('converting mail format') tmp = results_df.iloc[:, 0] # Convert to list mail_list = list(tmp[tmp.notna()]) # Add iscritti to newsletter print('Saving to db') db.addiscritti(mail_list) print('Done') else: print('No data written') client.close() print('----------- Time elapsed : ', round(time.time() - start_time, 6), '----------') return
def PersonToTelefono(m): TRX = MaltegoTransform() #m.parseArguments(sys.argv) #nombre=sys.argv[1] nombre = m.Value try: client = Socrata("www.datos.gov.co", None) r = client.get("3ard-sj8g", limit=2000) #for key, value in data.items(): #print key, value for i in range(len(r)): if (r[i]['nombre_prestador'] == nombre): telefono = r[i]['telefono'] break ent = TRX.addEntity('maltego.PhoneNumber', telefono) ent.addAdditionalFields("phonenumber.countrycode", "Country Code", True, "57") except Exception as e: TRX.addUIMessage("Cedula no encontrada en la base de datos") TRX.returnOutput()
def connectToSocrata(self): clientConfigFile = self.inputdir + self.configItems[ 'socrata_client_config_fname'] with open(clientConfigFile, 'r') as stream: try: client_items = yaml.load(stream) client = Socrata(client_items['url'], client_items['app_token'], username=client_items['username'], password=base64.b64decode( client_items['password'])) return client except yaml.YAMLError as exc: self._logger.error('Failed to open yaml file', exc_info=True) return 0
def download_crime_reports(start_year, end_year): ''' Imports crime reports data from the Chicago open data portal using the SODA API. Inputs: start_year (int): the first year to download crime reports from (valid input is 2001-2018) end_year (int): the last year to dowload crime reports from (valid input is 2001-2018) Returns: pandas dataframe where each row is a crime report ''' coltypes = {'latitude': float, 'longitude': float, 'year': int} client = Socrata('data.cityofchicago.org', APP_TOKEN) where_clause = 'year between {} and {}'.format(start_year, end_year) max_size = int(6.85 * 10**6) results = client.get('6zsd-86xi', where=where_clause, limit=max_size) results_df = pd.DataFrame.from_records(results)\ .astype(coltypes) results_df.date = pd.to_datetime(results_df.date) return results_df
def test_upsert(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "upsert_songs.txt" data = [{ "theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010" }] setup_mock(adapter, "POST", response_data, 200) response = client.upsert(DATASET_IDENTIFIER, data) assert isinstance(response, dict) assert response.get("Rows Created") == 1 client.close()
def crawl_website_socrata(website_link, dataset_identifier, crawl_limit, token=None): """ Simple crawler for a specific file Code snippet is from https://dev.socrata.com/foundry/data.cityofchicago.org/jaif-n74j """ # Unauthenticated client only works with public data sets. Note 'None' # in place of application token, and no username or password: client = Socrata(website_link, token) # Example authenticated client (needed for non-public datasets): # client = Socrata(data.cms.gov, # MyAppToken, # userame="*****@*****.**", # password="******") # First 2000 results, returned as JSON from API / converted to Python list of # dictionaries by sodapy. results = client.get(dataset_identifier, limit=crawl_limit) # Convert to pandas DataFrame results_df = pd.DataFrame.from_records(results) return results_df
def updateDatabase(self): f = open("./text/last_update_deaths.txt").read().strip() last_update = datetime.strptime(f, "%Y-%m-%dT%H:%M:%S") if datetime.now()-last_update > timedelta(hours=8): f = open("./text/last_update_deaths.txt", "w") f.write((datetime.now()).strftime("%Y-%m-%dT%H:%M:%S")) f.close() print("updated") client = Socrata(self.dataset_link, None) data = client.get(self.dataset_id, limit=self.limit) df = pd.DataFrame.from_dict(data) # dataset contains extra characters on those counties finished with 'à' try: df['comarcadescripcio'] = df['comarcadescripcio'].str.replace( "\xa0", "") df.to_pickle("./text/deaths_backup.pkl") return df except KeyError: df = pd.read_pickle("./text/deaths_backup.pkl") print("Malament") return df else: df = pd.read_pickle("./text/deaths_backup.pkl") return df
def collect_all(): #initialize cliant client = Socrata("data.cityofchicago.org", None) #get all requests #results = [y for x in (client.get("a9u4-3dwb", limit=2000, offset=i*1000, where="complaints IS NOT NULL") for i in range(0,100,2)) for y in x] results = [] for i in tqdm(range(0, 100, 2)): try: sub_list = client.get("a9u4-3dwb", limit=2000, offset=i * 1000, where="complaints IS NOT NULL") if len(sub_list) > 0: results.extend(sub_list) #returns empty list when offset > database size else: break except: print('err') #this is based on no rate limit errors, #at this sclae none have been encoutered #thus this will suffice for current use #save to df and print number of entries results_df = pd.DataFrame.from_records(results) print(f'number of complaints found: {results_df.shape[0]}') #save this to our pickle file for analysis f = '../pickle/reports.pkl' results_df.to_pickle(f) print(f'Initial Reports DataFrame has been saved to {f}') return results_df
def execute(trail = False): startTime = datetime.datetime.now() #Setup starts here DOMAIN = "data.cityofboston.gov" client = dml.pymongo.MongoClient() repo = client.repo repo.authenticate('ll0406_siboz', 'll0406_siboz') client = dml.pymongo.MongoClient() repo = client.repo repo.authenticate('ll0406_siboz', 'll0406_siboz') #Socrata API setup and raw data retrieval socrataClient = Socrata(DOMAIN, None) """
def get_data(app_key, page_size, num_pages, output_file): client = Socrata("data.cityofnewyork.us", app_key) offset = 0 res = [] # If num_pages is not provided, calculate the num_pages so that we can read the entire content. if num_pages is None: total_records = int( client.get("nc67-uf89", select="COUNT(*)")[0]["COUNT"]) num_pages = total_records // page_size + 1 #load Json_file is output is provided if output_file is not None: f = open(output_file, 'a') # Get records for i in range(num_pages): one_page = client.get("nc67-uf89", limit=page_size, offset=offset) if output_file is None: print(one_page) else: for item in one_page: f.write(json.dumps(item) + '\n') offset += page_size if output_file is not None: f.close()
def run_script(): client = Socrata("data.sfgov.org","wvRAyq5wvCnf9YKGmiuZ7T9y3") fetched_data = client.get("/resource/rqzj-sfat.json",select="objectid,latitude,longitude,fooditems,expirationdate") for row in fetched_data: print str(row) print "fetched_data", str(fetched_data) data = [] for i in fetched_data: a = Node(i.get('objectid',None),i.get('latitude',None),i.get('longitude',None),i.get('fooditems',None),i.get('expirationdate',None)) print repr(a) data.append(a._asdict()) print data try: conn = psycopg2.connect("dbname='mydb'") except: print "I am unable to connect to the database" cur = conn.cursor() try: cur.executemany("insert into foodtrucks(objectid,latitude, longitude,fooditems,expirationdate) select %(objectid)s,%(latitude)s,%(longitude)s,%(fooditems)s,%(expirationdate)s where not exists (select 1 from foodtrucks where objectid=%(objectid)s)",data) except Exception as e: print "unable to query postgres =>", e conn.commit()
def fetch_res_data(zip, max_query_results=20, num_entries_to_search=10000, t_out=10) -> Dict[str, any]: nyc_res_dataset_domain = "data.cityofnewyork.us" nyc_res_dataset_identifier = "43nn-pn8j" nyc_res_dataset_token = ( None # works with None but lower number of requests can be made ) client = Socrata(nyc_res_dataset_domain, nyc_res_dataset_token) client.timeout = t_out try: return client.get( nyc_res_dataset_identifier, select="dba, boro, zipcode, violation_description", # q=str(zip), #uncomment if want to query directly on the server side (may lead to timeout) order="score DESC", limit=num_entries_to_search, ) except requests.exceptions.Timeout: raise TimeoutError
def test_create(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None) columns = [ {"fieldName": "foo", "name": "Foo", "dataTypeName": "text"}, {"fieldName": "bar", "name": "Bar", "dataTypeName": "number"} ] tags = ["foo", "bar"] response = client.create("Foo Bar", description="test dataset", columns=columns, tags=tags, row_identifier="bar", display_type = "map") request = adapter.request_history[0] request_payload = json.loads(request.text) # can't figure out how to use .json # Test request payload for dataset_key in ["name", "description", "columns", "tags"]: assert dataset_key in request_payload for column_key in ["fieldName", "name", "dataTypeName"]: assert column_key in request_payload["columns"][0] assert "displayType" in request_payload assert request_payload["displayType"] == "map" # Test response assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def execute(trial=False): startTime = datetime.datetime.now() # ---[ Connect to Database ]--------------------------------- client = dml.pymongo.MongoClient() repo = client.repo repo.authenticate('smithnj', 'smithnj') repo_name = 'smithnj.commareas' # ---[ Grab Data ]------------------------------------------- client = Socrata("data.cityofchicago.org", "xbEYuk5XxkYsIaXl3hn79XIoR") results = client.get("74p9-q2aq", limit=500) df = pd.DataFrame.from_records(results).to_json(orient="records") loaded = json.loads(df) # ---[ MongoDB Insertion ]------------------------------------------- repo.dropCollection(repo_name) repo.createCollection(repo_name) print('done') repo[repo_name].insert_many(loaded) repo[repo_name].metadata({'complete': True}) # ---[ Finishing Up ]------------------------------------------- print(repo[repo_name].metadata()) repo.logout() endTime = datetime.datetime.now() return {"start": startTime, "end": endTime}
def datavis(request, dataset_id): ods = OpenDataSource.objects.get(pk=dataset_id) client = Socrata(ods.website, ods.token, ods.user, ods.password) dataset = DataSet.objects.get(pk=dataset_id) data = client.get(dataset.identifier) metadata = client.get_metadata(dataset.identifier) client.close() template = loader.get_template('datavis/datavis.html') data = json.dumps(data, indent=4, sort_keys=True) context = {'data': data, 'metadata': metadata, 'dataset': dataset} return HttpResponse(template.render(context, request))
def test_delete(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) uri = "{0}{1}/api/views/{2}.json".format(PREFIX, DOMAIN, DATASET_IDENTIFIER) adapter.register_uri("DELETE", uri, status_code=200) response = client.delete(DATASET_IDENTIFIER) assert response.status_code == 200 try: client.delete("foobar") except Exception as e: assert isinstance(e, requests_mock.exceptions.NoMockAddress) finally: client.close()
import sys sitepackage = os.path.join( os.environ['HOME'], "site", "wwwroot", "pyenv", "lib", "python3.4", "site-packages" ) sys.path.append(sitepackage) import json import re from datetime import datetime, timedelta from sodapy import Socrata soda_client = Socrata( os.environ["SOCRATA_HOST"], os.environ["SOCRATA_APPTOKEN"], username=os.environ["SOCRATA_USER"], password=os.environ["SOCRATA_PASS"] ) soda_batch_size = 950 # The location where agencies individual data is stored; e.g. each agency has its own folder if len(sys.argv) > 1: report_folder = sys.argv[1] else: report_folder = os.path.join( os.environ['HOME'], "site", "wwwroot", os.environ["ANALYTICS_DATA_PATH"] )
def test_client(): client = Socrata(DOMAIN, APPTOKEN) assert isinstance(client, Socrata) client.close()
from sodapy import Socrata import csv client = Socrata(site, app_token, username=user, password=passw) dset = "/resource/xb7i-cvg2.json" filepath = "grants-trunc.csv" client.get(dset) rowlist = [] with open(filepath, "rb") as csvfile: reader = csv.DictReader(csvfile) for row in reader: rowlist.append(row) client.replace(dset, rowlist)
import os os.environ.setdefault("DJANGO_SETTINGS_MODULE", "petfinder.settings") import django django.setup() from pets.models import Pet from datetime import datetime from django.utils import timezone import time import requests from django.db.models import Max import googlemaps time_start = time.time() #Timer start for script #Setup APIs call and initial variables gmaps = googlemaps.Client(key=g_api_key) api_setup = Socrata(socrata_api, socrata_app_token, socrata_id, socrata_pw) max_retry = 3 #Setup Dictionary of found locations and coordinates location_dict = {} coord_pets = Pet.objects.filter(loc_lat__isnull=False).order_by('-intake_at') for x in coord_pets: if not location_dict.has_key(x.found_location): location_dict[x.found_location] = {"lat": x.loc_lat, "lon": x.loc_lon} print coord_pets.count() #Get record count from AAC record_count = api_setup.get(intake_endpoint, select = "count('')") record_count_out = api_setup.get(outcome_endpoint, select = "count('')") count = record_count[0]['count'].encode('ascii','ignore') count_out = record_count_out[0]['count'].encode('ascii','ignore')
from sodapy import Socrata # client = Socrata("sandbox.demo.socrata.com", None) # print client.get("nimj-3ivp", limit=10) # client = Socrata("data.cms.gov/", None) # print client.get("97k6-zzx3", limit=10) # https://resource/97k6-zzx3.json?$limit=5 client = Socrata("data.sfgov.org", "8gffbg1meMZ1e2Z0yOz2OpwZq") #client.get("cuks-n6tp", limit=1) assault = client.get("cuks-n6tp", select ="category,time,location", where ="category='ASSAULT'",limit=10) print "assault data" print assault theft = client.get("cuks-n6tp", select ="category,time,location", where ="category='VEHICLE THEFT'",limit=10) print "vehicle theft data" print theft vandalism = client.get("cuks-n6tp", select ="category,time,location", where ="category='VANDALISM'",limit=10) print "VANDALISM data" print vandalism kidnapping = client.get("cuks-n6tp", select ="category,time,location", where ="category='KIDNAPPING'",limit=10) print "KIDNAPPING data" print kidnapping sex = client.get("cuks-n6tp", select ="category,time,location", where ="category='SEX OFFENSES, FORCIBLE'",limit=10) print "SEX OFFENSES, FORCIBLE data" print sex dui = client.get("cuks-n6tp", select ="category,time,location", where ="category='DRIVING UNDER THE INFLUENCE'",limit=10) print "DUI data" print dui client.close()
import requests from sodapy import Socrata import json # store api token in creds.txt with open('creds.json', 'r') as f: creds = json.loads(f.read()) client = Socrata("odn.data.socrata.com", creds["token"], username=creds['username'], password=creds['password']) data = requests.get('https://odn.data.socrata.com/resource/jwbj-xtgt.json?$where=region_name%20IS%20NOT%20NULL%20AND%20location%20IS%20NULL').json() for row in data: if len(row['region_name'].split(',')) > 2: location = requests.get('http://nominatim.openstreetmap.org/search/?q=%s&format=json' % (row['region_name'])).json() else: location = requests.get('http://nominatim.openstreetmap.org/search/?q=%s,usa&format=json' % (row['region_name'])).json() if location: location = location[0] row['location'] = "(%s, %s)" % (location['lat'], location['lon']) client.upsert("k53q-ytmx", data)