def test_create(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None) columns = [{ "fieldName": "foo", "name": "Foo", "dataTypeName": "text" }, { "fieldName": "bar", "name": "Bar", "dataTypeName": "number" }] tags = ["foo", "bar"] response = client.create("Foo Bar", description="test dataset", columns=columns, tags=tags, row_identifier="bar") request = adapter.request_history[0] request_payload = json.loads( request.text) # can't figure out how to use .json # Test request payload for dataset_key in ["name", "description", "columns", "tags"]: assert dataset_key in request_payload for column_key in ["fieldName", "name", "dataTypeName"]: assert column_key in request_payload["columns"][0] # Test response assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def load_data(api_endpoint, limit=10000): ''' Load data from Chicago Open Data portal using Socrata API and the api_endpoint. If limit is specified, load no more than limit number of observations. Input: api_endpoint: str limit: int Output: Pandas Data Frame ''' client = Socrata(CHICAGO_OPEN_DATA, None) data_dict = client.get(api_endpoint, limit=limit) data_df = pd.DataFrame.from_dict(data_dict) if 'the_geom' in data_df.columns: data_df.rename(columns={'the_geom': 'location'}, inplace=True) return data_df
def download_community_areas(): ''' Imports names, numbers, and shapes of Chicago community areas from the Chicago Open Data Portal using the SODA API. Returns: geopandas geodataframe where each row is a community area ''' client = Socrata('data.cityofchicago.org', APP_TOKEN) max_size = 100 results = client.get('igwz-8jzy', limit=max_size) results_df = pd.DataFrame.from_records(results) results_df.rename({'area_numbe': 'area_number'}, axis=1, inplace=True) results_df['the_geom'] = results_df.the_geom\ .apply(shapely.geometry.shape) results_df = geopd.GeoDataFrame(results_df, geometry='the_geom') results_df.crs = {'init': 'epsg:4326'} return results_df
def fetch_meteorite_landings_full_dataset(): # Unauthenticated client only works with public data sets. Note 'None' # in place of application token, and no username or password: client = Socrata("data.nasa.gov", None) # Example authenticated client (needed for non-public datasets): # client = Socrata(data.nasa.gov, # MyAppToken, # userame="*****@*****.**", # password="******") # First 2000 results, returned as JSON from API / converted to Python list of # dictionaries by sodapy. results = client.get("gh4g-9sfh", limit=50000) # Convert to pandas DataFrame meteorite_landings_data = pd.DataFrame.from_records(results) return meteorite_landings_data
def pull_agg_time_to_closure_statistics_created_since_closed_only( since, client=None, timeout=120, group_key=['agency']): if (client == None): client = Socrata(settings.APP_NYC_API_DOMAIN, settings.APP_TOKEN_311, timeout=timeout) group_key_str = ','.join(group_key) data = client.get(settings.APP_NYC_DATASET,query = "select "+ group_key_str+"," \ "avg(((date_extract_woy(closed_date)*7) -(7 - case(date_extract_dow(closed_date)=0,7,true,date_extract_dow(closed_date)))) " \ " - ((date_extract_woy(created_date)*7) - (7 - case(date_extract_dow(created_date)=0,7,true,date_extract_dow(created_date)))) "\ " + ((date_extract_y(closed_date) - date_extract_y(created_date))* 365) "\ ") as days_to_closure "\ "where created_date >= '" + str(since) + "' and closed_date IS NOT NULL and status = 'Closed' " \ "group by "+group_key_str ) dataFrame = pd.DataFrame.from_dict(data) dataFrame['days_to_closure'] = dataFrame['days_to_closure'].astype('float') return dataFrame
def link_block(acs_df): ''' Get blocks using API from chicago data portal Return: geo dataframe of chicago ''' client = Socrata('data.cityofchicago.org', app_token, username=, password=) res = client.get("74p9-q2aq") df = pd.DataFrame.from_records(res) df.rename(index = str, columns = {"geoid10": "geoid"}, inplace = True) client.close() return pd.merge(df, acs_df, on='geoid')
def fireworks_data_loader(): ''' Loads in fireworks data from NYC 311. https://data.cityofnewyork.us/Social-Services/311-Fireworks-Complaints/g4u2-tvag ''' client = Socrata('data.cityofnewyork.us', None) results = client.get_all('g4u2-tvag') df = pd.DataFrame.from_records(results) df['created_date'] = pd.to_datetime(df['created_date'], errors='coerce') df['fireworks'] = [ 1 if complaint == 'Illegal Fireworks' else 0 for complaint in df['complaint_type'] ] return df
def test_publish(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) response_data = "create_foobar.txt" setup_publish_mock(adapter, "POST", response_data, 200) response = client.publish(DATASET_IDENTIFIER) assert isinstance(response, dict) assert len(response.get("id")) == 9 client.close()
def get_business(start_date, end_date): ''' Get 2013 to 2018 business data start_date: "'2019-12-18T20:00:05'" end_date: "'2019-12-18T20:00:05'" ''' DATA_ID = "xqx5-8hwx" client = Socrata('data.cityofchicago.org', 'E0eO5nY1aKuEY1pVrunfqFhDz', username='******', password='******') conds = '''date_issued between "{}" and "{}"'''\ .format(start_date, end_date) res = client.get(DATA_ID, where=conds, limit=1000000) client.close() df = pd.DataFrame.from_records(res) return df
def test_upsert_exception(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "403_response_json.txt" setup_mock(adapter, "POST", response_data, 403, reason="Forbidden") data = [{"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach", "year": "2010"}] try: client.upsert(DATASET_IDENTIFIER, data) except Exception as e: assert isinstance(e, requests.exceptions.HTTPError) else: raise AssertionError("No exception raised for bad request.")
def get_food_trucks(page_number): """ Gets list of food trucks in array representation from Socrata client :param page_number: int :return: array representation of food trucks """ weekday_index = get_current_day_index() current_time_string = get_current_time_string() client = Socrata(SOCRATA_DOMAIN, SOCRATA_TOKEN) food_trucks = client.get(FOOD_TRUCKS_RESOURCE_IDENTIFIER, select="applicant, location", where=get_query_string(weekday_index, current_time_string), limit=LIMIT, offset=page_number * LIMIT, order="applicant ASC") return food_trucks
def get_community(): ''' Get 77 community nums's community name from chicago data portal Return: dataframe with community num and community name ''' client = Socrata('data.cityofchicago.org', app_token, username=, password=) res = client.get("igwz-8jzy", select = 'area_numbe, community') df = pd.DataFrame.from_records(res) df.rename(index = str, columns = {"area_numbe": "community_area"}, inplace = True) client.close() return df
def __init__(self, provider_name, mds_config, mds_gql): """ Constructor for the init class. :param str provider_name: The name of the provider :param MDSConfig mds_config: The configuration class where we can gather our endpoint :param MDSGraphQLRequest mds_gql: The http graphql class we need to make requests :return: """ self.provider_name = provider_name self.mds_config = mds_config self.mds_http_graphql = mds_gql self.mds_socrata_dataset = self.mds_config.get_setting( "SOCRATA_DATASET", None) self.client = Socrata( self.mds_config.get_setting("SOCRATA_DATA_ENDPOINT", None), self.mds_config.get_setting("SOCRATA_APP_TOKEN", None), username=self.mds_config.get_setting("SOCRATA_KEY_ID", None), password=self.mds_config.get_setting("SOCRATA_KEY_SECRET", None), timeout=20, ) self.query = Template(""" query getTrips { api_trips( where: { provider: { provider_name: { _eq: "$provider_name" }} end_time: { _gte: "$time_min" }, _and: { end_time: { _lt: "$time_max" }} } ) { trip_id: id device_id: device { id } vehicle_type trip_duration trip_distance start_time end_time modified_date council_district_start council_district_end census_geoid_start census_geoid_end } } """)
def TelefonoToCorreoDireccionPerson_mk5f_bdwx(m): TRX = MaltegoTransform() #m.parseArguments(sys.argv) #telefono=sys.argv[1] telefono=m.Value try: client = Socrata("www.datos.gov.co", None) r = client.get("u5mc-hpr6", limit=2000) #for key, value in data.items(): #print key, value for i in range(len(r)): if ( r[i]['celular'] == telefono or r[i]['telefonos'] == telefono) : nombre=r[i]['nombre'] correo_electronico= r[i]['correo_electronico'] direccion=r[i]['direccion'] barrio=r[i]['municipio'] break nombre = nombre.split(" ") if (len(nombre) == 4): firts = nombre[0] + " " + nombre[1] last = nombre[2] + " " + nombre[3] full = nombre[0] + " " + nombre[1] + " " + nombre[2] + " " + nombre[3] else: firts = nombre[0] last = nombre[1] + " " + nombre[2] full = nombre[0] + " " + nombre[1] + " " + nombre[2] ent = TRX.addEntity('maltego.Person', full) ent.addAdditionalFields("person.firtsnames", "Firts Names", True, firts) ent.addAdditionalFields("person.lastname", "Surname", True, last) ent1 = TRX.addEntity('maltego.EmailAddress', correo_electronico) ent4 = m.addEntity('maltego.Location', direccion) ent4.addAdditionalFields("country", "Country", True, "Colombia") ent4.addAdditionalFields("location.area", "Area", True, barrio) ent4.addAdditionalFields("streetaddress", "Street Address", True, direccion) except Exception as e: TRX.addUIMessage("Cedula no encontrada en la base de datos") TRX.returnOutput()
def get_parking_tickets(page_size, num_pages=None, output=None): """Request parking ticket data and save in output_folder.""" # Start connection to data API client = Socrata( API_BASE, APP_KEY) # get the maximum number of rows in the data set max_rows = client.get(dataSetID, select = 'COUNT(*)') max_rows = int(max_rows[0]['COUNT']) # find the maximum number of pages required to read all of the data max_num_pages = max_rows / page_size if max_rows % page_size > 0: max_num_pages += 1 # If num_pages is blank, read max_num_pages if num_pages == None: num_pages = max_num_pages data = [] # for each page for i in range(num_pages): try: # we use a try except because of time out errors when the offset is very large data += client.get(dataSetID, limit=page_size, offset=i*page_size) except: break # Output the data if output == None: # print to stdout print(data) else: with open(output,'w') as outputFile: outputFile.write('[') for line in data[:-1]: outputFile.write(str(line)+',\n') # write the last line without a comma outputFile.write(str(data[-1])+']') return data
def TelefonoToCorreoDireccionPerson_6kcx_kbuk(m): TRX = MaltegoTransform() #m.parseArguments(sys.argv) #telefono=sys.argv[1] telefono=m.Value try: client = Socrata("www.datos.gov.co", None) r = client.get("6kcx-kbuk", limit=2000) #for key, value in data.items(): #print key, value for i in range(len(r)): if ( r[i]['celular'] == telefono) : cc=r[i]['doc_identidad'] nombre=r[i]['nombre_concejal'] partido=r[i]['partido_politico'] correo_electronico= r[i]['correo_electronico'] break nombre = nombre.split(" ") if (len(nombre) == 4): firts = nombre[0] + " " + nombre[1] last = nombre[2] + " " + nombre[3] full = nombre[0] + " " + nombre[1] + " " + nombre[2] + " " + nombre[3] else: firts = nombre[0] last = nombre[1] + " " + nombre[2] full = nombre[0] + " " + nombre[1] + " " + nombre[2] ent = TRX.addEntity('maltego.Person', full) ent.addAdditionalFields("person.firtsnames", "Firts Names", True, firts) ent.addAdditionalFields("person.lastname", "Surname", True, last) ent1 = TRX.addEntity('maltego.EmailAddress', correo_electronico) ent2 = TRX.addEntity('eciescuelaing.PartidoPolitico', partido) ent3 = TRX.addEntity('eci.Cedula', cc) except Exception as e: TRX.addUIMessage("Cedula no encontrada en la base de datos") TRX.returnOutput()
def lambda_handler(event, context): ''' Method called by Amazon Web Services when the lambda trigger fires. This lambda is configured to be triggered by file creation in the ITS DataHub Sandbox s3 bucket ("usdot-its-cvpilot-public-data" or "test-usdot-its-cvpilot-public-data"). When a new file is added to the Sandbox s3 bucket, this lambda function will read the new JSON newline file, perform data transformation, upsert the new data records to the corresponding Socrata data set on data.transportation.gov, and remove the oldest records from the Socrata data set to keep the data set at a manageable size. Parameters: event, context: Amazon Web Services required parameters. Describes triggering event. ''' # Read data from the newly deposited file and # perform data transformation on the records out_recs = [] for bucket, key in lambda_to_socrata_util.get_fps_from_event(event): raw_recs = lambda_to_socrata_util.process_s3_file(bucket, key) out_recs += [process_bsm(i) for i in raw_recs] if len(out_recs) == 0: logger.info("No new data found. Exit script") return # Upsert the new records to the corresponding Socrata data set logger.info("Connecting to Socrata") client = Socrata("data.transportation.gov", SOCRATA_API_KEY, SOCRATA_USERNAME, SOCRATA_PASSWORD, timeout=400) logger.info("Transform record dtypes according to Socrata data set") col_dtype_dict = lambda_to_socrata_util.get_col_dtype_dict( client, SOCRATA_DATASET_ID) float_fields = ['randomNum', 'metadata_generatedAt_timeOfDay'] out_recs = [ lambda_to_socrata_util.mod_dtype(r, col_dtype_dict, float_fields) for r in out_recs ] logger.info("Uploading {} new records".format(len(out_recs))) uploadResponse = client.upsert(SOCRATA_DATASET_ID, out_recs) logger.info(uploadResponse)
def trafficDataIngestion(datalimit, start_datetime, end_datetime): # Unauthenticated client only works with public data sets. Note 'None' # in place of application token, and no username or password: client = Socrata("data.cityofnewyork.us", None) # First dataLimit results, returned as JSON from API / converted to Python list of # dictionaries by sodapy. #date = "data_as_of >" + "'" + date + "'" #para a partir de una fecha date = f"data_as_of between '{start_datetime}' and '{end_datetime}'" print(date) columns = "data_as_of, id, speed, travel_time, link_name" #results = client.get("i4gi-tjb9", limit=dataLimit, borough = "Manhattan", where = date, select = columns) #para a partir de una fecha results = client.get("i4gi-tjb9", limit=datalimit, borough="Manhattan", where=date, select=columns) # Convert to pandas DataFrame results_df = pd.DataFrame.from_records(results) #-----------------------------------------datetime - time_hour --------------------------------------# results_df["datetime"] = results_df["data_as_of"].str[:-9] + "00:00" results_df["datetime_traffic"] = results_df["data_as_of"].str[:-4] results_df["datetime"] = pd.to_datetime(results_df["datetime"]) results_df["datetime_traffic"] = pd.to_datetime( results_df["datetime_traffic"]) results_df["weekday"] = results_df['datetime'].dt.day_name() results_df = results_df[[ "datetime", "datetime_traffic", "weekday", "id", "speed", "travel_time", "link_name" ]] #guardando ----------------------------------------------------------------------------------------- current_dir = os.getcwd().split("\TFG")[0] file_name = current_dir + f"/TFG/apis_data/traffic_historical/traffic_dataIngestion_{start_datetime[0:13]}_to_{end_datetime[0:13]}.csv" results_df.to_csv(file_name, index=False) print(f"TrafficApi: {file_name}")
def choose_dataset(x, limit): """ Create a function to take in an api endpoint and output the results """ #setup a basic client client = Socrata("opendata.mass-cannabis-control.com", None) # get columns cols = x.columns # store api keys in a list list_of_endpts = x[cols[0]].to_list() # store user input user_input = input( "Which dataset are you interested in viewing?\nPlease choose an index (i.e. row number) from the table above: " ) time.sleep(2) limit = 2000 #transform string user_input = int(user_input) # endpoint selection try: submit = list_of_endpts[user_input] except (KeyError, IndexError): print( '\n\nYou did not choose a number listed in the table above; Try again...\n\n' ) # Pull data via api enpoint results = client.get(f"{submit}", limit=limit) # Convert to pandas DataFrame results_df = pd.DataFrame.from_records(results) # return final output return results_df
def update_db(dbname='bites', collection_name='permit'): """ This function pulls food truck permit data from SFGOV API and save it into mongoDB. This function should be ran daily and previous day's permit records will be deleted. """ try: client = Socrata("data.sfgov.org", "oBNrfX91YumclMO5wQlQKv0f0") # dictionaries by sodapy. results = client.get("rqzj-sfat", limit=5000) except Exception: print("Error: Could not connect to sfgov API") return try: mc = MongoClient("mongodb://*****:*****@34.212.27.178/" + dbname) except Exception: print("Error: Could not connect to MongoDB") return # Connect to database db = mc[dbname] # Drop table try: db[collection_name].drop() except errors.ServerSelectionTimeoutError: print("Error: MongoDB connection time out") return except errors.OperationFailure: print("Error: Not authorized to access the database") return approved_json = [ x for x in results if x['status'] == 'APPROVED' and (x['longitude'] != '0') ] for record in approved_json: db[collection_name].insert_one(record) mc.close() client.close() print("Database updated (%i records)" % len(approved_json))
def get_pm25_data(yr, st): """ Get pm2.5 data from the CDC API yr - year of interest - string st - fips code for the state of interest - string """ #establish connection to the CDC's data via Socrata client = Socrata("data.cdc.gov", parsed_yaml['cdc_key'], parsed_yaml['cdc_username'], parsed_yaml['cdc_password']) #set timeout to 200 seconds client.timeout = 300 #get number of records in the dataset record_count = client.get("qjju-smys", where=f"year = '{yr}' AND statefips = '{st}'", select="COUNT(*)") print("The record count is", record_count) print(f"Getting {yr} data from the Socrata API...") #get data from dataset start = 0 #starting at page 0 chunk_size = 50000 #fetching 50,000 rows at a time results = [] #empty list to store data while True: #add data to the list results.extend( client.get("qjju-smys", where=f"year = '{yr}' AND statefips = '{st}'", select="year, date, countyfips, pm_mean_pred", offset=start, limit=chunk_size)) #pagination start = start + chunk_size print("At record number", start) #stop adding to the list once all the data is fetched if (start > int(record_count[0]['COUNT'])): break #return list so that it can be stored in a dataframe return results
def _onchange_driver_license(self): client = Socrata("www.datossct.gob.mx", None) try: driver_license = client.get('3qhi-59v6', licencia=self.driver_license) license_valid_from = datetime.strptime( driver_license[0]['fecha_inicio_vigencia'], '%Y-%m-%dT%H:%M:%S.%f') license_expiration = datetime.strptime( driver_license[0]['fecha_fin_vigencia'], '%Y-%m-%dT%H:%M:%S.%f') self.license_type = driver_license[0]['categoria_de_la_licencia'] self.license_valid_from = license_valid_from self.license_expiration = license_expiration client.close() except: client.close() raise ValidationError( _('The driver license is not in SCT database'))
def test_delete(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD, session_adapter=mock_adapter) uri = "{0}{1}/api/views/{2}.json".format(PREFIX, DOMAIN, DATASET_IDENTIFIER) adapter.register_uri("DELETE", uri, status_code=200) response = client.delete(DATASET_IDENTIFIER) assert response.status_code == 200 try: client.delete("foobar") except Exception as e: assert isinstance(e, requests_mock.exceptions.NoMockAddress) finally: client.close()
def get_data(app_key: str, page_size: int, page=DEFAULT_PAGE) -> list: if type(page_size) is not int: page_size = int(page_size) if type(page) is not int: page = int(page) client = Socrata('data.cityofnewyork.us', app_key) result = [] for i in range(0, page): try: r = client.get(dataset_identifier=DATA_SET, limit=page_size, offset=i * page_size) result += r except requests.exceptions.ConnectionError as err: raise err except requests.exceptions.HTTPError: raise Exception('Invalid app_token specified') return result
def get_data(database_id, location_file, use_cache): client = Socrata("www.datos.gov.co", api_key) results = client.get(database_id, limit=100000) df = pd.DataFrame.from_records(results) df["location"] = df.apply( lambda row: f"{row.ciudad_de_ubicaci_n} {row.departamento}", axis=1 ) # remove accents df["location"] = df.location.apply(lambda x: str(unidecode.unidecode(x)).upper()) cities = pd.unique(df.location) print("Checking location file...") if use_cache and file_exists(location_file): print("File exists now reading") location_df = pd.read_csv(location_file) print("Done") else: print("Could not locate file, calculating from scratch") maybe_mkdirs(location_file) print("Calculating...") location_df = get_location_df(cities) print("Ready! Writing to remote...") location_df.to_csv(location_file, index=False) print("Done") print("Check if cities matches") if len(location_df.location.values) != len(cities): new_cities = set(cities) - set(location_df.location.values) print("Difference", new_cities) print("Calculating new cities") _location_df = get_location_df(list(new_cities)) location_df = location_df.append(_location_df) print("Done!") # save updated version print("Ready! Writing to remote...") location_df.to_csv(location_file, index=False) print("Done") return df, location_df
def upload_open_data_to_Elasticsearch(url, endpoint, api_key, query=None, kwargs={}): #input: Socrata url, endpoint, API key, OPTIONAL query, and ES bulk upload kwargs #output: uploads data to ES index client = Socrata(url, api_key) idx = 0 time.sleep(5) #sleep 20 seconds, to allow time to connect docs = client.get(endpoint, limit=10000, offset=0, where=query) upload_to_Elasticsearch.bulk_upload_docs_to_ES_cURL(docs, **kwargs) #time.sleep(20)#sleep 20 seconds, to allow time to connect while len(docs) > 0: #page through the results, appending to the out list idx += 10000 docs = client.get(endpoint, limit=10000, offset=idx, where=query) upload_to_Elasticsearch.update_ES_records_curl(docs, **kwargs) client.close()
def pull_socrata(): """ Checks the connection to the API. If connection status code is 200: Pulls the data from the specified url and inserts each record into an array If connection status code is not 200: Prints out status code :return: An array of dictionaries """ if check_connection() == "Successful Connection": with Socrata("data.sfgov.org", API_Token) as c: data = c.get_all("g8m3-pdis") try: col.insert_many(data, ordered=False) except pymongo.errors.BulkWriteError as e: panic = (lambda x: x["code"] != 11000, e.details["writeErrors"]) if len(panic) > 0: print("really panic") else: print(check_connection())
def download_chiopdat_data(api_endpoint, year_from=None, year_to=None, date_column='year', timestamp=False, limit=10000): ''' Load data from Chicago Open Data portal using Socrata API and the api_endpoint. If limit is specified, load no more than limit number of observations. To limit the dates, it needs the date_column and whether it is a timestamp column or an integer. Default is integer. Input: api_endpoint: str year_from: int year_to: int date_column: int timestamp: bool limit: int Output: Pandas Data Frame ''' client = Socrata(CHICAGO_OPEN_DATA, None) if not year_from: data_dict = client.get(api_endpoint, limit=limit) else: if timestamp: data_dict = client.get( api_endpoint, where=("date_extract_y({}) BETWEEN {} and {}".format( date_column, year_from, year_to)), limit=limit) else: data_dict = client.get(api_endpoint, where=("{} BETWEEN {} and {}".format( date_column, year_from, year_to)), limit=limit) data_df = pd.DataFrame.from_dict(data_dict) if 'the_geom' in data_df.columns: data_df.rename(columns={'the_geom': 'location'}, inplace=True) return data_df
def DeleteDataset(dataset_id): print('El id') print(dataset_id) try: # Creating Socrata Client client = Socrata(cfg["web"], cfg["token"], username=cfg["email"], password=cfg["password"]) client.delete(dataset_id) error = 'OK' client.close() except BaseException as e: #if there is an error, reload login with error message error = str(e) print('Error description:') print(error) client.close() return error
def get_trip_records(limit=100000): client = Socrata('data.cityofchicago.org', 'Tk6RhuGAFvF9P4ehsysybj3IW', username="******", password="******") client.timeout = 10000 results = client.get( "m6dm-c72p", limit=limit, select= '''trip_id, trip_start_timestamp, trip_end_timestamp, trip_seconds, trip_miles, pickup_community_area, dropoff_community_area, fare, tip, additional_charges, trip_total''' ) return pd.DataFrame.from_records(results)