def fetchSocrata(self, year=2019, querySize=10000): '''Fetch data from Socrata connection and return pandas dataframe''' # Load config files socrata_domain = self.config['Socrata']['DOMAIN'] socrata_dataset_identifier = self.config['Socrata']['AP' + str(year)] socrata_token = self.token # Establish connection to Socrata resource client = Socrata(socrata_domain, socrata_token) # Fetch data metadata = client.get_metadata(socrata_dataset_identifier) # Loop for querying dataset queryDf = None for i in range(0, querySize, 1000): print(i) results = client.get(socrata_dataset_identifier, offset=i, select="*", order="updateddate DESC") tempDf = pd.DataFrame.from_dict(results) if queryDf is None: queryDf = tempDf.copy() else: queryDf = queryDf.append(tempDf) self.data = queryDf # Fetch data metadata = client.get_metadata(socrata_dataset_identifier)
def getDataset(dataset_id): table = '' try: # Creating Socrata Client client = Socrata(cfg["web"], cfg["token"], username=cfg["email"], password=cfg["password"]) data = client.get(dataset_id, content_type="json") data = str(data) data = data.replace("'", "\"") data = data.upper() #getting data to compare with the uploaded data #print(type(data)) #print(data) table = pd.read_json(data) #Replacing NaN for '' table = table.replace(pd.np.nan, '', regex=True) table = table.to_html(classes='table-striped " id = "my_table', index=False) vistas = client.get_metadata(dataset_id) vistas = str(vistas.get("viewCount")) client.close() except BaseException as e: #if there is an error, reload login with error message error = str(e) print('Error description:') print(error) client.close() return table, vistas
class SocrataClient: def __init__(self, config=None): config = config['Socrata'] domain = config['DOMAIN'] token = None if config['TOKEN'] == 'None' else config['TOKEN'] timeout = int(config['TIMEOUT']) self.client = Socrata(domain, token, timeout=timeout) self.attempts = int(config['ATTEMPTS']) self.config = config def __del__(self): self.client.close() def dataset_id(self, year): return self.config['AP' + str(year)] def get(self, year, **kwargs): id = self.dataset_id(year) for attempt in range(self.attempts): try: return self.client.get(id, **kwargs) except Exception as e: if attempt < self.attempts - 1: continue else: raise e def get_metadata(self, year): id = self.dataset_id(year) return self.client.get_metadata(id)
class SocrataClient: def __init__(self): self.client = Socrata(conf.DOMAIN, conf.TOKEN, timeout=conf.TIMEOUT) def __del__(self): self.client.close() def dataset_id(self, year): return conf.DATASET_IDS[year] def get(self, year, **kwargs): id = self.dataset_id(year) for attempt in range(conf.ATTEMPTS): try: return self.client.get(id, **kwargs) except Exception as e: if attempt < conf.ATTEMPTS - 1: continue else: raise e def get_metadata(self, year): id = self.dataset_id(year) return self.client.get_metadata(id) def get_datasets(self): ''' Search for "MyLA311 Service Request Data" within the response to get the dataset ids for each year. ''' return self.client.datasets()
def check_for_new_data(app_token): """Test the Austin API to see if there is new data""" client = Socrata("data.austintexas.gov", app_token) austin_res = client.get_metadata(dataset_identifier="7d8e-dm7r") austin_max = datetime.datetime.fromtimestamp(austin_res['rowsUpdatedAt']) austin_max_pretty = austin_max.strftime('%Y-%m-%d %H:%M') return (austin_max, austin_max_pretty)
def get_dataset_name(x): """ Create a function to pull in the name of the dataset for the corresponding api endpoint """ #formatting for the final table #want to display the full name #of the dataset pd.options.display.max_colwidth = 200 #setup a basic client # authenticated client (needed for non-public datasets): client = Socrata("opendata.mass-cannabis-control.com", None) # list comprehension to capture relevant metadata (i.e. Name) dataset_name = [client.get_metadata(y)['name'] for y in x] # combine the api-endpoints # with the name of the assoc. # dataset data = list(zip(x, dataset_name)) #store final result into dataframe api_table = pd.DataFrame( data, columns=['api_endpoints', 'Name']).drop_duplicates().reset_index(drop=True) return api_table
def main(): arguments = docopt(__doc__, version=__version__) client = Socrata(arguments['<site>'], arguments['-a']) try: if arguments['ls']: datasets = list_datasets(client, arguments['<site>']) print(tabulate(datasets, headers='keys', tablefmt='psql')) elif arguments['insert']: dataset_id = arguments['<dataset_id>'] metadata = client.get_metadata(dataset_id) engine, session, geo = get_connection(arguments['-d'], metadata) Binding = get_binding(client, dataset_id, metadata, geo, arguments['-t']) # Create the table try: Binding.__table__.create(engine) except ProgrammingError as e: # Catch these here because this is our first attempt to # actually use the DB if 'already exists' in str(e): raise CLIError( 'Destination table already exists. Specify a new table' ' name with -t.') raise CLIError('Error creating destination table: %s' % str(e)) num_rows = get_row_count(client, dataset_id) bar = FillingCirclesBar(' ▶ Loading from API', max=num_rows) # Iterate the dataset and INSERT each page for page in get_dataset(client, dataset_id): to_insert = [] for row in page: to_insert.append(Binding(**parse_row(row, Binding))) session.add_all(to_insert) session.flush() bar.next(n=len(to_insert)) bar.finish() ui.item( 'Committing rows (this can take a bit for large datasets).') session.commit() success = 'Successfully imported %s rows from "%s".' % ( num_rows, metadata['name']) ui.header(success, color='\033[92m') client.close() except CLIError as e: ui.header(str(e), color='\033[91m')
def datavis(request, dataset_id): ods = OpenDataSource.objects.get(pk=dataset_id) client = Socrata(ods.website, ods.token, ods.user, ods.password) dataset = DataSet.objects.get(pk=dataset_id) data = client.get(dataset.identifier) metadata = client.get_metadata(dataset.identifier) client.close() template = loader.get_template('datavis/datavis.html') data = json.dumps(data, indent=4, sort_keys=True) context = {'data': data, 'metadata': metadata, 'dataset': dataset} return HttpResponse(template.render(context, request))
def test_get_metadata(): mock_adapter = {} mock_adapter["prefix"] = PREFIX adapter = requests_mock.Adapter() mock_adapter["adapter"] = adapter client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter) response_data = "get_song_metadata.txt" setup_old_api_mock(adapter, "GET", response_data, 200) response = client.get_metadata(DATASET_IDENTIFIER) assert isinstance(response, dict) assert "newBackend" in response assert "attachments" in response["metadata"] client.close()
def cargar_base(self): """ Se conecta al API de Socrata y retorna la base de datos descargada del Portal de Datos Abiertos como dataframe. :ref:`Ver ejemplo <datos_gov.cargarself._base>` .. warning:: Al descargar una base de datos utilizando el API de Socrata, esta omitirá cualquier columna que no contenga registros, lo cual puede generar inconsistencias con la información descrita en el portal de datos abiertos. :param api_id: (str) Identificación de la base de datos asociado con la API de Socrata. :param token: (str) opcional - token de usuario de la API Socrata. :param limite_filas: (int) (valor mayor a 0), indica el número máximo de filas a descargar de la base de datos \ asociada al api_id. El límite está pensado para bases de gran tamaño que superen la capacidad del computador. :return: base de datos en formato dataframe. """ client = Socrata(self.dominio_datos_gov, app_token=self.token) results = client.get(self.api_id, limit=self.limite_filas) self._base = pd.DataFrame.from_records(results) self.metadata = client.get_metadata(self.api_id)
class SocrataClient: def __init__(self): conf = config['Socrata'] domain = conf['DOMAIN'] token = conf['TOKEN'] timeout = conf['TIMEOUT'] self.client = Socrata(domain, token, timeout=timeout) self.attempts = conf['ATTEMPTS'] self.years = conf def __del__(self): self.client.close() def dataset_id(self, year): return self.years['AP' + str(year)] def get(self, year, **kwargs): id = self.dataset_id(year) for attempt in range(self.attempts): try: return self.client.get(id, **kwargs) except Exception as e: if attempt < self.attempts - 1: continue else: raise e def get_metadata(self, year): id = self.dataset_id(year) return self.client.get_metadata(id) def get_datasets(self): ''' Search for "MyLA311 Service Request Data" within the response to get the dataset ids for each year. ''' return self.client.datasets()
def scrape_datasets(dataset): dataportal = dataset.dataportal domain = dataportal.domain client = Socrata(domain, None) dataset.sourced_meta_data = client.get_metadata(dataset.identifier)
class SocrataPortal(Portal): ''' Stores SODA data. ''' def __init__(self, site, dataset_id, app_token, tbl_name=None): Portal.__init__(self, site) self.col_mappings = { 'checkbox': Boolean, 'url': Text, 'text': Text, 'number': Numeric, 'calendar_date': DateTime, 'point': Geometry(geometry_type='POINT', srid=4326), 'location': Geometry(geometry_type='POINT', srid=4326), 'multipolygon': Geometry(geometry_type='MULTIPOLYGON', srid=4326) } self.site = site self.name = "Socrata" self.dataset_id = dataset_id self.app_token = app_token self.client = Socrata(self.site, self.app_token) self.tbl_name = utils.get_table_name( self.client.get_metadata( self.dataset_id)['name']).lower() if not tbl_name else tbl_name self.metadata = self.__get_metadata() self.srid = 4326 self.num_rows = int( self.client.get(self.dataset_id, select='COUNT(*) AS count'))[0]['count'] self.data = self.__get_socrata_data(5000) def __get_metadata(self): ''' Uses provided metadata to map column types to SQLAlchemy. ''' ui.item("Gathering metadata") print() metadata = [] for col in self.client.get_metadata(self.dataset_id)['columns']: print(col['fieldName'], ":", col['dataTypeName']) try: metadata.append( (col['fieldName'], self.col_mappings[col['dataTypeName']])) except KeyError: warnings.warn('Unable to map "%s" to a SQL type.' % col['fieldName']) continue return metadata def __get_socrata_data(self, page_size=5000): ''' Iterate over a datasets pages using the Socrata API ''' ui.item("Gathering data (this can take a bit for large datasets).") page_num = 0 more_pages = True while more_pages: try: api_data = self.client.get( self.dataset_id, limit=page_size, offset=page_size * page_num, ) if len(api_data) < page_size: more_pages = False page_num += 1 yield api_data except: ui.item("Sleeping for 10 seconds to avoid timeout") time.sleep(10) def insert(self, circle_bar): for page in self.data: utils.insert_data( page, self.session, circle_bar, self.binding, srid=self.srid, \ socrata=True) pass
class SocrataRepository(HarvestRepository): """ Socrata Repository """ def setRepoParams(self, repoParams): self.metadataprefix = "socrata" super(SocrataRepository, self).setRepoParams(repoParams) # sodapy doesn't like http/https preceding URLs self.socratarepo = Socrata(self.url, self.socrata_app_token) self.domain_metadata = [] def _crawl(self): kwargs = { "repo_id": self.repository_id, "repo_url": self.url, "repo_set": self.set, "repo_name": self.name, "repo_type": "socrata", "enabled": self.enabled, "repo_thumbnail": self.thumbnail, "item_url_pattern": self.item_url_pattern, "abort_after_numerrors": self.abort_after_numerrors, "max_records_updated_per_run": self.max_records_updated_per_run, "update_log_after_numitems": self.update_log_after_numitems, "record_refresh_days": self.record_refresh_days, "repo_refresh_days": self.repo_refresh_days, "homepage_url": self.homepage_url } self.repository_id = self.db.update_repo(**kwargs) records = self.socratarepo.datasets() item_count = 0 for rec in records: result = self.db.write_header(rec["resource"]["id"], self.repository_id) item_count = item_count + 1 if (item_count % self.update_log_after_numitems == 0): tdelta = time.time() - self.tstart + 0.1 self.logger.info("Done {} item headers after {} ({:.1f} items/sec)".format(item_count, self.formatter.humanize(tdelta), item_count/tdelta) ) self.logger.info("Found {} items in feed".format(item_count) ) def format_socrata_to_oai(self, socrata_record, local_identifier): record = {} record["title"] = socrata_record["name"] record["description"] = socrata_record.get("description", "") record["tags"] = socrata_record.get("tags", "") record["identifier"] = local_identifier record["creator"] = socrata_record.get("attribution", self.name) record["pub_date"] = datetime.fromtimestamp(socrata_record["publicationDate"]).strftime('%Y-%m-%d') record["contact"] = self.contact record["series"] = socrata_record.get("category", "") return record def _rate_limited(max_per_second): """ Decorator that make functions not be called faster than a set rate """ threading = __import__('threading') lock = threading.Lock() min_interval = 1.0 / float(max_per_second) def decorate(func): last_time_called = [0.0] @wraps(func) def rate_limited_function(*args, **kwargs): lock.acquire() elapsed = time.clock() - last_time_called[0] left_to_wait = min_interval - elapsed if left_to_wait > 0: time.sleep(left_to_wait) lock.release() ret = func(*args, **kwargs) last_time_called[0] = time.clock() return ret return rate_limited_function return decorate @_rate_limited(5) def _update_record(self,record): try: socrata_record = self.socratarepo.get_metadata(record['local_identifier']) oai_record = self.format_socrata_to_oai(socrata_record,record['local_identifier']) if oai_record: self.db.write_record(oai_record, self.repository_id, self.metadataprefix.lower(), self.domain_metadata) return True except Exception as e: self.logger.error("Updating record {} failed: {}".format(record['local_identifier'], e)) # Touch the record so we do not keep requesting it on every run self.db.touch_record(record) self.error_count = self.error_count + 1 if self.error_count < self.abort_after_numerrors: return True return False
def main( dataset_id, table_name, database, socrata_username, socrata_password, where_clause, existing_table_rows="drop", ): """ Read in dataset from Socrata and write output to Platform Parameters -------- dataset_id: str Socrata dataset identifier table_name: str, optional destination table in Platform (schema.table) database: str, optional destination database in Platform socrata_username: str, optional username for socrata account, required for private data sets socrata_password: str, optional password for socrata account, required for private data sets where_clause: str, optional SoQL for filtering dataset existing_table_rows: str, optional options to pass to dataframe_to_civis command Outputs ------ Adds data as file output and, if table_name and database are specified, writes data to Platform """ socrata_client = Socrata( "data.lacity.org", None, username=socrata_username, password=socrata_password ) socrata_client.timeout = 50 raw_metadata = socrata_client.get_metadata(dataset_id) dataset = _read_paginated(socrata_client, dataset_id, where=where_clause) civis_client = civis.APIClient() if dataset.empty: msg = f"No rows returned for dataset {dataset_id}." LOG.warning(msg) write_and_attach_jsonvalue(json_value=msg, name="Error", client=civis_client) else: data_file_name = ( f"{dataset_id}_extract_{datetime.now().strftime('%Y-%m-%d')}.csv" ) file_id = _store_and_attach_dataset( client=civis_client, df=dataset, filename=data_file_name ) LOG.info(f"add the {file_id}") if table_name: # Optionally start table upload LOG.info(f"Storing data in table {table_name} on database {database}") print("writing table") run_id = os.environ["CIVIS_RUN_ID"] job_id = os.environ["CIVIS_JOB_ID"] dataset["civis_job_id"] = job_id dataset["civis_run_id"] = run_id table_upload = civis.io.dataframe_to_civis( dataset, database=database, table=table_name, existing_table_rows=existing_table_rows, ).result() LOG.info(f"using {table_upload}") # Parse raw_metadata to extract useful fields and attach both raw and # cleaned metadata as script outputs metadata_file_name = ( f"{dataset_id}_metadata_{datetime.now().strftime('%Y-%m-%d')}.json" ) metadata_paths = { "Proposed access level": "metadata.custom_fields.Proposed Access Level.Proposed Access Level", # noqa: E501 "Description": "description", "Data updated at": "rowsUpdatedAt", "Data provided by": "tableAuthor.screenName", } _, clean_metadata = _store_and_attach_metadata( client=civis_client, metadata=raw_metadata, metadata_paths=metadata_paths, filename=metadata_file_name, ) if table_name: sql = f'COMMENT ON TABLE {table_name} IS \'{clean_metadata["Description"]}\'' civis.io.query_civis( sql, database=database, polling_interval=2, client=civis_client ).result()
def main(): arguments = docopt(__doc__) site = arguments['<site>'] if arguments['--HUD']: source = "HUD" dataset_id = site client = None if arguments['--Socrata']: source = "Socrata" client = Socrata(site, arguments.get('-a')) try: if arguments.get('ls'): datasets = list_datasets(client, site) print(tabulate(datasets, headers='keys', tablefmt='psql')) elif arguments.get('insert'): if source == "Socrata": dataset_id = arguments['<dataset_id>'] metadata = client.get_metadata(dataset_id)['columns'] if source == "HUD": metadata = json.loads( urllib.request.urlopen(site).read())['fields'] engine, session, geo = \ get_connection(arguments['-d'], metadata, source) if arguments['-t']: Binding = get_binding( metadata, geo, arguments['-t'], source ) else: Binding = get_binding( metadata, geo, dataset_id, source ) # Create the table try: Binding.__table__.create(engine) except ProgrammingError as e: # Catch these here because this is our first attempt to # actually use the DB if 'already exists' in str(e): raise CLIError( 'Destination table already exists. Specify a new table' ' name with -t.' ) raise CLIError('Error creating destination table: %s' % str(e)) num_rows, data = get_data(source, dataset_id, client) bar = FillingCirclesBar(' ▶ Loading from source', max=num_rows) # Iterate the dataset and INSERT each page if source == "Socrata": for page in data: insert_data(page, session, bar, Binding) if source == "HUD": insert_data(data, session, bar, Binding) bar.finish() ui.item( 'Committing rows (this can take a bit for large datasets).' ) session.commit() success = 'Successfully imported %s rows.' % ( num_rows ) ui.header(success, color='\033[92m') if client: client.close() except CLIError as e: ui.header(str(e), color='\033[91m')
class SocrataDataset(object): def __init__(self, dataset_id, socrata_client=None, socrata_params={}, float_fields=[]): self.dataset_id = dataset_id self.client = socrata_client if not socrata_client and socrata_params: self.client = Socrata(**socrata_params) self.socrata_params = socrata_params self.col_dtype_dict = self.get_col_dtype_dict() self.float_fields = float_fields def get_col_dtype_dict(self): ''' Retrieve data dictionary of a Socrata data set in the form of a dictionary, with the key being the column name and the value being the column data type Returns: data dictionary of a Socrata data set in the form of a dictionary, with the key being the column name and the value being the column data type ''' dataset_col_meta = self.client.get_metadata(self.dataset_id)['columns'] col_dtype_dict = { col['name']: col['dataTypeName'] for col in dataset_col_meta } return col_dtype_dict def mod_dtype(self, rec, col_dtype_dict=None, float_fields=None): ''' Make sure the data type of each field in the data record matches the data type of the field in the Socrata data set. Parameters: rec: dictionary object of the data record col_dtype_dict: data dictionary of a Socrata data set in the form of a dictionary, with the key being the column name and the value being the column data type float_fields: list of fields that should be a float Returns: dictionary object of the data record, with number, string, and boolean fields modified to align with the data type of the corresponding Socrata data set ''' col_dtype_dict = col_dtype_dict or self.col_dtype_dict float_fields = float_fields or self.float_fields identity = lambda x: x dtype_func = {'number': float, 'text': str, 'checkbox': bool} out = {} for k, v in rec.items(): if k in float_fields and k in col_dtype_dict: out[k] = float(v) elif k in col_dtype_dict: if v is not None and v is not '': out[k] = dtype_func.get( col_dtype_dict.get(k, 'nonexistentKey'), identity)(v) out = {k: v for k, v in out.items() if k in col_dtype_dict} return out def create_new_draft(self): draftDataset = requests.post( 'https://{}/api/views/{}/publication.json'.format( self.client.domain, self.dataset_id), auth=(self.socrata_params['username'], self.socrata_params['password']), params={'method': 'copySchema'}) logger.info(draftDataset.json()) draftId = draftDataset.json()['id'] return draftId def publish_draft(self, draftId): time.sleep(5) publishResponse = requests.post( 'https://{}/api/views/{}/publication.json'.format( self.client.domain, draftId), auth=(self.socrata_params['username'], self.socrata_params['password'])) logger.info(publishResponse.json()) return publishResponse def delete_draft(self, draftId): time.sleep(5) deleteResponse = self.client.delete(draftId) if deleteResponse.status_code == 200: logger.info('Empty draft {} has been discarded.'.format(draftId)) return deleteResponse def clean_and_upsert(self, recs, dataset_id=None): dataset_id = dataset_id or self.dataset_id out_recs = [self.mod_dtype(r) for r in recs] uploadResponse = self.client.upsert(dataset_id, out_recs) return uploadResponse
def main( socrata_client_url: str, dataset_id: str, civis_table_name: str, civis_database: str, database_type: str, socrata_username: str, socrata_password: str, grant_group: str, varchar_len: str = None, action_existing_table_rows: str = "drop", ): """ Read in dataset from Socrata and write output to Platform Parameters -------- socrata_client_url: str url of socrata portal being referenced dataset_id: str Socrata dataset identifier civis_table_name: str destination table in Platform (schema.table) civis_database: str destination database in Platform database_type: str type of destination database socrata_username: str, optional username for socrata account, required for private data sets socrata_password: str, optional password for socrata account, required for private data sets grant_group: str string of group(s) that are passed to civis API to be granted select table access varchar_len: str sets the varchar length when datatypes are passed to civis API, 256 is defualt action_existing_table_rows: str, optional options to pass to dataframe_to_civis command Outputs ------ Adds data as file output and, if table_name and database are specified, writes data to Platform """ socrata_client = Socrata(socrata_client_url, None, username=socrata_username, password=socrata_password) # define socrata cleint civis_client = civis.APIClient() # define civis cleint socrata_client.timeout = 50 sample_data = socrata_client.get(dataset_id, limit=5, content_type="csv", exclude_system_fields=False, offset=0) # collects sample data from dataset sample_data_df = results_to_df(sample_data) # writes sample data to dataframe if sample_data_df.empty: msg = f"No rows returned for dataset {dataset_id}." LOG.warning(msg) write_and_attach_jsonvalue(json_value=msg, name="Error", client=civis_client) os._exit(1) # provides exit if no rows avalible in dataset raw_metadata = socrata_client.get_metadata(dataset_id) # calls for raw metadata sql_type = select_sql_map(database_type, varchar_len) # defines apropriate sql types for datatype mapping depending on # specifications ( civis_table_columns, point_columns, pandas_column_order, extra_columns, ) = create_col_type_dict(raw_metadata, sample_data_df, sql_type) # creates civis specific array of dicts that maps column name to # datatype using socrata metadata as guidence. Also, provides point # columns that are used to clean point column formatting during import. # And, provides array of columns that corresponds to order of the mapping # dict (civis_file_to_table is sensitive to order. print("Columns present in Metadata but not in data:", extra_columns) consolidated_csv_path = _read_paginated( client=socrata_client, dataset_id=dataset_id, point_columns=point_columns, column_order=pandas_column_order, ) # reads in socrata data in chunks (using offset and page_limit), and # appenda all to one csv and outputs path here data_file_name = f"{dataset_id}_extract_{datetime.now().strftime('%Y-%m-%d')}.csv" uploaded_file_id = _store_and_attach_dataset_csv( client=civis_client, csv_path=consolidated_csv_path, filename=data_file_name) print("file_id:", uploaded_file_id) LOG.info(f"add the {uploaded_file_id}") LOG.info( f"Storing data in table {civis_table_name} on database {civis_database}" ) table_upload = civis.io.civis_file_to_table( file_id=uploaded_file_id, database=civis_database, table=civis_table_name, table_columns=civis_table_columns, existing_table_rows=action_existing_table_rows, headers=True, ).result() LOG.info(f"using {table_upload}") # takes in file id and writes to table metadata_file_name = ( f"{dataset_id}_metadata_{datetime.now().strftime('%Y-%m-%d')}.json") # parse raw_metadata to extract useful fields and attach both raw and # cleaned metadata as script outputs upload_metadata_paths = { "Description": "description", "Data updated at": "rowsUpdatedAt", "Data provided by": "tableAuthor.screenName", } _, clean_metadata = _store_and_attach_metadata( client=civis_client, metadata=raw_metadata, metadata_paths=upload_metadata_paths, filename=metadata_file_name, ) if civis_table_name: sql = f""" COMMENT ON TABLE {civis_table_name} IS \'{clean_metadata["Description"]}\' """ civis.io.query_civis(sql, database=civis_database, polling_interval=2, client=civis_client).result() if grant_group: sql = f"GRANT ALL ON {civis_table_name} TO GROUP {grant_group}" civis.io.query_civis(sql, database=civis_database, polling_interval=2, client=civis_client).result()
from sodapy import Socrata import os import pandas as pd import numpy as np import plotly.plotly as py from plotly.graph_objs import * import plotly import collections import plotly.graph_objs as go client = Socrata("data.cityofchicago.org", "9ugcPuahbyTpHmzfeCefy30Ni", username="******", password="******") metadata = client.get_metadata("cwig-ma7x") plotly.tools.set_credentials_file(username='******', api_key='jINELDLWjEHsJLDibvbI') socrata_token = os.environ.get("SODAPY_APPTOKEN") results = client.get("cwig-ma7x", limit=170000) df = pd.DataFrame.from_dict(results) mapbox_access_token = 'pk.eyJ1IjoiYWxleHRpZmE3IiwiYSI6ImNqZ2x5aDR1NDF1cGgyd21qNW5kcWp0NzUifQ.5qMfl1OmJPDwIonDZergiA' lan = [] lon = [] unique_name = [] u = [] for i in range(len(df['dba_name'])): if [df['dba_name'][i], df['address'][i]] in u: if df['results'][i] == 'Fail':
def main (args): if len(args) == 0: print ('args: list of dataset id, state/province, city, country') exit(0) ids = [] with open(args[0],'r') as id_file: for line in id_file: ids.append(line.strip('\n\r')) print (ids) graphql = config(section='graphql') rebloc = config(section='rebloc') headers = { 'X-Hasura-Access-Key': graphql['apitoken']} _transport = RequestsHTTPTransport( url=graphql['endpoint'], headers=headers, use_json=True ) graphql_client = Client( transport=_transport, fetch_schema_from_transport=True ) my_marketplace = ReblocMarketplace(rebloc['endpoint'],graphql_client) ownerid = my_marketplace.look_up_user_id(rebloc['registeremail']) api_config = config(section='sourceapi') domain_client = Socrata(api_config['domain'], api_config['token']) for dataset_identifier in ids: metadata = domain_client.get_metadata(dataset_identifier) open_data = MyOpenData(domain_client,metadata,dataset_identifier) try: schema = open_data.data_schema() print (schema) server_config = config(section='ipfs') gen = DocumentGenerator() seed = gen.sentence() print(seed) # 32 bytes encryption keys sample_key = hashlib.sha256(seed.encode('utf-8')).hexdigest()[:32].encode('utf8') print("key = %s" % sample_key) seed = gen.sentence() print(seed) # 32 bytes encryption keys data_key = hashlib.sha256(seed.encode('utf-8')).hexdigest()[:32].encode('utf8') print("key = %s" % data_key) # publish sample print ('publishing sample....') sample_info = open_data.publish_sample_data( sample_key, server_config['endpoint'], server_config['port'], sample_size=300 ) # publish full data print('publishing all data....') data_info = open_data.publish_all_data( data_key, server_config['endpoint'], server_config['port'] ) current_date_time = datetime.datetime.utcnow().strftime("%a %b %d %H:%M:%S %Y") search_terms = "{property,taxes}" if metadata.get('tags') is not None: search_terms = "{" + ",".join(metadata['tags']) + "}" default_ipfs_gateway = "http://demo-app.rebloc.io:8080/ipfs/" default_price = 0.5 if 0.00001 * data_info['num_of_rows'] > default_price: default_price = round (0.01 * data_info['num_of_rows'],2) dataset = { "id": str(uuid.uuid1()), "name": metadata['name'], "table_name": metadata['id'], "description": metadata['description'], "country": "united states", "state_province": "california", "city": "{san mateo}", "topic": "{" + "assessment" + "}", "date_created": current_date_time, "date_modified": current_date_time, "dataset_owner_id": ownerid, "delivery_method": "IPFS/CSV", "enc_data_key": data_key.decode(), "enc_sample_key": sample_key.decode(), "sample_access_url": default_ipfs_gateway + sample_info['ipfs_hash'], "sample_hash": sample_info['md5_file_hash'], "access_url": default_ipfs_gateway + data_info['ipfs_hash'], 'data_hash': data_info['md5_file_hash'], "num_of_records": data_info['num_of_rows'], "search_terms": search_terms, "price_high": default_price, "price_low": 0.5, "stage": 3, "schema": schema, "json_schema": json.dumps(schema) } print (dataset) # list draft datasets to marketplace result = my_marketplace.post_draft_dataset(dataset) print (result) print (dataset_identifier + ' completed') except Exception as err: print("error occurs:%s" % err) print ('done')
print("Updating covid data") if not (token := os.environ.get("SODAPY_APPTOKEN")): raise EnvironmentError("SODAPY_APPTOKEN not set") domain = "data.ny.gov" covid_id = "xdss-u53e" client = Socrata(domain, token) site_last_updated = 0 if "site-last-updated" in os.listdir("."): with open("site-last-updated", "r") as f: site_last_updated = int(f.read()) metadata = client.get_metadata(covid_id) if data_last_updated := metadata.get("rowsUpdatedAt"): data_last_updated = int(data_last_updated) if site_last_updated >= data_last_updated: sys.exit(0) testing_data = client.get_all(covid_id, select="county, test_date, new_positives") print("Cleaning data") testing_data = [{ "county": c["county"].lower(), "test_date": datetime.strptime(c["test_date"], "%Y-%m-%dT%H:%M:%S.%f"),
def controllerCenter(allIDS): flagCounter = 0 limit = 10 token = 'sC4N6wXghMXaL2C3uUxVMphf0' client = Socrata('www.datos.gov.co', token, username="******", password="******") client.timeout = 180 print("Conjuntos de datos a evaluar", len(allIDS)) for i in allIDS: resultado = "" flagCounter += 1 try: try: # Generando los insumos del proceso. datasetURL = "https://www.datos.gov.co/resource/{}.json".format(i) print(flagCounter) print(datasetURL) # Valida URL y si es accesible statusDataset = requests.get(datasetURL, timeout=60) # Obtiene información del Dataset datosDataSet = client.get(i, limit=limit) # Obteniendo Metadatos del Dataset metaDataset = client.get_metadata(i) # Reviando que el Dataset no esté vacio frametoValidate = pd.DataFrame.from_records(datosDataSet) # Validando DataSet metaData = metaDataset['metadata'] except KeyError as error: print(error) logging.error(str(error)) indexCompletitud = 0 indexCredibilidad = 0 indexActualidad = 0 indexTrazabilidad = 0 indexDisponibildiad = 0 indexConformidad = 0 indexComprensibilidad = 0 indexPortabilidad = 0 indexConsistencia = 0 indexExactitud = 0 except TimeoutError as error: logging.error(str(error)) except requests.exceptions.ConnectionError as error: logging.error(str(error)) else: # Cálculo de indicadores para cada conjunto de datos evaluado if statusDataset.status_code == 200 and frametoValidate.empty == False: # Se genera primer indicador de disponibilidad resultado = str(i) # Se genera primer indicador de disponibilidad resultado = resultado + ';' + str(10) # Creación de la Instancia evaluation = Evaluation() # Indicador Completitud indexCompletitud = evaluation.indicadorCompletitud(frametoValidate) resultado = resultado + ',' + str(indexCompletitud) # Indicador Actualidad indexActualidad = evaluation.indicadorActualidad(metaDataset, metaData) resultado = resultado + ',' + str(indexActualidad) # Indicador Credibilidad indexCredibilidad = evaluation.indicadorCredibilidad(metaDataset) resultado = resultado + ',' + str(indexCredibilidad) # Indicador Trazabilidad indexTrazabilidad = evaluation.indicadorTrazabilidad(metaDataset) resultado = resultado + ',' + str(indexTrazabilidad) # Indicador Conformidad indexConformidad = evaluation.indicadorConformidad(metaDataset) resultado = resultado + ',' + str(indexConformidad) # Indicador Comprensibilidad indexComprensibilidad = evaluation.indicadorComprensibilidad(metaDataset, frametoValidate) resultado = resultado + ',' + str(indexComprensibilidad) # Indicador Portabilidad indexPortabilidad = evaluation.indicadorPortabilidad(datosDataSet) resultado = resultado + ',' + str(indexPortabilidad) # Indicador Consistencia indexConsistencia = evaluation.indicadorConsisetencia(frametoValidate) resultado = resultado + ',' + str(indexConsistencia) # INdicador Exactitud indexExactitud = evaluation.indicadorExactitud(metaDataset, frametoValidate) resultado = resultado + ',' + str(indexExactitud) else: print("error") finally: with open("Quality_Indicators.csv", 'a', encoding='UTF-8') as qIndicators: qIndicators.write(str(resultado)) qIndicators.write('\n') except BrokenPipeError as errorBroken: logging.error(str(errorBroken)) resultado = str(i) + "0,0,0,0,0,0,0,0,0,0" with open("Quality_Indicators.csv", 'a', encoding='UTF-8') as qIndicators: qIndicators.write(str(resultado)) qIndicators.write('\n')
class SocrataDataset(object): """ Helper class for interacting with datasets in Socrata. """ logger = None def __init__(self, dataset_id, socrata_client=None, socrata_params=None, float_fields=None, logger=None): """ Initialization function of the SocrataDataset class. Parameters: dataset_id: 4x4 ID of the Socrata draft (e.g. x123-bc12) client: Optional parameter if the user chooses to pass in the socrata_params parameter. If user chooses not to pass in socrata_params, they can also pass in an sodapy.Socrata object that has been initialized with the proper socrata credentials. socrata_params: Optional parameter if the user choose to pass in the socrata_client parameter. Dictionary object containing Socrata credentials. Must include the following fields: 'username', 'password', 'app_token', 'domain'. float_fields: An array of Socrata field names that should be of float types (numbers with decimals). logger: Optional parameter. Could pass in a logger object or not pass in anything. If a logger object is passed in, information will be logged instead of printed. If not, information will be printed. """ self.socrata_params = {} self.float_fields = [] self.dataset_id = dataset_id self.client = socrata_client if not socrata_client and socrata_params: self.client = Socrata(**socrata_params) self.socrata_params = socrata_params self.col_dtype_dict = self.get_col_dtype_dict() self.float_fields = float_fields self.print_func = print if logger: self.print_func = logger.info def get_col_dtype_dict(self): """ Retrieve data dictionary of a Socrata data set in the form of a dictionary, with the key being the column name and the value being the column data type Returns: Data dictionary of a Socrata data set in the form of a dictionary, with the key being the column name and the value being the column data type. """ dataset_col_meta = self.client.get_metadata(self.dataset_id)['columns'] col_dtype_dict = { col['name']: col['dataTypeName'] for col in dataset_col_meta } return col_dtype_dict def mod_dtype(self, rec, col_dtype_dict=None, float_fields=None): """ Make sure the data type of each field in the data record matches the data type of the field in the Socrata data set. Parameters: rec: dictionary object of the data record col_dtype_dict: data dictionary of a Socrata data set in the form of a dictionary, with the key being the column name and the value being the column data type float_fields: list of fields that should be a float Returns: Dictionary object of the data record, with number, string, and boolean fields modified to align with the data type of the corresponding Socrata data set. """ col_dtype_dict = col_dtype_dict or self.col_dtype_dict float_fields = float_fields or self.float_fields identity = lambda x: x dtype_func = {'number': float, 'text': str, 'checkbox': bool} out = {} for k, v in rec.items(): if k in float_fields and k in col_dtype_dict: out[k] = float(v) elif (k in col_dtype_dict and v not in [None, '']): out[k] = dtype_func.get( col_dtype_dict.get(k, 'nonexistentKey'), identity)(v) out = {k: v for k, v in out.items() if k in col_dtype_dict} return out def create_new_draft(self): """ Create a new draft of the current dataset. Returns: Draft ID of the new draft. """ draft_dataset = requests.post( 'https://{}/api/views/{}/publication.json'.format( self.client.domain, self.dataset_id), auth=(self.socrata_params['username'], self.socrata_params['password']), params={'method': 'copySchema'}) logger.info(draft_dataset.json()) draft_id = draft_dataset.json()['id'] return draft_id def publish_draft(self, draft_id): """ Publish the Socrata draft specified. Parameters: draft_id: 4x4 ID of the Socrata draft (e.g. x123-bc12) Returns: Response of the publish draft request. """ time.sleep(5) publish_response = requests.post( 'https://{}/api/views/{}/publication.json'.format( self.client.domain, draft_id), auth=(self.socrata_params['username'], self.socrata_params['password'])) logger.info(publish_response.json()) return publish_response def delete_draft(self, draft_id): """ Delete the Socrata draft specified. Parameters: draft_id: 4x4 ID of the Socrata draft (e.g. x123-bc12) Returns: Response of the delete draft request. """ time.sleep(5) delete_response = self.client.delete(draft_id) if delete_response.status_code == 200: logger.info('Empty draft {} has been discarded.'.format(draft_id)) return delete_response def clean_and_upsert(self, recs, dataset_id=None): """ Publish the Socrata draft specified. Parameters: recs: an array of dictionary objects of the data to upsert. dataset_id: 4x4 ID of the Socrata dataset (e.g. x123-bc12) to perform upserts to. This parameter is not required if you are performing upserts to the dataset you've initialized this class with. Returns: A dictionary object with the following fields: 'Rows Deleted' - number of rows deleted due to the upsert request 'Rows Updated' - number of rows updated due to the upsert request 'Rows Created' - number of rows created due to the upsert request """ dataset_id = dataset_id or self.dataset_id out_recs = [self.mod_dtype(r) for r in recs] upload_response = self.client.upsert(dataset_id, out_recs) return upload_response
import os from sodapy import Socrata import json socrata_token = os.environ.get("SODAPY_APPTOKEN") nyc_realestate_domain = 'data.cityofnewyork.us' nyc_realestate_dataset_identifier = 'm8p6-tp4b' nyc_realestate_domain_client = Socrata(nyc_realestate_domain, socrata_token) metadata = nyc_realestate_domain_client.get_metadata( nyc_realestate_dataset_identifier) # print (metadata) dataCols = [] for x in metadata['columns']: col = dict() col['name'] = x['fieldName'] if x['dataTypeName'] == "calendar_date": col['type'] = "timestamp without timezone" else: if x['dataTypeName'] == "number": col['type'] = "integer" else: col['type'] = x['dataTypeName'] col['label'] = x['name'].replace('_', ' ') col['description'] = x['description'] dataCols.append(col) # print (json.dumps(dataCols,indent=4, sort_keys=False,default=str))
, 'resolution_description': sqlalchemy.types.Text() , 'resolution_action_updated_date': sqlalchemy.DateTime() , 'created_date': sqlalchemy.DateTime() , 'due_date': sqlalchemy.DateTime() , 'closed_date': sqlalchemy.DateTime() , 'descriptor': sqlalchemy.types.Text() , 'location': sqlalchemy.types.Text() } #### connect w/ Socrata endpoint = 'https://data.cityofnewyork.us/resource/p5f6-bkga.json' domain = 'data.cityofnewyork.us' data_id = 'p5f6-bkga' token = myToken client = Socrata(domain, token) metadata = client.get_metadata(data_id) columns = [x['name'] for x in metadata['columns']] meta_amount = [x for x in metadata['columns'] if x['name'] == 'Agency'][0] retries = 3 #### Start Operation # 3006411 as of 2019-03-31 batches = int(3006411 // 50000) + 1 i=0 dropped_cols = [] while i < batches: fail_cnt = 0 query =""" select * where
class SocrataRepository(HarvestRepository): """ Socrata Repository """ def setRepoParams(self, repoParams): self.metadataprefix = "socrata" super(SocrataRepository, self).setRepoParams(repoParams) # sodapy doesn't like http/https preceding URLs self.socratarepo = Socrata(self.url, self.socrata_app_token) self.domain_metadata = [] def _crawl(self): kwargs = { "repo_id": self.repository_id, "repo_url": self.url, "repo_set": self.set, "repo_name": self.name, "repo_type": "socrata", "enabled": self.enabled, "repo_thumbnail": self.thumbnail, "item_url_pattern": self.item_url_pattern, "abort_after_numerrors": self.abort_after_numerrors, "max_records_updated_per_run": self.max_records_updated_per_run, "update_log_after_numitems": self.update_log_after_numitems, "record_refresh_days": self.record_refresh_days, "repo_refresh_days": self.repo_refresh_days, "homepage_url": self.homepage_url, "repo_oai_name": self.repo_oai_name } self.repository_id = self.db.update_repo(**kwargs) records = self.socratarepo.datasets() item_count = 0 for rec in records: result = self.db.write_header(rec["resource"]["id"], self.repository_id) item_count = item_count + 1 if (item_count % self.update_log_after_numitems == 0): tdelta = time.time() - self.tstart + 0.1 self.logger.info("Done {} item headers after {} ({:.1f} items/sec)".format(item_count, self.formatter.humanize(tdelta), item_count/tdelta) ) self.logger.info("Found {} items in feed".format(item_count) ) def format_socrata_to_oai(self, socrata_record, local_identifier): record = {} record["title"] = socrata_record.get("name","").strip() record["description"] = socrata_record.get("description", "") record["tags"] = socrata_record.get("tags", "") record["identifier"] = local_identifier record["creator"] = socrata_record.get("attribution", self.name) record["pub_date"] = datetime.fromtimestamp(socrata_record["publicationDate"]).strftime('%Y-%m-%d') record["subject"] = socrata_record.get("category", "") record["title_fr"] = "" record["series"] = "" record["rights"] = [] if ('license' in socrata_record) and socrata_record['license']: # Winnipeg, Nova Scotia, PEI record["rights"].append(socrata_record['license'].get("name", "")) record["rights"].append(socrata_record['license'].get("termsLink", "")) record["rights"] = "\n".join(record["rights"]) record["rights"] = record["rights"].strip() if record["rights"] == "See Terms of Use": # Calgary, Edmonton record["rights"] = [] if ('metadata' in socrata_record) and socrata_record['metadata']: if ('custom_fields' in socrata_record['metadata']) and socrata_record['metadata']['custom_fields']: if ('License/Attribution' in socrata_record['metadata']['custom_fields']) and socrata_record['metadata']['custom_fields']['License/Attribution']: if ('License URL' in socrata_record['metadata']['custom_fields']['License/Attribution'] and socrata_record['metadata']['custom_fields']['License/Attribution']['License URL']): if record["rights"] == "" or record["rights"] == []: # Calgary record["rights"] = socrata_record['metadata']['custom_fields']['License/Attribution']['License URL'] if ('License-URL' in socrata_record['metadata']['custom_fields']['License/Attribution'] and socrata_record['metadata']['custom_fields']['License/Attribution']['License-URL']): if record["rights"] == "" or record["rights"] == []: # Calgary record["rights"] = socrata_record['metadata']['custom_fields']['License/Attribution']['License-URL'] elif ('Licence' in socrata_record['metadata']['custom_fields']) and socrata_record['metadata']['custom_fields']['Licence']: if ('Licence' in socrata_record['metadata']['custom_fields']['Licence']) and socrata_record['metadata']['custom_fields']['Licence']['Licence']: if record["rights"] == "" or record["rights"] == []: # Winnipeg record["rights"] = socrata_record['metadata']['custom_fields']['Licence']['Licence'] elif ('Attributes' in socrata_record['metadata']['custom_fields']) and socrata_record['metadata']['custom_fields']['Attributes']: if ('Licence' in socrata_record['metadata']['custom_fields']['Attributes']) and socrata_record['metadata']['custom_fields']['Attributes']['Licence']: if record["rights"] == "" or record["rights"] == []: # Strathcona record["rights"] = socrata_record['metadata']['custom_fields']['Attributes']['Licence'] if record["rights"] == "" or record["rights"] == []: record.pop("rights") # Continue to default to English for our current Socrata repositories. # For Nova Scoatia, "fra" language refers to the dataset, not the metadata. # language = self.default_language # if "metadata" in socrata_record: # if "custom_fields" in socrata_record["metadata"]: # if "Detailed Metadata" in socrata_record["metadata"]["custom_fields"]: # if "Language" in socrata_record["metadata"]["custom_fields"]["Detailed Metadata"]: # # Nova Scotia # language = socrata_record["metadata"]["custom_fields"]["Detailed Metadata"]["Language"] # elif "Dataset Information" in socrata_record["metadata"]["custom_fields"]: # if "Language" in socrata_record["metadata"]["custom_fields"]["Dataset Information"]: # # Prince Edward Island # language = socrata_record["metadata"]["custom_fields"]["Dataset Information"]["Language"] # language = language.lower() # # if language in ["fr", "fre", "fra", "french"]: # language = "fr" # else: # language = "en" return record @rate_limited(5) def _update_record(self,record): try: socrata_record = self.socratarepo.get_metadata(record['local_identifier']) oai_record = self.format_socrata_to_oai(socrata_record,record['local_identifier']) if oai_record: self.db.write_record(oai_record, self) return True except Exception as e: self.logger.error("Updating record {} failed: {}".format(record['local_identifier'], e)) if self.dump_on_failure == True: try: print(socrata_record) except: pass # Touch the record so we do not keep requesting it on every run self.db.touch_record(record) self.error_count = self.error_count + 1 if self.error_count < self.abort_after_numerrors: return True return False
# def createDaemon(): # try: # # Store the Fork PID # pid = os.fork() # if pid > 0: # print('PID: ',pid) # os._exit(0) # except error: # print('Unable to fork. Error: ', error.errno, error.strerror) # os._exit(1) # runTask() if __name__ == '__main__': # API config (Do not change) app_token = "1CKHfUB8qIpEQKUM1JNdiEK1N" socrata_dataset_identifier = "xdss-u53e" client = Socrata("health.data.ny.gov", app_token) metadata = client.get_metadata(socrata_dataset_identifier) with open("Last updated.txt", 'r') as fp: last_updated = fp.read() last_updated_obj = datetime.datetime.strptime(last_updated, '%Y-%m-%dT00:00:00.000') while True: runTask() with open("logs.txt", "a+") as fp: fp.write("Last run at " + str(time.ctime()) + "\n") time.sleep(600)
# dataset_identifier: '9mfq-cb36' # sample query: # https://data.cdc.gov/resource/9mfq-cb36.json?submission_date=2021-02-20T00:00:00.000 import requests import json import os from sodapy import Socrata socrata_domain = 'data.cdc.gov' socrata_dataset_identifier = '9mfq-cb36' socrata_token = os.environ['SOCRATA_APP_TOKEN'] # Token in secrets.sh; turned into a global environmental variable client = Socrata(socrata_domain, socrata_token) metadata = client.get_metadata('9mfq-cb36') # this is the metadata, not the data ########### FINDING OUT ABOUT THE METADATA ############# #print(metadata.keys()) #OUTPUT: dict_keys(['id', 'name', 'assetType', 'attribution', 'attributionLink', 'averageRating', 'category', 'createdAt', 'description', 'displayType', 'downloadCount', 'hideFromCatalog', 'hideFromDataJson', 'licenseId', 'newBackend', 'numberOfComments', 'oid', 'provenance', 'publicationAppendEnabled', 'publicationDate', 'publicationGroup', 'publicationStage', 'rowsUpdatedAt', 'rowsUpdatedBy', 'tableId', 'totalTimesRated', 'viewCount', 'viewLastModified', 'viewType', 'approvals', 'columns', 'grants', 'license', 'metadata', 'owner', 'query', 'rights', 'tableAuthor', 'tags', 'flags']) #print(metadata['name']) #OUTPUT: United States COVID-19 Cases and Deaths by State over Time #print([x['name'] for x in metadata['columns']], ('***'*20)) #OUTPUT: ['submission_date', 'state', 'tot_cases', 'conf_cases', 'prob_cases', 'new_case', 'pnew_case', 'tot_death', 'conf_death', 'prob_death', 'new_death', 'pnew_death', 'created_at', 'consent_cases', 'consent_deaths'] #meta_amount = [x for x in metadata['columns'] if x['name'] == 'submission_date'][0] #print(len(meta_amount), meta_amount) #OUTPUT: 10 {'id': 481357041, 'name': 'submission_date', 'dataTypeName': 'calendar_date', 'description': 'Date of counts', 'fieldName': 'submission_date', 'position': 1, 'renderTypeName': 'calendar_date', 'tableColumnId': 105526416, 'cachedContents': {'non_null': '24120', 'largest': '2021-02-26T00:00:00.000', 'null': '0', 'top': [{'item': '2020-03-28T00:00:00.000', 'count': '60'}, {'item': '2020-11-27T00:00:00.000', 'count': '60'}, {'item': '2021-01-07T00:00:00.000', 'count': '60'}, {'item': '2021-01-30T00:00:00.000', 'count': '60'}, {'item': '2020-05-08T00:00:00.000', 'count': '60'}, {'item': '2020-04-11T00:00:00.000', 'count': '60'}, {'item': '2021-01-17T00:00:00.000', 'count': '60'}, {'item': '2020-10-28T00:00:00.000', 'count': '60'}, {'item': '2020-09-04T00:00:00.000', 'count': '60'}, {'item': '2020-03-25T00:00:00.000', 'count': '60'}, {'item': '2020-05-01T00:00:00.000', 'count': '60'}, {'item': '2020-08-14T00:00:00.000', 'count': '60'}, {'item': '2020-03-14T00:00:00.000', 'count': '60'}, {'item': '2020-11-17T00:00:00.000', 'count': '60'}, {'item': '2020-08-02T00:00:00.000', 'count': '60'}, {'item': '2020-07-09T00:00:00.000', 'count': '60'}, {'item': '2020-01-29T00:00:00.000', 'count': '60'}, {'item': '2020-07-10T00:00:00.000', 'count': '60'}, {'item': '2020-12-25T00:00:00.000', 'count': '60'}, {'item': '2020-04-19T00:00:00.000', 'count': '60'}], 'smallest': '2020-01-22T00:00:00.000', 'cardinality': '402'}, 'format': {'view': 'date'}} ### What you can see here are the most recent and the earliest dates (largest and smallest), and that there are '24120' non-null values