def fetch_311_data(zip, max_query_results=None, num_entries_to_search=10000, t_out=10) -> Dict[str, any]: nyc_311_dataset_domain = "data.cityofnewyork.us" nyc_311_dataset_identifier = "fhrw-4uyv" try: nyc_311_dataset_token = get_311_socrata_key() except KeyError: nyc_311_dataset_token = ( None # works with None but lower number of requests can be made ) client = Socrata(nyc_311_dataset_domain, nyc_311_dataset_token) client.timeout = t_out try: return client.get( nyc_311_dataset_identifier, select= "created_date, incident_zip, incident_address, city, complaint_type, descriptor, status", # q=str(zip), #uncomment if want to query directly on the server side (may lead to timeout) order="created_date DESC", limit=num_entries_to_search, ) except requests.exceptions.Timeout: raise TimeoutError
def run(self): # Autenticación en S3 ses = boto3.session.Session(profile_name='luigi_dpa', region_name='us-west-2') s3_resource = ses.resource('s3') obj = s3_resource.Bucket(self.bucket) print(ses) # Autenticación del cliente: client = Socrata(settings.get('dburl'), settings.get('apptoken'), username=settings.get('user'), password=settings.get('pass')) # los resultados son retornados como un archivo JSON desde la API / # convertida a una lista de Python usando sodapy client.timeout = 1000 limit = 1000000000 # query results = client.get( "erm2-nwe9", limit=limit, where= f"created_date between '{self.year}-{self.month}-{self.day}T00:00:00.000' and '{self.year}-{self.month}-{self.day}T23:59:59.999'" ) with self.output().open('w') as json_file: json.dump(results, json_file)
def fetch_nycOpenData(url, timeout, row_limit): client = Socrata("data.cityofnewyork.us", "eXBsiqAwodiCMHDYEheExaF3v", "*****@*****.**", "Monkeydluffy55!") client.timeout = timeout # Returned as JSON from API / converted to Python list of dictionaries by sodapy. results = client.get(url, limit=row_limit) # Convert to pandas DataFrame results_df = pd.DataFrame.from_records(results) return results_df
def setup_socrata_client(credentials, nadac_parameters): """ Setup client to access database on Socrata. Args: credentials (dict): Socrata app token from .env file nadac_parameters (dict): Parameters for downloading NADAC dataset from .env file WEBSITE: url of dataset (less 'http://www.') Returns: Socrata client """ client = Socrata(nadac_parameters['WEBSITE'], credentials['APP_TOKEN']) client.timeout = int(nadac_parameters['TIMEOUT']) return client
def main(): logger.info('Creating Spark session') spark = pyspark.sql.SparkSession.builder. \ master('local[*]'). \ config("spark.sql.warehouse.dir", HIVE_WAREHOUSE_DIR). \ config("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation", True). \ enableHiveSupport(). \ getOrCreate() # create spark session to dump the data into Hive spark.sparkContext.setLogLevel('ERROR') logger.info('Initializing Hive database') spark.sql(f'drop database if exists {HIVE_DATABASE} cascade') spark.sql(f'create database {HIVE_DATABASE}') logger.info('Accessing remote dataset') soda_client = Socrata(DOMAIN, APP_TOKEN) # create client for accessing API soda_client.timeout = 50 # otherwise we will get a lot of timeout errors dataset_size = query_size(soda_client, DATASET) num_batches = dataset_size // BATCH_SIZE + 1 logger.info('Remote dataset size: %d (%d batches)', dataset_size, num_batches) schema = make_schema() # spark dataframe schema for our data logger.info('Starting process pool') with multiprocessing.Pool(NUM_WORKERS) as pool: fetch_batch_partial = functools.partial(fetch_batch, soda_client=soda_client, batch_size=BATCH_SIZE) batch_it = pool.imap_unordered(fetch_batch_partial, range(num_batches)) logger.info('Fetching data') for idx, batch in tqdm(batch_it, total=num_batches): df = spark.createDataFrame(batch, schema=schema) df = enforce_types(df) df.createOrReplaceTempView('tmp_table') if spark.catalog._jcatalog.tableExists(HIVE_TABLE): spark.sql(f'insert into {HIVE_TABLE} from tmp_table') else: spark.sql( f'create table {HIVE_TABLE} as select * from tmp_table') logger.info('Probing the database') df = spark.sql(f"select * from {HIVE_TABLE} limit 200") logger.info(f'Received {df.count()} rows from {HIVE_TABLE}') soda_client.close() spark.stop() logger.info('Fetching finished') logger.info(f'All data has been written to Hive table {HIVE_TABLE}')
def get_ozone_data(yr, st): """ Get ozone data from the CDC API yr - year of interest - string st - fips code for the state of interest - string """ #establish connection to the CDC's data via Socrata client = Socrata("data.cdc.gov", parsed_yaml['cdc_key'], parsed_yaml['cdc_username'], parsed_yaml['cdc_password']) #set timeout to 60 seconds client.timeout = 60 #get number of records in the dataset record_count = client.get("kmf5-t9yc", where=f"year2 = '{yr}' AND statefips = '{st}'", select="COUNT(*)") print("The record count is", record_count) print("Getting data from the Socrata API...") #get data from dataset start = 0 #start at page 0 chunk_size = 50000 #fetch 50,000 rows at a time results = [] #empty list to store data while True: #add data to the list results.extend( client.get( "kmf5-t9yc", where=f"year2 = '{yr}' AND statefips = '{st}'", # SQL query select= "year2, month, countyfips, o3_max_pred", # interested columns offset=start, limit=chunk_size)) #pagination start = start + chunk_size print("At record number", start) #stop adding to the list once all the data is fetched if (start > int(record_count[0]['COUNT'])): break #return list so that it can be stored in a dataframe return results
def run(self): ''' Consulta de los datos en la api de 311 ''' # Autenticación del cliente: client = Socrata(settings.get('dburl'), settings.get('apptoken'), username=settings.get('user'), password=settings.get('pass')) # los resultados son retornados como un archivo JSON desde la API / # convertida a una lista de Python usando sodapy client.timeout = 50 results = client.get("erm2-nwe9", limit=1) with self.output().open('w') as json_file: json.dump(results, json_file)
def get_trip_records(limit=100000): client = Socrata('data.cityofchicago.org', 'Tk6RhuGAFvF9P4ehsysybj3IW', username="******", password="******") client.timeout = 10000 results = client.get( "m6dm-c72p", limit=limit, select= '''trip_id, trip_start_timestamp, trip_end_timestamp, trip_seconds, trip_miles, pickup_community_area, dropoff_community_area, fare, tip, additional_charges, trip_total''' ) return pd.DataFrame.from_records(results)
def run(self): # Autenticación del cliente: client = Socrata(settings.get('dburl'), settings.get('apptoken'), username=settings.get('user'), password=settings.get('pass')) # los resultados son retornados como un archivo JSON desde la API / # convertida a una lista de Python usando sodapy client.timeout = 1000 limit = 1000000000 # crear carpeta raw if not os.path.exists(f'{path_raw}'): os.mkdir(f'{path_raw}') else: None # crear carpeta year if not os.path.exists(f'{path_raw}/{self.year}'): os.mkdir(f'{path_raw}/{self.year}') else: None # crear carpeta year/month if not os.path.exists(f'{path_raw}/{self.year}/{self.month}'): os.mkdir(f'{path_raw}/{self.year}/{self.month}') else: None # crear carpeta year/month/day if not os.path.exists(f'{path_raw}/{self.year}/{self.month}/{self.day}'): os.mkdir(f'{path_raw}/{self.year}/{self.month}/{self.day}') else: None # query results = client.get( "erm2-nwe9", limit=limit, where=f"created_date between '{self.year}-{self.month}-{self.day}T00:00:00.000' and '{self.year}-{self.month}-{self.day}T23:59:59.999'") with self.output().open('w') as json_file: json.dump(results, json_file)
def queryApi311(year, month, day): # Usado en Task1 : Consulta a la API # Autenticación del cliente: client = Socrata(settings.get('dburl'), settings.get('apptoken'), username=settings.get('user'), password=settings.get('pass')) # los resultados son retornados como un archivo JSON desde la API / # convertida a una lista de Python usando sodapy client.timeout = 1000 limit = 1000000000 # query results = client.get( "erm2-nwe9", limit=limit, where= f"created_date between '{year}-{month}-{day}T00:00:00.000' and '{year}-{month}-{day}T23:59:59.999'" ) return results
def get_data(chunk_size=100000, begin_date='2020-01-01'): #define parameters for endpoint, dataset, and app token path = '../data/' data_url = 'data.cityofnewyork.us' dataset = 'erm2-nwe9' with open(path + 'client_secret.json') as f: credentials = json.load(f) app_token = credentials['app_token'] #sets up the connection, need application token to override throttling limits #username and password only required for creating or modifying data client = Socrata(data_url, app_token) client.timeout = 6000 #count number of records in desired dataset record_count = client.get(dataset, select='count(*)', where="created_date >='2020-01-01'") total_count = record_count[0]['count'] print(total_count) start = 0 results = [] #paginate through dataset in sets of 10000 to get all records since start of 2020 while True: print(f'{start} rows retrieved') results.extend( client.get( dataset, select= "unique_key, created_date, closed_date, agency, agency_name, complaint_type, descriptor, location_type, incident_zip, borough, address_type, city, status, latitude, longitude, location", where="created_date >= '2020-02-01'", limit=chunk_size, offset=start)) start += chunk_size if start > int(total_count): break return results
def run(self): ''' Consulta de los datos en la api de 311 ''' # Autenticación en S3 ses = boto3.session.Session(profile_name='luigi_dpa', region_name='us-west-2') s3_resource = ses.resource('s3') obj = s3_resource.Bucket(self.bucket) print(ses) # Autenticación del cliente: client = Socrata("data.cityofnewyork.us", "N2WpW61JnP5RoT5mrYGUaSUg9", username="******", password="******") # los resultados son retornados como un archivo JSON desde la API / # convertida a una lista de Python usando sodapy client.timeout =1000 results = client.get("erm2-nwe9", limit=100) with self.output().open('w') as json_file: json.dump(results, json_file)
def fetch_res_data(zip, max_query_results=20, num_entries_to_search=10000, t_out=10) -> Dict[str, any]: nyc_res_dataset_domain = "data.cityofnewyork.us" nyc_res_dataset_identifier = "43nn-pn8j" nyc_res_dataset_token = ( None # works with None but lower number of requests can be made ) client = Socrata(nyc_res_dataset_domain, nyc_res_dataset_token) client.timeout = t_out try: return client.get( nyc_res_dataset_identifier, select="dba, boro, zipcode, violation_description", # q=str(zip), #uncomment if want to query directly on the server side (may lead to timeout) order="score DESC", limit=num_entries_to_search, ) except requests.exceptions.Timeout: raise TimeoutError
from Statewide_Payroll import Fraction_Statewide_Payroll from DCP_Capital import get_DCP_capital from Statewide_Fringe import get_statewide_fringe helper_dir = "/Users/alexanderweinstein/Documents/Harris/Summer2020/Carceral_Budgeting/Exploratory/Agency_Classes/Agency_Helpers" sys.path.insert(0, helper_dir) from SOQL_Constructors import construct_expenditures_SOQL, construct_budget_SOQL, construct_payroll_SOQL, \ construct_settlements_SOQL from Find_Data import find_data from CY_To_FY import convert_CY_to_FY app_token = "2Qa1WiG8G4kj1vGVd2noK7zP0" client = Socrata("cthru.data.socrata.com", app_token) client.timeout = 40 class StateAgency(Agency): """Last updated July 10th to get revenue data into it's own dataframe Possible to do: return one summary dataframe instead of expenditures, budget, revenue by year Another to do: add client in initialize agencies code To do: fix how year range is set, it's getting passed from multiple places and creating conflicts Really, really need to fix this it's causing lots of bugs. Need to set year range from one place, when agency class is created, and have it all propogate Also: something strange is happending where once initialize agencies has been run and then I call it agian, the objects aren't re-initialized. Should figure out what is going on Actually, objects are getting intialized when I import initialize agencies, which isn't what I want.""" def __init__(self, alias, official_name, year_range, category, correction_function=lambda x:x, settlement_agencies=None, payroll_vendors=[], payroll_official_name=None, client=None,
def get_client(self): client = Socrata(self.socrata_domain, self.socrata_token) client.timeout = self.timeout return client
import geopandas as gpd from shapely.geometry import Point, LineString from geopy.distance import geodesic import matplotlib.pyplot as plt import datetime as dt import sqlite3 from sqlite3 import Error from sqlalchemy import create_engine from sodapy import Socrata from prep import wrangle as wr # Load data. # None indicates no credentials required for public datasets. client = Socrata("data.cityofchicago.org", None) client.timeout = 120 # Get bikeshare records with sodapy. results = client.get("fg6s-gzvg", limit=20000) # Convert to pandas DataFrame results_df = pd.DataFrame.from_records(results) # Get all stations. stations = results_df[['from_station_id', 'from_station_name', 'from_latitude', 'from_longitude']].reset_index(drop=True) stations.drop_duplicates(inplace=True) cols = stations.columns.tolist() stations.columns = [x.split('_',1)[1] for x in cols]
def main( dataset_id, table_name, database, socrata_username, socrata_password, where_clause, existing_table_rows="drop", ): """ Read in dataset from Socrata and write output to Platform Parameters -------- dataset_id: str Socrata dataset identifier table_name: str, optional destination table in Platform (schema.table) database: str, optional destination database in Platform socrata_username: str, optional username for socrata account, required for private data sets socrata_password: str, optional password for socrata account, required for private data sets where_clause: str, optional SoQL for filtering dataset existing_table_rows: str, optional options to pass to dataframe_to_civis command Outputs ------ Adds data as file output and, if table_name and database are specified, writes data to Platform """ socrata_client = Socrata( "data.lacity.org", None, username=socrata_username, password=socrata_password ) socrata_client.timeout = 50 raw_metadata = socrata_client.get_metadata(dataset_id) dataset = _read_paginated(socrata_client, dataset_id, where=where_clause) civis_client = civis.APIClient() if dataset.empty: msg = f"No rows returned for dataset {dataset_id}." LOG.warning(msg) write_and_attach_jsonvalue(json_value=msg, name="Error", client=civis_client) else: data_file_name = ( f"{dataset_id}_extract_{datetime.now().strftime('%Y-%m-%d')}.csv" ) file_id = _store_and_attach_dataset( client=civis_client, df=dataset, filename=data_file_name ) LOG.info(f"add the {file_id}") if table_name: # Optionally start table upload LOG.info(f"Storing data in table {table_name} on database {database}") print("writing table") run_id = os.environ["CIVIS_RUN_ID"] job_id = os.environ["CIVIS_JOB_ID"] dataset["civis_job_id"] = job_id dataset["civis_run_id"] = run_id table_upload = civis.io.dataframe_to_civis( dataset, database=database, table=table_name, existing_table_rows=existing_table_rows, ).result() LOG.info(f"using {table_upload}") # Parse raw_metadata to extract useful fields and attach both raw and # cleaned metadata as script outputs metadata_file_name = ( f"{dataset_id}_metadata_{datetime.now().strftime('%Y-%m-%d')}.json" ) metadata_paths = { "Proposed access level": "metadata.custom_fields.Proposed Access Level.Proposed Access Level", # noqa: E501 "Description": "description", "Data updated at": "rowsUpdatedAt", "Data provided by": "tableAuthor.screenName", } _, clean_metadata = _store_and_attach_metadata( client=civis_client, metadata=raw_metadata, metadata_paths=metadata_paths, filename=metadata_file_name, ) if table_name: sql = f'COMMENT ON TABLE {table_name} IS \'{clean_metadata["Description"]}\'' civis.io.query_civis( sql, database=database, polling_interval=2, client=civis_client ).result()
def main( socrata_client_url: str, dataset_id: str, civis_table_name: str, civis_database: str, database_type: str, socrata_username: str, socrata_password: str, grant_group: str, varchar_len: str = None, action_existing_table_rows: str = "drop", ): """ Read in dataset from Socrata and write output to Platform Parameters -------- socrata_client_url: str url of socrata portal being referenced dataset_id: str Socrata dataset identifier civis_table_name: str destination table in Platform (schema.table) civis_database: str destination database in Platform database_type: str type of destination database socrata_username: str, optional username for socrata account, required for private data sets socrata_password: str, optional password for socrata account, required for private data sets grant_group: str string of group(s) that are passed to civis API to be granted select table access varchar_len: str sets the varchar length when datatypes are passed to civis API, 256 is defualt action_existing_table_rows: str, optional options to pass to dataframe_to_civis command Outputs ------ Adds data as file output and, if table_name and database are specified, writes data to Platform """ socrata_client = Socrata(socrata_client_url, None, username=socrata_username, password=socrata_password) # define socrata cleint civis_client = civis.APIClient() # define civis cleint socrata_client.timeout = 50 sample_data = socrata_client.get(dataset_id, limit=5, content_type="csv", exclude_system_fields=False, offset=0) # collects sample data from dataset sample_data_df = results_to_df(sample_data) # writes sample data to dataframe if sample_data_df.empty: msg = f"No rows returned for dataset {dataset_id}." LOG.warning(msg) write_and_attach_jsonvalue(json_value=msg, name="Error", client=civis_client) os._exit(1) # provides exit if no rows avalible in dataset raw_metadata = socrata_client.get_metadata(dataset_id) # calls for raw metadata sql_type = select_sql_map(database_type, varchar_len) # defines apropriate sql types for datatype mapping depending on # specifications ( civis_table_columns, point_columns, pandas_column_order, extra_columns, ) = create_col_type_dict(raw_metadata, sample_data_df, sql_type) # creates civis specific array of dicts that maps column name to # datatype using socrata metadata as guidence. Also, provides point # columns that are used to clean point column formatting during import. # And, provides array of columns that corresponds to order of the mapping # dict (civis_file_to_table is sensitive to order. print("Columns present in Metadata but not in data:", extra_columns) consolidated_csv_path = _read_paginated( client=socrata_client, dataset_id=dataset_id, point_columns=point_columns, column_order=pandas_column_order, ) # reads in socrata data in chunks (using offset and page_limit), and # appenda all to one csv and outputs path here data_file_name = f"{dataset_id}_extract_{datetime.now().strftime('%Y-%m-%d')}.csv" uploaded_file_id = _store_and_attach_dataset_csv( client=civis_client, csv_path=consolidated_csv_path, filename=data_file_name) print("file_id:", uploaded_file_id) LOG.info(f"add the {uploaded_file_id}") LOG.info( f"Storing data in table {civis_table_name} on database {civis_database}" ) table_upload = civis.io.civis_file_to_table( file_id=uploaded_file_id, database=civis_database, table=civis_table_name, table_columns=civis_table_columns, existing_table_rows=action_existing_table_rows, headers=True, ).result() LOG.info(f"using {table_upload}") # takes in file id and writes to table metadata_file_name = ( f"{dataset_id}_metadata_{datetime.now().strftime('%Y-%m-%d')}.json") # parse raw_metadata to extract useful fields and attach both raw and # cleaned metadata as script outputs upload_metadata_paths = { "Description": "description", "Data updated at": "rowsUpdatedAt", "Data provided by": "tableAuthor.screenName", } _, clean_metadata = _store_and_attach_metadata( client=civis_client, metadata=raw_metadata, metadata_paths=upload_metadata_paths, filename=metadata_file_name, ) if civis_table_name: sql = f""" COMMENT ON TABLE {civis_table_name} IS \'{clean_metadata["Description"]}\' """ civis.io.query_civis(sql, database=civis_database, polling_interval=2, client=civis_client).result() if grant_group: sql = f"GRANT ALL ON {civis_table_name} TO GROUP {grant_group}" civis.io.query_civis(sql, database=civis_database, polling_interval=2, client=civis_client).result()
def controllerCenter(allIDS): flagCounter = 0 limit = 10 token = 'sC4N6wXghMXaL2C3uUxVMphf0' client = Socrata('www.datos.gov.co', token, username="******", password="******") client.timeout = 180 print("Conjuntos de datos a evaluar", len(allIDS)) for i in allIDS: resultado = "" flagCounter += 1 try: try: # Generando los insumos del proceso. datasetURL = "https://www.datos.gov.co/resource/{}.json".format(i) print(flagCounter) print(datasetURL) # Valida URL y si es accesible statusDataset = requests.get(datasetURL, timeout=60) # Obtiene información del Dataset datosDataSet = client.get(i, limit=limit) # Obteniendo Metadatos del Dataset metaDataset = client.get_metadata(i) # Reviando que el Dataset no esté vacio frametoValidate = pd.DataFrame.from_records(datosDataSet) # Validando DataSet metaData = metaDataset['metadata'] except KeyError as error: print(error) logging.error(str(error)) indexCompletitud = 0 indexCredibilidad = 0 indexActualidad = 0 indexTrazabilidad = 0 indexDisponibildiad = 0 indexConformidad = 0 indexComprensibilidad = 0 indexPortabilidad = 0 indexConsistencia = 0 indexExactitud = 0 except TimeoutError as error: logging.error(str(error)) except requests.exceptions.ConnectionError as error: logging.error(str(error)) else: # Cálculo de indicadores para cada conjunto de datos evaluado if statusDataset.status_code == 200 and frametoValidate.empty == False: # Se genera primer indicador de disponibilidad resultado = str(i) # Se genera primer indicador de disponibilidad resultado = resultado + ';' + str(10) # Creación de la Instancia evaluation = Evaluation() # Indicador Completitud indexCompletitud = evaluation.indicadorCompletitud(frametoValidate) resultado = resultado + ',' + str(indexCompletitud) # Indicador Actualidad indexActualidad = evaluation.indicadorActualidad(metaDataset, metaData) resultado = resultado + ',' + str(indexActualidad) # Indicador Credibilidad indexCredibilidad = evaluation.indicadorCredibilidad(metaDataset) resultado = resultado + ',' + str(indexCredibilidad) # Indicador Trazabilidad indexTrazabilidad = evaluation.indicadorTrazabilidad(metaDataset) resultado = resultado + ',' + str(indexTrazabilidad) # Indicador Conformidad indexConformidad = evaluation.indicadorConformidad(metaDataset) resultado = resultado + ',' + str(indexConformidad) # Indicador Comprensibilidad indexComprensibilidad = evaluation.indicadorComprensibilidad(metaDataset, frametoValidate) resultado = resultado + ',' + str(indexComprensibilidad) # Indicador Portabilidad indexPortabilidad = evaluation.indicadorPortabilidad(datosDataSet) resultado = resultado + ',' + str(indexPortabilidad) # Indicador Consistencia indexConsistencia = evaluation.indicadorConsisetencia(frametoValidate) resultado = resultado + ',' + str(indexConsistencia) # INdicador Exactitud indexExactitud = evaluation.indicadorExactitud(metaDataset, frametoValidate) resultado = resultado + ',' + str(indexExactitud) else: print("error") finally: with open("Quality_Indicators.csv", 'a', encoding='UTF-8') as qIndicators: qIndicators.write(str(resultado)) qIndicators.write('\n') except BrokenPipeError as errorBroken: logging.error(str(errorBroken)) resultado = str(i) + "0,0,0,0,0,0,0,0,0,0" with open("Quality_Indicators.csv", 'a', encoding='UTF-8') as qIndicators: qIndicators.write(str(resultado)) qIndicators.write('\n')
args = parser.parse_args() start_date = args.StartDate end_date = args.EndDate if args.SaveDir is None: save_dir = "/home/jtl/Dropbox (MIT)/DOE_TSMS/00_Data/raw_data/mobility/" else: save_dir = args.SaveDir apitoken = cityofchicago["apitoken"] username = cityofchicago["username"] pwd = cityofchicago["pwd"] client = Socrata("data.cityofchicago.org", apitoken, username, pwd) # automatic timeout is 10s, increase to 2 hours client.timeout = 7200 for y, m in [(i.year, i.month) for i in pd.period_range(start_date, end_date, freq="M")]: print("Downloading %d-%d" % (y, m)) time = datetime.datetime.now() # slow # results = client.get("m6dm-c72p", where="date_extract_y(trip_start_timestamp) = " + str(y) +" AND date_extract_m(trip_start_timestamp)= " + str(m), content_type="csv", limit=2000) if m < 10: pad = '0' else: pad = '' (y1, m1) = calendar.nextmonth(year=y, month=m) if m1 < 10: pad1 = '0'
from sodapy import Socrata import requests from requests import HTTPError import json from datetime import datetime from elasticsearch import Elasticsearch from time import sleep DATA_URL = "data.cityofnewyork.us" DATA_ID = 'nc67-uf89' app_token = os.environ.get("APP_TOKEN") client = Socrata(DATA_URL, app_token) client.timeout = 60 def create_and_update_index(index_name): es = Elasticsearch() try: es.indices.create(index=index_name) except Exception: pass return es def data_formatting(datastring): for key, value in datastring.items(): if 'amount' in key: datastring[key] = float(value)