Esempio n. 1
0
def test_create():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN,
                     APPTOKEN,
                     username=USERNAME,
                     password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None)

    columns = [{
        "fieldName": "foo",
        "name": "Foo",
        "dataTypeName": "text"
    }, {
        "fieldName": "bar",
        "name": "Bar",
        "dataTypeName": "number"
    }]
    tags = ["foo", "bar"]
    response = client.create("Foo Bar",
                             description="test dataset",
                             columns=columns,
                             tags=tags,
                             row_identifier="bar")

    request = adapter.request_history[0]
    request_payload = json.loads(
        request.text)  # can't figure out how to use .json

    # Test request payload
    for dataset_key in ["name", "description", "columns", "tags"]:
        assert dataset_key in request_payload

    for column_key in ["fieldName", "name", "dataTypeName"]:
        assert column_key in request_payload["columns"][0]

    # Test response
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Esempio n. 2
0
def load_data(api_endpoint, limit=10000):
    '''
    Load data from Chicago Open Data portal using Socrata API and the api_endpoint. If
    limit is specified, load no more than limit number of observations.
    Input:
        api_endpoint: str
        limit: int
    Output:
        Pandas Data Frame
    '''

    client = Socrata(CHICAGO_OPEN_DATA, None)
    data_dict = client.get(api_endpoint, limit=limit)

    data_df = pd.DataFrame.from_dict(data_dict)
    if 'the_geom' in data_df.columns:
        data_df.rename(columns={'the_geom': 'location'}, inplace=True)
    return data_df
Esempio n. 3
0
def download_community_areas():
    '''
    Imports names, numbers, and shapes of Chicago community areas from the
    Chicago Open Data Portal using the SODA API.

    Returns: geopandas geodataframe where each row is a community area
    '''
    client = Socrata('data.cityofchicago.org', APP_TOKEN)
    max_size = 100
    results = client.get('igwz-8jzy', limit=max_size)
    results_df = pd.DataFrame.from_records(results)
    results_df.rename({'area_numbe': 'area_number'}, axis=1, inplace=True)
    results_df['the_geom'] = results_df.the_geom\
                                       .apply(shapely.geometry.shape)
    results_df = geopd.GeoDataFrame(results_df, geometry='the_geom')
    results_df.crs = {'init': 'epsg:4326'}

    return results_df
Esempio n. 4
0
def fetch_meteorite_landings_full_dataset():
    # Unauthenticated client only works with public data sets. Note 'None'
    # in place of application token, and no username or password:
    client = Socrata("data.nasa.gov", None)

    # Example authenticated client (needed for non-public datasets):
    # client = Socrata(data.nasa.gov,
    #                  MyAppToken,
    #                  userame="*****@*****.**",
    #                  password="******")

    # First 2000 results, returned as JSON from API / converted to Python list of
    # dictionaries by sodapy.
    results = client.get("gh4g-9sfh", limit=50000)

    # Convert to pandas DataFrame
    meteorite_landings_data = pd.DataFrame.from_records(results)
    return meteorite_landings_data
Esempio n. 5
0
def pull_agg_time_to_closure_statistics_created_since_closed_only(
        since, client=None, timeout=120, group_key=['agency']):
    if (client == None):
        client = Socrata(settings.APP_NYC_API_DOMAIN,
                         settings.APP_TOKEN_311,
                         timeout=timeout)
    group_key_str = ','.join(group_key)

    data = client.get(settings.APP_NYC_DATASET,query = "select "+ group_key_str+","  \
                          "avg(((date_extract_woy(closed_date)*7) -(7 - case(date_extract_dow(closed_date)=0,7,true,date_extract_dow(closed_date)))) " \
                          " - ((date_extract_woy(created_date)*7) - (7 - case(date_extract_dow(created_date)=0,7,true,date_extract_dow(created_date)))) "\
                          " + ((date_extract_y(closed_date)  - date_extract_y(created_date))* 365) "\
                          ") as days_to_closure "\
                          "where created_date >= '" + str(since) + "' and closed_date IS NOT NULL and status = 'Closed' " \
                          "group by "+group_key_str )
    dataFrame = pd.DataFrame.from_dict(data)
    dataFrame['days_to_closure'] = dataFrame['days_to_closure'].astype('float')
    return dataFrame
def link_block(acs_df):
    '''
    Get blocks using API from chicago data portal

    Return:
        geo dataframe of chicago
    '''
    client = Socrata('data.cityofchicago.org',
                     app_token,
                     username=,
                     password=)
    res = client.get("74p9-q2aq")
    df = pd.DataFrame.from_records(res)
    df.rename(index = str, 
              columns = {"geoid10": "geoid"}, 
              inplace = True)
    client.close()
    return pd.merge(df, acs_df, on='geoid')
def fireworks_data_loader():
    '''
    Loads in fireworks data from NYC 311.
    https://data.cityofnewyork.us/Social-Services/311-Fireworks-Complaints/g4u2-tvag
    '''

    client = Socrata('data.cityofnewyork.us', None)
    results = client.get_all('g4u2-tvag')

    df = pd.DataFrame.from_records(results)

    df['created_date'] = pd.to_datetime(df['created_date'], errors='coerce')
    df['fireworks'] = [
        1 if complaint == 'Illegal Fireworks' else 0
        for complaint in df['complaint_type']
    ]

    return df
Esempio n. 8
0
def test_publish():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN,
                     APPTOKEN,
                     username=USERNAME,
                     password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_publish_mock(adapter, "POST", response_data, 200)

    response = client.publish(DATASET_IDENTIFIER)
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
def get_business(start_date, end_date):
    '''
    Get 2013 to 2018 business data
    start_date: "'2019-12-18T20:00:05'"
    end_date: "'2019-12-18T20:00:05'"
    '''
    DATA_ID = "xqx5-8hwx"
    client = Socrata('data.cityofchicago.org',
                     'E0eO5nY1aKuEY1pVrunfqFhDz',
                     username='******',
                     password='******')

    conds = '''date_issued between "{}" and "{}"'''\
            .format(start_date, end_date)
    res = client.get(DATA_ID, where=conds, limit=1000000)
    client.close()
    df = pd.DataFrame.from_records(res)
    return df
Esempio n. 10
0
def test_upsert_exception():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "403_response_json.txt"
    setup_mock(adapter, "POST", response_data, 403, reason="Forbidden")

    data = [{"theme": "Surfing", "artist": "Wavves",
             "title": "King of the Beach", "year": "2010"}]
    try:
        client.upsert(DATASET_IDENTIFIER, data)
    except Exception as e:
        assert isinstance(e, requests.exceptions.HTTPError)
    else:
        raise AssertionError("No exception raised for bad request.")
Esempio n. 11
0
def get_food_trucks(page_number):
    """
    Gets list of food trucks in array representation from Socrata client

    :param page_number: int
    :return: array representation of food trucks
    """
    weekday_index = get_current_day_index()
    current_time_string = get_current_time_string()
    client = Socrata(SOCRATA_DOMAIN, SOCRATA_TOKEN)
    food_trucks = client.get(FOOD_TRUCKS_RESOURCE_IDENTIFIER,
                             select="applicant, location",
                             where=get_query_string(weekday_index,
                                                    current_time_string),
                             limit=LIMIT,
                             offset=page_number * LIMIT,
                             order="applicant ASC")
    return food_trucks
Esempio n. 12
0
def get_community():
    '''
    Get 77 community nums's community name from chicago data portal

    Return:
        dataframe with community num and community name
    '''
    client = Socrata('data.cityofchicago.org',
                     app_token,
                     username=,
                     password=)
    res = client.get("igwz-8jzy", select = 'area_numbe, community')
    df = pd.DataFrame.from_records(res)
    df.rename(index = str, 
              columns = {"area_numbe": "community_area"}, 
              inplace = True)
    client.close()
    return df
Esempio n. 13
0
 def __init__(self, provider_name, mds_config, mds_gql):
     """
     Constructor for the init class.
     :param str provider_name: The name of the provider
     :param MDSConfig mds_config: The configuration class where we can gather our endpoint
     :param MDSGraphQLRequest mds_gql: The http graphql class we need to make requests
     :return:
     """
     self.provider_name = provider_name
     self.mds_config = mds_config
     self.mds_http_graphql = mds_gql
     self.mds_socrata_dataset = self.mds_config.get_setting(
         "SOCRATA_DATASET", None)
     self.client = Socrata(
         self.mds_config.get_setting("SOCRATA_DATA_ENDPOINT", None),
         self.mds_config.get_setting("SOCRATA_APP_TOKEN", None),
         username=self.mds_config.get_setting("SOCRATA_KEY_ID", None),
         password=self.mds_config.get_setting("SOCRATA_KEY_SECRET", None),
         timeout=20,
     )
     self.query = Template("""
     query getTrips {
       api_trips(
             where: {
             provider: { provider_name: { _eq: "$provider_name" }}
             end_time: { _gte: "$time_min" },
             _and: { end_time: { _lt: "$time_max" }}
           }
       ) {
         trip_id: id
         device_id: device { id }
         vehicle_type
         trip_duration
         trip_distance
         start_time
         end_time
         modified_date
         council_district_start
         council_district_end
         census_geoid_start
         census_geoid_end
       }
     }
     """)
def TelefonoToCorreoDireccionPerson_mk5f_bdwx(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #telefono=sys.argv[1]
    telefono=m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("u5mc-hpr6", limit=2000)

        #for key, value in data.items():
            #print key, value
        for i in range(len(r)):
            if ( r[i]['celular'] == telefono or  r[i]['telefonos'] == telefono) :
                nombre=r[i]['nombre']
                correo_electronico= r[i]['correo_electronico']
                direccion=r[i]['direccion']
                barrio=r[i]['municipio']
                break

        nombre = nombre.split(" ")
        if (len(nombre) == 4):
            firts = nombre[0] + " " + nombre[1]
            last = nombre[2] + " " + nombre[3]
            full = nombre[0] + " " + nombre[1] + " " + nombre[2] + " " + nombre[3]
        else:
            firts = nombre[0]
            last = nombre[1] + " " + nombre[2]
            full = nombre[0] + " " + nombre[1] + " " + nombre[2]

        ent = TRX.addEntity('maltego.Person', full)
        ent.addAdditionalFields("person.firtsnames", "Firts Names", True, firts)
        ent.addAdditionalFields("person.lastname", "Surname", True, last)
        ent1 = TRX.addEntity('maltego.EmailAddress', correo_electronico)
        ent4 = m.addEntity('maltego.Location', direccion)
        ent4.addAdditionalFields("country", "Country", True, "Colombia")
        ent4.addAdditionalFields("location.area", "Area", True, barrio)
        ent4.addAdditionalFields("streetaddress", "Street Address", True, direccion)



    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Esempio n. 15
0
def get_parking_tickets(page_size, num_pages=None, output=None):
    """Request parking ticket data and save in output_folder."""

    # Start connection to data API
    client = Socrata(
    API_BASE,
    APP_KEY)
    
    # get the maximum number of rows in the data set
    max_rows = client.get(dataSetID, select = 'COUNT(*)')
    max_rows = int(max_rows[0]['COUNT'])
    
    # find the maximum number of pages required to read all of the data
    max_num_pages = max_rows / page_size
    if max_rows % page_size > 0:
        max_num_pages += 1
    
    # If num_pages is blank, read max_num_pages
    if num_pages == None:
        num_pages = max_num_pages
    
    data = []
    # for each page
    for i in range(num_pages):
        try:
            # we use a try except because of time out errors when the offset is very large
            data += client.get(dataSetID, limit=page_size, offset=i*page_size)
        except:
            break
    
    # Output the data
    if output == None:
        # print to stdout
        print(data)
    else:
        with open(output,'w') as outputFile:
            outputFile.write('[')
            for line in data[:-1]:
                outputFile.write(str(line)+',\n')
                
            # write the last line without a comma
            outputFile.write(str(data[-1])+']')
    
    return data
def TelefonoToCorreoDireccionPerson_6kcx_kbuk(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #telefono=sys.argv[1]
    telefono=m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("6kcx-kbuk", limit=2000)

        #for key, value in data.items():
            #print key, value
        for i in range(len(r)):
            if ( r[i]['celular'] == telefono) :
                cc=r[i]['doc_identidad']
                nombre=r[i]['nombre_concejal']
                partido=r[i]['partido_politico']
                correo_electronico= r[i]['correo_electronico']
                break

        nombre = nombre.split(" ")
        if (len(nombre) == 4):
            firts = nombre[0] + " " + nombre[1]
            last = nombre[2] + " " + nombre[3]
            full = nombre[0] + " " + nombre[1] + " " + nombre[2] + " " + nombre[3]
        else:
            firts = nombre[0]
            last = nombre[1] + " " + nombre[2]
            full = nombre[0] + " " + nombre[1] + " " + nombre[2]

        ent = TRX.addEntity('maltego.Person', full)
        ent.addAdditionalFields("person.firtsnames", "Firts Names", True, firts)
        ent.addAdditionalFields("person.lastname", "Surname", True, last)
        ent1 = TRX.addEntity('maltego.EmailAddress', correo_electronico)
        ent2 = TRX.addEntity('eciescuelaing.PartidoPolitico', partido)
        ent3 = TRX.addEntity('eci.Cedula', cc)





    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Esempio n. 17
0
def lambda_handler(event, context):
    '''
    Method called by Amazon Web Services when the lambda trigger fires. This lambda
    is configured to be triggered by file creation in the ITS DataHub
    Sandbox s3 bucket ("usdot-its-cvpilot-public-data" or "test-usdot-its-cvpilot-public-data").
    When a new file is added to the Sandbox s3 bucket, this lambda function will read
    the new JSON newline file, perform data transformation, upsert the new data
    records to the corresponding Socrata data set on data.transportation.gov, and remove the
    oldest records from the Socrata data set to keep the data set at a manageable size.

    Parameters:
    	event, context: Amazon Web Services required parameters. Describes triggering event.
    '''
    # Read data from the newly deposited file and
    # perform data transformation on the records
    out_recs = []
    for bucket, key in lambda_to_socrata_util.get_fps_from_event(event):
        raw_recs = lambda_to_socrata_util.process_s3_file(bucket, key)
        out_recs += [process_bsm(i) for i in raw_recs]

    if len(out_recs) == 0:
        logger.info("No new data found. Exit script")
        return

    # Upsert the new records to the corresponding Socrata data set
    logger.info("Connecting to Socrata")
    client = Socrata("data.transportation.gov",
                     SOCRATA_API_KEY,
                     SOCRATA_USERNAME,
                     SOCRATA_PASSWORD,
                     timeout=400)

    logger.info("Transform record dtypes according to Socrata data set")
    col_dtype_dict = lambda_to_socrata_util.get_col_dtype_dict(
        client, SOCRATA_DATASET_ID)
    float_fields = ['randomNum', 'metadata_generatedAt_timeOfDay']
    out_recs = [
        lambda_to_socrata_util.mod_dtype(r, col_dtype_dict, float_fields)
        for r in out_recs
    ]

    logger.info("Uploading {} new records".format(len(out_recs)))
    uploadResponse = client.upsert(SOCRATA_DATASET_ID, out_recs)
    logger.info(uploadResponse)
Esempio n. 18
0
def trafficDataIngestion(datalimit, start_datetime, end_datetime):

    # Unauthenticated client only works with public data sets. Note 'None'
    # in place of application token, and no username or password:
    client = Socrata("data.cityofnewyork.us", None)

    # First dataLimit results, returned as JSON from API / converted to Python list of
    # dictionaries by sodapy.
    #date = "data_as_of >" + "'" + date + "'"  #para a partir de una fecha
    date = f"data_as_of between '{start_datetime}' and '{end_datetime}'"
    print(date)

    columns = "data_as_of, id, speed, travel_time, link_name"

    #results = client.get("i4gi-tjb9", limit=dataLimit, borough = "Manhattan", where = date, select = columns) #para a partir de una fecha
    results = client.get("i4gi-tjb9",
                         limit=datalimit,
                         borough="Manhattan",
                         where=date,
                         select=columns)

    # Convert to pandas DataFrame
    results_df = pd.DataFrame.from_records(results)

    #-----------------------------------------datetime - time_hour --------------------------------------#
    results_df["datetime"] = results_df["data_as_of"].str[:-9] + "00:00"
    results_df["datetime_traffic"] = results_df["data_as_of"].str[:-4]

    results_df["datetime"] = pd.to_datetime(results_df["datetime"])
    results_df["datetime_traffic"] = pd.to_datetime(
        results_df["datetime_traffic"])
    results_df["weekday"] = results_df['datetime'].dt.day_name()

    results_df = results_df[[
        "datetime", "datetime_traffic", "weekday", "id", "speed",
        "travel_time", "link_name"
    ]]

    #guardando -----------------------------------------------------------------------------------------
    current_dir = os.getcwd().split("\TFG")[0]
    file_name = current_dir + f"/TFG/apis_data/traffic_historical/traffic_dataIngestion_{start_datetime[0:13]}_to_{end_datetime[0:13]}.csv"

    results_df.to_csv(file_name, index=False)
    print(f"TrafficApi: {file_name}")
Esempio n. 19
0
def choose_dataset(x, limit):
    """
    Create a function to take
    in an api endpoint and 
    output the results
    """

    #setup a basic client
    client = Socrata("opendata.mass-cannabis-control.com", None)

    # get columns
    cols = x.columns

    # store api keys in a list
    list_of_endpts = x[cols[0]].to_list()

    # store user input
    user_input = input(
        "Which dataset are you interested in viewing?\nPlease choose an index (i.e. row number) from the table above: "
    )

    time.sleep(2)

    limit = 2000

    #transform string
    user_input = int(user_input)

    # endpoint selection
    try:
        submit = list_of_endpts[user_input]
    except (KeyError, IndexError):
        print(
            '\n\nYou did not choose a number listed in the table above; Try again...\n\n'
        )

    # Pull data via api enpoint
    results = client.get(f"{submit}", limit=limit)

    # Convert to pandas DataFrame
    results_df = pd.DataFrame.from_records(results)

    # return final output
    return results_df
def update_db(dbname='bites', collection_name='permit'):
    """
    This function pulls food truck permit data from SFGOV API
    and save it into mongoDB. This function should be ran daily
    and previous day's permit records will be deleted.
    """
    try:
        client = Socrata("data.sfgov.org", "oBNrfX91YumclMO5wQlQKv0f0")
        # dictionaries by sodapy.
        results = client.get("rqzj-sfat", limit=5000)
    except Exception:
        print("Error: Could not connect to sfgov API")
        return

    try:
        mc = MongoClient("mongodb://*****:*****@34.212.27.178/" +
                         dbname)
    except Exception:
        print("Error: Could not connect to MongoDB")
        return

    # Connect to database
    db = mc[dbname]
    # Drop table
    try:
        db[collection_name].drop()
    except errors.ServerSelectionTimeoutError:
        print("Error: MongoDB connection time out")
        return
    except errors.OperationFailure:
        print("Error: Not authorized to access the database")
        return

    approved_json = [
        x for x in results
        if x['status'] == 'APPROVED' and (x['longitude'] != '0')
    ]

    for record in approved_json:
        db[collection_name].insert_one(record)

    mc.close()
    client.close()
    print("Database updated (%i records)" % len(approved_json))
Esempio n. 21
0
def get_pm25_data(yr, st):
    """
    Get pm2.5 data from the CDC API
  
    yr - year of interest - string
    st - fips code for the state of interest - string
    """
    #establish connection to the CDC's data via Socrata
    client = Socrata("data.cdc.gov", parsed_yaml['cdc_key'],
                     parsed_yaml['cdc_username'], parsed_yaml['cdc_password'])

    #set timeout to 200 seconds
    client.timeout = 300

    #get number of records in the dataset
    record_count = client.get("qjju-smys",
                              where=f"year = '{yr}' AND statefips = '{st}'",
                              select="COUNT(*)")

    print("The record count is", record_count)
    print(f"Getting {yr} data from the Socrata API...")

    #get data from dataset
    start = 0  #starting at page 0
    chunk_size = 50000  #fetching 50,000 rows at a time
    results = []  #empty list to store data
    while True:
        #add data to the list
        results.extend(
            client.get("qjju-smys",
                       where=f"year = '{yr}' AND statefips = '{st}'",
                       select="year, date, countyfips, pm_mean_pred",
                       offset=start,
                       limit=chunk_size))
        #pagination
        start = start + chunk_size
        print("At record number", start)
        #stop adding to the list once all the data is fetched
        if (start > int(record_count[0]['COUNT'])):
            break

    #return list so that it can be stored in a dataframe
    return results
Esempio n. 22
0
 def _onchange_driver_license(self):
     client = Socrata("www.datossct.gob.mx", None)
     try:
         driver_license = client.get('3qhi-59v6',
                                     licencia=self.driver_license)
         license_valid_from = datetime.strptime(
             driver_license[0]['fecha_inicio_vigencia'],
             '%Y-%m-%dT%H:%M:%S.%f')
         license_expiration = datetime.strptime(
             driver_license[0]['fecha_fin_vigencia'],
             '%Y-%m-%dT%H:%M:%S.%f')
         self.license_type = driver_license[0]['categoria_de_la_licencia']
         self.license_valid_from = license_valid_from
         self.license_expiration = license_expiration
         client.close()
     except:
         client.close()
         raise ValidationError(
             _('The driver license is not in SCT database'))
Esempio n. 23
0
def test_delete():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    uri = "{0}{1}/api/views/{2}.json".format(PREFIX, DOMAIN, DATASET_IDENTIFIER)
    adapter.register_uri("DELETE", uri, status_code=200)
    response = client.delete(DATASET_IDENTIFIER)
    assert response.status_code == 200

    try:
        client.delete("foobar")
    except Exception as e:
        assert isinstance(e, requests_mock.exceptions.NoMockAddress)
    finally:
        client.close()
Esempio n. 24
0
def get_data(app_key: str, page_size: int, page=DEFAULT_PAGE) -> list:
    if type(page_size) is not int:
        page_size = int(page_size)
    if type(page) is not int:
        page = int(page)

    client = Socrata('data.cityofnewyork.us', app_key)
    result = []
    for i in range(0, page):
        try:
            r = client.get(dataset_identifier=DATA_SET,
                           limit=page_size,
                           offset=i * page_size)
            result += r
        except requests.exceptions.ConnectionError as err:
            raise err
        except requests.exceptions.HTTPError:
            raise Exception('Invalid app_token specified')
    return result
Esempio n. 25
0
def get_data(database_id, location_file, use_cache):
    client = Socrata("www.datos.gov.co", api_key)
    results = client.get(database_id, limit=100000)

    df = pd.DataFrame.from_records(results)
    df["location"] = df.apply(
        lambda row: f"{row.ciudad_de_ubicaci_n} {row.departamento}", axis=1
    )
    # remove accents
    df["location"] = df.location.apply(lambda x: str(unidecode.unidecode(x)).upper())
    cities = pd.unique(df.location)

    print("Checking location file...")
    if use_cache and file_exists(location_file):
        print("File exists now reading")
        location_df = pd.read_csv(location_file)
        print("Done")

    else:
        print("Could not locate file, calculating from scratch")
        maybe_mkdirs(location_file)

        print("Calculating...")
        location_df = get_location_df(cities)
        print("Ready! Writing to remote...")
        location_df.to_csv(location_file, index=False)
        print("Done")

    print("Check if cities matches")
    if len(location_df.location.values) != len(cities):
        new_cities = set(cities) - set(location_df.location.values)
        print("Difference", new_cities)
        print("Calculating new cities")
        _location_df = get_location_df(list(new_cities))
        location_df = location_df.append(_location_df)
        print("Done!")

        # save updated version
        print("Ready! Writing to remote...")
        location_df.to_csv(location_file, index=False)
        print("Done")

    return df, location_df
Esempio n. 26
0
def upload_open_data_to_Elasticsearch(url,
                                      endpoint,
                                      api_key,
                                      query=None,
                                      kwargs={}):
    #input: Socrata url, endpoint, API key, OPTIONAL query, and ES bulk upload kwargs
    #output: uploads data to ES index
    client = Socrata(url, api_key)
    idx = 0
    time.sleep(5)  #sleep 20 seconds, to allow time to connect
    docs = client.get(endpoint, limit=10000, offset=0, where=query)
    upload_to_Elasticsearch.bulk_upload_docs_to_ES_cURL(docs, **kwargs)
    #time.sleep(20)#sleep 20 seconds, to allow time to connect
    while len(docs) > 0:
        #page through the results, appending to the out list
        idx += 10000
        docs = client.get(endpoint, limit=10000, offset=idx, where=query)
        upload_to_Elasticsearch.update_ES_records_curl(docs, **kwargs)
    client.close()
def pull_socrata():
    """
    Checks the connection to the API.
    If connection status code is 200: Pulls the data from the specified url and inserts each record into an array
    If connection status code is not 200: Prints out status code
    :return: An array of dictionaries
    """
    if check_connection() == "Successful Connection":
        with Socrata("data.sfgov.org", API_Token) as c:
            data = c.get_all("g8m3-pdis")
            try:
                col.insert_many(data, ordered=False)
            except pymongo.errors.BulkWriteError as e:
                panic = (lambda x: x["code"] != 11000,
                         e.details["writeErrors"])
                if len(panic) > 0:
                    print("really panic")
    else:
        print(check_connection())
def download_chiopdat_data(api_endpoint,
                           year_from=None,
                           year_to=None,
                           date_column='year',
                           timestamp=False,
                           limit=10000):
    '''
    Load data from Chicago Open Data portal using Socrata API and the api_endpoint. If
    limit is specified, load no more than limit number of observations. To limit the 
    dates, it needs the date_column and whether it is a timestamp column or an integer.
    Default is integer.
    Input:
        api_endpoint: str
        year_from: int
        year_to: int
        date_column: int
        timestamp: bool
        limit: int
    Output:
        Pandas Data Frame
    '''

    client = Socrata(CHICAGO_OPEN_DATA, None)
    if not year_from:
        data_dict = client.get(api_endpoint, limit=limit)
    else:
        if timestamp:
            data_dict = client.get(
                api_endpoint,
                where=("date_extract_y({}) BETWEEN {} and {}".format(
                    date_column, year_from, year_to)),
                limit=limit)
        else:
            data_dict = client.get(api_endpoint,
                                   where=("{} BETWEEN {} and {}".format(
                                       date_column, year_from, year_to)),
                                   limit=limit)

    data_df = pd.DataFrame.from_dict(data_dict)
    if 'the_geom' in data_df.columns:
        data_df.rename(columns={'the_geom': 'location'}, inplace=True)

    return data_df
Esempio n. 29
0
def DeleteDataset(dataset_id):
    print('El id')
    print(dataset_id)
    try:
        # Creating Socrata Client
        client = Socrata(cfg["web"],
                         cfg["token"],
                         username=cfg["email"],
                         password=cfg["password"])
        client.delete(dataset_id)
        error = 'OK'
        client.close()
    except BaseException as e:
        #if there is an error, reload login with error message
        error = str(e)
        print('Error description:')
        print(error)
        client.close()
    return error
Esempio n. 30
0
def get_trip_records(limit=100000):

    client = Socrata('data.cityofchicago.org',
                     'Tk6RhuGAFvF9P4ehsysybj3IW',
                     username="******",
                     password="******")

    client.timeout = 10000

    results = client.get(
        "m6dm-c72p",
        limit=limit,
        select=
        '''trip_id, trip_start_timestamp, trip_end_timestamp, trip_seconds, 
                                                        trip_miles, pickup_community_area, dropoff_community_area, fare, 
                                                        tip, additional_charges, trip_total'''
    )

    return pd.DataFrame.from_records(results)