def get_311(data_id):
    '''
    Get 2012 and 2017 311 data from chicago data portal
    Input:
        data_id: id of different type of report
    Return:
        pandas dataframe with the columns and dtypes as COL_TYPES
    '''
    '''COL_TYPES = {'sr_type': str, 
                 #'created_date': str,
                 'zip_code': int,
                 'longitude': float,
                 'latitude': float,
                  'legacy_record': bool} '''
    #cols = [item for item in COL_TYPES.keys()]
    client = Socrata('data.cityofchicago.org',
                     'E0eO5nY1aKuEY1pVrunfqFhDz',
                     username='******',
                     password='******')

    conds = '''legacy_sr_number IS NULL'''
    res = client.get(
        data_id,
        #where= conds,
        limit=1000000)
    client.close()
    df = pd.DataFrame.from_records(res)
    #     df['created_date'] = pd.to_datetime(df['created_date'])
    #     df = df[df['created_date']>start_year]
    #     df = df[df['created_date']<end_year]
    return df
 def get_driver_license_info(self):
     client = Socrata("www.datossct.gob.mx", None)
     for rec in self:
         try:
             driver_license = client.get('3qhi-59v6',
                                         licencia=rec.driver_license)
             license_valid_from = datetime.strptime(
                 driver_license[0]['fecha_inicio_vigencia'],
                 '%Y-%m-%dT%H:%M:%S.%f')
             license_expiration = datetime.strptime(
                 driver_license[0]['fecha_fin_vigencia'],
                 '%Y-%m-%dT%H:%M:%S.%f')
             rec.write({
                 'license_type':
                 driver_license[0]['categoria_de_la_licencia'],
                 'license_valid_from':
                 license_valid_from,
                 'license_expiration':
                 license_expiration,
             })
             client.close()
         except Exception:
             client.close()
             raise ValidationError(
                 _('The driver license is not in SCT database'))
Esempio n. 3
0
def get_crime_data(name_of_borough):
    #connect to NYC Crime Database
    client = Socrata("data.cityofnewyork.us", None)
    #get past days
    last_date = (datetime.datetime.now() +
                 datetime.timedelta(days=-250)).isoformat()

    if (name_of_borough == 'staten'):
        upper_borough = 'STATEN ISLAND'
    else:
        upper_borough = name_of_borough.upper()

    #get all felonies without being throttled
    num_crimes = 0
    for i in range(15):
        offset = 1000 * i
        #build query to get crimes that are within x days and happened in name_of_borough
        q = "SELECT * WHERE cmplnt_fr_dt > '%s' and boro_nm = '%s' and law_cat_cd = '%s' OFFSET %d" % (
            last_date, upper_borough, "FELONY", offset)

        #run query
        num_crimes += len(client.get("7x9x-zpz6", query=q))

    print(num_crimes)
    client.close()
    return num_crimes
Esempio n. 4
0
def insert(connection, city):
    client = Socrata(urls[city],
                     "FXdTHdcGpX9fqDNp9wnN9DGTq",
                     username="******",
                     password="******")
    lim = 50000
    inc = 0
    results = ['']
    while len(results) > 0:
        results = client.get(datasets[city],
                             content_type="json",
                             limit=lim,
                             offset=lim * inc)
        rows = []
        for i in xrange(len(results)):
            try:
                rows.append(createRow(results[i], city))
            except:
                pass
        cursor = connection.cursor()
        query = "INSERT INTO %s " % city
        query = query + "(`city_id`,`crime_description`, `crime_code`, `datetime_occured`, `location_description`, \
						  `latitude`, `longitude`) \
							VALUES (%s, %s, %s, %s, %s, %s, %s)"

        cursor.executemany(query, rows)
        connection.commit()
        inc += 1
    client.close()
Esempio n. 5
0
def test_replace():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN,
                     APPTOKEN,
                     username=USERNAME,
                     password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "replace_songs.txt"
    data = [
        {
            "theme": "Surfing",
            "artist": "Wavves",
            "title": "King of the Beach",
            "year": "2010"
        },
        {
            "theme": "History",
            "artist": "Best Friends Forever",
            "title": "Abe Lincoln",
            "year": "2008"
        },
    ]
    setup_mock(adapter, "PUT", response_data, 200)
    response = client.replace(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 2
    client.close()
Esempio n. 6
0
def get_crime_data():
    # call crime data api
    client = Socrata(settings.SOURCE_DOMAIN, settings.SOCRATA_APP_TOKEN)

    limit = 20000
    offset = 0
    tries = 10

    # return the headers
    yield ['cad_cdw_id', 'event_clearance_date', 'event_clearance_group']

    for _ in xrange(tries):
        # sodapy throws an exception if we get a bad response
        # no need to handle it here, we'll catch it on the outside
        response = client.get(
            settings.DATASET_ID,
            content_type="csv",
            select='cad_cdw_id, event_clearance_group, event_clearance_date',
            where='event_clearance_date is not null and \
                    within_circle(incident_location, {0}, {1}, {2})'.format(
                        settings.CLINK_LAT, settings.CLINK_LON, settings.RADIUS),
            order='event_clearance_date DESC',
            limit=limit,
            offset=offset
        )
        offset += len(response) - 1 # don't count the header
        if len(response) == 1:  # if it's just the header
            break   # then there's nothing left to ask for

        # trim off the header row
        # using itertools so we don't make a shallow copy of the giant list
        response_iter = islice(response, 1, None)
        for row in imap(_convert_row, response_iter):
            yield row
    client.close()
Esempio n. 7
0
class SocrataClient:
    def __init__(self):
        self.client = Socrata(conf.DOMAIN, conf.TOKEN, timeout=conf.TIMEOUT)

    def __del__(self):
        self.client.close()

    def dataset_id(self, year):
        return conf.DATASET_IDS[year]

    def get(self, year, **kwargs):
        id = self.dataset_id(year)
        for attempt in range(conf.ATTEMPTS):
            try:
                return self.client.get(id, **kwargs)
            except Exception as e:
                if attempt < conf.ATTEMPTS - 1:
                    continue
                else:
                    raise e

    def get_metadata(self, year):
        id = self.dataset_id(year)
        return self.client.get_metadata(id)

    def get_datasets(self):
        '''
        Search for "MyLA311 Service Request Data" within the response
        to get the dataset ids for each year.
        '''
        return self.client.datasets()
Esempio n. 8
0
def test_get_all():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    setup_mock(adapter,
               "GET",
               "bike_counts_page_1.json",
               200,
               query="$offset=0")
    setup_mock(adapter,
               "GET",
               "bike_counts_page_2.json",
               200,
               query="$offset=1000")
    response = client.get_all(DATASET_IDENTIFIER)

    assert inspect.isgenerator(response)
    data = list(response)
    assert len(data) == 1001
    assert data[0]["date"] == "2016-09-21T15:45:00.000"
    assert data[-1]["date"] == "2016-10-02T01:45:00.000"

    client.close()
Esempio n. 9
0
def test_create():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None)

    columns = [
        {"fieldName": "foo", "name": "Foo", "dataTypeName": "text"},
        {"fieldName": "bar", "name": "Bar", "dataTypeName": "number"}
    ]
    tags = ["foo", "bar"]
    response = client.create("Foo Bar", description="test dataset",
        columns=columns, tags=tags, row_identifier="bar")

    request = adapter.request_history[0]
    request_payload = json.loads(request.text) # can't figure out how to use .json

    # Test request payload
    for dataset_key in ["name", "description", "columns", "tags"]:
        assert dataset_key in request_payload

    for column_key in ["fieldName", "name", "dataTypeName"]:
        assert column_key in request_payload["columns"][0]

    # Test response
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Esempio n. 10
0
def get_comps_sdat(address, miles=1, save_csv=False):
    meta_df = property_metadata(address)
    query = get_query(meta_df, miles)
    client = Socrata(url_md, app_id)
    response = client.get(id_md, query=query)
    df = pd.DataFrame(response)
    num = []
    non_num = []
    for col in df.columns:
        try:
            df[col] = pd.to_numeric(df[col])
            num.append(col)
        except:
            non_num.append(col)

    df['id'] = meta_df.address.iloc[0]
    df.date = pd.to_datetime(df['date'])
    df.date2 = pd.to_datetime(df['date2'])
    df['basement'] = df['style_code'].apply(
        lambda x: 'with basement' in x.lower())
    df['price_change'] = df['price'].astype(float) - df['price2'].astype(float)
    df['date_change'] = (df.date - df.date2).dt.days
    df['norm_price'] = (
        (1 +
         (0.03 / 365))**(pd.datetime.today() - df['date']).dt.days) * df.price
    client.close()
    if save_csv: df.to_csv(meta_df.address.iloc[0] + ".csv", index=False)
    results = (meta_df, df)
    return results  #df
Esempio n. 11
0
 def fillCDC500CitiesData(self):
     client = Socrata(
         "chronicdata.cdc.gov", 'QoQet97KEDYpMW4x4Manaflkp'
     )  #My (John Pham's) access token is QoQet97KEDYpMW4x4Manaflkp
     censusTractRetrievalString = "place_tractid='3651000-" + self.censusTractNum + "'"
     censusTract = client.get("47z2-4wuh",
                              where=censusTractRetrievalString,
                              content_type="json",
                              order="place_tractid ASC",
                              limit=1000)
     client.close()
     self.coreMenCrudePrev = float(
         censusTract[0]["corem_crudeprev"]
     )  #Model-based estimate for crude prevalence of older adult men aged >=65 years who are up to date on a core set of clinical preventive services: Flu shot past year, PPV shot ever, Colorectal cancer screening, 2016
     self.coreWomenCrudePrev = float(
         censusTract[0]["corew_crudeprev"]
     )  #Model-based estimate for crude prevalence of older adult women aged >=65 years who are up to date on a core set of clinical preventive services: Flu shot past year, PPV shot ever, Colorectal cancer screening, and Mammogram past 2 years, 2016
     self.colonScreenCrudePrev = float(
         censusTract[0]["colon_screen_crudeprev"]
     )  #Model-based estimate for crude prevalence of fecal occult blood test, sigmoidoscopy, or colonoscopy among adults aged 50–75 years, 2016
     self.mammoUseCrudePrev = float(
         censusTract[0]["mammouse_crudeprev"]
     )  #Model-based estimate for crude prevalence of mammography use among women aged 50–74 years, 2016
     self.teethLostCrudePrev = float(
         censusTract[0]["teethlost_crudeprev"]
     )  #Model-based estimate for crude prevalence of all teeth lost among adults aged >=65 years, 2016
     self.geolocation = censusTract[0][
         "geolocation"]  #Latitude, longitude of census tract centroid
     self.coreMenPercentage = self.coreMenCrudePrev / self.totalMales65Plus * 100  #save this value as a number in the range of 0-100
     self.coreWomenPercentage = self.coreWomenCrudePrev / self.totalFemales65Plus * 100  #save this value as a number in the range of 0-100
     self.mammoUsePercentage = self.mammoUseCrudePrev / self.totalFemales50To74 * 100  #save this value as a number in the range of 0-100
     self.colonScreenPercentage = self.colonScreenCrudePrev / self.totalOlderAdults50To74 * 100  #save this value as a number in the range of 0-100
     self.teethLostPercentage = self.teethLostCrudePrev / self.totalOlderAdults65Plus * 100  #save this value as a number in the range of 0-100
Esempio n. 12
0
def test_delete():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(
        DOMAIN,
        APPTOKEN,
        username=USERNAME,
        password=PASSWORD,
        session_adapter=mock_adapter,
    )

    uri = "{0}{1}{2}/{3}.json".format(PREFIX, DOMAIN, OLD_API_PATH,
                                      DATASET_IDENTIFIER)
    adapter.register_uri("DELETE", uri, status_code=200)
    response = client.delete(DATASET_IDENTIFIER)
    assert response.status_code == 200

    try:
        client.delete("foobar")
    except Exception as e:
        assert isinstance(e, requests_mock.exceptions.NoMockAddress)
    finally:
        client.close()
Esempio n. 13
0
def test_set_permission():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "empty.txt"
    setup_old_api_mock(adapter, "PUT", response_data, 200)

    # Test response
    response = client.set_permission(DATASET_IDENTIFIER, "public")
    assert response.status_code == 200

    # Test request
    request = adapter.request_history[0]
    query_string = request.url.split("?")[-1]
    params = query_string.split("&")

    assert len(params) == 2
    assert "method=setPermission" in params
    assert "value=public.read" in params

    client.close()
Esempio n. 14
0
def getDataset(dataset_id):
    table = ''
    try:
        # Creating Socrata Client
        client = Socrata(cfg["web"],
                         cfg["token"],
                         username=cfg["email"],
                         password=cfg["password"])
        data = client.get(dataset_id, content_type="json")
        data = str(data)
        data = data.replace("'", "\"")
        data = data.upper()
        #getting data to compare with the uploaded data
        #print(type(data))
        #print(data)
        table = pd.read_json(data)
        #Replacing NaN for ''
        table = table.replace(pd.np.nan, '', regex=True)
        table = table.to_html(classes='table-striped " id = "my_table',
                              index=False)
        vistas = client.get_metadata(dataset_id)
        vistas = str(vistas.get("viewCount"))
        client.close()
    except BaseException as e:
        #if there is an error, reload login with error message
        error = str(e)
        print('Error description:')
        print(error)
        client.close()
    return table, vistas
Esempio n. 15
0
def DatasetColumns(dataset_id):
    try:
        #Creating Socrata Client
        client = Socrata(cfg["web"],
                         cfg["token"],
                         username=cfg["email"],
                         password=cfg["password"])
        data = client.get(dataset_id, limit=2)
        data = sorted(data[0].keys())
        #getting columns that could be used in the dataframe
        columns = str(data).upper()
        columns = columns.replace("[", "")
        columns = columns.replace("]", "")
        columns = columns.replace("'", "")
        columns = columns.replace(" ", "")
        columns = columns.split(",")
        #getting data to compare with the uploaded data
        for i in range(0, len(data)):
            data[i] = (str(data[i]).upper(), str(data[i]).upper())
        client.close()
    except BaseException as e:
        #if there is an error, reload login with error message
        error = str(e)
        print('Error description:')
        print(error)
        data = None
        columns = None
        client.close()
    return data, columns
Esempio n. 16
0
def test_upsert():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(
        DOMAIN,
        APPTOKEN,
        username=USERNAME,
        password=PASSWORD,
        session_adapter=mock_adapter,
    )

    response_data = "upsert_songs.txt"
    data = [{
        "theme": "Surfing",
        "artist": "Wavves",
        "title": "King of the Beach",
        "year": "2010",
    }]
    setup_mock(adapter, "POST", response_data, 200)
    response = client.upsert(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 1
    client.close()
Esempio n. 17
0
class SocrataClient:
    def __init__(self, config=None):
        config = config['Socrata']

        domain = config['DOMAIN']
        token = None if config['TOKEN'] == 'None' else config['TOKEN']
        timeout = int(config['TIMEOUT'])

        self.client = Socrata(domain, token, timeout=timeout)
        self.attempts = int(config['ATTEMPTS'])
        self.config = config

    def __del__(self):
        self.client.close()

    def dataset_id(self, year):
        return self.config['AP' + str(year)]

    def get(self, year, **kwargs):
        id = self.dataset_id(year)
        for attempt in range(self.attempts):
            try:
                return self.client.get(id, **kwargs)
            except Exception as e:
                if attempt < self.attempts - 1:
                    continue
                else:
                    raise e

    def get_metadata(self, year):
        id = self.dataset_id(year)
        return self.client.get_metadata(id)
Esempio n. 18
0
def property_metadata(address, save_csv=False, select=select_statement):
    '''
        Query SDAT
        Return dictionary with metadata
    '''
    client = Socrata(url_md, app_id)
    address = clean_addresses(address)
    results = client.get(
        id_md,
        select=select,
        where="mdp_street_address_mdp_field_address='{}'".format(address))

    df = pd.DataFrame(results)
    closest = difflib.get_close_matches(address.upper(), df.address, n=1)
    #print('Closest Property:', closest[0])
    df = df[df.address == closest[0]]
    #df['lon'] = df.location.apply(lambda x: x['coordinates'][0])
    #df['lat'] = df.location.apply(lambda x: x['coordinates'][1])
    client.close()
    if save_csv:
        sdat = pd.read_csv('sdat_properties.csv')
        #sdat = pd.concat([sdat, df], ignore_index=True)
        result = pd.concat([sdat, df])
        result.to_csv('sdat_properties.csv', index=False)
    return df
Esempio n. 19
0
def test_replace_non_data_file():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(
        DOMAIN,
        APPTOKEN,
        username=USERNAME,
        password=PASSWORD,
        session_adapter=mock_adapter,
    )

    response_data = "successblobres.txt"
    nondatasetfile_path = "tests/test_data/nondatasetfile.zip"

    setup_replace_non_data_file(adapter, "POST", response_data, 200)

    with open(nondatasetfile_path, "rb") as fin:
        file = {"file": ("nondatasetfile.zip", fin)}
        response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file)

    assert isinstance(response, dict)
    assert response.get("blobFileSize") == 496
    client.close()
Esempio n. 20
0
def get_exposed():

    # Expuestos
    client = Socrata("www.datos.gov.co", "6aek14sky6N2pVL12sw1qfzoQ")
    results = client.get("8835-5baf", limit=5000000)
    df_expuestos_col = pd.DataFrame.from_records(results)
    df_expuestos_col = df_expuestos_col[["fecha", "acumuladas"]]
    df_expuestos_col = df_expuestos_col.iloc[1:].reset_index(drop=True)
    df_expuestos_col["fecha"] = df_expuestos_col["fecha"].astype(str)
    df_expuestos_col["fecha"] = df_expuestos_col["fecha"].transform(
        lambda x: x[:10])
    df_expuestos_col["acumuladas"] = df_expuestos_col["acumuladas"].astype(
        float)
    df_expuestos_col = df_expuestos_col.sort_values(by=["fecha"]).reset_index(
        drop=True)

    # pasando de acumuladas a por dia
    df_expuestos_col["expuestos"] = 0
    for i, row in df_expuestos_col.iterrows():
        if i == 0:
            df_expuestos_col.loc[i, "expuestos"] = df_expuestos_col.loc[
                i, "acumuladas"]
        else:
            df_expuestos_col.loc[i, "expuestos"] = (
                df_expuestos_col.loc[i, "acumuladas"] -
                df_expuestos_col.loc[i - 1, "acumuladas"])
    client.close()
    return df_expuestos_col
Esempio n. 21
0
def update_crime_table():
    """
    Updates the crime reports table and inserts it into the database file (replacing an existing table of the same name).
    """
    logging.warn("UPDATING CRIME REPORTS TABLE")
    results = None
    # authenticated client with api token
    try:
        client = Socrata(CRIME_DATABASE_URL, SOCRATA_API_KEY)
        if not isinstance(client, Socrata):
            logging.error(
                "Failed to create Socrata object to receive crime database")
            client.close()
            return
        # First 10000000 results, as a python dictionary version of the JSON from API
        results = client.get(CRIME_DATABASE_UID, limit=10000000)
        client.close()
    except Exception as e:
        logging.error(
            'Fatal error when attempting to retreive crime database\n' +
            traceback.format_exception_only(*sys.exc_info()[0:2])[0])

    if results is None or len(results) < 1:
        logging.error("received no data from the crime database")
        return
    # Convert to pandas DataFrame
    database = pd.DataFrame.from_records(results)

    # CLEAN DATA
    # manually converting all instances of "NA" in the "Weapon" column to NaN
    # so that program can properly handle it for any calculations
    database['weapon'] = database['weapon'].replace("NA", np.nan)

    # fixing some formatting inconsistencies
    database['inside_outside'] = database['inside_outside'].replace(
        "(?i)outside", "O", regex=True)
    database['inside_outside'] = database['inside_outside'].replace(
        "(?i)inside", "I", regex=True)

    database['crimedate'] = pd.to_datetime(database['crimedate'],
                                           format="%Y-%m-%dT%H:%M:%S.%f")

    #clear out any special funny chars so we can clean the requested data
    database = database.replace("[#\"\'_;]", "", regex=True)
    database = database.astype({
        "longitude": float,
        "latitude": float,
        "total_incidents": float
    })
    database.to_sql(CRIME_TABLE_NAME,
                    con=DATABASE,
                    index=True,
                    index_label='id',
                    if_exists='replace')
    logging.info([
        " ".join([str(x['name']), str(x['type'])])
        for x in inspect(DATABASE).get_columns(CRIME_TABLE_NAME)
    ])
Esempio n. 22
0
class Service(object):
    def __init__(self, app_token, domain="data.cityofnewyork.us"):
        # print(f"domain={domain}, app_token={app_token}")
        self.client = Socrata(domain, app_token)

    def __enter__(self):
        return self

    def get_info(self, location="nc67-uf89", limit=10):
        try:
            print(f"location={location}, limit={str(limit)}")
            return self.client.get(location, limit=limit)
        except HTTPError as e:
            print(f"Failed to make API call: {e}")
            raise
        except KeyError as e:
            print(f"Failed to get rates from response: {e}")
            raise
        except Exception as e:
            print(f"Something went wrong: {e}")
            raise

    def get_next_info(self, location="nc67-uf89", limit=10, offset=10):
        print(f"limit={str(limit)}, offset={str(offset)}")
        try:
            return self.client.get(location, limit=limit, offset=offset)
        except HTTPError as e:
            print(f"Failed to make API call: {e}")
            raise
        except KeyError as e:
            print(f"Failed to get rates from response: {e}")
            raise
        except Exception as e:
            print(f"Something went wrong: {e}")
            raise

    def get_size(self, location="nc67-uf89"):
        try:
            ret = self.client.get(location, select='COUNT(*)')
            return int(ret[0]['COUNT'])
        except HTTPError as e:
            print(f"Failed to make API call: {e}")
            raise
        except KeyError as e:
            print(f"Failed to get rates from response: {e}")
            raise
        except Exception as e:
            print(f"Something went wrong: {e}")
            raise

    def close(self):
        self.client.close()

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()
Esempio n. 23
0
def main():
    arguments = docopt(__doc__, version=__version__)

    client = Socrata(arguments['<site>'], arguments['-a'])

    try:
        if arguments['ls']:
            datasets = list_datasets(client, arguments['<site>'])
            print(tabulate(datasets, headers='keys', tablefmt='psql'))
        elif arguments['insert']:
            dataset_id = arguments['<dataset_id>']
            metadata = client.get_metadata(dataset_id)

            engine, session, geo = get_connection(arguments['-d'], metadata)
            Binding = get_binding(client, dataset_id, metadata, geo,
                                  arguments['-t'])

            # Create the table
            try:
                Binding.__table__.create(engine)
            except ProgrammingError as e:
                # Catch these here because this is our first attempt to
                # actually use the DB
                if 'already exists' in str(e):
                    raise CLIError(
                        'Destination table already exists. Specify a new table'
                        ' name with -t.')
                raise CLIError('Error creating destination table: %s' % str(e))

            num_rows = get_row_count(client, dataset_id)
            bar = FillingCirclesBar('  ▶ Loading from API', max=num_rows)

            # Iterate the dataset and INSERT each page
            for page in get_dataset(client, dataset_id):
                to_insert = []
                for row in page:
                    to_insert.append(Binding(**parse_row(row, Binding)))

                session.add_all(to_insert)
                session.flush()
                bar.next(n=len(to_insert))

            bar.finish()

            ui.item(
                'Committing rows (this can take a bit for large datasets).')
            session.commit()

            success = 'Successfully imported %s rows from "%s".' % (
                num_rows, metadata['name'])
            ui.header(success, color='\033[92m')

        client.close()
    except CLIError as e:
        ui.header(str(e), color='\033[91m')
Esempio n. 24
0
def datavis(request, dataset_id):
    ods = OpenDataSource.objects.get(pk=dataset_id)
    client = Socrata(ods.website, ods.token, ods.user, ods.password)
    dataset = DataSet.objects.get(pk=dataset_id)
    data = client.get(dataset.identifier)
    metadata = client.get_metadata(dataset.identifier)
    client.close()
    template = loader.get_template('datavis/datavis.html')
    data = json.dumps(data, indent=4, sort_keys=True)
    context = {'data': data, 'metadata': metadata, 'dataset': dataset}
    return HttpResponse(template.render(context, request))
Esempio n. 25
0
def main(startdate):
    # Unauthenticated client only works with public data sets.
    # SODAPYAPPTOKEN exists in defined in local bash_profile
    app_token = os.environ.get("SODAPYAPPTOKEN")
    client = Socrata("data.cityofchicago.org", app_token)
    download_food_inspections(client, startdate)
    download_crime_data(client, startdate)
    download_service_request_data_after_july01_2018(client)
    download_service_request_data_before_july01_2018(client, startdate)
    download_business_data(client, startdate)
    client.close()
Esempio n. 26
0
def get_owners_data():
    client = Socrata('data.cityofchicago.org', 'your-password-here')
    data_set = 'ezma-pppn'
    data = client.get(data_set,
                      query="""
                                            SELECT  
                                            account_number, owner_first_name, owner_last_name, owner_title                       
                                            LIMIT 300000                                          
                                            """)
    client.close()

    return data
Esempio n. 27
0
def CreateETL(view_data, title, description, category, tags):
    #preparing data
    cols = list(view_data)
    columns = [{
        "fieldName": cols[0].lower(),
        "name": cols[0],
        "dataTypeName": "text"
    }]
    for i in range(1, len(cols)):
        x = {
            "fieldName": cols[i].lower(),
            "name": cols[i],
            "dataTypeName": "text"
        }
        columns.append(x)
    tags = tags.split(",")
    #Uploadin data
    try:
        client = Socrata(cfg["web"],
                         cfg["token"],
                         username=cfg["email"],
                         password=cfg["password"])
        print(tags)
        print(category)
        print(description)
        print(columns)
        print(cols)

        NewDataSet = client.create(title,
                                   description=description,
                                   columns=columns,
                                   tags=tags,
                                   category=category)

        client.publish(NewDataSet.get('id'))
        client.set_permission(NewDataSet.get('id'), "private")
        # Convertion to JSON
        datajson = view_data.to_json(None, orient='records')
        # JSON to list
        datajson = json.loads(datajson)
        client.replace(NewDataSet.get('id'), datajson)
        print('Socrata done')
        error = 'OK'
        dataset_id = NewDataSet.get('id')
        client.close()
    except BaseException as e:
        #if there is an error, reload login with error message
        error = str(e)
        print('Error description:')
        print(error)
        dataset_id = 'NoData'
    return error, dataset_id
Esempio n. 28
0
def main():
    global DATASET_ID
    #define mapping
    index_mapping()

    #arguments
    parser = argparse.ArgumentParser(
        description="Load data into elasticsearch. ")

    parser.add_argument('--page_size',
                        type=int,
                        help='specify the number of records on each page',
                        required=True,
                        metavar='')
    parser.add_argument('--num_pages',
                        type=int,
                        help='specify the number of pages you want to load in',
                        metavar='')
    args = parser.parse_args()

    #socrata
    API_KEY = os.environ['APP_TOKEN']
    socrata_domain = "data.cityofnewyork.us"
    client = Socrata(socrata_domain, API_KEY, timeout=120)

    counter = 0
    if args.num_pages != None:
        for num in range(args.num_pages):
            parse_and_push(client, args.page_size, num * args.page_size)
    else:
        edge = int(client.get(DATASET_ID, select='COUNT(*)')[0]['COUNT'])
        offset1 = 0
        while offset1 < edge:
            threads = []
            for i in range(10):
                offset1 = counter * args.page_size
                counter += 1
                t = threading.Thread(
                    target=parse_and_push,
                    args=(
                        client,
                        args.page_size,
                        offset1,
                    ),
                )
                threads.append(t)
                t.start()
            for th in threads:
                th.join()

    client.close()
Esempio n. 29
0
def get_bus_lic_data():
    client = Socrata('data.cityofchicago.org', 'your-password-here')
    data_set = 'uupf-x98q'
    data = client.get(data_set,
                      query="""
                            SELECT
                            license_id, account_number, legal_name, doing_business_as_name,
                            business_activity, address, city, state, zip_code, license_description,
                            license_start_date, expiration_date  
                            LIMIT 100000
                            """)
    client.close()

    return data
Esempio n. 30
0
def get_grouped_crime_data(start, end):
    seattle_time = pytz.timezone('America/Los_Angeles')

    start_date = pytz.utc.localize(datetime.utcfromtimestamp(int(start)/1000))
    local_start_date = start_date.astimezone(seattle_time).replace(tzinfo=None)

    end_date = pytz.utc.localize(datetime.utcfromtimestamp(int(end)/1000))
    local_end_date = end_date.astimezone(seattle_time).replace(tzinfo=None)

    # call crime data api
    client = Socrata(settings.SOURCE_DOMAIN, settings.SOCRATA_APP_TOKEN)

    limit = 2000
    offset = 0
    tries = 10

    # return the headers
    yield ['count', 'date', 'event_clearance_group']

    for _ in xrange(tries):
        # sodapy throws an exception if we get a bad response
        # no need to handle it here, we'll catch it on the outside
        response = client.get(
            settings.DATASET_ID,
            content_type="csv",
            select='date_trunc_ymd(event_clearance_date) as date, event_clearance_group, count(*)',
            where='event_clearance_date is not null \
                    and event_clearance_date >= \'{0}\' \
                    and event_clearance_date <= \'{1}\' \
                    and within_circle(incident_location, {2}, {3}, {4})'.format(
                        local_start_date.isoformat().split('+')[0],
                        local_end_date.isoformat().split('+')[0],
                        settings.CLINK_LAT,
                        settings.CLINK_LON,
                        settings.RADIUS),
            group='date, event_clearance_group',
            order='date DESC, count DESC',
            limit=limit,
            offset=offset
        )
        offset += len(response) - 1 # don't count the header
        if len(response) == 1:  # if it's just the header
            break   # then there's nothing left to ask for

        # trim off the header row
        # using itertools so we don't make a shallow copy of the giant list
        response_iter = islice(response, 1, None)
        for row in imap(_convert_grouped_row, response_iter):
            yield row
    client.close()
Esempio n. 31
0
def test_publish():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_publish_mock(adapter, "POST", response_data, 200)

    response = client.publish(DATASET_IDENTIFIER)
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Esempio n. 32
0
def test_get():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "get_songs.txt"
    setup_mock(adapter, "GET", response_data, 200)
    response = client.get(DATASET_IDENTIFIER)

    assert isinstance(response, list)
    assert len(response) == 10

    client.close()
Esempio n. 33
0
def test_get_metadata():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "get_song_metadata.txt"
    setup_old_api_mock(adapter, "GET", response_data, 200)
    response = client.get_metadata(DATASET_IDENTIFIER)

    assert isinstance(response, dict)
    assert "newBackend" in response
    assert "attachments" in response["metadata"]

    client.close()
Esempio n. 34
0
def test_publish():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)
    
    response_data = "create_foobar.txt"
    resource = "/api/views/songs/publication.json" # publish() removes .json
    set_up_mock(adapter, "POST", response_data, 200, resource=resource)
    
    response = client.publish("/resource/songs.json") # hard-coded so request uri is matched
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Esempio n. 35
0
def test_upsert():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "upsert_songs.txt"
    data = [{"theme": "Surfing", "artist": "Wavves",
             "title": "King of the Beach", "year": "2010"}]
    setup_mock(adapter, "POST", response_data, 200)
    response = client.upsert(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 1
    client.close()
Esempio n. 36
0
def test_delete():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    uri = "{0}{1}/api/views/{2}.json".format(PREFIX, DOMAIN, DATASET_IDENTIFIER)
    adapter.register_uri("DELETE", uri, status_code=200)
    response = client.delete(DATASET_IDENTIFIER)
    assert response.status_code == 200

    try:
        client.delete("foobar")
    except Exception as e:
        assert isinstance(e, requests_mock.exceptions.NoMockAddress)
    finally:
        client.close()
Esempio n. 37
0
def test_upsert_exception():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "403_response_json.txt"
    setup_mock(adapter, "POST", response_data, 403, reason="Forbidden")

    data = [{"theme": "Surfing", "artist": "Wavves",
             "title": "King of the Beach", "year": "2010"}]
    try:
        client.upsert(DATASET_IDENTIFIER, data)
    except Exception as e:
        assert isinstance(e, requests.exceptions.HTTPError)
    else:
        raise AssertionError("No exception raised for bad request.")
    finally:
        client.close()
Esempio n. 38
0
def test_set_permission():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "empty.txt"
    resource = "/api/views" + PATH
    set_up_mock(adapter, "PUT", response_data, 200, resource=resource)
    
    # Test response
    response = client.set_permission(PATH, "public")
    assert response.status_code == 200
    
    # Test request
    request = adapter.request_history[0]
    qs = request.url.split("?")[-1]
    assert qs == "method=setPermission&value=public.read"
    client.close()
Esempio n. 39
0
def test_replace():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "replace_songs.txt"
    data = [
        {"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach",
         "year": "2010"},
        {"theme": "History", "artist": "Best Friends Forever",
         "title": "Abe Lincoln", "year": "2008"},
    ]
    setup_mock(adapter, "PUT", response_data, 200)
    response = client.replace(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 2
    client.close()
Esempio n. 40
0
def test_replace_non_data_file():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "successblobres.txt"
    nondatasetfile_path = 'tests/test_data/nondatasetfile.zip'

    setup_replace_non_data_file(adapter, "POST", response_data, 200)

    with open(nondatasetfile_path, 'rb') as fin:
        file = (
            {'file': ("nondatasetfile.zip", fin)}
        )
        response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file)

    assert isinstance(response, dict)
    assert response.get("blobFileSize") == 496
    client.close()
Esempio n. 41
0
 def get(self, request, *args, **kwargs):
     ''' Get criminal records from the dataset and save them the database as CriminalRecord objects'''
     ids = CriminalRecord.objects.values_list('case_id', flat=True)
     domain = settings.SOCRATA_DOMAIN
     token = settings.SOCRATA_APP_TOKEN
     endpoint = settings.SOCRATA_DATASET_ENDPOINT
     client = Socrata(domain, token)
     order = "date"
     where = 'latitude IS NOT NULL'
     offset = 0
     limit = 1000
     status = 200
     try:
         data = client.get(
             endpoint, order=order, where=where,
             offset=offset, limit=limit)
         while data:
             for record in data:
                 if self.to_int(record.get('id')) not in ids:
                     attrs = {
                         'case_id': self.to_int(
                             self.get_from_dict(record, 'id')),
                         'case_number': self.get_from_dict(
                             record, 'case_number'),
                         'date': datetime.strptime(
                             self.get_from_dict(record, 'date'),
                             '%Y-%m-%dT%H:%M:%S'),
                         'block': self.get_from_dict(record, 'block'),
                         'iucr': self.get_from_dict(record, 'iucr'),
                         'primary_type': self.get_from_dict(
                             record, 'primary_type'),
                         'crime_description': self.get_from_dict(
                             record, 'description'),
                         'location_description': self.get_from_dict(
                             record, 'location_description'),
                         'has_arrested': self.get_from_dict(
                             record, 'arrest'),
                         'is_domestic': self.get_from_dict(
                             record, 'domestic'),
                         'beat': self.get_from_dict(record, 'beat'),
                         'district': self.get_from_dict(
                             record, 'district'),
                         'ward': self.to_int(
                             self.get_from_dict(record, 'ward')),
                         'community_area': self.get_from_dict(
                             record, 'community_area'),
                         'fbi_code': self.get_from_dict(
                             record, 'fbi_code'),
                         'x_coordinate': self.to_int(
                             self.get_from_dict(
                                 record, 'x_coordinate')),
                         'y_coordinate': self.to_int(
                             self.get_from_dict(
                                 record, 'y_coordinate')),
                         'year': self.to_int(
                             self.get_from_dict(record, 'year')),
                         'updated_on': datetime.strptime(
                             self.get_from_dict(record, 'updated_on'),
                             '%Y-%m-%dT%H:%M:%S'),
                         'latitude': float(self.get_from_dict(
                             record, 'latitude')),
                         'longitude': float(self.get_from_dict(
                             record, 'longitude')),
                         'location': Point(
                             float(self.get_from_dict(
                                 record, 'longitude')),
                             float(self.get_from_dict(
                                 record, 'latitude')))
                     }
                     CriminalRecord.objects.create(**attrs)
             offset += limit
             data = client.get(
                 endpoint, order=order, where=where,
                 offset=offset, limit=limit)
         client.close()
     except Exception, e:
         print e
         status = 400
Esempio n. 42
0
theft = client.get("cuks-n6tp", select ="category,time,location", where ="category='VEHICLE THEFT'",limit=10)
print "vehicle theft data"
print theft
vandalism = client.get("cuks-n6tp", select ="category,time,location", where ="category='VANDALISM'",limit=10)
print "VANDALISM data"
print vandalism
kidnapping = client.get("cuks-n6tp", select ="category,time,location", where ="category='KIDNAPPING'",limit=10)
print "KIDNAPPING data"
print kidnapping
sex = client.get("cuks-n6tp", select ="category,time,location", where ="category='SEX OFFENSES, FORCIBLE'",limit=10)
print "SEX OFFENSES, FORCIBLE data"
print sex
dui = client.get("cuks-n6tp", select ="category,time,location", where ="category='DRIVING UNDER THE INFLUENCE'",limit=10)
print "DUI data"
print dui
client.close()

# search radius
# https://data.sfgov.org/resource/cuks-n6tp.json?$where=within_circle(location,%2037.78,%20-122.41,%201000)

# https://data.cms.gov/resource/97k6-zzx3.json?$limit=5
# client = Socrata("https://data.cms.gov/resource/", None)
# print client.get("http://data.cms.gov/resource/97k6-zzx3.json?$order=average_covered_charges$limit=1", limit=2)

# app token - need it eventually
# https://data.seattle.gov/resource/3k2p-39jp.json?$$app_token=APP_TOKEN
# https://dev.socrata.com/docs/app-tokens.html
# 8gffbg1meMZ1e2Z0yOz2OpwZq
# secret token at https://data.sfgov.org/profile/alex-wap/7wfp-cp8m/app_tokens
'''
SF DATA FORMAT
Esempio n. 43
0
def test_client():
    client = Socrata(DOMAIN, APPTOKEN)
    assert isinstance(client, Socrata)
    client.close()