def execute(self):
        dataset_id = '{}_{}'.format(self.dataset_url, self.dataset_filename)
        latest_status = self.redis_client.get(dataset_id)
        if latest_status is not None:
            latest_status = latest_status.decode(ENCODING)

        if latest_status == 'COMPLETED' or latest_status == 'ATTEMPT TO REFILL':
            self.redis_client.set(dataset_id, 'ATTEMPT TO REFILL')
            print('ATTEMPT TO REFILL')
            return False

        client = Socrata(self.dataset_url, None)
        self.redis_client.set(dataset_id, 'STARTED')

        for i in range(int(NUMBER_OF_MESSAGES / MESSAGES_PER_FETCH)):
            results = client.get(self.dataset_filename,
                                 limit=MESSAGES_PER_FETCH,
                                 offset=MESSAGES_PER_FETCH * i)

            results_df = pd.DataFrame.from_records(results)

            current_progress = '{} - {}'.format(
                str(i * MESSAGES_PER_FETCH + 1),
                str((i + 1) * MESSAGES_PER_FETCH))
            self.redis_client.set(dataset_id, current_progress)

            print('Progress {}'.format(current_progress))
            print(results_df)

        self.redis_client.set(self.dataset_url + "_" + self.dataset_filename,
                              'COMPLETED')
        print('COMPLETED')
Exemple #2
0
def get_population_data():
    client = Socrata("data.cityofnewyork.us",
                     app_token,
                     username=username,
                     password=password)
    result = client.get("27iv-9uub", limit=2000)
    return pd.DataFrame.from_records(result)
def get_311(data_id):
    '''
    Get 2012 and 2017 311 data from chicago data portal
    Input:
        data_id: id of different type of report
    Return:
        pandas dataframe with the columns and dtypes as COL_TYPES
    '''
    '''COL_TYPES = {'sr_type': str, 
                 #'created_date': str,
                 'zip_code': int,
                 'longitude': float,
                 'latitude': float,
                  'legacy_record': bool} '''
    #cols = [item for item in COL_TYPES.keys()]
    client = Socrata('data.cityofchicago.org',
                     'E0eO5nY1aKuEY1pVrunfqFhDz',
                     username='******',
                     password='******')

    conds = '''legacy_sr_number IS NULL'''
    res = client.get(
        data_id,
        #where= conds,
        limit=1000000)
    client.close()
    df = pd.DataFrame.from_records(res)
    #     df['created_date'] = pd.to_datetime(df['created_date'])
    #     df = df[df['created_date']>start_year]
    #     df = df[df['created_date']<end_year]
    return df
Exemple #4
0
def get_data():
    client = Socrata(socrata_domain, 'ohJoNJOw8qez7ABLZhzGUja4n')
    lmt = 1000000
    start_at = 0
    all_complaints = []

    while True:

        complaints = client.get(
            socrata_dataset_identifier,
            limit=lmt,
            offset=start_at,
            order="created_date ASC",
            select="created_date, borough, complaint_type, incident_zip",
            where=
            "created_date between '2017-01-01T00:00:00' and '2017-12-31T23:59:59'"
        )

        all_complaints.extend(complaints)
        start_at += lmt

        if len(complaints) < lmt:
            break

    headers = ['created_date', 'borough', 'complaint_type', 'incident_zip']

    with open('complaint.csv', 'w') as f:

        csvwriter = csv.DictWriter(f, headers)
        csvwriter.writeheader()
        csvwriter.writerows(all_complaints)

    print('SUCCESS. Number of complaints downloaded: {}'.format(
        len(all_complaints)))
    print('Saved to {} file'.format('complaint.csv'))
Exemple #5
0
 def fetchSocrata(self, year=2019, querySize=20000, pageSize=20000):
     '''Fetch data from Socrata connection and return pandas dataframe'''
     # Load config files
     print('Retrieving partial Socrata query...')
     fetchTimer = time.time()
     socrata_domain = self.config['Socrata']['DOMAIN']
     socrata_dataset_identifier = self.config['Socrata']['AP' + str(year)]
     socrata_token = self.token
     # Establish connection to Socrata resource
     client = Socrata(socrata_domain, socrata_token)
     # Fetch data
     # Loop for querying dataset
     queryDf = None
     for i in range(0, querySize, pageSize):
         # print(i + pageSize)
         results = client.get(socrata_dataset_identifier,
                              offset=i,
                              select="*",
                              order="updateddate DESC",
                              limit=querySize)
         tempDf = pd.DataFrame.from_dict(results)
         if queryDf is None:
             queryDf = tempDf.copy()
         else:
             queryDf = queryDf.append(tempDf)
     self.data = queryDf
     print('%d records retrieved in %.2f minutes' %
           (self.data.shape[0], self.elapsedTimer(fetchTimer)))
def fetch_311_data(zip,
                   max_query_results=None,
                   num_entries_to_search=10000,
                   t_out=10) -> Dict[str, any]:
    nyc_311_dataset_domain = "data.cityofnewyork.us"
    nyc_311_dataset_identifier = "fhrw-4uyv"
    try:
        nyc_311_dataset_token = get_311_socrata_key()
    except KeyError:
        nyc_311_dataset_token = (
            None  # works with None but lower number of requests can be made
        )

    client = Socrata(nyc_311_dataset_domain, nyc_311_dataset_token)

    client.timeout = t_out

    try:
        return client.get(
            nyc_311_dataset_identifier,
            select=
            "created_date, incident_zip, incident_address, city, complaint_type, descriptor, status",
            # q=str(zip), #uncomment if want to query directly on the server side (may lead to timeout)
            order="created_date DESC",
            limit=num_entries_to_search,
        )
    except requests.exceptions.Timeout:
        raise TimeoutError
Exemple #7
0
 def fetchSocrata(self, year=2019, querySize=10000):
     '''Fetch data from Socrata connection and return pandas dataframe'''
     # Load config files
     socrata_domain = self.config['Socrata']['DOMAIN']
     socrata_dataset_identifier = self.config['Socrata']['AP' + str(year)]
     socrata_token = self.token
     # Establish connection to Socrata resource
     client = Socrata(socrata_domain, socrata_token)
     # Fetch data
     metadata = client.get_metadata(socrata_dataset_identifier)
     # Loop for querying dataset
     queryDf = None
     for i in range(0, querySize, 1000):
         print(i)
         results = client.get(socrata_dataset_identifier,
                              offset=i,
                              select="*",
                              order="updateddate DESC")
         tempDf = pd.DataFrame.from_dict(results)
         if queryDf is None:
             queryDf = tempDf.copy()
         else:
             queryDf = queryDf.append(tempDf)
     self.data = queryDf
     # Fetch data
     metadata = client.get_metadata(socrata_dataset_identifier)
Exemple #8
0
    def execute(self):
        dataset_id = '{}_{}'.format(self.dataset_url, self.dataset_filename)
        latest_status = self.redis_client.get(dataset_id)

        kafka_server = '{}:{}'.format(
            config_reader.cfg.get('LAB', 'kafka_host'),
            config_reader.cfg.get('LAB', 'kafka_port'))
        producer = KafkaProducer(
            bootstrap_servers=[kafka_server],
            value_serializer=lambda v: json.dumps(v).encode(ENCODING))

        if latest_status == str(
                States.COMPLETED_STATUS) or latest_status == str(
                    States.ATTEMPT_TO_REFILL_STATUS):
            self.redis_client.set(dataset_id,
                                  str(States.ATTEMPT_TO_REFILL_STATUS))

        client = Socrata(self.dataset_url, None)
        self.redis_client.set(dataset_id, str(States.STARTED_STATUS))

        for i in range(NUMBER_OF_MESSAGES):
            results = client.get(self.dataset_filename, limit=1, offset=i)
            results_df = pd.DataFrame.from_records(results)

            current_progress = 'row #{}'.format(str(i))

            self.redis_client.set(dataset_id, current_progress)
            producer.send(config_reader.cfg.get('LAB', 'elastic_search_topic'),
                          results_df.to_dict())

            print("Results {}".format(current_progress))

        self.redis_client.set(dataset_id, str(States.COMPLETED_STATUS))
Exemple #9
0
class SocrataClient:
    def __init__(self, config=None):
        config = config['Socrata']

        domain = config['DOMAIN']
        token = None if config['TOKEN'] == 'None' else config['TOKEN']
        timeout = int(config['TIMEOUT'])

        self.client = Socrata(domain, token, timeout=timeout)
        self.attempts = int(config['ATTEMPTS'])
        self.config = config

    def __del__(self):
        self.client.close()

    def dataset_id(self, year):
        return self.config['AP' + str(year)]

    def get(self, year, **kwargs):
        id = self.dataset_id(year)
        for attempt in range(self.attempts):
            try:
                return self.client.get(id, **kwargs)
            except Exception as e:
                if attempt < self.attempts - 1:
                    continue
                else:
                    raise e

    def get_metadata(self, year):
        id = self.dataset_id(year)
        return self.client.get_metadata(id)
def PersonToTelefonoCorreo_u5cm_hpr6(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #person=sys.argv[1]
    person = m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("u5mc-hpr6", limit=2000)

        #for key, value in data.items():
        #print key, value
        for i in range(len(r)):
            if (r[i]['nombre'] == person):
                celular = r[i]['celular']
                email = r[i]['correo_electr_nico']
                break

        ent = TRX.addEntity('maltego.PhoneNumber', celular)
        ent.addAdditionalFields("phonenumber.countrycode", "Country Code",
                                True, "57")
        ent2 = TRX.addEntity('maltego.EmailAddress', email)

    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Exemple #11
0
    def get_unique_locations(self):
        # Unauthenticated client only works with public data sets. Note 'None'
        # in place of application token, and no username or password:
        client = Socrata("data.melbourne.vic.gov.au", None)

        # Example authenticated client (needed for non-public datasets):
        # client = Socrata(data.melbourne.vic.gov.au,
        #                  MyAppToken,
        #                  userame="*****@*****.**",
        #                  password="******")

        # First 2000 results, returned as JSON from API / converted to Python list of
        # dictionaries by sodapy.
        results = client.get("mxb8-wn4w", limit=1000)

        locations = set()
        for value in results:
            for key in value:
                if 'sensor_name' in key:
                    locations.add(value[key])

        """
        feature_collection = []
        for location in locations:
            print locations
            location.append
        """
        print locations
        return None
Exemple #12
0
def fetch_results(api, order=None, select=None, where=None, rowlimit=2000):
    client = Socrata(api['root'], None)
    return client.get(api['code'],
                      order=order,
                      select=select,
                      where=where,
                      limit=rowlimit)
Exemple #13
0
def import_geometries(ds_id, proj=None):
    '''
    Imports the shapefiles from a given data store on the City of Chicago data
    portal and retuns a geopandas dataframe linking geometries to different
    attributes.

    Inputs:
    ds_id (str): the data set identifier

    Returns: geodf from ds_id

    References:
    geopandas example docs: http://geopandas.org/gallery/create_geopandas_
        from_pandas.html?highlight=regular%20pandas
    '''
    if not proj:
        proj = {'init': 'epsg:4326'}

    client = Socrata('data.cityofchicago.org',
                     app_token,
                     username=,
                     password=)

    files = client.get(ds_id)
    df = pd.DataFrame(files)
    df['the_geom'] = df.the_geom.apply(shapely.geometry.shape)
    df = geo_pd.GeoDataFrame(df, geometry='the_geom')
    df.crs = proj
    df.drop(['shape_area', 'shape_len'], axis=1, inplace=True)

    return df
Exemple #14
0
def getData():

    from sodapy import Socrata
    from datetime import datetime

    global columnsList
    #apiEndpoint = "https://data.austintexas.gov/resource/hcup-htgu.csv"

    Date = datetime.now().strftime('%Y%m%d')

    # set up a connection
    client = Socrata("data.austintexas.gov", None)

    # retrieve data from the animal shelter resource
    # will retrieve as json doc from API aka Python list of dictionaries via sodapy
    results = client.get("hcup-htgu")

    # convert to pandas dataframe
    results_df = pd.DataFrame.from_records(results)

    columnsList = list(results_df)

    results_df.to_csv(fileMerged + Date + '.csv', index = False, encoding='utf-8', \
                      columns=columnsList)
    return columnsList
Exemple #15
0
 def get_driver_license_info(self):
     client = Socrata("www.datossct.gob.mx", None)
     for rec in self:
         try:
             driver_license = client.get('3qhi-59v6',
                                         licencia=rec.driver_license)
             license_valid_from = datetime.strptime(
                 driver_license[0]['fecha_inicio_vigencia'],
                 '%Y-%m-%dT%H:%M:%S.%f')
             license_expiration = datetime.strptime(
                 driver_license[0]['fecha_fin_vigencia'],
                 '%Y-%m-%dT%H:%M:%S.%f')
             rec.write({
                 'license_type':
                 driver_license[0]['categoria_de_la_licencia'],
                 'license_valid_from':
                 license_valid_from,
                 'license_expiration':
                 license_expiration,
             })
             client.close()
         except Exception:
             client.close()
             raise ValidationError(
                 _('The driver license is not in SCT database'))
Exemple #16
0
def PersonToDireccion(m):

    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #nombre=sys.argv[1]
    nombre = m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("3ard-sj8g", limit=2000)

        #for key, value in data.items():
        #print key, value
        for i in range(len(r)):
            if (r[i]['nombre_prestador'] == nombre):
                dir = r[i]['nombre_concejal']
                barrio = r[i]['barrio']
                l = r[i]['localizacion']['coordinates']
                break

        ent = TRX.addEntity('maltego.Location', dir)
        ent.addAdditionalFields("country", "Country", True, "Colombia")
        ent.addAdditionalFields("location.area", "Area", True, barrio)
        ent.addAdditionalFields("streetaddress", "Street Address", True, dir)
        ent.addAdditionalFields("longitude", "Longitude", True, l[0])
        ent.addAdditionalFields("latitude", "Latituded", True, l[1])

    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Exemple #17
0
    def indicator_value(self, indicator: str, day: datetime) -> list:
        """
        Populate provinces indicator

        :indicator: The indicator needed (must be a key of the indicator_map)
        :day: The day of interest
        :return: The average value of indicator
        """
        date_fmt = day.strftime('%Y-%m-%dT00:00.000')
        key = self.indicator_map[indicator]['key']
        param = self.indicator_map[indicator]['param']

        client = Socrata("www.dati.friuliveneziagiulia.it", None)
        sensors_data = client.get(key, limit=10**10, data_misura=date_fmt)

        for province in self.provinces:
            values = [
                x for x in sensors_data
                if x['rete'] == province.name and x['dati_insuff'] == 'False'
            ]

            if len(values) != 0:
                float_values = [float(x[param]) for x in values]
                setattr(province.quality, indicator,
                        round(mean(float_values), 2))
def create_results_df():
    """Create a dataframe based on JSON from the Chicago traffic API

    Args:
        None

    Returns:
        Dataframe object: ``results_df``

    """
    try:
        # First 2000 results, returned as JSON from API / converted to Python list of
        # dictionaries by sodapy.
        # Unauthenticated client only works with public data sets. Note 'None'
        # in place of application token, and no username or password:
        data_client = Socrata("data.cityofchicago.org", None)
        results = data_client.get(
            "8v9j-bter", limit=2000)  # unique id for chicago traffic data

        # Convert to pandas DataFrame
        results_df = pd.DataFrame.from_records(results)
        logger.info("Successfully created a pandas dataframe!")

        return results_df
    except Exception as e:
        logger.error("Failure to create a pandas dataframe :(")
        raise e
Exemple #19
0
def dataFrame(api_id, record):
    print("Importing dataframe from API...")
    
    res = requests.get(f'https://opendata.usac.org/resource/{api_id}.json?$select=count(*)')
    jsonRes = res.json()
    s1 = json.dumps(jsonRes)
    d2 = json.loads(s1)
    record_count = int(d2[0]['count'])
    
    fileName = str(now.strftime("%Y-%m-%d_%H-%M") + "_RHC_" + record + ".csv")
    filePath = os.path.join(os.getcwd(), fileName)
    
    call_range = int(record_count / 40000 + 1)
    offset = 0
    
    for i in range(call_range):
         
        client = Socrata("opendata.usac.org", None)
        results = client.get(api_id, limit=20000, offset=offset)
        
        results_df = pd.DataFrame.from_records(results)
        results_df.to_csv(filePath, sep="|", mode='a', header=False)
        
        if i == 0:
            print("Converting DataFrame into CSV")
        
        print(f'Offset #: {offset}')
        
        offset += 40000
Exemple #20
0
 def fillCDC500CitiesData(self):
     client = Socrata(
         "chronicdata.cdc.gov", 'QoQet97KEDYpMW4x4Manaflkp'
     )  #My (John Pham's) access token is QoQet97KEDYpMW4x4Manaflkp
     censusTractRetrievalString = "place_tractid='3651000-" + self.censusTractNum + "'"
     censusTract = client.get("47z2-4wuh",
                              where=censusTractRetrievalString,
                              content_type="json",
                              order="place_tractid ASC",
                              limit=1000)
     client.close()
     self.coreMenCrudePrev = float(
         censusTract[0]["corem_crudeprev"]
     )  #Model-based estimate for crude prevalence of older adult men aged >=65 years who are up to date on a core set of clinical preventive services: Flu shot past year, PPV shot ever, Colorectal cancer screening, 2016
     self.coreWomenCrudePrev = float(
         censusTract[0]["corew_crudeprev"]
     )  #Model-based estimate for crude prevalence of older adult women aged >=65 years who are up to date on a core set of clinical preventive services: Flu shot past year, PPV shot ever, Colorectal cancer screening, and Mammogram past 2 years, 2016
     self.colonScreenCrudePrev = float(
         censusTract[0]["colon_screen_crudeprev"]
     )  #Model-based estimate for crude prevalence of fecal occult blood test, sigmoidoscopy, or colonoscopy among adults aged 50–75 years, 2016
     self.mammoUseCrudePrev = float(
         censusTract[0]["mammouse_crudeprev"]
     )  #Model-based estimate for crude prevalence of mammography use among women aged 50–74 years, 2016
     self.teethLostCrudePrev = float(
         censusTract[0]["teethlost_crudeprev"]
     )  #Model-based estimate for crude prevalence of all teeth lost among adults aged >=65 years, 2016
     self.geolocation = censusTract[0][
         "geolocation"]  #Latitude, longitude of census tract centroid
     self.coreMenPercentage = self.coreMenCrudePrev / self.totalMales65Plus * 100  #save this value as a number in the range of 0-100
     self.coreWomenPercentage = self.coreWomenCrudePrev / self.totalFemales65Plus * 100  #save this value as a number in the range of 0-100
     self.mammoUsePercentage = self.mammoUseCrudePrev / self.totalFemales50To74 * 100  #save this value as a number in the range of 0-100
     self.colonScreenPercentage = self.colonScreenCrudePrev / self.totalOlderAdults50To74 * 100  #save this value as a number in the range of 0-100
     self.teethLostPercentage = self.teethLostCrudePrev / self.totalOlderAdults65Plus * 100  #save this value as a number in the range of 0-100
Exemple #21
0
def DatasetColumns(dataset_id):
    try:
        #Creating Socrata Client
        client = Socrata(cfg["web"],
                         cfg["token"],
                         username=cfg["email"],
                         password=cfg["password"])
        data = client.get(dataset_id, limit=2)
        data = sorted(data[0].keys())
        #getting columns that could be used in the dataframe
        columns = str(data).upper()
        columns = columns.replace("[", "")
        columns = columns.replace("]", "")
        columns = columns.replace("'", "")
        columns = columns.replace(" ", "")
        columns = columns.split(",")
        #getting data to compare with the uploaded data
        for i in range(0, len(data)):
            data[i] = (str(data[i]).upper(), str(data[i]).upper())
        client.close()
    except BaseException as e:
        #if there is an error, reload login with error message
        error = str(e)
        print('Error description:')
        print(error)
        data = None
        columns = None
        client.close()
    return data, columns
Exemple #22
0
def get_covid_data(zip):
    #Unauthenticated client only works with public data sets. Note 'None'
    # in place of application token, and no username or password:
    client = Socrata("data.sfgov.org", None)

    # Example authenticated client (needed for non-public datasets):
    # client = Socrata(data.sfgov.org,
    #                  MyAppToken,
    #                  userame="*****@*****.**",
    #                  password="******")

    # First 2000 results, returned as JSON from API / converted to Python list of
    # dictionaries by sodapy.
    results = client.get("favi-qct6", zip_code=zip, limit=2000)

    # Convert to pandas DataFrame
    results_df = pd.DataFrame.from_records(results)
    #user enters invalid zipcode
    if len(results)==0:
        label['text'] = ""
        label2['text'] = ""
        label3['text'] = "No results for ZIP code " + "'" + str(zip) + "'" + " \n\nPlease enter valid San Francisco ZIP code."
        label4['text'] = ""
        label5['text'] = ""

    else:
        label['text'] = "As of " + results[0]['data_as_of']
        label2['text'] = "For ZIP Code: " + results[0]['zip_code']
        label3['text'] = "No. of Cases: " + results[0]['count']
        label4['text'] = "Population Size (2017 Census estimate): "+ results[0]['acs_population']
        label5['text'] = "Estimated Rate of Cases (per 10k): " + results[0]['rate']
Exemple #23
0
def get_case_study_data():
    """
    Function to match complaint narratives to message ids used in case study

    """
    # Use Socrata client to collect consumer finance data
    client = Socrata("data.consumerfinance.gov", None)
    results = client.get("s6ew-h6mp", limit=99999999)

    # Convert to pandas DataFrame
    results_df = pd.DataFrame.from_records(results)
    results_df = results_df[['complaint_id', 'complaint_what_happened']]
    results_df.rename(columns={'complaint_what_happened':'text'}, inplace=True)
    results_df.complaint_id = results_df.complaint_id.astype('int64')

    # Load Case Study message ids
    case_study_df = pd.read_csv("case_study_msg_ids.csv")
    case_study_df.complaint_id = case_study_df.complaint_id.astype('int64')

    # Join by msg_id
    case_study_df = case_study_df.merge(results_df, on='complaint_id', how='left')
	
	# Drop NAs
    case_study_df.dropna(inplace=True)

    return case_study_df
def NombreToCargo(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #nombre=sys.argv[1]
    nombre=m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("2gvv-khi3", limit=2000)
        #for key, value in data.items():
            #print key, value
        for i in range(len(r)):
            if ( r[i]['nombre'] ==  nombre.upper()) :
                cargo=r[i]['cargo']
                direccion = r[i]['direccion']
                email=r[i]['email']
                telefono=r[i]['telefono']
                break


        ent=TRX.addEntity('eci.Cargo', cargo)
        ent.addAdditionalFields("properity.direccion", "Direccion", True, direccion)
        ent.addAdditionalFields("properity.email", "Email", True, email)
        ent.addAdditionalFields("properity.telefono", "Telefono", True, telefono)

    except Exception as e:
        TRX.addUIMessage("Nombre no encontrado en la base de datos")

    TRX.returnOutput()
Exemple #25
0
def get_open_data(url, endpoint, api_key, limit=1000, query=''):
    #input: Socrata endpoint for data, and OPTIONAL query
    client = Socrata(url, api_key)
    out = []
    idx = 0
    time.sleep(5)  #sleep 20 seconds, to allow time to connect
    data = client.get(endpoint, limit=limit, offset=idx, where=query)
    time.sleep(20)  #sleep 20 seconds, to allow time to connect
    while len(data) > 0:
        #page through the results, appending to the out list
        out += data
        idx += limit
        data = client.get(endpoint, limit=limit, offset=idx, where=query)

    client.close()
    return out
    def run(self):
        # Autenticación en S3
        ses = boto3.session.Session(profile_name='luigi_dpa',
                                    region_name='us-west-2')
        s3_resource = ses.resource('s3')

        obj = s3_resource.Bucket(self.bucket)
        print(ses)
        # Autenticación del cliente:
        client = Socrata(settings.get('dburl'),
                         settings.get('apptoken'),
                         username=settings.get('user'),
                         password=settings.get('pass'))

        # los resultados son retornados como un archivo JSON desde la API /
        # convertida a una lista de Python usando sodapy
        client.timeout = 1000
        limit = 1000000000

        # query
        results = client.get(
            "erm2-nwe9",
            limit=limit,
            where=
            f"created_date between '{self.year}-{self.month}-{self.day}T00:00:00.000' and '{self.year}-{self.month}-{self.day}T23:59:59.999'"
        )
        with self.output().open('w') as json_file:
            json.dump(results, json_file)
Exemple #27
0
 def load_subset(self, limit, query=False):
     '''Uses the sodapy Socrata as an API to access the 
         Seattle Public Library checkouts dataset.'''
     client = Socrata(self.datasource, None)
     if query is not False:
         try:
             results = client.get(self.ID, query=query)
         except HTTPError:
             print('For queries, terms, such as select, groupby, or where,',
                   'need to be lowercase. Strings need to be in quotes',
                   'e.g. "EBOOK" and numbers need to not have quotes.')
         self.results_df = pd.DataFrame.from_records(results)
     else:
         results = client.get("tmmm-ytt6", limit=limit)
         self.results_df = pd.DataFrame.from_records(results)
     return self.results_df.head()
Exemple #28
0
def trafficDataIngestion(datalimit, start_datetime, file_dir):

    # Unauthenticated client only works with public data sets. Note 'None'
    # in place of application token, and no username or password:
    client = Socrata("data.cityofnewyork.us", None)

    date = f"data_as_of>'{start_datetime}'"  #se define la fecha de inicio de la consulta
    date = date.replace(" ", "T")
    print(date)
    
    columns = "data_as_of, id, speed, travel_time, link_name"  #columnas que se pediran a la api

    results = client.get("i4gi-tjb9", limit=datalimit, borough = "Manhattan", where = date, select = columns) 
    
    # Convert to pandas DataFrame
    results_df = pd.DataFrame.from_records(results)
    
    if results_df.empty:
        return False
    #-----------------------------------------datetime - time_hour --------------------------------------#
    results_df["datetime"] = results_df["data_as_of"].str[:-9] + "00:00"
    results_df["datetime_traffic"] = results_df["data_as_of"].str[:-4]
    
    results_df["datetime"] = pd.to_datetime(results_df["datetime"])
    results_df["weekday"] = results_df['datetime'].dt.day_name()
    results_df["datetime"] = results_df["datetime"].dt.strftime("%Y-%m-%dT%H:%M:%S")

    # se añaden nuevas columnas
    results_df = results_df[["datetime", "datetime_traffic", "weekday", "id", "speed", "travel_time", "link_name"]]

    #se guarda en la direccion del archivo pasado como parametro
    results_df.to_csv(file_dir, index=False)
    print(f"TrafficApi: {file_dir}")
    return True
Exemple #29
0
def getDataset(dataset_id):
    table = ''
    try:
        # Creating Socrata Client
        client = Socrata(cfg["web"],
                         cfg["token"],
                         username=cfg["email"],
                         password=cfg["password"])
        data = client.get(dataset_id, content_type="json")
        data = str(data)
        data = data.replace("'", "\"")
        data = data.upper()
        #getting data to compare with the uploaded data
        #print(type(data))
        #print(data)
        table = pd.read_json(data)
        #Replacing NaN for ''
        table = table.replace(pd.np.nan, '', regex=True)
        table = table.to_html(classes='table-striped " id = "my_table',
                              index=False)
        vistas = client.get_metadata(dataset_id)
        vistas = str(vistas.get("viewCount"))
        client.close()
    except BaseException as e:
        #if there is an error, reload login with error message
        error = str(e)
        print('Error description:')
        print(error)
        client.close()
    return table, vistas
def retrieve_save_data(**kwargs):
    # Using Authenticated Client:
    # API Document: https://dev.socrata.com/foundry/data.lacity.org/aub4-z9pc

    client = Socrata("data.lacity.org", MY_APP_TOKEN, USERNAME, PASSWORD)
    # Getting the total number of rows in the dataset
    # row_count = client.get("aub4-z9pc", select="count(srnumber)")
    # row_count = pd.DataFrame.from_records(row_count)
    # row_count = row_count.count_srnumber[0]

    # Grabs last 10K records for upserting
    row_count = 10000

    logging.info(
        "The number of rows is read successfully. Now it's pulling data.")

    # Retrieving dataset using API, limit=row_count is to specify we are retrieving all the rows
    dataset = client.get("aub4-z9pc", content_type="csv", limit=row_count)

    logging.info("the dataset is pulled successfully.")

    # Remove prev file if exist
    if os.path.exists(filename):
        logging.info(filename + " already exist. Overwriting it")
        os.remove(filename)

    # Writing new file
    with open(filename, 'w') as writeFile:
        writer = csv.writer(writeFile)
        writer.writerows(dataset)

    writeFile.close()

    logging.info(
        "the dataset is saved successfully, locally at {}.".format(filename))
Exemple #31
0
    def getData(self):
        # Get crime records via API
        if self.storage == "remote":
            print("Getting crime data from remote source.\nThis takes a while (approx. 5 mins)! Please be patient.")

            # API request information
            client_crime = Socrata('data.lacity.org','7pTgt6f2oTY53aDI1jXNJoNZD')
            offset_temp = 0
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            cur.execute("DROP TABLE IF EXISTS 'Crime2016'")

            # Getting data in dataframe then manipulate before storing in ReferralCrimeMap.db
            while True:
                results = client_crime.get("7fvc-faax", limit=10000, offset=offset_temp)
                crime_df_temp = pd.DataFrame.from_records(results)
                # This loop stops when the next block of dataframe is empty
                if crime_df_temp.empty == True:
                    break

                # Split location_1 into lat and long
                # Create 'year_rptd' to filter cases reported in 2016
                # Create 'count' for later data analysis
                crime_df_temp['location_1'] = crime_df_temp['location_1'].astype('str')
                crime_df_temp['long'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[0])
                crime_df_temp['lat'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[-1])
                crime_df_temp['year_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[0])
                crime_df_temp['month_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[1])
                crime_df_temp['count'] = 1
                crime_df_temp = crime_df_temp[crime_df_temp['year_rptd']=='2016']

                # Insert dataframe into ReferralCrimeMap.db
                pd_sql.to_sql(crime_df_temp, 'Crime2016', conn, if_exists='append', index=False)
                offset_temp+=10000

                # Shows the percentage of data 
                if offset_temp % 100000 == 0:
                    print(offset_temp/2000000*100,"%")
                else:
                    continue
            cur.execute("SELECT * FROM Crime2016")
            print(cur.fetchone())
            conn.close()

        # Load local data if -source is set to local
        else:
            print("Getting crime data from local source.")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = "SELECT * FROM Crime"
            try:
                crime = pd.read_sql(query, conn)
                conn.close()
                print(crime.head())
            except Exception as e:
                print("There is an error:", e)
                print("Please set data course as remote.")
                exit()
Exemple #32
0
def gen_data(filepath, api_key, username=None, password=None, output='json'):
        api = Socrata('data.seattle.gov', api_key, username=username, password=password)

        with open(filepath) as fp:
            uid = set([i.strip() for i in fp.readlines()]) | set([i.strip() for i in open('completed.json')])

        for dataset in uid:
            print(dataset, file=open('completed.json', 'a'))
            yield {dataset: api.get('/resource/' + dataset + '.' + output)}
def createBarChart(charttype):
    with open("database_charts/chartconfig.json") as config:
        jsonData = json.load(config)
    config.close()
    chart = jsonData[charttype]

    chartTitle = chart['chart-title']
    dataTitle = chart['data-title']
    yFormat = chart['y-axis-format']

    xaxis = chart['x-axis']
    items = chart['y-axis']
    colors = chart['color']

    with open("database_charts/url_info.json") as urlConfig:
        urlInfo = json.load(urlConfig)
    client = Socrata(urlInfo['url'], None, username=urlInfo['username'], password=urlInfo['password'])

    colorIndex = 0
    data = []

    for item in items:
        dict = {
            "type": chart["chart-type"],
            "legendText": item.replace("_", " "),
            "cursor": "pointer" if len(items) > 1 else "default",
            "showInLegend": True,
            'legendMarkerColor': colors[colorIndex],
            "toolTipContent": item.replace("_", " ") + " in year " + "{label}: {y}"
        }

        dataPoints = []
        request = client.get(dataTitle, select=xaxis + ", " + item)
        for r in request:
            if len(r) > 1:
                d = {
                    "label": int(r[xaxis]),
                    "y": int(r[item]),
                    "color": colors[colorIndex],
                }
            if d['label'] is not None:
                dataPoints.append(d)
        colorIndex += 1

        dict["dataPoints"] = dataPoints
        data.append(dict)

        # Create Chart Information
        chartInfo = {}
        chartInfo["chartTitle"] = chartTitle
        chartInfo["data"] = data
        chartInfo["json"] = json.dumps(data)
        chartInfo["valueFormat"] = yFormat
        chartInfo['addClick'] = False if len(items) == 1 else True
    return chartInfo
    def createPieChart(self):
        with open("database_charts/chartconfig.json") as config:
            data = json.load(config)

        chartconfig = {}
        for key, value in data.items():
            if key == self.chartType:
                chartconfig = value

        # The position index from the excel file
        xaxis = chartconfig['x-axis']
        # An array of dictionary with Name of the category and index in excel file
        yaxis = chartconfig['y-axis']

        charttitle = chartconfig['chart-title']
        datatitle = chartconfig['data-title']
        colors = chartconfig['color']
        colorIndex = 0

        with open("database_charts/url_info.json") as urlConfig:
            urlInfo = json.load(urlConfig)
        client = Socrata(urlInfo['url'], None, username=urlInfo['username'], password=urlInfo['password'])

        request = client.get(datatitle, select=",".join(yaxis), where=xaxis + "=" + str(self.year))[0]
        newData = []
        dic = {
            "type": "pie",
            "showInLegend": True,
            "toolTipContent": "{y} - #percent %",
            "yValueFormatString": "#0.#,,. Million",
            "legendText": "{indexLabel}",
        }
        dataPoints = {}
        for cat in yaxis:
            if cat in request.keys() and int(request[cat]) > 0:
                if cat not in dataPoints.keys():
                    dataPoints[cat] = {'y': int(request[cat]), 'indexLabel': cat.replace("_", " "),
                                       'legendMarkerColor': colors[colorIndex], 'color': colors[colorIndex]}
                else:
                    dataPoints[cat]['y'] += int(request[cat])
            colorIndex += 1

        for item in dataPoints:
            newData.append(dataPoints[item])

        dic['dataPoints'] = newData
        pieChartInfo = {}
        pieChartInfo['title'] = charttitle + str(self.year)
        pieChartInfo['data'] = dic
        pieChartInfo['json'] = json.dumps(dic)
        return pieChartInfo
Exemple #35
0
def test_get():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "get_songs.txt"
    setup_mock(adapter, "GET", response_data, 200)
    response = client.get(DATASET_IDENTIFIER)

    assert isinstance(response, list)
    assert len(response) == 10

    client.close()
Exemple #36
0
    def getData(self):
        if self.storage == "remote":
            print("Getting DCFS referral data from remote source.")

            # API request
            client_dcfs = Socrata('data.lacounty.gov','7pTgt6f2oTY53aDI1jXNJoNZD')
            offset_temp = 0
            dcfs_df=pd.DataFrame()

            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            cur.execute('DROP TABLE IF EXISTS dcfs')

            print('Inserting DCFS referral dataframe into ReferralCrimeMap.db.')
            while True:
                results = client_dcfs.get("8vmx-hhtu", limit=5000, offset=offset_temp)
                dcfs_df = pd.DataFrame.from_records(results)
                # Break the loop and stop requesting if the block is empty 
                if dcfs_df.empty == True:
                    break
                # Insert dataframe into ReferralCrimeMap.db
                dcfs_df['location'] = dcfs_df['location'].astype('str')
                pd_sql.to_sql(dcfs_df, 'dcfs', conn, if_exists='append', index=False)
                
                offset_temp+=5000
                # I didn't use time.sleep as this API is unlimited
                # time.sleep(1)
            conn.close()

        else:
            print ("Getting DCFS referral data from local source.")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = '''
                    SELECT *
                    FROM dcfs
                    '''
            try:
                dcfs = pd.read_sql(query, conn)
                conn.close()
                print(dcfs.head())
            # If the table does not exist it will throw an error.
            except Exception as e:
                print('There is an error:', e)
                print('Please enter remote source.')
                exit()
Exemple #37
0
def fetch_data():
	# Make a connection	
	conn = r.connect(host="localhost", port=28015, db="test")
	# You need to register the App Token (manually?)
	client = Socrata("data.sunshinecoast.qld.gov.au", "6MbT9NoWolynKM1ooRzrvm7Fs")
	# API Endpoint
	endpoint = "/resource/mn3m-fqri.json"
	off = 0
	while True:
		data = client.get(endpoint, limit=50000,offset=off)
		if len(data) == 0:
			break
		for elem in data:
			try:
				# Only store if we have a date
				cur = elem['d_date_rec']
				r.table("planning").insert(elem).run(conn)
			except KeyError:
				pass
		off = off+50000
def get_resource_data(socrata_resource, since=None, limit=1000):
    client = Socrata(socrata_resource.domain, socrata_resource.token)
    kwargs = {
        'limit': limit,
        'where': socrata_resource.conditions,
    }
    if socrata_resource.unique_key:
        kwargs['order'] = socrata_resource.unique_key

    while True:
        if since:
            kwargs['where'] = "{} and {} > '{}'".format(
                socrata_resource.conditions,
                socrata_resource.unique_key,
                since
            )
        batch = client.get(socrata_resource.endpoint, **kwargs)
        if len(batch) > 0:
            since = batch[-1][socrata_resource.unique_key]
            yield batch
        else:
            return
Exemple #39
0
def run_script():
    client = Socrata("data.sfgov.org", "wvRAyq5wvCnf9YKGmiuZ7T9y3")
    fetched_data = client.get("/resource/rqzj-sfat.json", select="objectid,latitude,longitude,fooditems,expirationdate")
    data = []

    for i in fetched_data:
        a = Node(
            i.get("objectid", None),
            i.get("latitude", None),
            i.get("longitude", None),
            i.get("fooditems", None),
            i.get("expirationdate", None),
        )
        print repr(a)
        data.append(a._asdict())

    urlparse.uses_netloc.append("postgres")
    try:
        url = urlparse.urlparse(os.environ["DATABASE_URL"])
        conn = psycopg2.connect(
            database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port
        )
    except:
        url = urlparse.urlparse("postgresql://localhost/mydb")
        conn = psycopg2.connect(
            database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port
        )
        print "I am unable to connect to the database"
    cur = conn.cursor()
    try:
        cur.executemany(
            "insert into foodtrucks(objectid,latitude, longitude,fooditems,expirationdate) select %(objectid)s,%(latitude)s,%(longitude)s,%(fooditems)s,%(expirationdate)s where not exists (select 1 from foodtrucks where objectid=%(objectid)s)",
            data,
        )
    except Exception as e:
        print "unable to query postgres =>", e
    conn.commit()
    return render_template("successScript.html")
Exemple #40
0
def run_script():
    client = Socrata("data.sfgov.org","wvRAyq5wvCnf9YKGmiuZ7T9y3")
    fetched_data = client.get("/resource/rqzj-sfat.json",select="objectid,latitude,longitude,fooditems,expirationdate")
    for row in fetched_data:
        print str(row)
    print "fetched_data", str(fetched_data)
    data = []

    for i in fetched_data:
        a = Node(i.get('objectid',None),i.get('latitude',None),i.get('longitude',None),i.get('fooditems',None),i.get('expirationdate',None))
        print repr(a)
        data.append(a._asdict())

    print data
    try:
        conn = psycopg2.connect("dbname='mydb'")
    except:
        print "I am unable to connect to the database"
    cur = conn.cursor()
    try:
        cur.executemany("insert into foodtrucks(objectid,latitude, longitude,fooditems,expirationdate) select %(objectid)s,%(latitude)s,%(longitude)s,%(fooditems)s,%(expirationdate)s where not exists (select 1 from foodtrucks where objectid=%(objectid)s)",data)
    except Exception as e:
        print "unable to query postgres =>", e
    conn.commit()
Exemple #41
0
 def get(self, request, *args, **kwargs):
     ''' Get criminal records from the dataset and save them the database as CriminalRecord objects'''
     ids = CriminalRecord.objects.values_list('case_id', flat=True)
     domain = settings.SOCRATA_DOMAIN
     token = settings.SOCRATA_APP_TOKEN
     endpoint = settings.SOCRATA_DATASET_ENDPOINT
     client = Socrata(domain, token)
     order = "date"
     where = 'latitude IS NOT NULL'
     offset = 0
     limit = 1000
     status = 200
     try:
         data = client.get(
             endpoint, order=order, where=where,
             offset=offset, limit=limit)
         while data:
             for record in data:
                 if self.to_int(record.get('id')) not in ids:
                     attrs = {
                         'case_id': self.to_int(
                             self.get_from_dict(record, 'id')),
                         'case_number': self.get_from_dict(
                             record, 'case_number'),
                         'date': datetime.strptime(
                             self.get_from_dict(record, 'date'),
                             '%Y-%m-%dT%H:%M:%S'),
                         'block': self.get_from_dict(record, 'block'),
                         'iucr': self.get_from_dict(record, 'iucr'),
                         'primary_type': self.get_from_dict(
                             record, 'primary_type'),
                         'crime_description': self.get_from_dict(
                             record, 'description'),
                         'location_description': self.get_from_dict(
                             record, 'location_description'),
                         'has_arrested': self.get_from_dict(
                             record, 'arrest'),
                         'is_domestic': self.get_from_dict(
                             record, 'domestic'),
                         'beat': self.get_from_dict(record, 'beat'),
                         'district': self.get_from_dict(
                             record, 'district'),
                         'ward': self.to_int(
                             self.get_from_dict(record, 'ward')),
                         'community_area': self.get_from_dict(
                             record, 'community_area'),
                         'fbi_code': self.get_from_dict(
                             record, 'fbi_code'),
                         'x_coordinate': self.to_int(
                             self.get_from_dict(
                                 record, 'x_coordinate')),
                         'y_coordinate': self.to_int(
                             self.get_from_dict(
                                 record, 'y_coordinate')),
                         'year': self.to_int(
                             self.get_from_dict(record, 'year')),
                         'updated_on': datetime.strptime(
                             self.get_from_dict(record, 'updated_on'),
                             '%Y-%m-%dT%H:%M:%S'),
                         'latitude': float(self.get_from_dict(
                             record, 'latitude')),
                         'longitude': float(self.get_from_dict(
                             record, 'longitude')),
                         'location': Point(
                             float(self.get_from_dict(
                                 record, 'longitude')),
                             float(self.get_from_dict(
                                 record, 'latitude')))
                     }
                     CriminalRecord.objects.create(**attrs)
             offset += limit
             data = client.get(
                 endpoint, order=order, where=where,
                 offset=offset, limit=limit)
         client.close()
     except Exception, e:
         print e
         status = 400
Exemple #42
0
from sodapy import Socrata

# client = Socrata("sandbox.demo.socrata.com", None)
# print client.get("nimj-3ivp", limit=10)

# client = Socrata("data.cms.gov/", None)
# print client.get("97k6-zzx3", limit=10)
# https://resource/97k6-zzx3.json?$limit=5

client = Socrata("data.sfgov.org", "8gffbg1meMZ1e2Z0yOz2OpwZq")
#client.get("cuks-n6tp", limit=1)
assault = client.get("cuks-n6tp", select ="category,time,location", where ="category='ASSAULT'",limit=10)
print "assault data"
print assault
theft = client.get("cuks-n6tp", select ="category,time,location", where ="category='VEHICLE THEFT'",limit=10)
print "vehicle theft data"
print theft
vandalism = client.get("cuks-n6tp", select ="category,time,location", where ="category='VANDALISM'",limit=10)
print "VANDALISM data"
print vandalism
kidnapping = client.get("cuks-n6tp", select ="category,time,location", where ="category='KIDNAPPING'",limit=10)
print "KIDNAPPING data"
print kidnapping
sex = client.get("cuks-n6tp", select ="category,time,location", where ="category='SEX OFFENSES, FORCIBLE'",limit=10)
print "SEX OFFENSES, FORCIBLE data"
print sex
dui = client.get("cuks-n6tp", select ="category,time,location", where ="category='DRIVING UNDER THE INFLUENCE'",limit=10)
print "DUI data"
print dui
client.close()
def call_api():
    """Request data from socrata api and get back JSON."""
    client = Socrata("data.seattle.gov", 'SOCRATA_TOKEN')
    data = client.get("ih58-ykqj", content_type="json", limit=49998)
    return data
Exemple #44
0
time_start = time.time() #Timer start for script
#Setup APIs call and initial variables
gmaps = googlemaps.Client(key=g_api_key)
api_setup = Socrata(socrata_api, socrata_app_token, socrata_id, socrata_pw)
max_retry = 3

#Setup Dictionary of found locations and coordinates
location_dict = {}
coord_pets = Pet.objects.filter(loc_lat__isnull=False).order_by('-intake_at')
for x in coord_pets: 
    if not location_dict.has_key(x.found_location):
        location_dict[x.found_location] = {"lat": x.loc_lat, "lon": x.loc_lon}
print coord_pets.count()

#Get record count from AAC
record_count = api_setup.get(intake_endpoint, select = "count('')")
record_count_out = api_setup.get(outcome_endpoint, select = "count('')")
count = record_count[0]['count'].encode('ascii','ignore')
count_out = record_count_out[0]['count'].encode('ascii','ignore')

#Functions to Update Database
def pull_in_intakes(intakes):
    for x in intakes:
        intime = datetime.strptime(x["intake_date"], '%m/%d/%y')
        if len(Pet.objects.filter(animal_id = x["animal_id"])) == 0: #Create a new pet
            new_pet = Pet()
            if x.has_key("name"):
                new_pet.name = x["name"]
            else:
                new_pet.name = "No Name"
            new_pet.animal_id = x["animal_id"]
Exemple #45
0
from sodapy import Socrata
import csv

client = Socrata(site, app_token, username=user, password=passw)


dset = "/resource/xb7i-cvg2.json"
filepath = "grants-trunc.csv"

client.get(dset)
rowlist = []
with open(filepath, "rb") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        rowlist.append(row)

client.replace(dset, rowlist)