Ejemplo n.º 1
0
def test_create():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None)

    columns = [
        {"fieldName": "foo", "name": "Foo", "dataTypeName": "text"},
        {"fieldName": "bar", "name": "Bar", "dataTypeName": "number"}
    ]
    tags = ["foo", "bar"]
    response = client.create("Foo Bar", description="test dataset",
        columns=columns, tags=tags, row_identifier="bar")

    request = adapter.request_history[0]
    request_payload = json.loads(request.text) # can't figure out how to use .json

    # Test request payload
    for dataset_key in ["name", "description", "columns", "tags"]:
        assert dataset_key in request_payload

    for column_key in ["fieldName", "name", "dataTypeName"]:
        assert column_key in request_payload["columns"][0]

    # Test response
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Ejemplo n.º 2
0
def test_set_permission():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "empty.txt"
    setup_old_api_mock(adapter, "PUT", response_data, 200)

    # Test response
    response = client.set_permission(DATASET_IDENTIFIER, "public")
    assert response.status_code == 200

    # Test request
    request = adapter.request_history[0]
    query_string = request.url.split("?")[-1]
    params = query_string.split("&")

    assert len(params) == 2
    assert "method=setPermission" in params
    assert "value=public.read" in params

    client.close()
Ejemplo n.º 3
0
    def getData(self):
        # Get crime records via API
        if self.storage == "remote":
            print("Getting crime data from remote source.\nThis takes a while (approx. 5 mins)! Please be patient.")

            # API request information
            client_crime = Socrata('data.lacity.org','7pTgt6f2oTY53aDI1jXNJoNZD')
            offset_temp = 0
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            cur.execute("DROP TABLE IF EXISTS 'Crime2016'")

            # Getting data in dataframe then manipulate before storing in ReferralCrimeMap.db
            while True:
                results = client_crime.get("7fvc-faax", limit=10000, offset=offset_temp)
                crime_df_temp = pd.DataFrame.from_records(results)
                # This loop stops when the next block of dataframe is empty
                if crime_df_temp.empty == True:
                    break

                # Split location_1 into lat and long
                # Create 'year_rptd' to filter cases reported in 2016
                # Create 'count' for later data analysis
                crime_df_temp['location_1'] = crime_df_temp['location_1'].astype('str')
                crime_df_temp['long'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[0])
                crime_df_temp['lat'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[-1])
                crime_df_temp['year_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[0])
                crime_df_temp['month_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[1])
                crime_df_temp['count'] = 1
                crime_df_temp = crime_df_temp[crime_df_temp['year_rptd']=='2016']

                # Insert dataframe into ReferralCrimeMap.db
                pd_sql.to_sql(crime_df_temp, 'Crime2016', conn, if_exists='append', index=False)
                offset_temp+=10000

                # Shows the percentage of data 
                if offset_temp % 100000 == 0:
                    print(offset_temp/2000000*100,"%")
                else:
                    continue
            cur.execute("SELECT * FROM Crime2016")
            print(cur.fetchone())
            conn.close()

        # Load local data if -source is set to local
        else:
            print("Getting crime data from local source.")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = "SELECT * FROM Crime"
            try:
                crime = pd.read_sql(query, conn)
                conn.close()
                print(crime.head())
            except Exception as e:
                print("There is an error:", e)
                print("Please set data course as remote.")
                exit()
Ejemplo n.º 4
0
def gen_data(filepath, api_key, username=None, password=None, output='json'):
        api = Socrata('data.seattle.gov', api_key, username=username, password=password)

        with open(filepath) as fp:
            uid = set([i.strip() for i in fp.readlines()]) | set([i.strip() for i in open('completed.json')])

        for dataset in uid:
            print(dataset, file=open('completed.json', 'a'))
            yield {dataset: api.get('/resource/' + dataset + '.' + output)}
Ejemplo n.º 5
0
def createBarChart(charttype):
    with open("database_charts/chartconfig.json") as config:
        jsonData = json.load(config)
    config.close()
    chart = jsonData[charttype]

    chartTitle = chart['chart-title']
    dataTitle = chart['data-title']
    yFormat = chart['y-axis-format']

    xaxis = chart['x-axis']
    items = chart['y-axis']
    colors = chart['color']

    with open("database_charts/url_info.json") as urlConfig:
        urlInfo = json.load(urlConfig)
    client = Socrata(urlInfo['url'], None, username=urlInfo['username'], password=urlInfo['password'])

    colorIndex = 0
    data = []

    for item in items:
        dict = {
            "type": chart["chart-type"],
            "legendText": item.replace("_", " "),
            "cursor": "pointer" if len(items) > 1 else "default",
            "showInLegend": True,
            'legendMarkerColor': colors[colorIndex],
            "toolTipContent": item.replace("_", " ") + " in year " + "{label}: {y}"
        }

        dataPoints = []
        request = client.get(dataTitle, select=xaxis + ", " + item)
        for r in request:
            if len(r) > 1:
                d = {
                    "label": int(r[xaxis]),
                    "y": int(r[item]),
                    "color": colors[colorIndex],
                }
            if d['label'] is not None:
                dataPoints.append(d)
        colorIndex += 1

        dict["dataPoints"] = dataPoints
        data.append(dict)

        # Create Chart Information
        chartInfo = {}
        chartInfo["chartTitle"] = chartTitle
        chartInfo["data"] = data
        chartInfo["json"] = json.dumps(data)
        chartInfo["valueFormat"] = yFormat
        chartInfo['addClick'] = False if len(items) == 1 else True
    return chartInfo
Ejemplo n.º 6
0
    def createPieChart(self):
        with open("database_charts/chartconfig.json") as config:
            data = json.load(config)

        chartconfig = {}
        for key, value in data.items():
            if key == self.chartType:
                chartconfig = value

        # The position index from the excel file
        xaxis = chartconfig['x-axis']
        # An array of dictionary with Name of the category and index in excel file
        yaxis = chartconfig['y-axis']

        charttitle = chartconfig['chart-title']
        datatitle = chartconfig['data-title']
        colors = chartconfig['color']
        colorIndex = 0

        with open("database_charts/url_info.json") as urlConfig:
            urlInfo = json.load(urlConfig)
        client = Socrata(urlInfo['url'], None, username=urlInfo['username'], password=urlInfo['password'])

        request = client.get(datatitle, select=",".join(yaxis), where=xaxis + "=" + str(self.year))[0]
        newData = []
        dic = {
            "type": "pie",
            "showInLegend": True,
            "toolTipContent": "{y} - #percent %",
            "yValueFormatString": "#0.#,,. Million",
            "legendText": "{indexLabel}",
        }
        dataPoints = {}
        for cat in yaxis:
            if cat in request.keys() and int(request[cat]) > 0:
                if cat not in dataPoints.keys():
                    dataPoints[cat] = {'y': int(request[cat]), 'indexLabel': cat.replace("_", " "),
                                       'legendMarkerColor': colors[colorIndex], 'color': colors[colorIndex]}
                else:
                    dataPoints[cat]['y'] += int(request[cat])
            colorIndex += 1

        for item in dataPoints:
            newData.append(dataPoints[item])

        dic['dataPoints'] = newData
        pieChartInfo = {}
        pieChartInfo['title'] = charttitle + str(self.year)
        pieChartInfo['data'] = dic
        pieChartInfo['json'] = json.dumps(dic)
        return pieChartInfo
Ejemplo n.º 7
0
def test_get():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "get_songs.txt"
    setup_mock(adapter, "GET", response_data, 200)
    response = client.get(DATASET_IDENTIFIER)

    assert isinstance(response, list)
    assert len(response) == 10

    client.close()
Ejemplo n.º 8
0
def test_publish():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_publish_mock(adapter, "POST", response_data, 200)

    response = client.publish(DATASET_IDENTIFIER)
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Ejemplo n.º 9
0
def test_publish():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)
    
    response_data = "create_foobar.txt"
    resource = "/api/views/songs/publication.json" # publish() removes .json
    set_up_mock(adapter, "POST", response_data, 200, resource=resource)
    
    response = client.publish("/resource/songs.json") # hard-coded so request uri is matched
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Ejemplo n.º 10
0
def test_get_metadata():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "get_song_metadata.txt"
    setup_old_api_mock(adapter, "GET", response_data, 200)
    response = client.get_metadata(DATASET_IDENTIFIER)

    assert isinstance(response, dict)
    assert "newBackend" in response
    assert "attachments" in response["metadata"]

    client.close()
Ejemplo n.º 11
0
    def getData(self):
        if self.storage == "remote":
            print("Getting DCFS referral data from remote source.")

            # API request
            client_dcfs = Socrata('data.lacounty.gov','7pTgt6f2oTY53aDI1jXNJoNZD')
            offset_temp = 0
            dcfs_df=pd.DataFrame()

            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            cur.execute('DROP TABLE IF EXISTS dcfs')

            print('Inserting DCFS referral dataframe into ReferralCrimeMap.db.')
            while True:
                results = client_dcfs.get("8vmx-hhtu", limit=5000, offset=offset_temp)
                dcfs_df = pd.DataFrame.from_records(results)
                # Break the loop and stop requesting if the block is empty 
                if dcfs_df.empty == True:
                    break
                # Insert dataframe into ReferralCrimeMap.db
                dcfs_df['location'] = dcfs_df['location'].astype('str')
                pd_sql.to_sql(dcfs_df, 'dcfs', conn, if_exists='append', index=False)
                
                offset_temp+=5000
                # I didn't use time.sleep as this API is unlimited
                # time.sleep(1)
            conn.close()

        else:
            print ("Getting DCFS referral data from local source.")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = '''
                    SELECT *
                    FROM dcfs
                    '''
            try:
                dcfs = pd.read_sql(query, conn)
                conn.close()
                print(dcfs.head())
            # If the table does not exist it will throw an error.
            except Exception as e:
                print('There is an error:', e)
                print('Please enter remote source.')
                exit()
Ejemplo n.º 12
0
def test_upsert():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "upsert_songs.txt"
    data = [{"theme": "Surfing", "artist": "Wavves",
             "title": "King of the Beach", "year": "2010"}]
    setup_mock(adapter, "POST", response_data, 200)
    response = client.upsert(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 1
    client.close()
Ejemplo n.º 13
0
def test_delete():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    uri = "{0}{1}{2}".format(PREFIX, DOMAIN, PATH)
    adapter.register_uri("DELETE", uri, status_code=200)
    response = client.delete(PATH)
    assert response.status_code == 200

    try:
        client.delete("/foobar.json")
    except Exception, e:
        assert isinstance(e, requests_mock.exceptions.NoMockAddress)
Ejemplo n.º 14
0
def test_upsert_exception():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    path = "/songs.json"
    response_data = "403_response_json.txt"
    set_up_mock(adapter, "POST", response_data, 403, reason="Forbidden")

    data = [{"theme": "Surfing", "artist": "Wavves",
             "title": "King of the Beach", "year": "2010"}]
    try:
        response = client.upsert(PATH, data)
    except Exception, e:
        assert isinstance(e, requests.exceptions.HTTPError)
Ejemplo n.º 15
0
def test_upsert_exception():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, session_adapter=mock_adapter)

    response_data = "403_response_json.txt"
    setup_mock(adapter, "POST", response_data, 403, reason="Forbidden")

    data = [{"theme": "Surfing", "artist": "Wavves",
             "title": "King of the Beach", "year": "2010"}]
    try:
        client.upsert(DATASET_IDENTIFIER, data)
    except Exception as e:
        assert isinstance(e, requests.exceptions.HTTPError)
    else:
        raise AssertionError("No exception raised for bad request.")
Ejemplo n.º 16
0
def log_request():
    print 'log_request', request.url
    print (socrata_app_token and socrata_username and socrata_password and socrata_access_log_domain and socrata_access_log_datasetid)
    if socrata_app_token and socrata_username and socrata_password and socrata_access_log_domain and socrata_access_log_datasetid:
        client = Socrata(socrata_access_log_domain, socrata_app_token, username=socrata_username, password=socrata_password)
        
        # fix this, see http://esd.io/blog/flask-apps-heroku-real-ip-spoofing.html
        if not request.headers.getlist("X-Forwarded-For"):
            ip = request.remote_addr
        else:
            ip = request.headers.getlist("X-Forwarded-For")[0]
        # for some reason a space and a * is causing an upsert error so am replacing space with %20
        url = str(request.url).replace(" ", "%20")
        # See Socrata's time format https://support.socrata.com/hc/en-us/articles/202949918-Importing-Data-Types-and-You-
        dtnow = datetime.utcnow().isoformat()
        dtnow = dtnow[:dtnow.index('.')]+'Z' 
        data = [{'datetime': dtnow, 'ip_address': str(ip), 'url': url}]
        print data
        print 'upsert', client.upsert(socrata_access_log_datasetid, data)
Ejemplo n.º 17
0
Archivo: main.py Proyecto: mscook/PDOIB
def fetch_data():
	# Make a connection	
	conn = r.connect(host="localhost", port=28015, db="test")
	# You need to register the App Token (manually?)
	client = Socrata("data.sunshinecoast.qld.gov.au", "6MbT9NoWolynKM1ooRzrvm7Fs")
	# API Endpoint
	endpoint = "/resource/mn3m-fqri.json"
	off = 0
	while True:
		data = client.get(endpoint, limit=50000,offset=off)
		if len(data) == 0:
			break
		for elem in data:
			try:
				# Only store if we have a date
				cur = elem['d_date_rec']
				r.table("planning").insert(elem).run(conn)
			except KeyError:
				pass
		off = off+50000
Ejemplo n.º 18
0
def test_set_permission():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "empty.txt"
    resource = "/api/views" + PATH
    set_up_mock(adapter, "PUT", response_data, 200, resource=resource)
    
    # Test response
    response = client.set_permission(PATH, "public")
    assert response.status_code == 200
    
    # Test request
    request = adapter.request_history[0]
    qs = request.url.split("?")[-1]
    assert qs == "method=setPermission&value=public.read"
    client.close()
Ejemplo n.º 19
0
def test_replace():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "replace_songs.txt"
    data = [
        {"theme": "Surfing", "artist": "Wavves", "title": "King of the Beach",
         "year": "2010"},
        {"theme": "History", "artist": "Best Friends Forever",
         "title": "Abe Lincoln", "year": "2008"},
    ]
    setup_mock(adapter, "PUT", response_data, 200)
    response = client.replace(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 2
    client.close()
Ejemplo n.º 20
0
def get_resource_data(socrata_resource, since=None, limit=1000):
    client = Socrata(socrata_resource.domain, socrata_resource.token)
    kwargs = {
        'limit': limit,
        'where': socrata_resource.conditions,
    }
    if socrata_resource.unique_key:
        kwargs['order'] = socrata_resource.unique_key

    while True:
        if since:
            kwargs['where'] = "{} and {} > '{}'".format(
                socrata_resource.conditions,
                socrata_resource.unique_key,
                since
            )
        batch = client.get(socrata_resource.endpoint, **kwargs)
        if len(batch) > 0:
            since = batch[-1][socrata_resource.unique_key]
            yield batch
        else:
            return
Ejemplo n.º 21
0
def test_replace_non_data_file():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "successblobres.txt"
    nondatasetfile_path = 'tests/test_data/nondatasetfile.zip'

    setup_replace_non_data_file(adapter, "POST", response_data, 200)

    with open(nondatasetfile_path, 'rb') as fin:
        file = (
            {'file': ("nondatasetfile.zip", fin)}
        )
        response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file)

    assert isinstance(response, dict)
    assert response.get("blobFileSize") == 496
    client.close()
Ejemplo n.º 22
0
def run_script():
    client = Socrata("data.sfgov.org", "wvRAyq5wvCnf9YKGmiuZ7T9y3")
    fetched_data = client.get("/resource/rqzj-sfat.json", select="objectid,latitude,longitude,fooditems,expirationdate")
    data = []

    for i in fetched_data:
        a = Node(
            i.get("objectid", None),
            i.get("latitude", None),
            i.get("longitude", None),
            i.get("fooditems", None),
            i.get("expirationdate", None),
        )
        print repr(a)
        data.append(a._asdict())

    urlparse.uses_netloc.append("postgres")
    try:
        url = urlparse.urlparse(os.environ["DATABASE_URL"])
        conn = psycopg2.connect(
            database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port
        )
    except:
        url = urlparse.urlparse("postgresql://localhost/mydb")
        conn = psycopg2.connect(
            database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port
        )
        print "I am unable to connect to the database"
    cur = conn.cursor()
    try:
        cur.executemany(
            "insert into foodtrucks(objectid,latitude, longitude,fooditems,expirationdate) select %(objectid)s,%(latitude)s,%(longitude)s,%(fooditems)s,%(expirationdate)s where not exists (select 1 from foodtrucks where objectid=%(objectid)s)",
            data,
        )
    except Exception as e:
        print "unable to query postgres =>", e
    conn.commit()
    return render_template("successScript.html")
Ejemplo n.º 23
0
    def pull(self):
        """
        Utilities for spatio temporal analysis
        @author zed.uchicago.edu

        Pulls new entries from datasource
        NOTE: should make flexible but for now use city of Chicago data
        """

        socrata_domain = "data.cityofchicago.org"
        socrata_dataset_identifier = "crimes"
        socrata_token = "ZIgqoPrBu0rsvhRr7WfjyPOzW"

        client = Socrata(socrata_domain, socrata_token)
Ejemplo n.º 24
0
    def run(self):
        '''
        Consulta de los datos en la api de 311
        '''
        # Autenticación en S3
        ses = boto3.session.Session(profile_name='luigi_dpa', region_name='us-west-2')
        s3_resource = ses.resource('s3')

        obj = s3_resource.Bucket(self.bucket)
        print(ses)
        # Autenticación del cliente:
        client = Socrata("data.cityofnewyork.us",
                        "N2WpW61JnP5RoT5mrYGUaSUg9",
                        username="******",
                        password="******")

        # los resultados son retornados como un archivo JSON desde la API /
        # convertida a una lista de Python usando sodapy
        client.timeout =1000
        results = client.get("erm2-nwe9", limit=100)

        with self.output().open('w') as json_file:
            json.dump(results, json_file)
Ejemplo n.º 25
0
def test_replace_non_data_file():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN,
                     APPTOKEN,
                     username=USERNAME,
                     password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "successblobres.txt"
    nondatasetfile_path = 'tests/test_data/nondatasetfile.zip'

    setup_replace_non_data_file(adapter, "POST", response_data, 200)

    with open(nondatasetfile_path, 'rb') as fin:
        file = ({'file': ("nondatasetfile.zip", fin)})
        response = client.replace_non_data_file(DATASET_IDENTIFIER, {}, file)

    assert isinstance(response, dict)
    assert response.get("blobFileSize") == 496
    client.close()
Ejemplo n.º 26
0
    def execute(self, context):
        # Authenticate Socrata client
        self.log.info('Authenticate Socrata client')
        client = Socrata(self.socrata_domain,
                         self.socrata_token)

        rendered_socrata_query_filters = copy.deepcopy(self.socrata_query_filters)
        if rendered_socrata_query_filters is not None:
            for filter, filter_value in self.socrata_query_filters.items():
                if isinstance(filter_value, str):
                    rendered_socrata_query_filters[filter] = filter_value.format(**context)

        # Get JSON results from API endpoint
        self.log.info('Query API')
        results = client.get(self.socrata_dataset_identifier,
                             **rendered_socrata_query_filters)
        self.log.info('Got {} results'.format(len(results)))

        self.log.info('Write JSON to file')
        rendered_json_output_filepath = self.json_output_filepath.format(**context)
        with open(rendered_json_output_filepath, 'w') as outfile:
            json.dump(results, outfile)
        self.log.info('Write JSON to {}'.format(rendered_json_output_filepath))
    def __init__(self,
                 dataset_id,
                 socrata_client=None,
                 socrata_params=None,
                 float_fields=None,
                 logger=None):
        """
        Initialization function of the SocrataDataset class.

        Parameters:
            dataset_id: 4x4 ID of the Socrata draft (e.g. x123-bc12)
            client: Optional parameter if the user chooses to pass in the
                socrata_params parameter. If user chooses not to pass in
                socrata_params, they can also pass in an sodapy.Socrata object
                that has been initialized with the proper socrata credentials.
            socrata_params: Optional parameter if the user choose to pass in the
                socrata_client parameter. Dictionary object containing Socrata
                credentials. Must include the following fields: 'username',
                'password', 'app_token', 'domain'.
            float_fields: An array of Socrata field names that should be of
                float types (numbers with decimals).
            logger: Optional parameter. Could pass in a logger object or not pass
                in anything. If a logger object is passed in, information will be
                logged instead of printed. If not, information will be printed.
        """
        self.socrata_params = {}
        self.float_fields = []
        self.dataset_id = dataset_id
        self.client = socrata_client
        if not socrata_client and socrata_params:
            self.client = Socrata(**socrata_params)
        self.socrata_params = socrata_params
        self.col_dtype_dict = self.get_col_dtype_dict()
        self.float_fields = float_fields
        self.print_func = print
        if logger:
            self.print_func = logger.info
Ejemplo n.º 28
0
def get_data(APP_KEY, page_size, num_pages, output):
	try:
		client = Socrata('data.cityofnewyork.us', APP_KEY)
		es = create_and_update_index('violationparking-index', 'violations')


		if num_pages == '':
			num_row = client.get('nc67-uf89', select='COUNT(*)') #count num of row in dataset

			print("SOMETHING")

			total = int(num_row[0]['COUNT'])
			num_pages = (total / page_size) #get the num of pages 
			return num_pages


		else: 
			for i in range (num_pages):
				data = client.get('nc67-uf89', limit=page_size, offset=i*page_size)

				if output != '':
					with open(output, 'a') as fout:
						for result in data:
							fout.write(json.dumps(result) + '\n')
							
							print("HERE")

							load_ES(result, es)

				else:
					for result in data:
						print(result + '\n')


	except Exception as e:
		print(f'Something went wrong {e}')
		raise 
Ejemplo n.º 29
0
    def execute(trial=False):

        startTime = datetime.datetime.now()

        # ---[ Connect to Database ]---------------------------------
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('smithnj', 'smithnj')
        repo_name = 'smithnj.ctastats'
        # ---[ Grab Data ]-------------------------------------------
        client = Socrata("data.cityofchicago.org", "xbEYuk5XxkYsIaXl3hn79XIoR")
        if (trial):
            results = client.get(
                "t2rn-p8d7",
                select="station_id,stationame,month_beginning,monthtotal",
                where=
                "month_beginning > '2012-12-01T00:00:00.000' AND month_beginning < '2019-01-01T00:00:00.000'",
                limit=100)
        if (trial == False):
            results = client.get(
                "t2rn-p8d7",
                select="station_id,stationame,month_beginning,monthtotal",
                where=
                "month_beginning > '2012-12-01T00:00:00.000' AND month_beginning < '2019-01-01T00:00:00.000'",
                limit=30000)
        df = pd.DataFrame.from_records(results).to_json(orient="records")
        loaded = json.loads(df)
        # ---[ MongoDB Insertion ]-------------------------------------------
        repo.dropCollection(repo_name)
        repo.createCollection(repo_name)
        print('done')
        repo[repo_name].insert_many(loaded)
        repo[repo_name].metadata({'complete': True})
        # ---[ Finishing Up ]-------------------------------------------
        print(repo[repo_name].metadata())
        repo.logout()
        endTime = datetime.datetime.now()
Ejemplo n.º 30
0
 def send_crab_data(self):
     CONVERSION_RATE = .00000701549
     oocytes = Oocyte.objects.filter(crab=self).filter(chosen_count=10)
     client = Socrata("noaa-fisheries-afsc.data.socrata.com",
                      "q3DhSQxvyWbtq1kLPs5q7jwQp",
                      username="******",
                      password="******")
     data = {
         'area_2': '',
         'area_5': '',
         'calibration_5x': 0.00028,
         'area_4': '',
         'area_7': '',
         'area_10': '',
         'calibration_10x': 0.00056,
         'area_9': '',
         'year': '',
         'sample': '',
         'area_3': '',
         'area_8': '',
         'area_1': '',
         'area_6': ''
     }
     data['area_1'] = oocytes[0].area * CONVERSION_RATE
     data['area_2'] = oocytes[1].area * CONVERSION_RATE
     data['area_3'] = oocytes[2].area * CONVERSION_RATE
     data['area_4'] = oocytes[3].area * CONVERSION_RATE
     data['area_5'] = oocytes[4].area * CONVERSION_RATE
     data['area_6'] = oocytes[5].area * CONVERSION_RATE
     data['area_7'] = oocytes[6].area * CONVERSION_RATE
     data['area_8'] = oocytes[7].area * CONVERSION_RATE
     data['area_9'] = oocytes[8].area * CONVERSION_RATE
     data['area_10'] = oocytes[9].area * CONVERSION_RATE
     data['year'] = datetime.datetime.now().year
     data['sample'] = self.sample_num
     payload = [data]
     client.upsert("km2u-hwjw", payload)
def get_crime(start_year, end_year):
    '''
    Get 2013 to 2018 crime data from chicago data portal
    Return:
        pandas dataframe with the columns and dtypes as COL_TYPES
    '''
    crime_type = [
        "HOMICIDE", "CRIM SEXUAL ASSAULT", "ROBBERY", "ASSAULT", "BATTERY",
        "BURGLARY", "ARSON", "MOTOR VEHICLE THEFT", "THEFT"
    ]
    COL_TYPES = {
        'block': str,
        'case_number': str,
        'primary_type': 'category',
        'date': str,
        'latitude': float,
        'longitude': float,
        'year': int
    }
    MAX_ROWS = 100000000  # the total rows of the original data
    CRIME_DATA_ID = "6zsd-86xi"
    cols = [item for item in COL_TYPES.keys()]
    client = Socrata('data.cityofchicago.org',
                     'E0eO5nY1aKuEY1pVrunfqFhDz',
                     username='******',
                     password='******')
    conds = "year >= {} AND year <= {}".format(start_year, end_year)
    res = client.get(CRIME_DATA_ID,
                     select=",".join(cols),
                     where=conds,
                     limit=MAX_ROWS)
    client.close()
    df = pd.DataFrame.from_records(res)
    #df['date'] = pd.to_datetime(df['date'])
    df = df[df.primary_type.isin(crime_type)]
    df = df.astype(COL_TYPES)
    return df
Ejemplo n.º 32
0
    def api_to_s3(self, filename, table_name, limit=2000):
        """
        Loads data via api, uploads data file to s3.

        Parameters
        ----------
        filename : str 
            file name with extension
        table_name : str
            name of dataset fetched from api
        limit : int
            number of records to download

        """
        # load api module
        from sodapy import Socrata
        api_client = Socrata(self.url, self.api_token, self.api_user_email,
                             self.api_user_pwd)
        # get records
        logging.info(f'Fetching data from {self.url} API.')
        records = api_client.get(table_name, limit=limit)
        df = pd.DataFrame.from_records(records)

        # write csv
        csv_buf = StringIO()
        df.to_csv(csv_buf, header=True, index=False)
        csv_buf.seek(0)
        content = csv_buf.getvalue()

        # add "_current" to filename
        filename = self.__rename_file(filename)
        # copy "_current" file in bucket to "_previous"
        self.__archive_file(filename)
        # create s3 object
        obj = self.s3_conn.Object(self.bucket_name, filename)
        obj.put(Body=content)
        logging.info(f'{filename} uploaded to s3 bucket.')
Ejemplo n.º 33
0
def get_scooters(url=None, key=None, save=True):
    '''
    # must input key
    use the Socrata API call to dataset 2kfw-zvte
    return dataframe of individual scooter trips over Chicago 2019 pilot
    '''
    # check to see if weather pickle exists
    if os.path.exists('scooter_df.pickle'):
        print('loading pickle!')
        scooter_df = pd.read_pickle('scooter_df.pickle')

    else:
        print('calling API!')
        from sodapy import Socrata  #client

        if key == None:
            key = json.load(open('hidden.json', 'r'))['CHI']

        client = Socrata(key['url'], key['key'])

        # First 2000 results, returned as JSON from API / converted to Python list of
        # dictionaries by sodapy.
        results = client.get("2kfw-zvte", limit=712_000)

        # Convert to pandas DataFrame
        scooter_df = pd.DataFrame(results)

        scooter_df.drop(columns=[
            ':@computed_region_bdys_3d7i', ':@computed_region_vrxf_vc4k'
        ],
                        inplace=True)
        if save:
            # saving, so you don't have to make the request again :praise:
            scooter_df.to_pickle('scooter_df.pickle')
            print('saved to pickle!')

    return scooter_df
def PersonToCorreoDireccionTelefono_mk5f_bdwx(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #nombre=sys.argv[1]
    nombre = m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("u5mc-hpr6", limit=2000)

        #for key, value in data.items():
        #print key, value
        for i in range(len(r)):
            if (r[i]['nombre'] == nombre):
                celular = r[i]['celular']
                correo_electronico = r[i]['correo_electronico']
                direccion = r[i]['direccion']
                celular2 = r[i]['telefonos']
                barrio = r[i]['municipio']
                break

        ent1 = TRX.addEntity('maltego.EmailAddress', correo_electronico)
        ent2 = TRX.addEntity('maltego.PhoneNumber', celular)
        ent2.addAdditionalFields("phonenumber.countrycode", "Country Code",
                                 True, "57")
        ent3 = TRX.addEntity('maltego.PhoneNumber', celular2)
        ent3.addAdditionalFields("phonenumber.countrycode", "Country Code",
                                 True, "57")
        ent4 = TRX.addEntity('maltego.Location', direccion)
        ent4.addAdditionalFields("country", "Country", True, "Colombia")
        ent4.addAdditionalFields("location.area", "Area", True, barrio)
        ent4.addAdditionalFields("streetaddress", "Street Address", True,
                                 direccion)

    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Ejemplo n.º 35
0
def create_vacc_df():
    columns = [
        'date, fips, recip_county, recip_state, series_complete_pop_pct, series_complete_yes, booster_doses, booster_doses_vax_pct'
    ]
    client = Socrata("data.cdc.gov",
                     app_token="SMDNVaBjBRb2aY7ZjRLbnLpZc",
                     username="******",
                     password="******")
    results = client.get("8xkx-amqh", limit=100000000)
    df_vacc_1 = pd.DataFrame.from_records(results)
    df_vacc_1['date'] = df_vacc_1['date'].astype('datetime64')
    df_vacc_1['recip_county'] = df_vacc_1['recip_county'].astype('string')
    df_vacc_1['recip_state'] = df_vacc_1['recip_state'].astype('string')
    df_vacc_1['series_complete_pop_pct'] = df_vacc_1[
        'series_complete_pop_pct'].astype('float')
    df_vacc_1['series_complete_yes'] = df_vacc_1['series_complete_yes'].astype(
        'float')
    df_vacc_1['booster_doses'] = df_vacc_1['booster_doses'].astype('float')
    df_vacc_1['booster_doses_vax_pct'] = df_vacc_1[
        'booster_doses_vax_pct'].astype('float')

    date_min = df_vacc_1['date'].min()
    date_max = df_vacc_1['date'].max()
    date_list = []
    for mon in pd.date_range(date_min, date_max, freq='MS'):
        date_list.append(mon.strftime("%Y-%m-%d"))
    df_vacc_animated = df_vacc_1[df_vacc_1['date'].isin(date_list)]
    df_vacc_static = df_vacc_1[df_vacc_1['date'] == date_max]
    df_vacc_animated = df_vacc_animated.sort_values('date', ascending=True)
    df_vacc_animated['date'] = df_vacc_animated['date'].astype('string')

    with urlopen(
            'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
    ) as response:
        counties = json.load(response)

    return counties, df_vacc_static, df_vacc_animated
def TelefonoToCorreoPerson_u5mc_hpr6(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #telefono=sys.argv[1]
    telefono = m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("u5mc-hpr6", limit=2000)

        #for key, value in data.items():
        #print key, value
        for i in range(len(r)):
            if (r[i]['celular'] == telefono):
                nombre = r[i]['nombre']
                email = r[i]['correo_electr_nico']
                break

        nombre = nombre.split(" ")
        if (len(nombre) == 4):
            firts = nombre[0] + " " + nombre[1]
            last = nombre[2] + " " + nombre[3]
            full = nombre[0] + " " + nombre[1] + " " + nombre[
                2] + " " + nombre[3]
        else:
            firts = nombre[0]
            last = nombre[1] + " " + nombre[2]
            full = nombre[0] + " " + nombre[1] + " " + nombre[2]
        ent = TRX.addEntity('maltego.Person', full)
        ent.addAdditionalFields("person.firtsnames", "Firts Names", True,
                                firts)
        ent.addAdditionalFields("person.lastname", "Surname", True, last)
        ent2 = TRX.addEntity('maltego.EmailAddress', email)

    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Ejemplo n.º 37
0
def load_data():
    client = Socrata("data.cincinnati-oh.gov", None)
    crimes = client.get("k59e-2pvf", limit=500000)
    crimes_na = pd.DataFrame.from_records(crimes)
    crimes_df = crimes_na.replace(r'^\s*$', np.nan, regex=True)

    # convert dates to datetime
    crimes_df.date_reported = pd.to_datetime(crimes_df.date_reported,
                                             errors='coerce').dt.tz_localize(
                                                 'US/Eastern',
                                                 ambiguous='NaT',
                                                 nonexistent='NaT')
    crimes_df.date_from = pd.to_datetime(
        crimes_df.date_from, errors='coerce').dt.tz_localize('US/Eastern',
                                                             ambiguous='NaT',
                                                             nonexistent='NaT')
    crimes_df.date_to = pd.to_datetime(
        crimes_df.date_to, errors='coerce').dt.tz_localize('US/Eastern',
                                                           ambiguous='NaT',
                                                           nonexistent='NaT')
    crimes_df.date_of_clearance = pd.to_datetime(
        crimes_df.date_of_clearance,
        errors='coerce').dt.tz_localize('US/Eastern',
                                        ambiguous='NaT',
                                        nonexistent='NaT')

    crimes = crimes_df.rename(columns={
        'longitude_x': 'lon',
        'latitude_x': 'lat'
    })

    crimes['lon'] = crimes['lon'].astype(float)
    crimes['lat'] = crimes['lat'].astype(float)

    crimestats = crimes

    return crimestats
Ejemplo n.º 38
0
    def update_vac_data(self):
        vac_df = pd.DataFrame()
        # 51 US states
        state_str = ', '.join(
            ['"' + x + '"' for x in list(self.state_name_abbr.values())])

        # socrata API
        client = Socrata("data.cdc.gov", None)
        # API query
        dates = client.get("8xkx-amqh", select='distinct date')

        # for date in dates:
        # * for testing purpose only *
        # for date in dates[:1]:
        for date in dates:
            date = date['date']
            visited = self.vac_visited.get_item(Key={'date_visited': date})
            # if not visited
            if 'Item' not in visited.keys():
                self.vac_visited.put_item(Item={'date_visited': date})
                results = client.get(
                    "8xkx-amqh",
                    select=
                    'date, recip_county, recip_state, series_complete_yes, series_complete_pop_pct',
                    where=f'date = "{date}" AND recip_state in ({state_str})',
                    limit=2000000)
                vac_df = vac_df.append(pd.DataFrame.from_records(results))
            else:
                pass
        if vac_df.shape[0] > 0:
            vac_df = self.preprocess(vac_df, 'vac_df')

        # push preprocessed data to danamodb
        with self.vac_data_con.batch_writer() as batch:
            for index, row in vac_df.iterrows():
                batch.put_item(json.loads(row.to_json(), parse_float=Decimal))
Ejemplo n.º 39
0
def downloadata(domain, dataset_id, app_token):

    start_time = time.time()

    client = Socrata(domain, app_token)

    # Example authenticated client (needed for non-public datasets):
    # client = Socrata(www.dati.lombardia.it,
    #                  MyAppToken,
    #                  userame="*****@*****.**",
    #                  password="******")

    print('Querying opendata emails...')
    results = downloader(client, dataset_id, taglist)
    if results is not None:
        # Convert to pandas DataFrame
        results_df = pd.DataFrame.from_records(results)
        print('converting mail format')
        tmp = results_df.iloc[:, 0]

        # Convert to list
        mail_list = list(tmp[tmp.notna()])

        # Add iscritti to newsletter
        print('Saving to db')

        db.addiscritti(mail_list)
        print('Done')
    else:
        print('No data written')
    client.close()

    print('----------- Time elapsed : ', round(time.time() - start_time, 6),
          '----------')

    return
Ejemplo n.º 40
0
def PersonToTelefono(m):
    TRX = MaltegoTransform()
    #m.parseArguments(sys.argv)
    #nombre=sys.argv[1]
    nombre = m.Value
    try:
        client = Socrata("www.datos.gov.co", None)
        r = client.get("3ard-sj8g", limit=2000)

        #for key, value in data.items():
        #print key, value
        for i in range(len(r)):
            if (r[i]['nombre_prestador'] == nombre):
                telefono = r[i]['telefono']
                break

        ent = TRX.addEntity('maltego.PhoneNumber', telefono)
        ent.addAdditionalFields("phonenumber.countrycode", "Country Code",
                                True, "57")

    except Exception as e:
        TRX.addUIMessage("Cedula no encontrada en la base de datos")

    TRX.returnOutput()
Ejemplo n.º 41
0
 def connectToSocrata(self):
     clientConfigFile = self.inputdir + self.configItems[
         'socrata_client_config_fname']
     with open(clientConfigFile, 'r') as stream:
         try:
             client_items = yaml.load(stream)
             client = Socrata(client_items['url'],
                              client_items['app_token'],
                              username=client_items['username'],
                              password=base64.b64decode(
                                  client_items['password']))
             return client
         except yaml.YAMLError as exc:
             self._logger.error('Failed to open yaml file', exc_info=True)
     return 0
Ejemplo n.º 42
0
def download_crime_reports(start_year, end_year):
    '''
    Imports crime reports data from the Chicago open data portal using the SODA
    API.

    Inputs:
    start_year (int): the first year to download crime reports from (valid input
        is 2001-2018)
    end_year (int): the last year to dowload crime reports from (valid input is
        2001-2018)

    Returns: pandas dataframe where each row is a crime report
    '''
    coltypes = {'latitude': float, 'longitude': float, 'year': int}
    client = Socrata('data.cityofchicago.org', APP_TOKEN)
    where_clause = 'year between {} and {}'.format(start_year, end_year)
    max_size = int(6.85 * 10**6)
    results = client.get('6zsd-86xi', where=where_clause, limit=max_size)
    results_df = pd.DataFrame.from_records(results)\
                             .astype(coltypes)

    results_df.date = pd.to_datetime(results_df.date)

    return results_df
Ejemplo n.º 43
0
def test_upsert():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN,
                     APPTOKEN,
                     username=USERNAME,
                     password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "upsert_songs.txt"
    data = [{
        "theme": "Surfing",
        "artist": "Wavves",
        "title": "King of the Beach",
        "year": "2010"
    }]
    setup_mock(adapter, "POST", response_data, 200)
    response = client.upsert(DATASET_IDENTIFIER, data)

    assert isinstance(response, dict)
    assert response.get("Rows Created") == 1
    client.close()
Ejemplo n.º 44
0
def crawl_website_socrata(website_link,
                          dataset_identifier,
                          crawl_limit,
                          token=None):
    """
    Simple crawler for a specific file
    Code snippet is from https://dev.socrata.com/foundry/data.cityofchicago.org/jaif-n74j
    """
    # Unauthenticated client only works with public data sets. Note 'None'
    # in place of application token, and no username or password:
    client = Socrata(website_link, token)
    # Example authenticated client (needed for non-public datasets):
    # client = Socrata(data.cms.gov,
    #                  MyAppToken,
    #                  userame="*****@*****.**",
    #                  password="******")

    # First 2000 results, returned as JSON from API / converted to Python list of
    # dictionaries by sodapy.
    results = client.get(dataset_identifier, limit=crawl_limit)
    # Convert to pandas DataFrame
    results_df = pd.DataFrame.from_records(results)

    return results_df
Ejemplo n.º 45
0
 def updateDatabase(self):
     f = open("./text/last_update_deaths.txt").read().strip()
     last_update = datetime.strptime(f, "%Y-%m-%dT%H:%M:%S")
     if datetime.now()-last_update > timedelta(hours=8):
         f = open("./text/last_update_deaths.txt", "w")
         f.write((datetime.now()).strftime("%Y-%m-%dT%H:%M:%S"))
         f.close()
         print("updated")
         client = Socrata(self.dataset_link, None)
         data = client.get(self.dataset_id, limit=self.limit)
         df = pd.DataFrame.from_dict(data)
         # dataset contains extra characters on those counties finished with 'à'
         try:
             df['comarcadescripcio'] = df['comarcadescripcio'].str.replace(
                 "\xa0", "")
             df.to_pickle("./text/deaths_backup.pkl")
             return df
         except KeyError:
             df = pd.read_pickle("./text/deaths_backup.pkl")
             print("Malament")
             return df
     else:
         df = pd.read_pickle("./text/deaths_backup.pkl")
         return df
Ejemplo n.º 46
0
def collect_all():
    #initialize cliant
    client = Socrata("data.cityofchicago.org", None)

    #get all requests
    #results = [y for x in (client.get("a9u4-3dwb", limit=2000, offset=i*1000, where="complaints IS NOT NULL") for i in range(0,100,2)) for y in x]

    results = []

    for i in tqdm(range(0, 100, 2)):
        try:
            sub_list = client.get("a9u4-3dwb",
                                  limit=2000,
                                  offset=i * 1000,
                                  where="complaints IS NOT NULL")
            if len(sub_list) > 0:
                results.extend(sub_list)
            #returns empty list when offset > database size
            else:
                break
        except:
            print('err')

    #this is based on no rate limit errors,
    #at this sclae none have been encoutered
    #thus this will suffice for current use

    #save to df and print number of  entries
    results_df = pd.DataFrame.from_records(results)
    print(f'number of complaints found:  {results_df.shape[0]}')

    #save this to our pickle file for analysis
    f = '../pickle/reports.pkl'
    results_df.to_pickle(f)
    print(f'Initial Reports DataFrame has been saved to {f}')
    return results_df
Ejemplo n.º 47
0
        def execute(trail = False):
                startTime = datetime.datetime.now()
                #Setup starts here
                DOMAIN = "data.cityofboston.gov"
                client = dml.pymongo.MongoClient()
                repo = client.repo
                repo.authenticate('ll0406_siboz', 'll0406_siboz')
                client = dml.pymongo.MongoClient()
                repo = client.repo
                repo.authenticate('ll0406_siboz', 'll0406_siboz')

                #Socrata API setup and raw data retrieval
                socrataClient = Socrata(DOMAIN, None)

                """
Ejemplo n.º 48
0
def get_data(app_key, page_size, num_pages, output_file):
    client = Socrata("data.cityofnewyork.us", app_key)
    offset = 0
    res = []
    # If num_pages is not provided, calculate the num_pages so that we can read the entire content.
    if num_pages is None:
        total_records = int(
            client.get("nc67-uf89", select="COUNT(*)")[0]["COUNT"])
        num_pages = total_records // page_size + 1
    #load Json_file is output is provided
    if output_file is not None:
        f = open(output_file, 'a')
    # Get records
    for i in range(num_pages):
        one_page = client.get("nc67-uf89", limit=page_size, offset=offset)
        if output_file is None:
            print(one_page)
        else:
            for item in one_page:
                f.write(json.dumps(item) + '\n')
        offset += page_size

    if output_file is not None:
        f.close()
Ejemplo n.º 49
0
def run_script():
    client = Socrata("data.sfgov.org","wvRAyq5wvCnf9YKGmiuZ7T9y3")
    fetched_data = client.get("/resource/rqzj-sfat.json",select="objectid,latitude,longitude,fooditems,expirationdate")
    for row in fetched_data:
        print str(row)
    print "fetched_data", str(fetched_data)
    data = []

    for i in fetched_data:
        a = Node(i.get('objectid',None),i.get('latitude',None),i.get('longitude',None),i.get('fooditems',None),i.get('expirationdate',None))
        print repr(a)
        data.append(a._asdict())

    print data
    try:
        conn = psycopg2.connect("dbname='mydb'")
    except:
        print "I am unable to connect to the database"
    cur = conn.cursor()
    try:
        cur.executemany("insert into foodtrucks(objectid,latitude, longitude,fooditems,expirationdate) select %(objectid)s,%(latitude)s,%(longitude)s,%(fooditems)s,%(expirationdate)s where not exists (select 1 from foodtrucks where objectid=%(objectid)s)",data)
    except Exception as e:
        print "unable to query postgres =>", e
    conn.commit()
def fetch_res_data(zip,
                   max_query_results=20,
                   num_entries_to_search=10000,
                   t_out=10) -> Dict[str, any]:
    nyc_res_dataset_domain = "data.cityofnewyork.us"
    nyc_res_dataset_identifier = "43nn-pn8j"
    nyc_res_dataset_token = (
        None  # works with None but lower number of requests can be made
    )

    client = Socrata(nyc_res_dataset_domain, nyc_res_dataset_token)

    client.timeout = t_out

    try:
        return client.get(
            nyc_res_dataset_identifier,
            select="dba, boro, zipcode, violation_description",
            # q=str(zip), #uncomment if want to query directly on the server side (may lead to timeout)
            order="score DESC",
            limit=num_entries_to_search,
        )
    except requests.exceptions.Timeout:
        raise TimeoutError
Ejemplo n.º 51
0
def test_create():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    response_data = "create_foobar.txt"
    setup_mock(adapter, "POST", response_data, 200, dataset_identifier=None)

    columns = [
        {"fieldName": "foo", "name": "Foo", "dataTypeName": "text"},
        {"fieldName": "bar", "name": "Bar", "dataTypeName": "number"}
    ]
    tags = ["foo", "bar"]
    response = client.create("Foo Bar", description="test dataset",
        columns=columns, tags=tags, row_identifier="bar", display_type = "map")

    request = adapter.request_history[0]
    request_payload = json.loads(request.text) # can't figure out how to use .json

    # Test request payload
    for dataset_key in ["name", "description", "columns", "tags"]:
        assert dataset_key in request_payload

    for column_key in ["fieldName", "name", "dataTypeName"]:
        assert column_key in request_payload["columns"][0]

    assert "displayType" in request_payload
    assert request_payload["displayType"] == "map"

    # Test response
    assert isinstance(response, dict)
    assert len(response.get("id")) == 9
    client.close()
Ejemplo n.º 52
0
    def execute(trial=False):
        startTime = datetime.datetime.now()

        # ---[ Connect to Database ]---------------------------------
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('smithnj', 'smithnj')
        repo_name = 'smithnj.commareas'
        # ---[ Grab Data ]-------------------------------------------
        client = Socrata("data.cityofchicago.org", "xbEYuk5XxkYsIaXl3hn79XIoR")
        results = client.get("74p9-q2aq", limit=500)
        df = pd.DataFrame.from_records(results).to_json(orient="records")
        loaded = json.loads(df)
        # ---[ MongoDB Insertion ]-------------------------------------------
        repo.dropCollection(repo_name)
        repo.createCollection(repo_name)
        print('done')
        repo[repo_name].insert_many(loaded)
        repo[repo_name].metadata({'complete': True})
        # ---[ Finishing Up ]-------------------------------------------
        print(repo[repo_name].metadata())
        repo.logout()
        endTime = datetime.datetime.now()
        return {"start": startTime, "end": endTime}
Ejemplo n.º 53
0
def datavis(request, dataset_id):
    ods = OpenDataSource.objects.get(pk=dataset_id)
    client = Socrata(ods.website, ods.token, ods.user, ods.password)
    dataset = DataSet.objects.get(pk=dataset_id)
    data = client.get(dataset.identifier)
    metadata = client.get_metadata(dataset.identifier)
    client.close()
    template = loader.get_template('datavis/datavis.html')
    data = json.dumps(data, indent=4, sort_keys=True)
    context = {'data': data, 'metadata': metadata, 'dataset': dataset}
    return HttpResponse(template.render(context, request))
Ejemplo n.º 54
0
def test_delete():
    mock_adapter = {}
    mock_adapter["prefix"] = PREFIX
    adapter = requests_mock.Adapter()
    mock_adapter["adapter"] = adapter
    client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
                     session_adapter=mock_adapter)

    uri = "{0}{1}/api/views/{2}.json".format(PREFIX, DOMAIN, DATASET_IDENTIFIER)
    adapter.register_uri("DELETE", uri, status_code=200)
    response = client.delete(DATASET_IDENTIFIER)
    assert response.status_code == 200

    try:
        client.delete("foobar")
    except Exception as e:
        assert isinstance(e, requests_mock.exceptions.NoMockAddress)
    finally:
        client.close()
Ejemplo n.º 55
0
import sys

sitepackage = os.path.join(
    os.environ['HOME'], "site", "wwwroot",
    "pyenv", "lib", "python3.4", "site-packages"
)
sys.path.append(sitepackage)

import json
import re
from datetime import datetime, timedelta
from sodapy import Socrata

soda_client = Socrata(
    os.environ["SOCRATA_HOST"],
    os.environ["SOCRATA_APPTOKEN"],
    username=os.environ["SOCRATA_USER"],
    password=os.environ["SOCRATA_PASS"]
)
soda_batch_size = 950

# The location where agencies individual data is stored; e.g. each agency has its own folder
if len(sys.argv) > 1:
    report_folder = sys.argv[1]
else:
    report_folder = os.path.join(
        os.environ['HOME'],
        "site",
        "wwwroot",
        os.environ["ANALYTICS_DATA_PATH"]
    )
Ejemplo n.º 56
0
def test_client():
    client = Socrata(DOMAIN, APPTOKEN)
    assert isinstance(client, Socrata)
    client.close()
Ejemplo n.º 57
0
from sodapy import Socrata
import csv

client = Socrata(site, app_token, username=user, password=passw)


dset = "/resource/xb7i-cvg2.json"
filepath = "grants-trunc.csv"

client.get(dset)
rowlist = []
with open(filepath, "rb") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        rowlist.append(row)

client.replace(dset, rowlist)
Ejemplo n.º 58
0
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "petfinder.settings")
import django
django.setup()
from pets.models import Pet
from datetime import datetime
from django.utils import timezone
import time
import requests
from django.db.models import Max
import googlemaps

time_start = time.time() #Timer start for script
#Setup APIs call and initial variables
gmaps = googlemaps.Client(key=g_api_key)
api_setup = Socrata(socrata_api, socrata_app_token, socrata_id, socrata_pw)
max_retry = 3

#Setup Dictionary of found locations and coordinates
location_dict = {}
coord_pets = Pet.objects.filter(loc_lat__isnull=False).order_by('-intake_at')
for x in coord_pets: 
    if not location_dict.has_key(x.found_location):
        location_dict[x.found_location] = {"lat": x.loc_lat, "lon": x.loc_lon}
print coord_pets.count()

#Get record count from AAC
record_count = api_setup.get(intake_endpoint, select = "count('')")
record_count_out = api_setup.get(outcome_endpoint, select = "count('')")
count = record_count[0]['count'].encode('ascii','ignore')
count_out = record_count_out[0]['count'].encode('ascii','ignore')
Ejemplo n.º 59
0
from sodapy import Socrata

# client = Socrata("sandbox.demo.socrata.com", None)
# print client.get("nimj-3ivp", limit=10)

# client = Socrata("data.cms.gov/", None)
# print client.get("97k6-zzx3", limit=10)
# https://resource/97k6-zzx3.json?$limit=5

client = Socrata("data.sfgov.org", "8gffbg1meMZ1e2Z0yOz2OpwZq")
#client.get("cuks-n6tp", limit=1)
assault = client.get("cuks-n6tp", select ="category,time,location", where ="category='ASSAULT'",limit=10)
print "assault data"
print assault
theft = client.get("cuks-n6tp", select ="category,time,location", where ="category='VEHICLE THEFT'",limit=10)
print "vehicle theft data"
print theft
vandalism = client.get("cuks-n6tp", select ="category,time,location", where ="category='VANDALISM'",limit=10)
print "VANDALISM data"
print vandalism
kidnapping = client.get("cuks-n6tp", select ="category,time,location", where ="category='KIDNAPPING'",limit=10)
print "KIDNAPPING data"
print kidnapping
sex = client.get("cuks-n6tp", select ="category,time,location", where ="category='SEX OFFENSES, FORCIBLE'",limit=10)
print "SEX OFFENSES, FORCIBLE data"
print sex
dui = client.get("cuks-n6tp", select ="category,time,location", where ="category='DRIVING UNDER THE INFLUENCE'",limit=10)
print "DUI data"
print dui
client.close()
Ejemplo n.º 60
0
import requests
from sodapy import Socrata
import json
# store api token in creds.txt
with open('creds.json', 'r') as f:
    creds = json.loads(f.read())
client = Socrata("odn.data.socrata.com", creds["token"], username=creds['username'], password=creds['password'])
data = requests.get('https://odn.data.socrata.com/resource/jwbj-xtgt.json?$where=region_name%20IS%20NOT%20NULL%20AND%20location%20IS%20NULL').json()
for row in data:
    if len(row['region_name'].split(',')) > 2:
        location = requests.get('http://nominatim.openstreetmap.org/search/?q=%s&format=json' % (row['region_name'])).json()
    else:
        location = requests.get('http://nominatim.openstreetmap.org/search/?q=%s,usa&format=json' % (row['region_name'])).json()
    if location:
        location = location[0]
        row['location'] = "(%s, %s)" % (location['lat'], location['lon'])
client.upsert("k53q-ytmx", data)