예제 #1
0
def batch_read(query, full_url, max_rows=98360):
    """
    To stay within the query limit for row numbers - this spit the query in multiple 
    batches.
    """
    dimensions= [len(q['selection']['values']) for q in query['query']]
    n_rows = functools.reduce(operator.mul, dimensions, 1)
    n_batches = math.ceil(n_rows / (max_rows * 0.95)) # Use 95% of the maximum value to be safe
    max_dim = max(dimensions)
    i_max = dimensions.index(max(dimensions))
    batch_size = int(max_dim / n_batches) # Taking int round down the batch size to be sure we don't exceed the limit
    n_batches = math.ceil(max_dim / batch_size) # Recalculate the real number of batches required, because batch size has been round down
    print("The table has: ", n_rows, "rows in total.")

    results = pd.DataFrame()
    for b in range(n_batches):
        print("Doing query:", b + 1, "/", n_batches)
        min_range, max_range = b * batch_size, b * batch_size + batch_size
        query_ = copy.deepcopy(query)
        query_['query'][i_max]['selection']['values'] = query['query'][i_max]['selection']['values'][min_range:max_range]
        dimensions= [len(q['selection']['values']) for q in query_['query']]
        if dimensions[i_max] > 0: # Avoid empty queries
          data_ = requests.post(full_url, json = query_)
          dataj_ = data_.json(object_pairs_hook=OrderedDict)
          r = pyjstat.from_json_stat(dataj_)[0]
          r_ = pyjstat.from_json_stat(dataj_, naming='id')[0]
          r_.columns = ["_" + c for c in r_.columns]
          results_ = pd.concat([r, r_], axis=1)
          results = results.append(results_, ignore_index=True)
    return results
예제 #2
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_from_to_json_stat_no_loads(self):
        """ Test pyjstat nested from-to json_stat using list of dicts as input
        """

        results = pyjstat.from_json_stat(self.oecd_datasets)
        json_data = json.loads(pyjstat.to_json_stat(results), object_pairs_hook=OrderedDict)
        data_df = pyjstat.from_json_stat(json_data)
        line_thirty = ["unemployment rate", "Belgium", "2009", 7.891892855]
        dimensions = pyjstat.get_dimensions(self.oecd_datasets["oecd"], "label")
        self.assertTrue(len(data_df) == 2)
        self.assertTrue(set(data_df[0].columns.values[:-1]) == set(dimensions[1]))
        self.assertTrue(set(data_df[0].iloc[30].values) == set(line_thirty))
예제 #3
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_from_to_json_stat_no_loads(self):
     """Test pyjstat nested from-to json_stat w list of dicts as input."""
     results = pyjstat.from_json_stat(self.oecd_datasets)
     json_data = json.loads(pyjstat.to_json_stat(results),
                            object_pairs_hook=OrderedDict)
     data_df = pyjstat.from_json_stat(json_data)
     line_thirty = ['unemployment rate', 'Belgium', '2009', 7.891892855]
     dimensions = pyjstat.get_dimensions(self.oecd_datasets['oecd'],
                                         'label')
     self.assertTrue(len(data_df) == 2)
     self.assertTrue(
         set(data_df[0].columns.values[:-1]) == set(dimensions[1]))
     self.assertTrue(set(data_df[0].iloc[30].values) == set(line_thirty))
예제 #4
0
    def test_to_json_stat_value(self):
        """ Test pyjstat to_json_stat() custom value column"""

        results = pyjstat.from_json_stat(self.sample_dataset, value='measure')
        json_data = json.loads(pyjstat.to_json_stat(results, value='measure'),
                               object_pairs_hook=OrderedDict)
        self.assertTrue(json_data[0]["dataset1"]["measure"][0] == 4729)
예제 #5
0
    def test_to_json_stat_types(self):
        """ Test pyjstat to_json_stat() output types"""

        results = pyjstat.from_json_stat(self.oecd_datasets)
        json_data = json.loads(pyjstat.to_json_stat(results),
                               object_pairs_hook=OrderedDict)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]
                        ["OECD countries,EU15 and total"]["category"]["index"]
                        ["Spain"] == 28)
        self.assertTrue(type(json_data[0]["dataset1"]["dimension"]
                        ["OECD countries,EU15 and total"]["category"]["index"]
                        ["Spain"]) is int)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]
                        ["OECD countries,EU15 and total"]["category"]["label"]
                        ["France"] == "France")
        self.assertTrue(type(json_data[0]["dataset1"]["dimension"]
                        ["OECD countries,EU15 and total"]["category"]["label"]
                        ["France"]) is unicode)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]
                        ["2003-2014"]["category"]["index"]
                        ["2005"] == 2)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]
                        ["2003-2014"]["category"]["label"]
                        ["2005"] == "2005")
        self.assertTrue(type(json_data[0]["dataset1"]["dimension"]
                        ["2003-2014"]["category"]["index"]
                        ["2005"]) is int)
        self.assertTrue(type(json_data[0]["dataset1"]["dimension"]
                        ["2003-2014"]["category"]["label"]
                        ["2005"]) is unicode)
예제 #6
0
    def read_box(self, from_box):
        """
        Takes a widget container as input (where the user has selected varables) 
        and returns a pandas dataframe with the values for the selected variables.
        
        Example
        -------
            
        df = read_box(box)
    
        """
        try:

            query = self.get_json(from_box)
            url = from_box.children[3].value
            data = requests.post(url, json=query)
            results = pyjstat.from_json_stat(
                data.json(object_pairs_hook=OrderedDict))
            label = data.json(object_pairs_hook=OrderedDict)
            return [results[0], label['dataset']['label']]

        except TypeError:
            print('You must make choices in the box!')
        except:
            print('You must make choices in the box!')
예제 #7
0
def read_premade(premade_id = None, 
            language = 'en', 
            base_url = 'http://data.ssb.no/api/v0/dataset', 
            full_url = None, 
            table_format = 'json'):
    """
    Returns a pandas dataframe of the premade table indicated by the premade 
    table_id or the full_url.
    
    Note: The premade table id may be different from the normal table id.
    """
    
    if full_url is None:
        full_url = '{base_url}/{premade_id}.{table_format}?lang={language}'.format(
                base_url = base_url,
                premade_id = str(premade_id), 
                language = language,
                table_format = table_format)
    #print(full_url)
    
    if table_format == 'json':
        data = requests.get(full_url)
        df = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
        df = df[0]
        
    elif table_format == 'csv':
        df = pd.read_csv(full_url)
    else:
        print("""Table_format is incorrectly specified. 
              It must be 'json-stat' or 'csv'""")
        df = None
    return df
def post_query():
    """ A function to do a post query on the SSB API.

    This function does a post query on the SSB API, following the SSB API Documentation, by
    doing a post request with the query we have built up, we get a JSON stat file back with the result.
    First we run meta_filter() once to get the filtered metadata variables, then for each dict in the list
    we run the build_query() function and post that query to the SSB API. Which after running that query
    returns a JSON-Stat file back with the results. We then run that JSON-Stat through pyjstat which converts
    and structures that file to a pandas DataFrame which gets appended to dataframes list. Once the for loop
    has finished we run a pandas concat on the dataframes list to convert to one single DF.

    Returns:
    --------
    big_df : Series
        This is the DataFrame that will be returned to the SQL server we are using.
    """

    dataframes = []
    meta_data = meta_filter(calc_iterations())

    for variables in meta_data:
        query = build_query(variables)
        data = requests.post(ssb_table.metadata_url, json=query)
        if data.status_code != 200:
            print("Feil! Status kode:", data.status_code)
        time.sleep(3.0)
        results = pyjstat.from_json_stat(
            data.json(object_pairs_hook=OrderedDict), naming="id")
        dataframes.append(results[0])
    big_df = pd.concat(dataframes, ignore_index=True)
    return big_df
예제 #9
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_convert_zeroes_not_null(self):
     """Test pyjstat to_json_stat zero conversion."""
     results = pyjstat.from_json_stat(self.sweden_dataset)
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(self.sweden_dataset['dataset']['value'][0] ==
                     json_data['dataset1']['value'][0])
예제 #10
0
    def test_to_json_stat_types(self):
        """ Test pyjstat to_json_stat() output types"""

        results = pyjstat.from_json_stat(self.oecd_datasets)
        json_data = json.loads(pyjstat.to_json_stat(results),
                               object_pairs_hook=OrderedDict)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]
                        ["OECD countries, EU15 and total"]["category"]["index"]
                        ["Spain"] == 28)
        self.assertTrue(
            type(json_data[0]["dataset1"]["dimension"]
                 ["OECD countries, EU15 and total"]["category"]["index"]
                 ["Spain"]) is int)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]
                        ["OECD countries, EU15 and total"]["category"]["label"]
                        ["France"] == "France")
        self.assertTrue(
            type(
                str(json_data[0]["dataset1"]["dimension"]
                    ["OECD countries, EU15 and total"]["category"]["label"]
                    ["France"])) is str)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]["2003-2014"]
                        ["category"]["index"]["2005"] == 2)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]["2003-2014"]
                        ["category"]["label"]["2005"] == "2005")
        self.assertTrue(
            type(json_data[0]["dataset1"]["dimension"]["2003-2014"]["category"]
                 ["index"]["2005"]) is int)
        self.assertTrue(
            type(
                str(json_data[0]["dataset1"]["dimension"]["2003-2014"]
                    ["category"]["label"]["2005"])) is str)
예제 #11
0
    def test_to_json_stat_value(self):
        """ Test pyjstat to_json_stat() custom value column"""

        results = pyjstat.from_json_stat(self.sample_dataset, value='measure')
        json_data = json.loads(pyjstat.to_json_stat(results, value='measure'),
                               object_pairs_hook=OrderedDict)
        self.assertTrue(json_data[0]["dataset1"]["measure"][0] == 4729)
예제 #12
0
    def test_from_to_json_stat_as_dict(self):
        """ Test pyjstat nested from-to json_stat using dict of dicts as input
        """

        results = pyjstat.from_json_stat(self.oecd_datasets)
        json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                               object_pairs_hook=OrderedDict)
        data_df = pyjstat.from_json_stat(
            json.loads(json.dumps(json_data), object_pairs_hook=OrderedDict))
        line_thirty = ['unemployment rate', 'Belgium', '2009', 7.891892855]
        dimensions = pyjstat.get_dimensions(self.oecd_datasets['oecd'],
                                            'label')
        self.assertTrue(len(data_df) == 2)
        self.assertTrue(set(data_df[0].columns.values[:-1]) ==
                        set(dimensions[1]))
        self.assertTrue(set(data_df[0].iloc[30].values) ==
                        set(line_thirty))
예제 #13
0
    def test_convert_zeroes_not_null(self):
        """ Test pyjstat to_json_stat zero conversion"""

        results = pyjstat.from_json_stat(self.sweden_dataset)
        json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                               object_pairs_hook=OrderedDict)
        self.assertTrue(self.sweden_dataset['dataset']['value'][0] ==
                        json_data['dataset1']['value'][0])
예제 #14
0
def folkemengde():
    url = 'http://data.ssb.no/api/v0/no/table/06913'
    payload = {"query": [{"code": "Region", "selection": {"filter": "item", "values": ["0"]}}, {"code": "ContentsCode", "selection": {"filter": "item", "values": ["Folkemengde"]}}, {"code": "Tid", "selection": {"filter": "all", "values": ["*"]}}], "response": {"format": "json-stat"}}
    data = requests.post(url, json = payload)
    result = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
    frame = result[0]
    frame[u'år'] = pd.to_numeric(frame[u'år']) - 1
    frame[u'folkemengde'] = pd.to_numeric(frame[u'value'])
    return frame
예제 #15
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_ons_index_sort_bug_index(self):
     """Test from_json_stat dimension sorting indexes instead of labels."""
     results = pyjstat.from_json_stat(self.ons_dataset, naming='id')
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(
         self.ons_dataset['A02Level']['dimension']['CL_0000667']['category']
         ['index']['CI_0018938'] == json_data['dataset1']['dimension']
         ['CL_0000667']['category']['index']['CI_0018938'])
예제 #16
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_uk_dataset(self):
        """ Test pyjstat using a different ONS dataset"""

        results = pyjstat.from_json_stat(self.uk_dataset)
        json_data = json.loads(pyjstat.to_json_stat(results, output="dict"), object_pairs_hook=OrderedDict)
        self.assertTrue(len(results[0].columns) == 5)
        self.assertTrue(len(results[0].index) == 3)
        self.assertTrue(self.uk_dataset["QS104EW"]["value"]["0"] == json_data["dataset1"]["value"][0])
        self.assertTrue(self.uk_dataset["QS104EW"]["value"]["2"] == json_data["dataset1"]["value"][2])
예제 #17
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_from_json_stat_with_label(self):
        """ Test pyjstat from_json_stat() using label as parameter """

        results = pyjstat.from_json_stat(self.oecd_datasets)
        line_thirty = ["unemployment rate", "Belgium", "2009", 7.891892855]
        dimensions = pyjstat.get_dimensions(self.oecd_datasets["oecd"], "label")
        self.assertTrue(len(results) == 2)
        self.assertTrue(set(results[0].columns.values[:-1]) == set(dimensions[1]))
        self.assertTrue(set(results[0].iloc[30].values) == set(line_thirty))
예제 #18
0
 def test_ons_index_sort_bug(self):
     """ Test pyjstat from_json_stat dimension sorting"""
     results = pyjstat.from_json_stat(self.ons_dataset)
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(self.ons_dataset['A02Level']['dimension']['CL_0000667']
                     ['category']['index']['CI_0018938'] ==
                     json_data['dataset1']['dimension']['Age']['category']
                     ['index']['16-17'])
예제 #19
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_from_json_stat_with_id(self):
        """ Test pyjstat from_json_stat() using id as parameter"""

        results = pyjstat.from_json_stat(self.oecd_datasets, naming="id")
        line_thirty = [u"UNR", u"BE", u"2009", 7.891892855]
        dimensions = pyjstat.get_dimensions(self.oecd_datasets["oecd"], "id")
        self.assertTrue(len(results) == 2)
        self.assertTrue(set(results[0].columns.values[:-1]) == set(dimensions[1]))
        self.assertTrue(set(results[0].iloc[30].values) == set(line_thirty))
예제 #20
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_from_json_stat_with_id(self):
     """Test pyjstat from_json_stat() using id as parameter."""
     results = pyjstat.from_json_stat(self.oecd_datasets, naming='id')
     line_thirty = [u'UNR', u'BE', u'2009', 7.891892855]
     dimensions = pyjstat.get_dimensions(self.oecd_datasets['oecd'], 'id')
     self.assertTrue(len(results) == 2)
     self.assertTrue(
         set(results[0].columns.values[:-1]) == set(dimensions[1]))
     self.assertTrue(set(results[0].iloc[30].values) == set(line_thirty))
예제 #21
0
def kjorelengde():
    url = 'http://data.ssb.no/api/v0/no/table/07301'
    payload = {"query": [{"code": "Kjoretoytype", "selection": {"filter": "item", "values": ["15",]}}, {"code": "ContentsCode", "selection": {"filter": "item", "values": ["Kjorekm"]}}, {"code": "Tid", "selection": {"filter": "all", "values": ["*"        ]}}], "response": {"format": "json-stat"}}
    data = requests.post(url, json = payload)
    result = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
    frame = result[0]
    frame[u'år'] = pd.to_numeric(frame[u'år'])
    frame[u'koyrelengde'] = pd.to_numeric(frame.value)
    return frame
예제 #22
0
 def test_ons_index_sort_bug(self):
     """ Test pyjstat from_json_stat dimension sorting"""
     results = pyjstat.from_json_stat(self.ons_dataset)
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(
         self.ons_dataset['A02Level']['dimension']['CL_0000667']['category']
         ['index']['CI_0018938'] == json_data['dataset1']['dimension']
         ['Age']['category']['index']['16-17'])
예제 #23
0
파일: __init__.py 프로젝트: klpn/seregmort
def ndeaths(regvalues, causevalues, agevalues = allages(), 
        sexvalues = ['1', '2'], yearvalues = yearrange()):
    """Send a JSON request to return number of deaths."""
    qjson = mortreqjson(regvalues, causevalues, agevalues, sexvalues, yearvalues)
    req = requests.post(morturl, json = qjson)
    req.raise_for_status()
    respstr = req.content.decode('utf-8')
    respjson = json.loads(respstr, object_pairs_hook = OrderedDict)
    return {'dimension': respjson['dataset']['dimension'], 
            'frame': pyjstat.from_json_stat(respjson, naming = 'id')[0]}
예제 #24
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_from_json_stat_with_label(self):
     """Test pyjstat from_json_stat() using label as parameter."""
     results = pyjstat.from_json_stat(self.oecd_datasets)
     line_thirty = ['unemployment rate', 'Belgium', '2009', 7.891892855]
     dimensions = pyjstat.get_dimensions(self.oecd_datasets['oecd'],
                                         'label')
     self.assertTrue(len(results) == 2)
     self.assertTrue(
         set(results[0].columns.values[:-1]) == set(dimensions[1]))
     self.assertTrue(set(results[0].iloc[30].values) == set(line_thirty))
예제 #25
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_to_json_stat(self):
        """ Test pyjstat to_json_stat()"""

        results = pyjstat.from_json_stat(self.oecd_datasets)
        json_data = json.loads(pyjstat.to_json_stat(results), object_pairs_hook=OrderedDict)
        self.assertTrue(json_data[0]["dataset1"]["dimension"]["indicator"]["label"] == "indicator")
        self.assertTrue(json_data[0]["dataset1"]["dimension"]["size"][1] == 36)
        self.assertTrue(json_data[1]["dataset2"]["dimension"]["id"][2] == "age group")
        self.assertTrue(json_data[0]["dataset1"]["value"][-1], results[0][-1:]["value"])
        results[0].columns = ["a", "a", "b", "value"]
        self.assertRaises(ValueError, pyjstat.to_json_stat, results)
예제 #26
0
    def test_from_json_stat_with_id(self):
        """ Test pyjstat from_json_stat() using id as parameter"""

        results = pyjstat.from_json_stat(self.oecd_datasets, naming='id')
        line_thirty = ['UNR', 'BE', 2009, 7.891892855]
        dimensions = pyjstat.get_dimensions(self.oecd_datasets['oecd'], 'id')
        self.assertTrue(len(results) == 2)
        self.assertTrue(set(results[0].columns.values[:-1]) ==
                        set(dimensions[1]))
        self.assertTrue(set(results[0].iloc[30].values) ==
                        set(line_thirty))
예제 #27
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_class_dataset(self):
        """ Test pyjstat using class dataset from v1.02"""

        results = pyjstat.from_json_stat(self.galicia_dataset)
        json_data = json.loads(pyjstat.to_json_stat(results, output="dict"), object_pairs_hook=OrderedDict)
        self.assertTrue(self.galicia_dataset["class"] == "dataset")
        self.assertTrue(len(results[0].columns) == 7)
        self.assertTrue(len(results[0].index) == 3960)
        self.assertTrue(self.galicia_dataset["value"][0] == json_data["dataset1"]["value"][0])
        self.assertTrue(self.galicia_dataset["value"][547] == json_data["dataset1"]["value"][547])
        self.assertTrue(self.galicia_dataset["value"][-1] == json_data["dataset1"]["value"][-1])
예제 #28
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_uk_dataset(self):
     """Test pyjstat using a different ONS dataset."""
     results = pyjstat.from_json_stat(self.uk_dataset)
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(len(results[0].columns) == 5)
     self.assertTrue(len(results[0].index) == 3)
     self.assertTrue(self.uk_dataset['QS104EW']['value']['0'] ==
                     json_data['dataset1']['value'][0])
     self.assertTrue(self.uk_dataset['QS104EW']['value']['2'] ==
                     json_data['dataset1']['value'][2])
예제 #29
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_us_labor_dataset(self):
        """ Test pyjstat using a us labor dataset of class dataset"""

        results = pyjstat.from_json_stat(self.uslabor_dataset)
        json_data = json.loads(pyjstat.to_json_stat(results, output="dict"), object_pairs_hook=OrderedDict)
        self.assertTrue(self.uslabor_dataset["class"] == "dataset")
        self.assertTrue(len(results[0].columns) == 4)
        self.assertTrue(len(results[0].index) == 12880)
        self.assertTrue(self.uslabor_dataset["value"][0] == json_data["dataset1"]["value"][0])
        self.assertTrue(self.uslabor_dataset["value"][547] == json_data["dataset1"]["value"][547])
        self.assertTrue(self.uslabor_dataset["value"][-1] == json_data["dataset1"]["value"][-1])
예제 #30
0
def read_query(queries):
    dataframes = []
    for i in queries:
        data = requests.post(a.url, json=i)
        results = pyjstat.from_json_stat(
            data.json(object_pairs_hook=OrderedDict), naming="id")
        dataframes.append(results[0])
    if len(queries) > 1:
        big_df = pd.concat(dataframes, ignore_index=True)
        return big_df
    else:
        return dataframes[0]
예제 #31
0
    def test_uk_dataset(self):
        """ Test pyjstat using a different ONS dataset"""

        results = pyjstat.from_json_stat(self.uk_dataset)
        json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                               object_pairs_hook=OrderedDict)
        self.assertTrue(len(results[0].columns) == 5)
        self.assertTrue(len(results[0].index) == 3)
        self.assertTrue(self.uk_dataset['QS104EW']['value']['0'] ==
                        json_data['dataset1']['value'][0])
        self.assertTrue(self.uk_dataset['QS104EW']['value']['2'] ==
                        json_data['dataset1']['value'][2])
예제 #32
0
파일: __init__.py 프로젝트: klpn/seregmort
def npop(regvalues, agevalues = allages('pop'),
        sexvalues = ['1', '2'], yearvalues = yearrange()):
    """Send a JSON request to return population size."""
    qjson = popreqjson(regvalues, agevalues, sexvalues, yearvalues)
    req = requests.post(popurl, json = qjson)
    req.raise_for_status()
    respstr = req.content.decode('utf-8')
    respjson = json.loads(respstr, object_pairs_hook = OrderedDict)
    popframe = pyjstat.from_json_stat(respjson, naming = 'id')[0]
    popmerged = pd.merge(ageintmerge(), popframe, on = 'Alder')
    return {'dimension': respjson['dataset']['dimension'], 
            'frame': popmerged}
예제 #33
0
    def _get_table(self, url, table_format='json'):

        if table_format == 'json':
            response = requests.get(url)
            df = pyjstat.from_json_stat(
                response.json(object_pairs_hook=OrderedDict))[0]
        elif table_format == 'csv':
            df = pd.read_csv(url)
        else:
            print("""table_format param must be either 'json' or 'csv'""")
            df = None
        return df
예제 #34
0
def read_all(table_id = None, 
             language = 'en',
             base_url = 'http://data.ssb.no/api/v0', 
             full_url = None,
             max_rows = 98360):
    """
    Returns a pandas dataframe with all values for all options 
    for the table specified by table_id
    
    Warning: The table may be large
    
    Useful if 
        - you know exactly what you are looking for and
        - you do not want to use the notebook/widgets/box to specify the json query)
    
    Example
    
    df = read_all(table_id = '10714')
    
    """
    if full_url is None:        
        full_url = '{base_url}/{language}/table/{table_id}'.format(
            base_url = base_url,
            language = language, 
            table_id = table_id)
    print("Requesting: ", full_url)    
    query = full_json(full_url = full_url)
    
    try: # Query limit is currently of 800,000 rows - if this fails then split the query
        data = requests.post(full_url, json = query)
        dataj = data.json(object_pairs_hook=OrderedDict)
        r = pyjstat.from_json_stat(dataj)[0]
        r_ = pyjstat.from_json_stat(dataj, naming='id')[0]
        r_.columns = ["_" + c for c in r_.columns]
        results = pd.concat([r, r_], axis=1)
    except:
        print("Simple query failed: Trying to split the query...")
        results = batch_read(query, full_url, max_rows=max_rows)
    return results
예제 #35
0
파일: __init__.py 프로젝트: klpn/seregmort
def npop(regvalues,
         agevalues=allages('pop'),
         sexvalues=['1', '2'],
         yearvalues=yearrange()):
    """Send a JSON request to return population size."""
    qjson = popreqjson(regvalues, agevalues, sexvalues, yearvalues)
    req = requests.post(popurl, json=qjson)
    req.raise_for_status()
    respstr = req.content.decode('utf-8')
    respjson = json.loads(respstr, object_pairs_hook=OrderedDict)
    popframe = pyjstat.from_json_stat(respjson, naming='id')[0]
    popmerged = pd.merge(ageintmerge(), popframe, on='Alder')
    return {'dimension': respjson['dataset']['dimension'], 'frame': popmerged}
예제 #36
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_class_dataset(self):
     """Test pyjstat using class dataset from v1.02."""
     results = pyjstat.from_json_stat(self.galicia_dataset)
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(self.galicia_dataset['class'] == 'dataset')
     self.assertTrue(len(results[0].columns) == 7)
     self.assertTrue(len(results[0].index) == 3960)
     self.assertTrue(self.galicia_dataset['value'][0] ==
                     json_data['dataset1']['value'][0])
     self.assertTrue(self.galicia_dataset['value'][547] ==
                     json_data['dataset1']['value'][547])
     self.assertTrue(self.galicia_dataset['value'][-1] ==
                     json_data['dataset1']['value'][-1])
예제 #37
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_us_labor_dataset(self):
     """Test pyjstat using a us labor dataset of class dataset."""
     results = pyjstat.from_json_stat(self.uslabor_dataset)
     json_data = json.loads(pyjstat.to_json_stat(results, output='dict'),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(self.uslabor_dataset['class'] == 'dataset')
     self.assertTrue(len(results[0].columns) == 4)
     self.assertTrue(len(results[0].index) == 12880)
     self.assertTrue(self.uslabor_dataset['value'][0] ==
                     json_data['dataset1']['value'][0])
     self.assertTrue(self.uslabor_dataset['value'][547] ==
                     json_data['dataset1']['value'][547])
     self.assertTrue(self.uslabor_dataset['value'][-1] ==
                     json_data['dataset1']['value'][-1])
예제 #38
0
파일: test.py 프로젝트: masego24/pyjstat
 def test_to_json_stat(self):
     """Test pyjstat to_json_stat()."""
     results = pyjstat.from_json_stat(self.oecd_datasets)
     json_data = json.loads(pyjstat.to_json_stat(results),
                            object_pairs_hook=OrderedDict)
     self.assertTrue(json_data[0]["dataset1"]["dimension"]["indicator"]
                     ["label"] == "indicator")
     self.assertTrue(json_data[0]["dataset1"]["dimension"]["size"][1] == 36)
     self.assertTrue(
         json_data[1]["dataset2"]["dimension"]["id"][2] == "age group")
     self.assertTrue(json_data[0]["dataset1"]["value"][-1],
                     results[0][-1:]['value'])
     results[0].columns = ['a', 'a', 'b', 'value']
     self.assertRaises(ValueError, pyjstat.to_json_stat, results)
예제 #39
0
    def get_pandas_df(self, url, params=None, table_format='json'):
        """
        Get Pandas dataframe
        """

        self.log(str(url))

        if params == None:
            params = json.dumps(self._full_json(url))

        response = requests.post(url, params).content
        response = response.decode('utf-8')
        df = pyjstat.from_json_stat(json.loads(response))[0]
        return df
예제 #40
0
def sverige_folkemengde():
    url = 'http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy'
    payload = {"query": [
        {"code": "ContentsCode", "selection":
         {"filter": "item", "values": ["BE0101N1"]}},
        {"code": "Region", "selection":
         {"filter": "item", "values": ["00"]}},
        {"code": "Tid", "selection":
         {"filter": "all", "values": ["*"]}}],
               "response": {"format": "json-stat"}}
    data = requests.post(url, json = payload)
    result = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
    frame = result[0]
    frame[u'år'] = pd.to_numeric(frame[u'year']) 
    frame[u'folkemengde_sverige'] = pd.to_numeric(frame[u'value'])
    return frame
예제 #41
0
파일: __init__.py 프로젝트: klpn/seregmort
def ndeaths(regvalues,
            causevalues,
            agevalues=allages(),
            sexvalues=['1', '2'],
            yearvalues=yearrange()):
    """Send a JSON request to return number of deaths."""
    qjson = mortreqjson(regvalues, causevalues, agevalues, sexvalues,
                        yearvalues)
    req = requests.post(morturl, json=qjson)
    req.raise_for_status()
    respstr = req.content.decode('utf-8')
    respjson = json.loads(respstr, object_pairs_hook=OrderedDict)
    return {
        'dimension': respjson['dataset']['dimension'],
        'frame': pyjstat.from_json_stat(respjson, naming='id')[0]
    }
예제 #42
0
def read_box(from_box):
    """
    Takes a widget container as input (where the user has selected varables) 
    and returns a pandas dataframe with the values for the selected variables.
    
    Example
    -------
    
    df = read_box(box)
    
    """
    query = get_json(from_box)
    url = from_box.children[3].value
    data = requests.post(url, json = query)
    results = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
    return results[0]
예제 #43
0
def read_with_json(table_id = None, 
              query = None, 
              language = 'en', 
              base_url = 'http://data.ssb.no/api/v0', 
              full_url = None):
    """
    Returns a pandas dataframe with the values for the table specified by 
    table_id and an explicit json string (in json-stat format).
    
    Useful if 
        - you know exactly what you are looking for and
        - can specify the json yourself (as a dictionary)
        - you do not want to use the notebook/widgets/box to specify the json query
        
    Hints
    -----
        - use full_json(table_id = '10714', out = 'string') to get a query string and edit it
        - use to_dict(str) to get a dict from an edited json string
            
    Example
    -------
    json_query = {'response': {'format': 'json-stat'}, 
        'query': [
        {'selection': {'values': ['0'], 'filter': 'item'}, 'code': 'Region'}, 
        {'selection': {'values': ['KufjolsIAlt'], 'filter': 'item'}, 'code': 'ContentsCode'}, 
        {'selection': {'values': ['1999', '2013'], 'filter': 'item'}, 'code': 'Tid'}]}
        }
    
    df = read_with_json(table_id = '10714', query = json_query)
    
    """
    if full_url is None:
        
        full_url = '{base_url}/{language}/table/{table_id}'.format(
            base_url = base_url, 
            language = language, 
            table_id = table_id)
        
    data = requests.post(full_url, json = query)
    results = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
    return results[0]
예제 #44
0
def read_url(full_url = None, 
             table_format = 'json'):
    """
    Returns a pandas dataframe of the premade table indicated by the premade 
    table_id or the full_url.
    
    Note: The premade table id may be different from the normal table id.
    """
      
    if table_format == 'json':
        data = requests.get(full_url)
        df = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
        df = df[0]
        
    elif table_format == 'csv':
        df = pd.read_csv(full_url)
    else:
        print("""Table_format is incorrectly specified. 
              It must be 'json-stat' or 'csv'""")
        df = None
    return df
예제 #45
0
def read_all(table_id = None, 
             language = 'en',
             base_url = 'http://data.ssb.no/api/v0', 
             full_url = None):
    """
    Returns a pandas dataframe with all values for all options 
    for the table specified by table_id
    
    Warning: The table may be large
    
    Useful if 
        - you know exactly what you are looking for and
        - you do not want to use the notebook/widgets/box to specify the json query)
    
    Example
    
    df = read_all(table_id = '10714')
    
    """
    
     
    if full_url is None:        
        full_url = '{base_url}/{language}/table/{table_id}'.format(
            base_url = base_url,
            language = language, 
            table_id = table_id)
        
    query = full_json(full_url = full_url)
    data = requests.post(full_url, json = query)
    results = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
    
    # maybe this need not be its own function, 
    # but an option in read_json? json = 'all'
    
    # other functions(options include: read_recent to get only the 
    # most recent values (defined as x), json = 'recent')
    
    return results[0]
예제 #46
0
파일: example.py 프로젝트: lla11358/pyjstat
# -*- coding: utf-8 -*-
""" pyjstat example with 0.3.5-like syntax for JSON-stat 1.3."""

from pyjstat import pyjstat
import requests
from collections import OrderedDict
import json


EXAMPLE_URL = 'http://json-stat.org/samples/us-labor-ds.json'

data = requests.get(EXAMPLE_URL)
results = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
print (results)
print (json.dumps(json.loads(pyjstat.to_json_stat(results))))
예제 #47
0
파일: test.py 프로젝트: jmvg/pyjstat
    def test_from_json_stat_no_coertion(self):
        """ Test pyjstat from_json_stat with id naming without coertion"""

        results = pyjstat.from_json_stat(self.sweden_dataset, naming="id")
        self.assertTrue(results[0]["Alder"][500] == "35-39")
예제 #48
0
def run_pyjstat(result_list):
    return pyjstat.from_json_stat(
        result_list.json(object_pairs_hook=OrderedDict), naming="id")[0]
예제 #49
0
import requests
import json

from pyjstat import pyjstat
from urllib.request import urlopen
import matplotlib.pyplot as plt
import pandas as pd

pd.set_option('display.max_columns', 10)
pd.set_option('display.width',2000)
pd.set_option('display.max_rows', 200)
'''
pyjstat.from_json_stat(datasets, naming='label', value='value')
Decode JSON-stat formatted data into pandas.DataFrame object.
Parameters:	datasets (OrderedDict, list) – data in JSON-stat format, previously deserialized to a python object by json.load() or json.loads(), for example. Both List and OrderedDict are accepted as inputs.
naming (string, optional) – dimension naming. Possible values: ‘label’ or ‘id’.Defaults to ‘label’.
value (string, optional) – name of the value column. Defaults to ‘value’.
Returns:	results – list of pandas.DataFrame with imported data.
Return type:	list
'''
# mainstream class numbers, mainstream pupils, average class size.
#  https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/ED114/JSON-stat/1.0/
url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/ED112/JSON-stat/1.0/"
results = pyjstat.from_json_stat(json.load(urlopen(url)))
# print(results)  - List with 1 element

data = results[0]  # a PANDAS DATAFRAME! Hurah.
print(data.head())
summary = data.groupby("Statistic")["value"].sum()
print(summary)
예제 #50
0
# -*- coding: utf-8 -*-
from pyjstat import pyjstat
import urllib2
import json
from collections import OrderedDict
data = json.load(urllib2.urlopen(
    'http://json-stat.org/samples/oecd-canada.json'),
    object_pairs_hook=OrderedDict)
results = pyjstat.from_json_stat(data)
print results
예제 #51
0
파일: test_2.py 프로젝트: langphil/sandbox
from pyjstat import pyjstat
from collections import OrderedDict
import urllib2
import json

dataset_url_1 = 'http://www.cso.ie/StatbankServices/StatbankServices.svc/jsonservice/responseinstance/CDD01'

population_json_data = json.load(urllib2.urlopen(dataset_url_1),
                      object_pairs_hook=OrderedDict)
population_results = pyjstat.from_json_stat(population_json_data, naming="id")

population_dataset = population_results[0]

population_data = population_dataset[population_dataset['ContentsCode'] ==
                  'Folketallet11']
population_data.head()
예제 #52
0
    def test_from_json_stat_no_coertion(self):
        """ Test pyjstat from_json_stat with id naming without coertion"""

        results = pyjstat.from_json_stat(self.sweden_dataset, naming='id')
        self.assertTrue(results[0]['Alder'][500] == '35-39')