Ejemplo n.º 1
0
    def test_parse_list_values(self):
        pg = Postgresql()
        insert = pg.parse_values_list(
            [{
                'gsId': 100,
                'name': 'Max',
                'zip_code': 123,
                'state': 'CA',
                'gsRating': 3.5
            }, {
                'gsId': 101,
                'name': 'Tez',
                'zip_code': 123,
                'state': 'CA',
                'gsRating': 8.5
            }], {
                'gsId': 'INT',
                'zip_code': 'INT',
                'state': 'TEXT',
                'name': 'TEXT',
                'gsRating': 'FLOAT'
            },
            field_list=['gsId', 'name', 'gsRating'])

        assert insert == "(100, 'Max', 3.5),(101, 'Tez', 8.5)"
Ejemplo n.º 2
0
    def test_parse_list_values(self):
        pg = Postgresql()
        insert = pg.parse_values_list([{'gsId': 100, 'name': 'Max', 'zip_code': 123, 'state': 'CA', 'gsRating': 3.5},
                                       {'gsId': 101, 'name': 'Tez', 'zip_code': 123, 'state': 'CA', 'gsRating': 8.5}],
                                      {'gsId': 'INT', 'zip_code': 'INT', 'state': 'TEXT', 'name': 'TEXT', 'gsRating': 'FLOAT'},
                                      field_list=['gsId', 'name', 'gsRating'])

        assert insert == "(100, 'Max', 3.5),(101, 'Tez', 8.5)"
Ejemplo n.º 3
0
class GreatSchools:
    """
    This object connects to the GreatSchools.org API and retrieves information about schools and GS ratings.
    See more information on: http://www.greatschools.org/api/docs/main.page
    """

    def __init__(self, key=None):
        if key is None:
            self.api_key = _get_great_schools_api_key()
        else:
            self.api_key = key
        # myan: initialize postgresql
        datamodel = Datamodel()
        self.table, self.table_config = datamodel.great_schools()
        self.postgres = Postgresql(user_name='postgres',
                                   password='******',
                                   host='localhost',
                                   port='5432',
                                   db='TestProject')
        self.postgres.initialize_table(self.table, recreate=False, **self.table_config)

    def set_api_key(self, key=None):
        self.api_key = key

    def run(self, **kwargs):
        # myan: seems python has a strange way of handling memory pointers when deleting elements from lists in a loop
        # therefore create a separate list tmp_results to hold all the results from API calls first and decide what to
        # include.
        tmp_results = self._nearby_schools(**kwargs)
        results = []
        existing_keys = self.postgres.get("select gsid from {table};".format(table=self.table))
        for entry in tmp_results:
            if len(existing_keys) < 1 or entry['gsid'] not in existing_keys['gsid'].values:
                results.append(entry)
        self._push(results)
        return results

    def _push(self, data, batch_size=500):
        fields_list = list(self.table_config['fields_types'].keys())
        fields_to_push = self.postgres.construct_db_field_string(fields_list)
        start_idx = 0
        while start_idx < len(data):
            end_idx = min(len(data), start_idx + batch_size)
            values_to_insert = self.postgres.parse_values_list(data[start_idx:end_idx],
                                                               self.table_config['fields_types'],
                                                               fields_list)
            start_idx = end_idx
            self.postgres.put(self.table, fields=fields_to_push, values=values_to_insert)

    def _nearby_schools(self, state=None, zip_code=None, radius=5, limit=10):
        """
        Gets a list of schools for a specified physical location (i.e. state + zip_code), within a certain radius
        Args:
            state:
            zip_code:
            radius:
            limit:

        Returns:
            list, [dict(gsId=int, name=string, gsRating=float), dict(...), ...]

        Examples:
            gs = GreatSchools(key='Your GS Key')
            results = gs._nearby_schools(state='TX', zip_code=75228, limit=2)
            # [{'gsId': '1769', 'gsRating': '3', 'name': 'Bryan Adams High School'}, {'gsId': '7566', 'name': 'White Rock Montessori School'}]
        """
        self._check_key()
        url = "http://api.greatschools.org/schools/nearby?key={key}&state={state}&radius={radius}&zip={zip_code}&limit={limit}".format(
            key=self.api_key,
            state=state,
            zip_code=zip_code,
            radius=radius,
            limit=limit)

        results = self._run(url,
                            key_string='school',
                            result_fields=[(int, 'gsId'), (None, 'name'), (float, 'gsRating')],
                            zip_code=zip_code,
                            state=state)
        return results

    def _run(self, url, key_string="school", result_fields=None, zip_code=None, state=None):
        """
        Generic method to extract data from API calls
        Args:
            url: string, the API call url to retrieve data
            key_string: string, the parent field in the XML file
            result_fields: list, [(func, field), ...] where func can be int, float etc.

        Returns:
            list, [dict(field_1=value_1, field_2=value2, ...), dict(...)]
        """
        nearby = requests.get(url)
        results = []
        for school in ElementTree.fromstring(nearby.content).findall(key_string):
            curr_result = dict(zip_code=zip_code, state=state)
            try:
                for (func, field) in result_fields:
                    if func is None:
                        curr_result[field.lower()] = school.find(field).text
                    else:
                        curr_result[field.lower()] = func(school.find(field).text)
            except:
                pass
            if curr_result:
                results.append(curr_result)
        return results

    def _check_key(self):
        if self.api_key is None:
            raise ValueError("Use .set_api_key() method to set Great School API Keys first.")