def _import_data(self, server, database, max_facilities_to_import=DEFAULT_FACILITIES_TO_IMPORT,
                     max_mdg_to_import=DEFAULT_MDG_TO_IMPORT):
        from mangrove.datastore.database import DatabaseManager
        from mangrove.datastore.entity import Entity, get_entities_by_value
        from mangrove.datastore.datadict import DataDictType, get_datadict_type, create_datadict_type
        from mangrove.utils import GoogleSpreadsheetsClient
        from mangrove.utils.google_spreadsheets import get_string, get_number, get_boolean, get_list
        from mangrove.utils.spreadsheets import CsvReader
        from mangrove.utils.helpers import slugify
        from mangrove.georegistry.api import get_feature_by_id
        import os
        import datetime
        import json
        from pytz import UTC

        print "Loading 'NIMS Data'..."

        print "\tServer: %s" % server
        print "\tDatabase: %s" % database

        dbm = DatabaseManager(server=server, database=database)

        user_spreadsheets = GoogleSpreadsheetsClient(settings.GMAIL_USERNAME, settings.GMAIL_PASSWORD)
        nims_data = user_spreadsheets['NIMS Data Deux']

        load_population = True
        load_other = True
        load_mdg = True
        load_health = True
        load_water = True
        load_education = True

        countries = {}
        states = {}
        locations = {}
        num_cgs = 0
        datadict_types = {}
        geo_id_dict = {}

        cgs_type = create_datadict_type(
            dbm,
            slug='cgs',
            name='CGS',
            primitive_type='boolean'
        )
        datadict_types['cgs'] = cgs_type.id

        geo_id_type = create_datadict_type(
            dbm,
            slug='geo_id',
            name='Geographic ID',
            primitive_type='string'
        )
        datadict_types['geo_id'] = geo_id_type.id

        name_type = create_datadict_type(
            dbm,
            slug='name',
            name='Name',
            primitive_type='string'
        )
        datadict_types['name'] = name_type.id

        mdg_type = create_datadict_type(
            dbm,
            slug='mdg',
            name='MDG',
            primitive_type='string'
        )
        datadict_types['mdg'] = mdg_type.id

        country_geo_id = {}
        for row in nims_data['Nigeria Country ALL']:
            country_geo_id[row['name']] = row['grid']
        state_geo_ids = {}
        for row in nims_data['Nigeria States ALL']:
            state_geo_ids[row['name']] = row['grid']

        num_rows = 0
        print "Importing location entities from 'Nigeria LGAs ALL' worksheet"
        for row in nims_data['Nigeria LGAs ALL']:
            country = get_string('country', row)
            state = get_string('state', row)
            lga = get_string('lga', row)
            cgs = get_boolean('cgs', row)
            geo_id = get_string('geoid', row)
            lga_gr_id = get_string('grid', row)
            location = (country, state, lga)
            if country not in countries:
                gr_id = country_geo_id[country]
                feature = get_feature_by_id(gr_id)
#                geometry = feature['geometry']
                centroid = json.loads(feature['properties']['geometry_centroid'])
                e = Entity(dbm,
                           entity_type=["Location", "Country"],
                           location=[country],
                           centroid=centroid,
                           gr_id=gr_id)
                locations[(country,)] = e.save()
                countries[country] = e.id
                data = [(name_type.slug, country, name_type)]
                e.add_data(data, event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC))
                num_rows += 1
                print "[%s]...(%s) -- %s" % (num_rows, country, e.id)
                feature = get_feature_by_id(gr_id)
#                geometry = feature['geometry']
                centroid = json.loads(feature['properties']['geometry_centroid'])
                e = Entity(dbm,
                           entity_type=["Location", "State"],
                           location=[country, state],
                           centroid=centroid,
                           gr_id=gr_id)
                locations[(country, state)] = e.save()
                states[state] = e.id
                data = [(name_type.slug, state, name_type)]
                e.add_data(data, event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC))
                num_rows += 1
                print "[%s]...(%s, %s) -- %s" % (num_rows, country, state, e.id)
            gr_id = lga_gr_id
            feature = get_feature_by_id(gr_id)
#            geometry = feature['geometry']
            centroid = json.loads(feature['properties']['geometry_centroid'])
            e = Entity(dbm,
                       entity_type=["Location", "LGA"],
                       location=[country, state, lga],
                       centroid=centroid,
                       gr_id=gr_id)
            locations[location] = e.save()
            geo_id_dict[geo_id] = e
            data = [(name_type.slug, lga, name_type)]
            e.add_data(data, event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC))
            data = [(geo_id_type.slug, geo_id, geo_id_type)]
            e.add_data(data, event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC))

            num_rows += 1
Exemple #3
0
    def _import_data(self,
                     server,
                     database,
                     max_facilities_to_import=DEFAULT_FACILITIES_TO_IMPORT,
                     max_mdg_to_import=DEFAULT_MDG_TO_IMPORT):
        from mangrove.datastore.database import DatabaseManager
        from mangrove.datastore.entity import Entity, get_entities_by_value
        from mangrove.datastore.datadict import DataDictType, get_datadict_type, create_datadict_type
        from mangrove.utils import GoogleSpreadsheetsClient
        from mangrove.utils.google_spreadsheets import get_string, get_number, get_boolean, get_list
        from mangrove.utils.spreadsheets import CsvReader
        from mangrove.utils.helpers import slugify
        from mangrove.georegistry.api import get_feature_by_id
        import os
        import datetime
        import json
        from pytz import UTC

        print "Loading 'NIMS Data'..."

        print "\tServer: %s" % server
        print "\tDatabase: %s" % database

        dbm = DatabaseManager(server=server, database=database)

        user_spreadsheets = GoogleSpreadsheetsClient(settings.GMAIL_USERNAME,
                                                     settings.GMAIL_PASSWORD)
        nims_data = user_spreadsheets['NIMS Data Deux']

        load_population = True
        load_other = True
        load_mdg = True
        load_health = True
        load_water = True
        load_education = True

        countries = {}
        states = {}
        locations = {}
        num_cgs = 0
        datadict_types = {}
        geo_id_dict = {}

        cgs_type = create_datadict_type(dbm,
                                        slug='cgs',
                                        name='CGS',
                                        primitive_type='boolean')
        datadict_types['cgs'] = cgs_type.id

        geo_id_type = create_datadict_type(dbm,
                                           slug='geo_id',
                                           name='Geographic ID',
                                           primitive_type='string')
        datadict_types['geo_id'] = geo_id_type.id

        name_type = create_datadict_type(dbm,
                                         slug='name',
                                         name='Name',
                                         primitive_type='string')
        datadict_types['name'] = name_type.id

        mdg_type = create_datadict_type(dbm,
                                        slug='mdg',
                                        name='MDG',
                                        primitive_type='string')
        datadict_types['mdg'] = mdg_type.id

        country_geo_id = {}
        for row in nims_data['Nigeria Country ALL']:
            country_geo_id[row['name']] = row['grid']
        state_geo_ids = {}
        for row in nims_data['Nigeria States ALL']:
            state_geo_ids[row['name']] = row['grid']

        num_rows = 0
        print "Importing location entities from 'Nigeria LGAs ALL' worksheet"
        for row in nims_data['Nigeria LGAs ALL']:
            country = get_string('country', row)
            state = get_string('state', row)
            lga = get_string('lga', row)
            cgs = get_boolean('cgs', row)
            geo_id = get_string('geoid', row)
            lga_gr_id = get_string('grid', row)
            location = (country, state, lga)
            if country not in countries:
                gr_id = country_geo_id[country]
                feature = get_feature_by_id(gr_id)
                #                geometry = feature['geometry']
                centroid = json.loads(
                    feature['properties']['geometry_centroid'])
                e = Entity(dbm,
                           entity_type=["Location", "Country"],
                           location=[country],
                           centroid=centroid,
                           gr_id=gr_id)
                locations[(country, )] = e.save()
                countries[country] = e.id
                data = [(name_type.slug, country, name_type)]
                e.add_data(data,
                           event_time=datetime.datetime(2011,
                                                        03,
                                                        01,
                                                        tzinfo=UTC))
                num_rows += 1
                print "[%s]...(%s) -- %s" % (num_rows, country, e.id)
Exemple #4
0
class Command(BaseCommand):
    help = "Loads the NMIS dataset from the 'NIMS Data' Google Doc."
    args = "<server> <database> | <database>"

    from optparse import make_option
    option_list = BaseCommand.option_list + (
        make_option("--max_facilities_to_import",
                    type="int",
                    dest="max_facilities_to_import"),
        make_option(
            "--max_mdg_to_import", type="int", dest="max_mdg_to_import"),
    )

    def handle(self, *args, **options):
        from mangrove.utils.types import is_number

        server = settings.MANGROVE_DATABASES['default']['SERVER']
        database = settings.MANGROVE_DATABASES['default']['DATABASE']

        if len(args) == 2:
            server = args[0]
            database = args[1]
        elif len(args) == 1:
            database = args[0]
        elif len(args) == 0:
            pass
        else:
            raise CommandError(
                'Wrong number of arguments. Run \'python manage.py help loadnmisdata\' for usage.'
            )
        max_facilities_to_import = options.get('max_facilities_to_import',
                                               10000)
        max_mdg_to_import = options.get('max_mdg_to_import', 10000)
        if not is_number(max_facilities_to_import):
            max_facilities_to_import = DEFAULT_FACILITIES_TO_IMPORT
        if not is_number(max_mdg_to_import):
            max_mdg_to_import = DEFAULT_MDG_TO_IMPORT
        self._import_data(server, database, max_facilities_to_import,
                          max_mdg_to_import)

    def _import_data(self,
                     server,
                     database,
                     max_facilities_to_import=DEFAULT_FACILITIES_TO_IMPORT,
                     max_mdg_to_import=DEFAULT_MDG_TO_IMPORT):
        from mangrove.datastore.database import DatabaseManager
        from mangrove.datastore.entity import Entity, get_entities_by_value
        from mangrove.datastore.datadict import DataDictType, get_datadict_type, create_datadict_type
        from mangrove.utils import GoogleSpreadsheetsClient
        from mangrove.utils.google_spreadsheets import get_string, get_number, get_boolean, get_list
        from mangrove.utils.spreadsheets import CsvReader
        from mangrove.utils.helpers import slugify
        from mangrove.georegistry.api import get_feature_by_id
        import os
        import datetime
        import json
        from pytz import UTC

        print "Loading 'NIMS Data'..."

        print "\tServer: %s" % server
        print "\tDatabase: %s" % database

        dbm = DatabaseManager(server=server, database=database)

        user_spreadsheets = GoogleSpreadsheetsClient(settings.GMAIL_USERNAME,
                                                     settings.GMAIL_PASSWORD)
        nims_data = user_spreadsheets['NIMS Data Deux']

        load_population = True
        load_other = True
        load_mdg = True
        load_health = True
        load_water = True
        load_education = True

        countries = {}
        states = {}
        locations = {}
        num_cgs = 0
        datadict_types = {}
        geo_id_dict = {}

        cgs_type = create_datadict_type(dbm,
                                        slug='cgs',
                                        name='CGS',
                                        primitive_type='boolean')
        datadict_types['cgs'] = cgs_type.id

        geo_id_type = create_datadict_type(dbm,
                                           slug='geo_id',
                                           name='Geographic ID',
                                           primitive_type='string')
        datadict_types['geo_id'] = geo_id_type.id

        name_type = create_datadict_type(dbm,
                                         slug='name',
                                         name='Name',
                                         primitive_type='string')
        datadict_types['name'] = name_type.id

        mdg_type = create_datadict_type(dbm,
                                        slug='mdg',
                                        name='MDG',
                                        primitive_type='string')
        datadict_types['mdg'] = mdg_type.id

        country_geo_id = {}
        for row in nims_data['Nigeria Country ALL']:
            country_geo_id[row['name']] = row['grid']
        state_geo_ids = {}
        for row in nims_data['Nigeria States ALL']:
            state_geo_ids[row['name']] = row['grid']

        num_rows = 0
        print "Importing location entities from 'Nigeria LGAs ALL' worksheet"
        for row in nims_data['Nigeria LGAs ALL']:
            country = get_string('country', row)
            state = get_string('state', row)
            lga = get_string('lga', row)
            cgs = get_boolean('cgs', row)
            geo_id = get_string('geoid', row)
            lga_gr_id = get_string('grid', row)
            location = (country, state, lga)
            if country not in countries:
                gr_id = country_geo_id[country]
                feature = get_feature_by_id(gr_id)
                #                geometry = feature['geometry']
                centroid = json.loads(
                    feature['properties']['geometry_centroid'])
                e = Entity(dbm,
                           entity_type=["Location", "Country"],
                           location=[country],
                           centroid=centroid,
                           gr_id=gr_id)
                locations[(country, )] = e.save()
                countries[country] = e.id
                data = [(name_type.slug, country, name_type)]
                e.add_data(data,
                           event_time=datetime.datetime(2011,
                                                        03,
                                                        01,
                                                        tzinfo=UTC))
                num_rows += 1
                print "[%s]...(%s) -- %s" % (num_rows, country, e.id)
            if state not in states:
                gr_id = state_geo_ids[state]
                feature = get_feature_by_id(gr_id)
                #                geometry = feature['geometry']
                centroid = json.loads(
                    feature['properties']['geometry_centroid'])
                e = Entity(dbm,
                           entity_type=["Location", "State"],
                           location=[country, state],
                           centroid=centroid,
                           gr_id=gr_id)
                locations[(country, state)] = e.save()
                states[state] = e.id
                data = [(name_type.slug, state, name_type)]
                e.add_data(data,
                           event_time=datetime.datetime(2011,
                                                        03,
                                                        01,
                                                        tzinfo=UTC))
Exemple #5
0
                location=[country, state],
                centroid=centroid,
                gr_id=gr_id)
     locations[(country, state)] = e.save()
     states[state] = e.id
     data = [(name_type.slug, state, name_type)]
     e.add_data(data,
                event_time=datetime.datetime(2011,
                                             03,
                                             01,
                                             tzinfo=UTC))
     num_rows += 1
     print "[%s]...(%s, %s) -- %s" % (num_rows, country, state,
                                      e.id)
 gr_id = lga_gr_id
 feature = get_feature_by_id(gr_id)
 #            geometry = feature['geometry']
 centroid = json.loads(feature['properties']['geometry_centroid'])
 e = Entity(dbm,
            entity_type=["Location", "LGA"],
            location=[country, state, lga],
            centroid=centroid,
            gr_id=gr_id)
 locations[location] = e.save()
 geo_id_dict[geo_id] = e
 data = [(name_type.slug, lga, name_type)]
 e.add_data(data,
            event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC))
 data = [(geo_id_type.slug, geo_id, geo_id_type)]
 e.add_data(data,
            event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC))