Example #1
0
 def __init__(self, spreadsheet):
     # database session
     config_file = os.path.join(os.path.dirname(__file__), 'openhea.cfg')
     self.config = OpenHEAConfig()
     read = self.config.read(config_file)
     if len(read) != 1:
         print 'Need openhea.cfg setup with database parameters'
         sys.exit(1)
     cs = self.config.sqlalchemy_connection_string()
     engine = create_engine(cs, echo=True)
     Session = sessionmaker(bind=engine)
     self.session = Session()
     # open workbook
     self.workbook = xlrd.open_workbook(spreadsheet)
Example #2
0
class DataImporter:
    def __init__(self, spreadsheet):
        # database session
        config_file = os.path.join(os.path.dirname(__file__), 'openhea.cfg')
        self.config = OpenHEAConfig()
        read = self.config.read(config_file)
        if len(read) != 1:
            print 'Need openhea.cfg setup with database parameters'
            sys.exit(1)
        cs = self.config.sqlalchemy_connection_string()
        engine = create_engine(cs, echo=True)
        Session = sessionmaker(bind=engine)
        self.session = Session()
        # open workbook
        self.workbook = xlrd.open_workbook(spreadsheet)

    def saveSiteData(self):
        """Expecting site_data to look like:
        [
        ['One sample point', '']
        ['Country name', 'Namibia']
        ['LZ name', 'Caprivi Lowland Maize and Cattle Zone']
        ['Name of village or settlement', 'avillage']
        ['Interview date', '40321.0']
        ['Interviewer name', 'James Acidri']
        ['Interview number', '1']
        ['Start of reference/ consumption year', 'March']
        ]

        Note that the date being a number is an Excel thing - see code for how we deal with it
        """
        site_data = sheetToGridNoBlank(self.workbook.sheet_by_index(0))
        testPrint(site_data)
        DATACOL = 1
        project = site_data[1][DATACOL]
        livelihoodzone = site_data[2][DATACOL]
        # TODO: not currently in database schema
        #village_name = site_data[3][DATACOL]
        date_tuple = xlrd.xldate_as_tuple(site_data[4][DATACOL],
                self.workbook.datemode)
        datecreated = date(date_tuple[0], date_tuple[1], date_tuple[2])
        createdby = site_data[5][DATACOL]
        # TODO: not currently in database schema
        #interview_number = site_data[6][DATACOL]
        consumptionyearbegins = site_data[7][DATACOL]
        lz = Livelihoodzone(
                livelihoodzone=livelihoodzone,
                createdby=createdby,
                datecreated=datecreated,
                consumptionyearbegins=consumptionyearbegins)
        self.session.add(lz)
        self.session.commit()

    def saveExpenditureData(self):
        """Expecting expenditure_data to look like:
        [
        ['', '', '', '', '', u'HOUSEHOLD EXPENDITURE', '', '', '', '', '', '', '', '', ''],
        ['', '', '', u'WG1 Lower', u'WG1 upper', u'WG2 Lower', u'WG2 Upper', u'WG3 Lower', u'WG3 Upper', u'WG4 Lower', u'WG4 Upper', u'WG1', u'WG2', u'WG3', u'WG4'],
        [u'Category', u'Food type', u'Unit', '', '', '', u'No. Units purchased', '', '', '', '', '', u'Price per unit', '', ''],
        [u'Staple food', u'Maize meal', u'Kg', 390.0, 390.0, 208.0, 208.0, 182.0, 182.0, 97.5, 97.5, 4.0, 4.0, 4.0, 4.0],
        [u'Non-staple food', u'Sugar', u'Kg', 11.0, 11.0, 22.0, 22.0, 36.0, 36.0, 18.0, 18.0, 7.5, 11.0, 6.0, 8.5],
        ['', '', '', '', '', '', u'Annual expenditure', '', '', '', '', '', '', '', ''],
        [u'Household items', u'Candles', u'N$', 92.0, 92.0, 95.0, 95.0, 164.0, 164.0, 235.0, 235.0, '', '', '', ''],
        ['', u'Soap/Vaseline', u'N$', 258.0, 258.0, 480.0, 480.0, 326.0, 326.0, 597.0, 597.0, '', '', '', ''],
        ['', u'Kerosine', u'N$', 0.0, 0.0, 0.0, 0.0, 140.0, 140.0, 360.0, 360.0, '', '', '', ''],
        [u'Essential inputs', u'Tools', '', 200.0, 200.0, 200.0, 200.0, 300.0, 300.0, 300.0, 300.0, '', '', '', '']
        ]
        """
        expenditure_data = sheetToGridNoBlank(self.workbook.sheet_by_index(3))
        testPrint(expenditure_data)
        # check the first row is what we expect
        title_row = expenditure_data.pop(0)
        stripped_title_row = [x for x in title_row if x != '']
        assert len(stripped_title_row) == 1
        wealth_groups = {}
        expenditure = []
        standard_of_living = []
        wg_row = expenditure_data.pop(0)
        for index, col in enumerate(wg_row):
            if col.lower().endswith('lower') and wg_row[index+1].lower().endswith('upper'):
                wealth_group_name = col.split()[0]
                assert wealth_group_name.lower() == wg_row[index+1].lower().split()[0]
                wealth_groups[wealth_group_name] = {
                        'lower_col': index,
                        'upper_col': index+1,
                        }
        expenditure_data.pop(0)
        data = expenditure
        category = ''
        for row in expenditure_data:
            strip_row = [x for x in row if x != '']
            # change data dictionary after "Annual Expenditure"
            if len(strip_row) == 1 and strip_row[0].lower().startswith('annual'):
                data = standard_of_living
                continue
            # cache category - it is often not repeated, so we will keep using the same
            # value until it changes
            datadict = {}
            if row[0]:
                category = row[0]
            datadict['category'] = category
            datadict['type'] = row[1]
            datadict['unit'] = row[2]
            for wg in wealth_groups.keys():
                datadict[wg] = {
                        'lower': row[wealth_groups[wg]['lower_col']],
                        'upper': row[wealth_groups[wg]['upper_col']],
                        }
            data.append(datadict)


    def saveWealthGroupAssetsData(self):
        """Expecting wgassets_data to look like:
            [['', u'Wealth group characteristics', '', '', '', '', '', '', '', '', '', '', '', ''],
            ['', '', '', u'WG1', u'WG2', u'WG3', u'WG4', u'WG5 etc', '', '', '', '', '', ''],
            ['', u'Wealth group name', '', u'very poor', u'poor ', u'middle ', u'better off', '', '', '', '', '', '', ''],
            ['', u'Percent in wealth group', '', 0.31, 0.39, 0.22, 0.08, '', '', '', '', '', '', ''],
            ['', u'Number of people in household', '', 7.0, 7.0, 6.0, 5.0, '', '', '', '', '', '', ''],
            ['', u'Wealth group rank 1= poorest', '', 1.0, 2.0, 3.0, 4.0, '', '', '', '', '', '', ''],
            [u'ASSETS', '', '', '', '', u'Asset holdings', '', '', '', '', '', '', u'Asset price', ''],
            ['', '', '', u'WG1', u'WG1', u'WG2', u'WG2', u'WG3', u'WG3', u'WG4 ', u'WG4', u'WG5 etc', '', ''],
            ['', '', '', u'Lower', u'Upper', u'Lower', u'Upper', u'Lower', u'Upper', u'Lower', u'Upper', '', u'Lower', u'Upper'],
            [u'Category', u'Asset Type', u'Unit', '', '', '', '', '', '', '', '', '', '', ''],
            [u'Land', u'Upland', u'Acre', 1.25, 2.0, 2.5, 3.0, 20.0, 25.0, 22.5, 30.0, '', '', ''],
            ['', u'Owned Irrigated', u'Acre', 0.5, 1.0, 1.0, 2.0, 1.0, 2.0, '', '', '', '', ''],
            [u'Trees', u'Mango', u'Item', 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, '', '', ''],
            [u'Other tradeable goods', u'Cell phone', u'Item', 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, '', '', ''],
            ['', u'Ox plough', u'Item', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, '', '', ''],
            ['', u'Livestock assets setup missing', '', '', '', '', '', '', '', '', '', '', '', ''],
            [u'Livestock', u'Cattle', u'Item', 0.0, 5.0, 3.0, 6.0, 7.0, 14.0, 35.0, 87.0, '', 300.0, 300.0],
            ['', u'Goats', u'Item', 0.0, 6.0, 7.0, 11.0, 12.0, 23.0, 20.0, 22.0, '', 50.0, 50.0],
            ['', u'?cash?foodstocks', '', '', '', '', '', '', '', '', '', '', '', '']]

        """
        wgassets_data = sheetToGridNoBlank(self.workbook.sheet_by_index(1))
        # check the first row has one cell (the title)
        title_row = wgassets_data[0]
        stripped_title_row = [x for x in title_row if x != '']
        assert len(stripped_title_row) == 1
        wealth_groups = {}
        category = ''
        data = []
        wealth_groups_list = set(wgassets_data[7])
        wealth_groups_list.remove('')
        wealth_groups = {}
        for wg in wealth_groups_list:
            wealth_groups[wg] = {
                'upper':'',
                'lower':'',
            }
        for row in wgassets_data[10:]:
            this_wealth_groups = wealth_groups.copy()
            # cache category - it is often not repeated, so we will keep using the same
            # value until it changes
            if row[0]:
                category = row[0]
            datadict = {}
            datadict['category'] = category
            datadict['type'] = row[1]
            datadict['unit'] = row[2]
            for index,val in enumerate(row[3:]):
                if val != '' and wgassets_data[7][index+3] != '':
                    this_wealth_groups[wgassets_data[7][index+3]][wgassets_data[8][index+3].lower()] = val

            datadict['data'] = this_wealth_groups
            data.append(datadict)