Example #1
0
 def _process_sheet(self, tabular_data):
     import db
     repo = db.Repository('sqlite:///%s' % dbpath)
     years = range(2002, 2011)
     td = tabular_data
     cells = td.data
     title = cells[0][0]
     table = db.PesaTable(title=title)
     footnotes = []
     for lastrow in reversed(cells):
         if len(lastrow) > 2: # into the data
             break
         foot = lastrow[0].strip()
         if foot:
             footnotes.append(foot)
     table.footnotes = simplejson.dumps(footnotes)
     entries = {}
     for row in cells[6:]:
         if row[1]: # not a subheading or footnote
             series_name = row[0]
             for (year, cell) in zip(years, row[1:10]):
                 db.Expenditure(
                         title=series_name,
                         date=unicode(year),
                         amount=swiss.floatify(cell),
                         pesatable=table,
                         )
     db.Session.flush()
Example #2
0
 def _process_sheet(self, tabular_data):
     import db
     repo = db.Repository('sqlite:///%s' % dbpath)
     years = range(2002, 2011)
     td = tabular_data
     cells = td.data
     title = cells[0][0]
     table = db.PesaTable(title=title)
     footnotes = []
     for lastrow in reversed(cells):
         if len(lastrow) > 2:  # into the data
             break
         foot = lastrow[0].strip()
         if foot:
             footnotes.append(foot)
     table.footnotes = simplejson.dumps(footnotes)
     entries = {}
     for row in cells[6:]:
         if row[1]:  # not a subheading or footnote
             series_name = row[0]
             for (year, cell) in zip(years, row[1:10]):
                 db.Expenditure(
                     title=series_name,
                     date=unicode(year),
                     amount=swiss.floatify(cell),
                     pesatable=table,
                 )
     db.Session.flush()
Example #3
0
    def load(self):
        '''
        Looks like LA is very limited and is always associated with a given
        "department" -- so this is really a classifier for the account

        Simplest normalization:
            * years
            * dept FK

        Expenditure
            * subfunc 
            * year
            * caporcur
            * region: usuals ones ... (ID or Non-ID not needed ...)
            * programme FK
        
        # does the same programme ever occur within two differnet departments?
        Programme
            * department

        Department?


        What questions do i want to ask:
            * Basically we want to browse in by facets
            * Region, func, subfunc, ...
        '''
        import db
        fp = cache.retrieve(url)
        reader = csv.reader(open(fp))
        # theoretically we'd have distributions to dept from CG as well ...
        # acc = 'CG' acc = 'LA'

        # dept -> account

        # Tag accounts:
        # subfunc
        # Tags relate to other tags ...

        repo = db.Repository(dburi)
        # skip headings
        reader.next()
        _clean = lambda _str: unicode(_str.strip())
        for count, row in enumerate(reader):
            deptcode = _clean(row[0])
            dept = _clean(row[1])
            # have some blank rows at end
            if not dept:
                continue
            function = _clean(row[2])
            subfunction = _clean(row[3])
            pog = _clean(row[4])
            poga = _clean(row[5])  # take verbose one
            # pog = row['Programme Object Group']
            caporcur = _clean(row[7])
            region = _clean(row[9])
            exps = row[10:]
            area = db.Area(title=poga,
                           deptcode=deptcode,
                           department=dept,
                           function=function,
                           subfunction=subfunction,
                           pog=pog,
                           cap_or_cur=caporcur,
                           region=region)
            for ii, exp in enumerate(exps):
                amount = swiss.floatify(exp)
                if amount:  # do not bother with null or zero amounts
                    area.expenditures.append(
                        db.Expenditure(amount=amount, year=2003 + ii))
            if count % 5000 == 0:
                print 'Completed: %s' % count
                db.Session.commit()
                db.Session.remove()
        db.Session.commit()
Example #4
0
    def load(self):
        """
        Looks like LA is very limited and is always associated with a given
        "department" -- so this is really a classifier for the account

        Simplest normalization:
            * years
            * dept FK

        Expenditure
            * subfunc 
            * year
            * caporcur
            * region: usuals ones ... (ID or Non-ID not needed ...)
            * programme FK
        
        # does the same programme ever occur within two differnet departments?
        Programme
            * department

        Department?


        What questions do i want to ask:
            * Basically we want to browse in by facets
            * Region, func, subfunc, ...
        """
        import db

        fp = cache.retrieve(url)
        reader = csv.reader(open(fp))
        # theoretically we'd have distributions to dept from CG as well ...
        # acc = 'CG' acc = 'LA'

        # dept -> account

        # Tag accounts:
        # subfunc
        # Tags relate to other tags ...

        repo = db.Repository(dburi)
        # skip headings
        reader.next()
        _clean = lambda _str: unicode(_str.strip())
        for count, row in enumerate(reader):
            dept = _clean(row[1])
            # have some blank rows at end
            if not dept:
                continue
            subfunction = _clean(row[3])
            pog = _clean(row[5])  # take verbose one
            # pog = row['Programme Object Group']
            caporcur = _clean(row[7])
            region = _clean(row[9])
            exps = row[10:]
            area = db.Area(title=pog, department=dept, cap_or_cur=caporcur, region=region)
            for ii, exp in enumerate(exps):
                amount = swiss.floatify(exp)
                if amount:  # do not bother with null or zero amounts
                    area.expenditures.append(db.Expenditure(amount=amount, year=2003 + ii))
            if count % 5000 == 0:
                print "Completed: %s" % count
                db.Session.commit()
                db.Session.remove()
        db.Session.commit()