def create_tree(muni, year): schema_dataset = get_scheme() tree = Tree.from_dict(schema_dataset.find_one()) tree.update_field('muni', muni) tree.update_field('year', year) if tree.children[0].expense: expense_root = tree.children[0] revenue_root = tree.children[1] else: expense_root = tree.children[1] revenue_root = tree.children[0] budget_dataset = get_raw_budget(muni, year) for i, line in enumerate(budget_dataset.find({})): node = Tree(muni=muni, year=year, **line) if len([x for x in expense_root.children if x.code == node.code[1]]): expense_root.insert_node(node) else: revenue_root.insert_node(node) tree.update_amount() schema_dataset.close() budget_dataset.close() return tree
def handle_sheet(self, year, filename): print 'handling file: %s' % (filename, ) dataset = get_raw_budget(self.MUNI, year, clean=self.clean) if dataset.count() > 0 and not self.clean: print "Budget for %s, in year %d already exists. Use --clean to overwrite." % ( self.MUNI, year) return reader = csv.reader(file(filename, 'rb')) fields = self.data_fields(year) start_in_row = self.start_in_row(year) for line_number, line in enumerate(reader): if line_number >= start_in_row: new_line = {} line_fields = [fields[index](line[index]) for index in fields] # check validity of line and write valid lines to DB fields_are_valid = [field.is_valid() for field in line_fields] if all(fields_are_valid): for field in line_fields: # process fields new_line[field.name] = field.process() # insert line data to DB self.print_str(new_line) dataset.insert(new_line) else: invalid_fields = [ ':'.join([field.name, field.value, field.error()]) for field in line_fields if not field.is_valid() ] #self.logger.info('invalid fields: %s', ' '.join(invalid_fields)) print 'invalid fields in line %d : %s' % ( line_number + 1, ', '.join(invalid_fields), ) dataset.close()