コード例 #1
0
def ipp_node_to_element(name, node):
    """
    A `node` is a dict or a list produced by `build_tree_from_yaml_clean` or `transform_ipp_tree`.
    """
    if isinstance(node, dict):
        if node.get('TYPE') == u'BAREME':
            bareme_element = etree.Element('BAREME', attrib = dict(
                code = strings.slugify(name, separator = u'_'),
                origin = u'ipp',
                ))
            for slice_name in node.get('SEUIL', {}).keys():
                tranche_element = etree.Element('TRANCHE', attrib = dict(
                    code = strings.slugify(slice_name, separator = u'_'),
                    ))

                seuil_element = etree.Element('SEUIL')
                values, format, type = prepare_xml_values(name, node.get('SEUIL', {}).get(slice_name, []))
                transform_values_to_element_children(values, seuil_element)
                if len(seuil_element) > 0:
                    tranche_element.append(seuil_element)

                taux_element = etree.Element('TAUX')
                values, format, type = prepare_xml_values(name, node.get('TAUX', {}).get(slice_name, []))
                transform_values_to_element_children(values, taux_element)
                if len(taux_element) > 0:
                    tranche_element.append(taux_element)

                if len(tranche_element) > 0:
                    bareme_element.append(tranche_element)
            return bareme_element if len(bareme_element) > 0 else None
        else:
            node_element = etree.Element('NODE', attrib = dict(
                code = strings.slugify(name, separator = u'_'),
                origin = u'ipp',
                ))
            for key, value in node.items():
                child_element = ipp_node_to_element(key, value)
                if child_element is not None:
                    node_element.append(child_element)
            return node_element if len(node_element) > 0 else None
    else:
        assert isinstance(node, list), node
        values, format, type = prepare_xml_values(name, node)
        if not values:
            return None
        code_element = etree.Element('CODE', attrib = dict(
            code = strings.slugify(name, separator = u'_'),
            origin = u'ipp',
            ))
        if format is not None:
            code_element.set('format', format)
        if type is not None:
            code_element.set('type', type)
        transform_values_to_element_children(values, code_element)
        return code_element if len(code_element) > 0 else None
コード例 #2
0
ファイル: ramdb.py プロジェクト: Gentux/etalage
def iter_categories_slug(organism_types_only = False, tags_slug = None, term = None):
    intersected_sets = []
    if organism_types_only:
        intersected_sets.append(set(category_slug_by_pivot_code.itervalues()))
    for tag_slug in set(tags_slug or []):
        if tag_slug is not None:
            intersected_sets.append(categories_slug_by_tag_slug.get(tag_slug))
    if term:
        prefixes = strings.slugify(term).split(u'-')
        categories_slug_by_prefix = {}
        for prefix in prefixes:
            if prefix in categories_slug_by_prefix:
                # TODO? Handle categories with several words sharing the same prefix?
                continue
            categories_slug_by_prefix[prefix] = union_set(
                word_categories_slug
                for word, word_categories_slug in categories_slug_by_word.iteritems()
                if word.startswith(prefix)
                ) or set()
        intersected_sets.extend(categories_slug_by_prefix.itervalues())

    categories_slug = intersection_set(intersected_sets)
    if categories_slug is None:
        return category_by_slug.iterkeys()
    return categories_slug
コード例 #3
0
 def json_to_dated_python(self):
     enum = self.enum
     if enum is None:
         return conv.pipe(
             conv.test_isinstance((basestring, int)),
             conv.anything_to_int,
         )
     # This converters accepts either an item number or an item name.
     index_by_slug = self.index_by_slug
     if index_by_slug is None:
         self.index_by_slug = index_by_slug = dict(
             (strings.slugify(name), index)
             for index, name in sorted(enum._vars.iteritems()))
     return conv.pipe(
         conv.test_isinstance((basestring, int)),
         conv.condition(
             conv.anything_to_int,
             conv.pipe(
                 # Verify that item index belongs to enumeration.
                 conv.anything_to_int,
                 conv.test_in(enum._vars),
             ),
             conv.pipe(
                 # Convert item name to its index.
                 conv.input_to_slug,
                 conv.test_in(index_by_slug),
                 conv.function(lambda slug: index_by_slug[slug]),
             ),
         ),
     )
コード例 #4
0
ファイル: columns.py プロジェクト: ACPU/openfisca-core
 def json_to_dated_python(self):
     enum = self.enum
     if enum is None:
         return conv.pipe(
             conv.test_isinstance((basestring, int)),
             conv.anything_to_int,
             )
     # This converters accepts either an item number or an item name.
     index_by_slug = self.index_by_slug
     if index_by_slug is None:
         self.index_by_slug = index_by_slug = dict(
             (strings.slugify(name), index)
             for index, name in sorted(enum._vars.iteritems())
             )
     return conv.pipe(
         conv.test_isinstance((basestring, int)),
         conv.condition(
             conv.anything_to_int,
             conv.pipe(
                 # Verify that item index belongs to enumeration.
                 conv.anything_to_int,
                 conv.test_in(enum._vars),
                 ),
             conv.pipe(
                 # Convert item name to its index.
                 conv.input_to_slug,
                 conv.test_in(index_by_slug),
                 conv.function(lambda slug: index_by_slug[slug]),
                 ),
             ),
         )
コード例 #5
0
def user_extract(req):
    ctx = contexts.Ctx(req)
    user = model.get_user(ctx, check = True)
    if user.email is None:
        return wsgihelpers.forbidden(ctx)
    legislation = ctx.node
    if legislation.is_owner(ctx) and legislation.is_dated:
        return wsgihelpers.bad_request(ctx, explanation = ctx._(u'This legislation is already dated.'))

    params = req.GET
    inputs = {
        'date': params.get('date'),
        }
    data, errors = conv.struct({
        'date': conv.pipe(
            conv.french_formatted_str_to_datetime,
            conv.default(datetime.datetime.utcnow()),
            ),
        })(inputs, state = ctx)
    if errors is not None:
        return wsgihelpers.bad_request(ctx, explanation = errors)

    new_legislation = None
    new_legislation_title = ctx._(u'{} (copy {})').format(legislation.title, user.email)
    new_legislation_slug = strings.slugify(new_legislation_title)
    existing_legislations_cursor = model.Legislation.find(
        dict(
            slug = new_legislation_slug,
            ),
        as_class = collections.OrderedDict,
        )
    if existing_legislations_cursor.count() > 0:
        for existing_legislation in existing_legislations_cursor:
            if existing_legislation.is_owner(ctx):
                return wsgihelpers.redirect(ctx, location = existing_legislation.get_user_url(ctx))
        if new_legislation is None:
            return wsgihelpers.bad_request(
                ctx,
                explanation = ctx._(u'A legislation with the same name already exists.'),
                )
    else:
        new_legislation = model.Legislation(
            author_id = user._id,
            datetime_begin = legislation.datetime_begin,
            datetime_end = legislation.datetime_end,
            description = ctx._(u'Copy of legislation "{}"').format(legislation.title),
            title = new_legislation_title,
            slug = new_legislation_slug,
            )
        response = requests.post(
            conf['api.urls.legislations'],
            headers = {
                'Content-Type': 'application/json',
                'User-Agent': conf['app_name'],
                },
            data = json.dumps(dict(date = data['date'].isoformat(), legislation = legislation.json)),
            )
        new_legislation.json = response.json(object_pairs_hook = collections.OrderedDict).get('dated_legislation')
        new_legislation.save(safe = True)
    return wsgihelpers.redirect(ctx, location = new_legislation.get_user_url(ctx))
コード例 #6
0
ファイル: model.py プロジェクト: onmytab/openfisca-web-ui
 def compute_words(self):
     self.words = sorted(
         set(
             strings.slugify(u'-'.join(
                 unicode(fragment) for fragment in (
                     self._id,
                     self.email,
                     self.full_name,
                 ) if fragment is not None)).split(u'-'))) or None
コード例 #7
0
ファイル: model.py プロジェクト: onmytab/openfisca-web-ui
 def compute_words(self):
     self.words = sorted(
         set(
             strings.slugify(u'-'.join(
                 unicode(fragment) for fragment in (
                     self._id,
                     self.description,
                     self.title,
                 ) if fragment is not None)).split(u'-'))) or None
コード例 #8
0
 def setUp(self):  # noqa
     super(TestLegislations, self).setUp()
     self.ctx = contexts.Ctx()
     legislation_title = u'Legislation 1'
     self.legislation = model.Legislation(
         description=legislation_title,
         slug=strings.slugify(legislation_title),
         title=legislation_title,
     )
     self.legislation.save(safe=True)
コード例 #9
0
 def compute_words(self):
     self.words = sorted(set(strings.slugify(u'-'.join(
         unicode(fragment)
         for fragment in (
             self._id,
             self.email,
             self.full_name,
             )
         if fragment is not None
         )).split(u'-'))) or None
コード例 #10
0
 def compute_words(self):
     self.words = sorted(set(strings.slugify(u'-'.join(
         unicode(fragment)
         for fragment in (
             self._id,
             self.description,
             self.title,
             )
         if fragment is not None
         )).split(u'-'))) or None
コード例 #11
0
 def setUp(self):  # noqa
     super(TestLegislations, self).setUp()
     self.ctx = contexts.Ctx()
     legislation_title = u'Legislation 1'
     self.legislation = model.Legislation(
         description = legislation_title,
         slug = strings.slugify(legislation_title),
         title = legislation_title,
         )
     self.legislation.save(safe = True)
コード例 #12
0
ファイル: pois.py プロジェクト: Gentux/etalage-passim
    def iter_ids(cls, ctx, territory = None, coverages = None, term = None):
        intersected_sets = []

        if territory is not None:
            ancestor_territories_poi_sets = []
            for ancestor_id in territory.ancestors_id:
                ancestor_territories_poi_sets.append(cls.ids_by_territory_id.get(ancestor_id, set()))
            for child_territory_id in ramdb.territories_id_by_ancestor_id.get(territory._id):
                ancestor_territories_poi_sets.append(cls.sim_ids_by_territory_id.get(child_territory_id, set()))
            intersected_sets.append(ramdb.union_set(ancestor_territories_poi_sets))

        for coverage in (coverages or []):
            coverage_slug = strings.slugify(coverage)
            coverage_pois_id = cls.ids_by_coverage.get(coverage_slug)
            if not coverage_pois_id:
                return set()
            intersected_sets.append(coverage_pois_id)

        # We should filter on term *after* having looked for competent organizations. Otherwise, when no organization
        # matching term is found, the nearest organizations will be used even when there are competent organizations
        # (that don't match the term).
        if term and isinstance(term, basestring):
            prefixes = strings.slugify(term).split(u'-')
            pois_id_by_prefix = {}
            for prefix in prefixes:
                if prefix in pois_id_by_prefix:
                    # TODO? Handle pois with several words sharing the same prefix?
                    continue
                pois_id_by_prefix[prefix] = ramdb.union_set(
                    pois_id
                    for word, pois_id in cls.ids_by_word.iteritems()
                    if word.startswith(prefix)
                    ) or set()
            intersected_sets.extend(pois_id_by_prefix.itervalues())

        found_pois_id = ramdb.intersection_set(intersected_sets)
        if found_pois_id is None:
            return cls.indexed_ids
        return found_pois_id
コード例 #13
0
def duplicate(req):
    ctx = contexts.Ctx(req)
    test_case = ctx.node
    user = model.get_user(ctx, check=True)
    new_test_case_title = ctx._(u'Copy of {}').format(test_case.title)
    new_test_case = model.TestCase(
        author_id=user._id,
        description=new_test_case_title,
        title=new_test_case_title,
        slug=strings.slugify(new_test_case_title),
    )
    new_test_case.save(safe=True)
    return wsgihelpers.redirect(ctx, location=user.get_user_url(ctx))
コード例 #14
0
def duplicate(req):
    ctx = contexts.Ctx(req)
    test_case = ctx.node
    user = model.get_user(ctx, check = True)
    new_test_case_title = ctx._(u'Copy of {}').format(test_case.title)
    new_test_case = model.TestCase(
        author_id = user._id,
        description = new_test_case_title,
        title = new_test_case_title,
        slug = strings.slugify(new_test_case_title),
        )
    new_test_case.save(safe = True)
    return wsgihelpers.redirect(ctx, location = user.get_user_url(ctx))
コード例 #15
0
def convert_taxipp_name_tree(value, state = None):
    return conv.condition(
        conv.test_isinstance(dict),
        conv.pipe(
            conv.uniform_mapping(
                conv.test_isinstance(basestring),
                convert_taxipp_name_tree,
                ),
            conv.empty_to_none,
            ),
        conv.pipe(
            conv.test_isinstance(basestring),
            conv.translate({u'nc': None}),
            conv.test(lambda taxipp_name: strings.slugify(taxipp_name, separator = u'_') == taxipp_name.strip(u'_'),
                error = N_(u'Invalid TaxIPP name')),
            ),
        )(value, state = state or conv.default_state)
コード例 #16
0
ファイル: model.py プロジェクト: TheoVital/openfisca-web-site
 def convert_element_to_article(self, element, updated):
     title_url = None
     for xpath in (
             './/h1',
             './/h2',
             './/h3',
             './/h4',
             './/h5',
             './/h6',
     ):
         heading_elements = element.xpath(xpath)
         if len(heading_elements) > 0:
             title = lxml.html.tostring(heading_elements[0],
                                        encoding=unicode,
                                        method='text').strip()
             # Remove header from article element.
             header_element = None
             for ancestor_element in iter_element_ancestors(
                     heading_elements[0]):
                 if ancestor_element.tag in ('a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup') \
                         or ancestor_element.tag == 'div' and ancestor_element.get('class') == 'page-header':
                     header_element = ancestor_element
                 if ancestor_element.tag == 'a':
                     url, error = conv.pipe(
                         conv.make_input_to_url(),
                         conv.not_none,
                     )(ancestor_element.get('href'), state=self.ctx)
                     if error is None:
                         title_url = url
             header_element.getparent().remove(header_element)
             break
     else:
         title = None
     return dict(
         element=element,
         hash=element.get('id') or strings.slugify(title),
         id=element.get('id'),
         node=self,
         title=title,
         title_url=title_url,
         updated=get_element_time(element, default=updated),
     )
コード例 #17
0
def edit(req):
    ctx = contexts.Ctx(req)
    user = model.get_user(ctx, check=True)
    params = req.params
    inputs = {
        'title': params.get('title'),
        'description': params.get('description'),
    }
    data, errors = conv.struct({
        'title': conv.cleanup_line,
        'description': conv.cleanup_line,
    })(inputs, state=ctx)
    if errors is not None:
        return wsgihelpers.bad_request(ctx, explanation=errors)
    test_case = ctx.node
    test_case.description = data['description']
    test_case.slug = strings.slugify(data['title'])
    test_case.title = data['title']
    test_case.save(safe=True)
    return wsgihelpers.redirect(ctx, location=user.get_user_url(ctx))
コード例 #18
0
def edit(req):
    ctx = contexts.Ctx(req)
    user = model.get_user(ctx, check = True)
    params = req.params
    inputs = {
        'title': params.get('title'),
        'description': params.get('description'),
        }
    data, errors = conv.struct({
        'title': conv.cleanup_line,
        'description': conv.cleanup_line,
        })(inputs, state = ctx)
    if errors is not None:
        return wsgihelpers.bad_request(ctx, explanation = errors)
    test_case = ctx.node
    test_case.description = data['description']
    test_case.slug = strings.slugify(data['title'])
    test_case.title = data['title']
    test_case.save(safe = True)
    return wsgihelpers.redirect(ctx, location = user.get_user_url(ctx))
コード例 #19
0
ファイル: conv.py プロジェクト: Gentux/etalage-passim
def csv_infos_to_csv_bytes(csv_infos_by_schema_name, state = None):
    from . import ramdb
    if csv_infos_by_schema_name is None:
        return None, None
    if state is None:
        state = default_state
    csv_bytes_by_name = {}
    for schema_name, csv_infos in csv_infos_by_schema_name.iteritems():
        csv_file = StringIO()
        writer = csv.writer(csv_file, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
        writer.writerow([
            (label or u'').encode("utf-8")
            for label in csv_infos['columns_label']
            ])
        for row in csv_infos['rows']:
            writer.writerow([
                unicode(cell).encode('utf-8') if cell is not None else None
                for cell in row
                ])
        csv_filename = '{0}.csv'.format(strings.slugify(ramdb.schema_title_by_name.get(schema_name, schema_name)))
        csv_bytes_by_name[csv_filename] = csv_file.getvalue()
    return csv_bytes_by_name or None, None
コード例 #20
0
ファイル: model.py プロジェクト: dattaz/openfisca-web-site
 def convert_element_to_article(self, element, updated):
     title_url = None
     for xpath in (
             './/h1',
             './/h2',
             './/h3',
             './/h4',
             './/h5',
             './/h6',
             ):
         heading_elements = element.xpath(xpath)
         if len(heading_elements) > 0:
             title = lxml.html.tostring(heading_elements[0], encoding = unicode, method = 'text').strip()
             # Remove header from article element.
             header_element = None
             for ancestor_element in iter_element_ancestors(heading_elements[0]):
                 if ancestor_element.tag in ('a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup') \
                         or ancestor_element.tag == 'div' and ancestor_element.get('class') == 'page-header':
                     header_element = ancestor_element
                 if ancestor_element.tag == 'a':
                     url, error = conv.pipe(
                         conv.make_input_to_url(),
                         conv.not_none,
                         )(ancestor_element.get('href'), state = self.ctx)
                     if error is None:
                         title_url = url
             header_element.getparent().remove(header_element)
             break
     else:
         title = None
     return dict(
         element = element,
         hash = element.get('id') or strings.slugify(title),
         id = element.get('id'),
         node = self,
         title = title,
         title_url = title_url,
         updated = get_element_time(element, default = updated),
         )
def slugify_ipp_translation_key(key):
    return key if key in ("RENAME", "TYPE") else strings.slugify(key, separator=u"_")
コード例 #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--dir',
                        default='Baremes_IPP_2015',
                        help='path of IPP XLS directory')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help="increase output verbosity")
    args = parser.parse_args()
    # args.dir = path
    logging.basicConfig(
        level=logging.DEBUG if args.verbose else logging.WARNING,
        stream=sys.stdout)

    root_node = dict(
        children=[],
        name="root",
        text=textwrap.dedent(u"""\
            Ce document présente l'ensemble de la législation permettant le calcul des contributions sociales, taxes sur
            les salaires  et cotisations sociales. Il s'agit des barèmes bruts de la législation utilisés dans le
            micro-simulateur de l'IPP, TAXIPP. Les sources législatives (texte de loi, numéro du décret ou arrêté) ainsi
            que la date de publication au Journal Officiel de la République française (JORF) sont systématiquement
            indiquées. La première ligne du fichier (masquée) indique le nom des paramètres dans TAXIPP.

            Citer cette source :
            Barèmes IPP: prélèvements sociaux, Institut des politiques publiques, avril 2014.

            Auteurs :
            Antoine Bozio, Julien Grenet, Malka Guillot, Laura Khoury et Marianne Tenand

            Contacts :
            [email protected]; [email protected]; [email protected]

            Licence :
            Licence ouverte / Open Licence
            """).split(u'\n'),
        title=u"Barème IPP",
        type=u'NODE',
    )

    for bareme in baremes:
        xls_path = os.path.join(args.dir.decode('utf-8'),
                                u"Baremes IPP - {0}.xls".format(bareme))
        if not os.path.exists(xls_path):
            log.warning("Skipping file {} that doesn't exist: {}".format(
                bareme, xls_path))
            continue
        log.info(u'Parsing file {}'.format(bareme))
        book = xlrd.open_workbook(filename=xls_path, formatting_info=True)

        sheet_names = [
            sheet_name for sheet_name in book.sheet_names()
            if not sheet_name.startswith((u'Abréviations', u'Outline'))
            and sheet_name not in forbiden_sheets.get(bareme, [])
        ]
        sheet_title_by_name = {}
        for sheet_name in sheet_names:
            log.info(u'  Parsing sheet {}'.format(sheet_name))
            sheet = book.sheet_by_name(sheet_name)

            # Extract coordinates of merged cells.
            merged_cells_tree = {}
            for row_low, row_high, column_low, column_high in sheet.merged_cells:
                for row_index in range(row_low, row_high):
                    cell_coordinates_by_merged_column_index = merged_cells_tree.setdefault(
                        row_index, {})
                    for column_index in range(column_low, column_high):
                        cell_coordinates_by_merged_column_index[
                            column_index] = (row_low, column_low)

            if sheet_name.startswith(u'Sommaire'):
                # Associate the titles of the sheets to their Excel names.
                for row_index in range(sheet.nrows):
                    linked_sheet_number = transform_xls_cell_to_json(
                        book, sheet, merged_cells_tree, row_index, 2)
                    if isinstance(linked_sheet_number, int):
                        linked_sheet_title = transform_xls_cell_to_str(
                            book, sheet, merged_cells_tree, row_index, 3)
                        if linked_sheet_title is not None:
                            hyperlink = get_hyperlink(sheet, row_index, 3)
                            if hyperlink is not None and hyperlink.type == u'workbook':
                                linked_sheet_name = hyperlink.textmark.split(
                                    u'!', 1)[0].strip(u'"').strip(u"'")
                                sheet_title_by_name[
                                    linked_sheet_name] = linked_sheet_title
                continue

            descriptions_rows = []
            labels_rows = []
            notes_rows = []
            state = 'taxipp_names'
            taxipp_names_row = None
            values_rows = []
            for row_index in range(sheet.nrows):
                columns_count = len(sheet.row_values(row_index))
                if state == 'taxipp_names':
                    taxipp_names_row = [
                        taxipp_name for taxipp_name in (
                            transform_xls_cell_to_str(
                                book, sheet, merged_cells_tree, row_index,
                                column_index)
                            for column_index in range(columns_count))
                    ]
                    state = 'labels'
                    continue
                if state == 'labels':
                    first_cell_value = transform_xls_cell_to_json(
                        book, sheet, merged_cells_tree, row_index, 0)
                    date_or_year, error = conv.pipe(
                        conv.test_isinstance((int, basestring)),
                        cell_to_date,
                        conv.not_none,
                    )(first_cell_value, state=conv.default_state)
                    if error is not None:
                        # First cell of row is not a date => Assume it is a label.
                        labels_rows.append([
                            transform_xls_cell_to_str(book, sheet,
                                                      merged_cells_tree,
                                                      row_index, column_index)
                            for column_index in range(columns_count)
                        ])
                        continue
                    state = 'values'
                if state == 'values':
                    first_cell_value = transform_xls_cell_to_json(
                        book, sheet, merged_cells_tree, row_index, 0)
                    if first_cell_value is None or isinstance(
                            first_cell_value, (int, basestring)):
                        date_or_year, error = cell_to_date(
                            first_cell_value, state=conv.default_state)
                        if error is None:
                            # First cell of row is a valid date or year.
                            values_row = [
                                transform_xls_cell_to_json(
                                    book, sheet, merged_cells_tree, row_index,
                                    column_index)
                                for column_index in range(columns_count)
                            ]
                            if date_or_year is not None:
                                assert date_or_year.year < 2601, 'Invalid date {} in {} at row {}'.format(
                                    date_or_year, sheet_name, row_index + 1)
                                values_rows.append(values_row)
                                continue
                            if all(value in (None, u'')
                                   for value in values_row):
                                # If first cell is empty and all other cells in line are also empty, ignore this line.
                                continue
                            # First cell has no date and other cells in row are not empty => Assume it is a note.
                    state = 'notes'
                if state == 'notes':
                    first_cell_value = transform_xls_cell_to_json(
                        book, sheet, merged_cells_tree, row_index, 0)
                    if isinstance(first_cell_value,
                                  basestring) and first_cell_value.strip(
                                  ).lower() == 'notes':
                        notes_rows.append([
                            transform_xls_cell_to_str(book, sheet,
                                                      merged_cells_tree,
                                                      row_index, column_index)
                            for column_index in range(columns_count)
                        ])
                        continue
                    state = 'description'
                assert state == 'description'
                descriptions_rows.append([
                    transform_xls_cell_to_str(book, sheet, merged_cells_tree,
                                              row_index, column_index)
                    for column_index in range(columns_count)
                ])

            text_lines = []
            for row in notes_rows:
                text_lines.append(u' | '.join(cell for cell in row if cell))
            if text_lines:
                text_lines.append(None)
            for row in descriptions_rows:
                text_lines.append(u' | '.join(cell for cell in row if cell))

            sheet_title = sheet_title_by_name.get(sheet_name)
            if sheet_title is None:
                log.warning(u"Missing title for sheet {} in summary".format(
                    sheet_name))
                continue
            labels = []
            for labels_row in labels_rows:
                for column_index, label in enumerate(labels_row):
                    if not label:
                        continue
                    while column_index >= len(labels):
                        labels.append([])
                    labels_column = labels[column_index]
                    if not labels_column or labels_column[-1] != label:
                        labels_column.append(label)
            labels = [
                tuple(labels_column1)
                if len(labels_column1) > 1 else labels_column1[0]
                for labels_column1 in labels
            ]

            cell_by_label_rows = []
            for value_row in values_rows:
                cell_by_label = collections.OrderedDict(
                    itertools.izip(labels, value_row))
                cell_by_label, errors = values_row_converter(
                    cell_by_label, state=conv.default_state)
                assert errors is None, "Errors in {}:\n{}".format(
                    cell_by_label, errors)
                cell_by_label_rows.append(cell_by_label)

            sheet_node = dict(
                children=[],
                name=strings.slugify(sheet_name, separator=u'_'),
                text=text_lines,
                title=sheet_title,
                type=u'NODE',
            )
            root_node['children'].append(sheet_node)

            for taxipp_name, labels_column in zip(taxipp_names_row, labels):
                if not taxipp_name or taxipp_name in (u'date', ):
                    continue
                variable_node = dict(
                    children=[],
                    name=strings.slugify(taxipp_name, separator=u'_'),
                    title=u' - '.join(labels_column) if isinstance(
                        labels_column, tuple) else labels_column,
                    type=u'CODE',
                )
                sheet_node['children'].append(variable_node)

                for cell_by_label in cell_by_label_rows:
                    amount_and_unit = cell_by_label[labels_column]
                    variable_node['children'].append(
                        dict(
                            law_reference=cell_by_label[
                                u'Références législatives'],
                            notes=cell_by_label[u'Notes'],
                            publication_date=cell_by_label[u"Parution au JO"],
                            start_date=cell_by_label[
                                u"Date d'entrée en vigueur"],
                            type=u'VALUE',
                            unit=amount_and_unit[1] if isinstance(
                                amount_and_unit, tuple) else None,
                            value=amount_and_unit[0] if isinstance(
                                amount_and_unit, tuple) else amount_and_unit,
                        ))

            # dates = [
            #     conv.check(cell_to_date)(
            #         row[1] if bareme == u'Impot Revenu' else row[0],
            #         state = conv.default_state,
            #         )
            #     for row in values_rows
            #     ]
            # for column_index, taxipp_name in enumerate(taxipp_names_row):
            #     if taxipp_name and strings.slugify(taxipp_name) not in (
            #             'date',
            #             'date-ir',
            #             'date-rev',
            #             'note',
            #             'notes',
            #             'ref-leg',
            #             ):
            #         vector = [
            #             transform_cell_value(date, row[column_index])
            #             for date, row in zip(dates, values_rows)
            #             ]
            #         vector = [
            #             cell if not isinstance(cell, basestring) or cell == u'nc' else '-'
            #             for cell in vector
            #             ]
            #         # vector_by_taxipp_name[taxipp_name] = pd.Series(vector, index = dates)
            #         vector_by_taxipp_name[taxipp_name] = vector
            #

    print_node(root_node)

    return 0
コード例 #23
0
def transform(xls_dir, yaml_raw_dir):
    file_system_encoding = sys.getfilesystemencoding()

    error_by_book_name = collections.OrderedDict()
    warning_by_book_name = collections.OrderedDict()
    for filename_encoded in sorted(os.listdir(xls_dir)):
        if not filename_encoded.endswith('.xls'):
            continue
        filename = filename_encoded.decode(file_system_encoding)
        log.info(u'Parsing file {}'.format(filename))
        book_name = os.path.splitext(filename)[0]
        xls_path_encoded = os.path.join(xls_dir, filename_encoded)
        book = xlrd.open_workbook(filename = xls_path_encoded, formatting_info = True)

        book_yaml_dir_encoded = os.path.join(yaml_raw_dir, strings.slugify(book_name).encode(file_system_encoding))
        if not os.path.exists(book_yaml_dir_encoded):
            os.makedirs(book_yaml_dir_encoded)

        error_by_sheet_name = collections.OrderedDict()
        sheet_english_title_by_name = collections.OrderedDict()
        sheet_title_by_name = collections.OrderedDict()
        warning_by_sheet_name = collections.OrderedDict()
        for sheet_name in book.sheet_names():
            log.info(u'  Parsing sheet {}.'.format(sheet_name))
            sheet = book.sheet_by_name(sheet_name)
            sheet_error = None
            sheet_warning = None

            try:
                # Extract coordinates of merged cells.
                merged_cells_tree = {}
                for row_low, row_high, column_low, column_high in sheet.merged_cells:
                    for row_index in range(row_low, row_high):
                        cell_coordinates_by_merged_column_index = merged_cells_tree.setdefault(
                            row_index, {})
                        for column_index in range(column_low, column_high):
                            cell_coordinates_by_merged_column_index[column_index] = (row_low, column_low)

                if sheet_name.startswith((u'Sommaire', u'Outline')):
                    french = sheet_name.startswith(u'Sommaire')
                    # Associate the titles of the sheets to their Excel names.
                    book_title = transform_xls_cell_to_str(book, sheet, merged_cells_tree, 1, 1)
                    if not book_title:
                        book_title = transform_xls_cell_to_str(book, sheet, merged_cells_tree, 2, 1)
                    book_title = book_title.strip()
                    assert book_title
                    book_description = transform_xls_cell_to_str(book, sheet, merged_cells_tree, 4, 1)
                    if not book_description:
                        book_description = transform_xls_cell_to_str(book, sheet, merged_cells_tree, 5, 1)
                    book_description = book_description.strip()
                    assert book_description

                    for column_index in range(1, 4):
                        current_heading = u'Annexes' if french else u'Annexes'
                        sheet_title_by_slug_by_heading = collections.OrderedDict()
                        for row_index in range(sheet.nrows):
                            heading = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 1)
                            if isinstance(heading, basestring):  # noqa F821
                                heading = heading.strip()
                                if not heading:
                                    continue
                                if heading == book_title or heading == book_description:
                                    continue
                                if number_re.match(heading) is None:
                                    current_heading = heading
                                    continue
                            linked_sheet_number = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index,
                                column_index)
                            if isinstance(linked_sheet_number, int) or (isinstance(linked_sheet_number, basestring) and number_re.match(linked_sheet_number) is not None):  # noqa F821
                                linked_sheet_title = transform_xls_cell_to_str(book, sheet, merged_cells_tree,
                                    row_index, column_index + 1)
                                if linked_sheet_title is not None:
                                    linked_sheet_title = linked_sheet_title.strip()
                                if linked_sheet_title:
                                    hyperlink = get_hyperlink(sheet, row_index, column_index + 1)
                                    if hyperlink is not None and hyperlink.type == u'workbook':
                                        linked_sheet_name = hyperlink.textmark.split(u'!', 1)[0].strip(u'"').strip(u"'")
                                        sheet_title_by_slug = sheet_title_by_slug_by_heading.setdefault(current_heading,
                                            collections.OrderedDict())
                                        sheet_title_by_slug[strings.slugify(linked_sheet_name)] = linked_sheet_title

                                        if french:
                                            sheet_title_by_name[linked_sheet_name] = linked_sheet_title
                                        else:
                                            sheet_english_title_by_name[linked_sheet_name] = linked_sheet_title
                        if sheet_title_by_slug_by_heading:
                            break
                    assert sheet_title_by_slug_by_heading

                    book_notes = []
                    for column_index in range(8, 12):
                        for row_index in range(sheet.nrows):
                            note = transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            if note and note.strip() == book_description:
                                continue
                            if book_notes or note:
                                book_notes.append((note or u'').rstrip())
                                if note:
                                    blank_notes_count = 0
                                elif blank_notes_count >= 1:
                                    break
                                else:
                                    blank_notes_count += 1
                        if book_notes:
                            break
                    while book_notes and not book_notes[-1]:
                        del book_notes[-1]
                    assert book_notes

                    sheet_node = collections.OrderedDict((
                        (u'Titre' if french else u'Title', book_title),
                        (u'Description' if french else u'Description', book_description),
                        (u'Sommaire' if french else u'Table of Content', sheet_title_by_slug_by_heading),
                        (u'Notes' if french else u'Notes', literal_unicode(u'\n'.join(book_notes))),
                        (u'Données initiales' if french else u'Source Data', collections.OrderedDict((
                            (u'Producteur' if french else u'Producer', u'Institut des politiques publiques'),
                            (u'Format', u'XLS'),
                            (u'URL', u'http://www.ipp.eu/outils/baremes-ipp/' if french
                                else u'http://www.ipp.eu/en/tools/ipp-tax-and-benefit-tables/'),
                            ))),
                        (u'Convertisseur' if french else u'Converter', collections.OrderedDict((
                            (u'URL', u'https://git.framasoft.org/french-tax-and-benefit-tables/ipp-tax-and-benefit-tables-converters'),  # noqa
                            ))),
                        (u'Données générées' if french else u'Generated Data', collections.OrderedDict((
                            (u'Format', u'YAML'),
                            (u'URL', u'https://git.framasoft.org/french-tax-and-benefit-tables/ipp-tax-and-benefit-tables-yaml-raw'),  # noqa
                            ))),
                        (u'Licence' if french else u'License',
                            u'Licence ouverte <http://www.etalab.gouv.fr/licence-ouverte-open-licence>' if french
                            else u'Open Licence <http://www.etalab.gouv.fr/licence-ouverte-open-licence>'),
                        ))

                    yaml_file_path_encoded = os.path.join(
                        book_yaml_dir_encoded,
                        (strings.slugify(sheet_name, transform = strings.upper) + u'.yaml').encode(
                            file_system_encoding),
                        )
                elif sheet_name.startswith(u'Abréviation'):
                    log.warning(u'    Ignoring sheet {} of book {}.'.format(sheet_name, book_name))
                    sheet_warning = u'Sheet ignored.'

                    sheet_title = sheet_title_by_name.get(sheet_name, sheet_name)
                    sheet_node = collections.OrderedDict((
                        (u'Titre' if french else u'Title', sheet_title),
                        ))

                    yaml_file_path_encoded = os.path.join(
                        book_yaml_dir_encoded,
                        (strings.slugify(sheet_name, transform = strings.upper) + u'.yaml').encode(
                            file_system_encoding),
                        )
                else:
                    descriptions_rows = []
                    labels_rows = []
                    notes_rows = []
                    state = 'taxipp_names'
                    taxipp_names_row = None
                    values_rows = []
                    for row_index in range(sheet.nrows):
                        columns_count = len(sheet.row_values(row_index))
                        if state == 'taxipp_names':
                            taxipp_names_row = [
                                (taxipp_name or u'').strip()
                                for taxipp_name in (
                                    transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                                    for column_index in range(columns_count)
                                    )
                                ]
                            state = 'labels'
                            if all(
                                    not taxipp_name
                                    for taxipp_name in taxipp_names_row
                                    ):
                                # The first row is empty => This sheet doesn't contain TaxIPP names.
                                continue
                            # When any TaxIPP name is in lowercase, assume that this row is really the TaxIPP names row.
                            if any(
                                    taxipp_name and taxipp_name[0].islower()
                                    for taxipp_name in taxipp_names_row
                                    ):
                                continue
                            else:
                                log.info(u'    Sheet "{}" of XLS file "{}" has no row for TaxIPP names.'.format(
                                    sheet_name, filename))
                                # warning = u'Row not found'
                                # if sheet_warning is None:
                                #     sheet_warning = collections.OrderedDict()
                                # if isinstance(sheet_warning, dict):
                                #     sheet_warning[u'Noms TaxIPP'] = warning
                                # else:
                                #     assert isinstance(sheet_warning, basestring), sheet_warning
                                #     sheet_warning = u'\n\n'.join(
                                #         fragment
                                #         for fragment in (sheet_warning, warning)
                                #         if fragment
                                #         )
                                taxipp_names_row = []
                        if state == 'labels':
                            first_cell_value, error = conv.pipe(cell_to_row_first_cell, conv.not_none)(
                                transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0),
                                state = conv.default_state)
                            if error is not None:
                                # First cell of row is not a the first cell of a row of values => Assume it is a label.
                                labels_rows.append([
                                    u' '.join((label or u'').split()).strip()
                                    for label in (
                                        transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index,
                                            column_index)
                                        for column_index in range(columns_count)
                                        )
                                    ])
                                continue
                            state = 'values'
                        if state == 'values':
                            first_cell_value, error = cell_to_row_first_cell(
                                transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0),
                                state = conv.default_state)
                            if error is None:
                                # First cell of row is a valid date or year.
                                values_row = [
                                    value.strip() if isinstance(value, basestring) else value  # noqa F821
                                    for value in (
                                        transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index,
                                            column_index, empty_white_value = u'nc')
                                        for column_index in range(columns_count)
                                        )
                                    ]
                                if isinstance(first_cell_value, datetime.date):
                                    assert first_cell_value.year < 2601, 'Invalid date {} in {} at row {}'.format(
                                        first_cell_value, sheet_name, row_index + 1)
                                    values_rows.append(values_row)
                                    continue
                                if isinstance(first_cell_value, basestring) and aad_re.match(first_cell_value) is not None:  # noqa F821
                                    values_rows.append(values_row)
                                    continue
                                if all(value in (None, u'', u'nc') for value in values_row):
                                    # If first cell is empty and all other cells in line are also empty, ignore this
                                    # line.
                                    continue
                                # First cell has no date and other cells in row are not empty => Assume it is a note.
                            state = 'notes'
                        if state == 'notes':
                            first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                            if isinstance(first_cell_value, basestring) and first_cell_value.strip().lower() == 'notes':  # noqa F821
                                notes_rows.append([
                                    (line or u'').rstrip()
                                    for line in (
                                        transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index,
                                            column_index)
                                        for column_index in range(columns_count)
                                        )
                                    ])
                                continue
                            state = 'description'
                        assert state == 'description'
                        descriptions_rows.append([
                            (line or u'').strip()
                            for line in (
                                transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                                for column_index in range(columns_count)
                                )
                            ])

                    sheet_node = collections.OrderedDict()

                    sheet_title = sheet_title_by_name.get(sheet_name)
                    if sheet_title is not None:
                        sheet_node[u'Titre'] = sheet_title

                    sheet_node[u'Titre court'] = sheet_name

                    labels = []
                    for labels_row in labels_rows:
                        for column_index, label in enumerate(labels_row):
                            if label is None:
                                continue
                            label = label.strip()
                            if not label:
                                continue
                            while column_index >= len(labels):
                                labels.append([])
                            column_labels = labels[column_index]
                            if not column_labels or column_labels[-1] != label:
                                column_labels.append(label)
                    labels = [
                        (tuple(
                            label_stripped
                            for label_stripped in (
                                (label or u'').strip()
                                for label in column_labels1
                                )
                            if label_stripped
                            ) if column_labels1 else None) or (u'Colonne sans titre',)
                        for index, column_labels1 in enumerate(labels, 1)
                        ]
                    assert labels

                    taxipp_name_by_column_labels = collections.OrderedDict()
                    for column_labels, taxipp_name in zip(labels, taxipp_names_row):
                        if not taxipp_name:
                            continue
                        taxipp_name_by_column_label = taxipp_name_by_column_labels
                        for column_label in column_labels[:-1]:
                            taxipp_name_by_column_label = taxipp_name_by_column_label.setdefault(column_label,
                                collections.OrderedDict())
                        taxipp_name_by_column_label[column_labels[-1]] = taxipp_name
                    if taxipp_name_by_column_labels:
                        sheet_node[u'Noms TaxIPP'] = taxipp_name_by_column_labels

                    sheet_values = []
                    for value_row in values_rows:
                        cell_by_column_labels = collections.OrderedDict()
                        for column_labels, cell in zip(labels, value_row):
                            if cell is None or cell == '':
                                continue
                            cell_by_column_label = cell_by_column_labels
                            for column_label in column_labels[:-1]:
                                cell_by_column_label = cell_by_column_label.setdefault(column_label,
                                    collections.OrderedDict())
                            # Merge (amount, unit) couples to a string to simplify YAML.
                            if isinstance(cell, tuple):
                                cell = transform_amount_tuple_to_str(cell)
                            if isinstance(cell, basestring) and u'\n' in cell:  # noqa F821
                                cell = literal_unicode(cell)
                            cell_by_column_label[column_labels[-1]] = cell
                        sheet_values.append(cell_by_column_labels)
                    if sheet_values:
                        sheet_node[u'Valeurs'] = sheet_values

                    notes = u'\n'.join([
                        line.rstrip()
                        for line in u'\n'.join([
                            u' | '.join(
                                cell for cell in row
                                if cell
                                ).rstrip()
                            for row in notes_rows
                            ]).split(u'\n')
                        ]).rstrip()
                    if notes:
                        sheet_node[u'Notes'] = literal_unicode(notes)

                    description = u'\n'.join([
                        line.rstrip()
                        for line in u'\n'.join([
                            u' | '.join(
                                cell for cell in row
                                if cell
                                ).rstrip()
                            for row in descriptions_rows
                            ]).split(u'\n')
                        ]).rstrip()
                    if description:
                        sheet_node[u'Description'] = literal_unicode(description)

                    yaml_file_path_encoded = os.path.join(
                        book_yaml_dir_encoded,
                        (strings.slugify(sheet_name) + u'.yaml').encode(file_system_encoding),
                        )

                if sheet_error:
                    sheet_node[u'ERRORS'] = literal_unicode(sheet_error) \
                        if isinstance(sheet_error, basestring) and u'\n' in sheet_error else sheet_error  # noqa F821
                if sheet_warning:
                    sheet_node[u'WARNINGS'] = literal_unicode(sheet_warning) \
                        if isinstance(sheet_warning, basestring) and u'\n' in sheet_warning else sheet_warning  # noqa F821
                with open(yaml_file_path_encoded, 'w') as yaml_file:
                    yaml.dump(sheet_node, yaml_file, allow_unicode = True, default_flow_style = False, indent = 2,
                        width = 120)
            except:  # noqa E722
                message = u'An exception occurred when parsing sheet "{}" of XLS file "{}".'.format(sheet_name,
                    filename)
                log.exception(u'    {}'.format(message))
                sheet_error = literal_unicode(u'\n\n'.join(
                    fragment
                    for fragment in (
                        unicode(sheet_error) if sheet_error is not None else None,  # noqa F821
                        message,
                        traceback.format_exc().decode('utf-8'),
                        )
                    if fragment
                    ))

            if sheet_error:
                error_by_sheet_name[sheet_name] = sheet_error
            if sheet_warning:
                warning_by_sheet_name[sheet_name] = sheet_warning

        if error_by_sheet_name:
            yaml_file_path_encoded = os.path.join(
                book_yaml_dir_encoded,
                u'ERRORS.yaml'.encode(file_system_encoding),
                )
            with open(yaml_file_path_encoded, 'w') as yaml_file:
                yaml.dump(error_by_sheet_name, yaml_file, allow_unicode = True, default_flow_style = False,
                    indent = 2, width = 120)
            error_by_book_name[book_name] = error_by_sheet_name
        if warning_by_sheet_name:
            yaml_file_path_encoded = os.path.join(
                book_yaml_dir_encoded,
                u'WARNINGS.yaml'.encode(file_system_encoding),
                )
            with open(yaml_file_path_encoded, 'w') as yaml_file:
                yaml.dump(warning_by_sheet_name, yaml_file, allow_unicode = True, default_flow_style = False,
                    indent = 2, width = 120)
            warning_by_book_name[book_name] = warning_by_sheet_name

    if error_by_book_name:
        yaml_file_path_encoded = os.path.join(
            yaml_raw_dir,
            u'ERRORS.yaml'.encode(file_system_encoding),
            )
        with open(yaml_file_path_encoded, 'w') as yaml_file:
            yaml.dump(error_by_book_name, yaml_file, allow_unicode = True, default_flow_style = False, indent = 2,
                width = 120)
    if warning_by_book_name:
        yaml_file_path_encoded = os.path.join(
            yaml_raw_dir,
            u'WARNINGS.yaml'.encode(file_system_encoding),
            )
        with open(yaml_file_path_encoded, 'w') as yaml_file:
            yaml.dump(warning_by_book_name, yaml_file, allow_unicode = True, default_flow_style = False, indent = 2,
                width = 120)
コード例 #24
0
def transform_node_to_element(name, node):
    if isinstance(node, dict):
        if node.get('TYPE') == u'BAREME':
            scale_element = etree.Element('BAREME', attrib = dict(
                code = strings.slugify(name, separator = u'_'),
                ))
            for slice_name in node.get('SEUIL', {}).keys():
                slice_element = etree.Element('TRANCHE', attrib = dict(
                    code = strings.slugify(slice_name, separator = u'_'),
                    ))

                threshold_element = etree.Element('SEUIL')
                values, format, type = prepare_xml_values(name, node.get('SEUIL', {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        threshold_element.append(value_element)
                if len(threshold_element) > 0:
                    slice_element.append(threshold_element)

                amount_element = etree.Element('MONTANT')
                values, format, type = prepare_xml_values(name, node.get('MONTANT', {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        amount_element.append(value_element)
                if len(amount_element) > 0:
                    slice_element.append(amount_element)

                rate_element = etree.Element('TAUX')
                values, format, type = prepare_xml_values(name, node.get('TAUX', {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        rate_element.append(value_element)
                if len(rate_element) > 0:
                    slice_element.append(rate_element)

                base_element = etree.Element('ASSIETTE')
                values, format, type = prepare_xml_values(name, node.get('ASSIETTE', {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        base_element.append(value_element)
                if len(base_element) > 0:
                    slice_element.append(base_element)

                if len(slice_element) > 0:
                    scale_element.append(slice_element)
            return scale_element if len(scale_element) > 0 else None
        else:
            node_element = etree.Element('NODE', attrib = dict(
                code = strings.slugify(name, separator = u'_'),
                ))
            for key, value in node.iteritems():
                child_element = transform_node_to_element(key, value)
                if child_element is not None:
                    node_element.append(child_element)
            return node_element if len(node_element) > 0 else None
    else:
        assert isinstance(node, list), node
        values, format, type = prepare_xml_values(name, node)
        if not values:
            return None
        code_element = etree.Element('CODE', attrib = dict(
            code = strings.slugify(name, separator = u'_'),
            ))
        if format is not None:
            code_element.set('format', format)
        if type is not None:
            code_element.set('type', type)
        for value in values:
            value_element = transform_value_to_element(value)
            if value_element is not None:
                code_element.append(value_element)
        return code_element if len(code_element) > 0 else None
コード例 #25
0
def transform(xls_dir, yaml_raw_dir):
    file_system_encoding = sys.getfilesystemencoding()

    error_by_book_name = collections.OrderedDict()
    warning_by_book_name = collections.OrderedDict()
    for filename_encoded in sorted(os.listdir(xls_dir)):
        if not filename_encoded.endswith('.xls'):
            continue
        filename = filename_encoded.decode(file_system_encoding)
        log.info(u'Parsing file {}'.format(filename))
        book_name = os.path.splitext(filename)[0]
        xls_path_encoded = os.path.join(xls_dir, filename_encoded)
        book = xlrd.open_workbook(filename=xls_path_encoded,
                                  formatting_info=True)

        book_yaml_dir_encoded = os.path.join(
            yaml_raw_dir,
            strings.slugify(book_name).encode(file_system_encoding))
        if not os.path.exists(book_yaml_dir_encoded):
            os.makedirs(book_yaml_dir_encoded)

        error_by_sheet_name = collections.OrderedDict()
        sheet_english_title_by_name = collections.OrderedDict()
        sheet_title_by_name = collections.OrderedDict()
        warning_by_sheet_name = collections.OrderedDict()
        for sheet_name in book.sheet_names():
            log.info(u'  Parsing sheet {}.'.format(sheet_name))
            sheet = book.sheet_by_name(sheet_name)
            sheet_error = None
            sheet_warning = None

            try:
                # Extract coordinates of merged cells.
                merged_cells_tree = {}
                for row_low, row_high, column_low, column_high in sheet.merged_cells:
                    for row_index in range(row_low, row_high):
                        cell_coordinates_by_merged_column_index = merged_cells_tree.setdefault(
                            row_index, {})
                        for column_index in range(column_low, column_high):
                            cell_coordinates_by_merged_column_index[
                                column_index] = (row_low, column_low)

                if sheet_name.startswith((u'Sommaire', u'Outline')):
                    french = sheet_name.startswith(u'Sommaire')
                    # Associate the titles of the sheets to their Excel names.
                    book_title = transform_xls_cell_to_str(
                        book, sheet, merged_cells_tree, 1, 1)
                    if not book_title:
                        book_title = transform_xls_cell_to_str(
                            book, sheet, merged_cells_tree, 2, 1)
                    book_title = book_title.strip()
                    assert book_title
                    book_description = transform_xls_cell_to_str(
                        book, sheet, merged_cells_tree, 4, 1)
                    if not book_description:
                        book_description = transform_xls_cell_to_str(
                            book, sheet, merged_cells_tree, 5, 1)
                    book_description = book_description.strip()
                    assert book_description

                    for column_index in range(1, 4):
                        current_heading = u'Annexes' if french else u'Annexes'
                        sheet_title_by_slug_by_heading = collections.OrderedDict(
                        )
                        for row_index in range(sheet.nrows):
                            heading = transform_xls_cell_to_json(
                                book, sheet, merged_cells_tree, row_index, 1)
                            if isinstance(heading, basestring):
                                heading = heading.strip()
                                if not heading:
                                    continue
                                if heading == book_title or heading == book_description:
                                    continue
                                if number_re.match(heading) is None:
                                    current_heading = heading
                                    continue
                            linked_sheet_number = transform_xls_cell_to_json(
                                book, sheet, merged_cells_tree, row_index,
                                column_index)
                            if isinstance(linked_sheet_number, int) or (
                                    isinstance(linked_sheet_number, basestring)
                                    and number_re.match(linked_sheet_number)
                                    is not None):
                                linked_sheet_title = transform_xls_cell_to_str(
                                    book, sheet, merged_cells_tree, row_index,
                                    column_index + 1)
                                if linked_sheet_title is not None:
                                    linked_sheet_title = linked_sheet_title.strip(
                                    )
                                if linked_sheet_title:
                                    hyperlink = get_hyperlink(
                                        sheet, row_index, column_index + 1)
                                    if hyperlink is not None and hyperlink.type == u'workbook':
                                        linked_sheet_name = hyperlink.textmark.split(
                                            u'!', 1)[0].strip(u'"').strip(u"'")
                                        sheet_title_by_slug = sheet_title_by_slug_by_heading.setdefault(
                                            current_heading,
                                            collections.OrderedDict())
                                        sheet_title_by_slug[strings.slugify(
                                            linked_sheet_name
                                        )] = linked_sheet_title

                                        if french:
                                            sheet_title_by_name[
                                                linked_sheet_name] = linked_sheet_title
                                        else:
                                            sheet_english_title_by_name[
                                                linked_sheet_name] = linked_sheet_title
                        if sheet_title_by_slug_by_heading:
                            break
                    assert sheet_title_by_slug_by_heading

                    book_notes = []
                    for column_index in range(8, 12):
                        for row_index in range(sheet.nrows):
                            note = transform_xls_cell_to_str(
                                book, sheet, merged_cells_tree, row_index,
                                column_index)
                            if note and note.strip() == book_description:
                                continue
                            if book_notes or note:
                                book_notes.append((note or u'').rstrip())
                                if note:
                                    blank_notes_count = 0
                                elif blank_notes_count >= 1:
                                    break
                                else:
                                    blank_notes_count += 1
                        if book_notes:
                            break
                    while book_notes and not book_notes[-1]:
                        del book_notes[-1]
                    assert book_notes

                    sheet_node = collections.OrderedDict((
                        (u'Titre' if french else u'Title', book_title),
                        (u'Description' if french else u'Description',
                         book_description),
                        (u'Sommaire' if french else u'Table of Content',
                         sheet_title_by_slug_by_heading),
                        (u'Notes' if french else u'Notes',
                         literal_unicode(u'\n'.join(book_notes))),
                        (u'Données initiales' if french else u'Source Data',
                         collections.OrderedDict((
                             (u'Producteur' if french else u'Producer',
                              u'Institut des politiques publiques'),
                             (u'Format', u'XLS'),
                             (u'URL', u'http://www.ipp.eu/outils/baremes-ipp/'
                              if french else
                              u'http://www.ipp.eu/en/tools/ipp-tax-and-benefit-tables/'
                              ),
                         ))),
                        (
                            u'Convertisseur' if french else u'Converter',
                            collections.OrderedDict((
                                (u'URL',
                                 u'https://git.framasoft.org/french-tax-and-benefit-tables/ipp-tax-and-benefit-tables-converters'
                                 ),  # noqa
                            ))),
                        (
                            u'Données générées'
                            if french else u'Generated Data',
                            collections.OrderedDict((
                                (u'Format', u'YAML'),
                                (u'URL',
                                 u'https://git.framasoft.org/french-tax-and-benefit-tables/ipp-tax-and-benefit-tables-yaml-raw'
                                 ),  # noqa
                            ))),
                        (u'Licence' if french else u'License',
                         u'Licence ouverte <http://www.etalab.gouv.fr/licence-ouverte-open-licence>'
                         if french else
                         u'Open Licence <http://www.etalab.gouv.fr/licence-ouverte-open-licence>'
                         ),
                    ))

                    yaml_file_path_encoded = os.path.join(
                        book_yaml_dir_encoded,
                        (strings.slugify(sheet_name, transform=strings.upper) +
                         u'.yaml').encode(file_system_encoding),
                    )
                elif sheet_name.startswith(u'Abréviation'):
                    log.warning(u'    Ignoring sheet {} of book {}.'.format(
                        sheet_name, book_name))
                    sheet_warning = u'Sheet ignored.'

                    sheet_title = sheet_title_by_name.get(
                        sheet_name, sheet_name)
                    sheet_node = collections.OrderedDict(
                        ((u'Titre' if french else u'Title', sheet_title), ))

                    yaml_file_path_encoded = os.path.join(
                        book_yaml_dir_encoded,
                        (strings.slugify(sheet_name, transform=strings.upper) +
                         u'.yaml').encode(file_system_encoding),
                    )
                else:
                    descriptions_rows = []
                    labels_rows = []
                    notes_rows = []
                    state = 'taxipp_names'
                    taxipp_names_row = None
                    values_rows = []
                    for row_index in range(sheet.nrows):
                        columns_count = len(sheet.row_values(row_index))
                        if state == 'taxipp_names':
                            taxipp_names_row = [
                                (taxipp_name or u'').strip()
                                for taxipp_name in (
                                    transform_xls_cell_to_str(
                                        book, sheet, merged_cells_tree,
                                        row_index, column_index)
                                    for column_index in range(columns_count))
                            ]
                            state = 'labels'
                            if all(not taxipp_name
                                   for taxipp_name in taxipp_names_row):
                                # The first row is empty => This sheet doesn't contain TaxIPP names.
                                continue
                            # When any TaxIPP name is in lowercase, assume that this row is really the TaxIPP names row.
                            if any(taxipp_name and taxipp_name[0].islower()
                                   for taxipp_name in taxipp_names_row):
                                continue
                            else:
                                log.info(
                                    u'    Sheet "{}" of XLS file "{}" has no row for TaxIPP names.'
                                    .format(sheet_name, filename))
                                # warning = u'Row not found'
                                # if sheet_warning is None:
                                #     sheet_warning = collections.OrderedDict()
                                # if isinstance(sheet_warning, dict):
                                #     sheet_warning[u'Noms TaxIPP'] = warning
                                # else:
                                #     assert isinstance(sheet_warning, basestring), sheet_warning
                                #     sheet_warning = u'\n\n'.join(
                                #         fragment
                                #         for fragment in (sheet_warning, warning)
                                #         if fragment
                                #         )
                                taxipp_names_row = []
                        if state == 'labels':
                            first_cell_value, error = conv.pipe(
                                cell_to_row_first_cell,
                                conv.not_none)(transform_xls_cell_to_json(
                                    book, sheet, merged_cells_tree, row_index,
                                    0),
                                               state=conv.default_state)
                            if error is not None:
                                # First cell of row is not a the first cell of a row of values => Assume it is a label.
                                labels_rows.append([
                                    u' '.join((label or u'').split()).strip()
                                    for label in (transform_xls_cell_to_str(
                                        book, sheet, merged_cells_tree,
                                        row_index, column_index)
                                                  for column_index in range(
                                                      columns_count))
                                ])
                                continue
                            state = 'values'
                        if state == 'values':
                            first_cell_value, error = cell_to_row_first_cell(
                                transform_xls_cell_to_json(
                                    book, sheet, merged_cells_tree, row_index,
                                    0),
                                state=conv.default_state)
                            if error is None:
                                # First cell of row is a valid date or year.
                                values_row = [
                                    value.strip() if isinstance(
                                        value, basestring) else value
                                    for value in (transform_xls_cell_to_json(
                                        book,
                                        sheet,
                                        merged_cells_tree,
                                        row_index,
                                        column_index,
                                        empty_white_value=u'nc')
                                                  for column_index in range(
                                                      columns_count))
                                ]
                                if isinstance(first_cell_value, datetime.date):
                                    assert first_cell_value.year < 2601, 'Invalid date {} in {} at row {}'.format(
                                        first_cell_value, sheet_name,
                                        row_index + 1)
                                    values_rows.append(values_row)
                                    continue
                                if isinstance(first_cell_value, basestring) \
                                        and aad_re.match(first_cell_value) is not None:
                                    values_rows.append(values_row)
                                    continue
                                if all(value in (None, u'', u'nc')
                                       for value in values_row):
                                    # If first cell is empty and all other cells in line are also empty, ignore this
                                    # line.
                                    continue
                                # First cell has no date and other cells in row are not empty => Assume it is a note.
                            state = 'notes'
                        if state == 'notes':
                            first_cell_value = transform_xls_cell_to_json(
                                book, sheet, merged_cells_tree, row_index, 0)
                            if isinstance(
                                    first_cell_value, basestring
                            ) and first_cell_value.strip().lower() == 'notes':
                                notes_rows.append([
                                    (line or u'').rstrip()
                                    for line in (transform_xls_cell_to_str(
                                        book, sheet, merged_cells_tree,
                                        row_index, column_index)
                                                 for column_index in range(
                                                     columns_count))
                                ])
                                continue
                            state = 'description'
                        assert state == 'description'
                        descriptions_rows.append([
                            (line or u'').strip() for line in (
                                transform_xls_cell_to_str(
                                    book, sheet, merged_cells_tree, row_index,
                                    column_index)
                                for column_index in range(columns_count))
                        ])

                    sheet_node = collections.OrderedDict()

                    sheet_title = sheet_title_by_name.get(sheet_name)
                    if sheet_title is not None:
                        sheet_node[u'Titre'] = sheet_title

                    sheet_node[u'Titre court'] = sheet_name

                    labels = []
                    for labels_row in labels_rows:
                        for column_index, label in enumerate(labels_row):
                            if label is None:
                                continue
                            label = label.strip()
                            if not label:
                                continue
                            while column_index >= len(labels):
                                labels.append([])
                            column_labels = labels[column_index]
                            if not column_labels or column_labels[-1] != label:
                                column_labels.append(label)
                    labels = [
                        (tuple(
                            label_stripped
                            for label_stripped in ((label or u'').strip()
                                                   for label in column_labels1)
                            if label_stripped) if column_labels1 else None)
                        or (u'Colonne sans titre', )
                        for index, column_labels1 in enumerate(labels, 1)
                    ]
                    assert labels

                    taxipp_name_by_column_labels = collections.OrderedDict()
                    for column_labels, taxipp_name in zip(
                            labels, taxipp_names_row):
                        if not taxipp_name:
                            continue
                        taxipp_name_by_column_label = taxipp_name_by_column_labels
                        for column_label in column_labels[:-1]:
                            taxipp_name_by_column_label = taxipp_name_by_column_label.setdefault(
                                column_label, collections.OrderedDict())
                        taxipp_name_by_column_label[
                            column_labels[-1]] = taxipp_name
                    if taxipp_name_by_column_labels:
                        sheet_node[
                            u'Noms TaxIPP'] = taxipp_name_by_column_labels

                    sheet_values = []
                    for value_row in values_rows:
                        cell_by_column_labels = collections.OrderedDict()
                        for column_labels, cell in zip(labels, value_row):
                            if cell is None or cell == '':
                                continue
                            cell_by_column_label = cell_by_column_labels
                            for column_label in column_labels[:-1]:
                                cell_by_column_label = cell_by_column_label.setdefault(
                                    column_label, collections.OrderedDict())
                            # Merge (amount, unit) couples to a string to simplify YAML.
                            if isinstance(cell, tuple):
                                cell = transform_amount_tuple_to_str(cell)
                            if isinstance(cell, basestring) and u'\n' in cell:
                                cell = literal_unicode(cell)
                            cell_by_column_label[column_labels[-1]] = cell
                        sheet_values.append(cell_by_column_labels)
                    if sheet_values:
                        sheet_node[u'Valeurs'] = sheet_values

                    notes = u'\n'.join([
                        line.rstrip() for line in u'\n'.join([
                            u' | '.join(cell for cell in row if cell).rstrip()
                            for row in notes_rows
                        ]).split(u'\n')
                    ]).rstrip()
                    if notes:
                        sheet_node[u'Notes'] = literal_unicode(notes)

                    description = u'\n'.join([
                        line.rstrip() for line in u'\n'.join([
                            u' | '.join(cell for cell in row if cell).rstrip()
                            for row in descriptions_rows
                        ]).split(u'\n')
                    ]).rstrip()
                    if description:
                        sheet_node[u'Description'] = literal_unicode(
                            description)

                    yaml_file_path_encoded = os.path.join(
                        book_yaml_dir_encoded,
                        (strings.slugify(sheet_name) +
                         u'.yaml').encode(file_system_encoding),
                    )

                if sheet_error:
                    sheet_node[u'ERRORS'] = literal_unicode(sheet_error) \
                        if isinstance(sheet_error, basestring) and u'\n' in sheet_error \
                        else sheet_error
                if sheet_warning:
                    sheet_node[u'WARNINGS'] = literal_unicode(sheet_warning) \
                        if isinstance(sheet_warning, basestring) and u'\n' in sheet_warning \
                        else sheet_warning
                with open(yaml_file_path_encoded, 'w') as yaml_file:
                    yaml.dump(sheet_node,
                              yaml_file,
                              allow_unicode=True,
                              default_flow_style=False,
                              indent=2,
                              width=120)
            except:
                message = u'An exception occurred when parsing sheet "{}" of XLS file "{}".'.format(
                    sheet_name, filename)
                log.exception(u'    {}'.format(message))
                sheet_error = literal_unicode(u'\n\n'.join(
                    fragment for fragment in (
                        unicode(sheet_error
                                ) if sheet_error is not None else None,
                        message,
                        traceback.format_exc().decode('utf-8'),
                    ) if fragment))

            if sheet_error:
                error_by_sheet_name[sheet_name] = sheet_error
            if sheet_warning:
                warning_by_sheet_name[sheet_name] = sheet_warning

        if error_by_sheet_name:
            yaml_file_path_encoded = os.path.join(
                book_yaml_dir_encoded,
                u'ERRORS.yaml'.encode(file_system_encoding),
            )
            with open(yaml_file_path_encoded, 'w') as yaml_file:
                yaml.dump(error_by_sheet_name,
                          yaml_file,
                          allow_unicode=True,
                          default_flow_style=False,
                          indent=2,
                          width=120)
            error_by_book_name[book_name] = error_by_sheet_name
        if warning_by_sheet_name:
            yaml_file_path_encoded = os.path.join(
                book_yaml_dir_encoded,
                u'WARNINGS.yaml'.encode(file_system_encoding),
            )
            with open(yaml_file_path_encoded, 'w') as yaml_file:
                yaml.dump(warning_by_sheet_name,
                          yaml_file,
                          allow_unicode=True,
                          default_flow_style=False,
                          indent=2,
                          width=120)
            warning_by_book_name[book_name] = warning_by_sheet_name

    if error_by_book_name:
        yaml_file_path_encoded = os.path.join(
            yaml_raw_dir,
            u'ERRORS.yaml'.encode(file_system_encoding),
        )
        with open(yaml_file_path_encoded, 'w') as yaml_file:
            yaml.dump(error_by_book_name,
                      yaml_file,
                      allow_unicode=True,
                      default_flow_style=False,
                      indent=2,
                      width=120)
    if warning_by_book_name:
        yaml_file_path_encoded = os.path.join(
            yaml_raw_dir,
            u'WARNINGS.yaml'.encode(file_system_encoding),
        )
        with open(yaml_file_path_encoded, 'w') as yaml_file:
            yaml.dump(warning_by_book_name,
                      yaml_file,
                      allow_unicode=True,
                      default_flow_style=False,
                      indent=2,
                      width=120)
コード例 #26
0
ファイル: auth.py プロジェクト: onmytab/openfisca-web-ui
def login(req):
    """Authorization request."""
    ctx = contexts.Ctx(req)

    params = req.POST
    inputs = dict(
        assertion = params.get('assertion'),
        )
    data, errors = conv.struct(
        dict(
            assertion = conv.pipe(
                conv.cleanup_line,
                conv.not_none,
                ),
            ),
        )(inputs, state = ctx)
    if errors is not None:
        return wsgihelpers.bad_request(ctx, explanation = ctx._(u'Login Error: {0}').format(errors))

    response = requests.post('https://verifier.login.persona.org/verify',
        data = dict(
            audience = urls.get_full_url(ctx),
            assertion = data['assertion'],
            ),
        verify = True,
        )
    if not response.ok:
        return wsgihelpers.internal_error(ctx,
            dump = response.text,
            explanation = ctx._(u'Error while verifying authentication assertion'),
            )
    verification_data = json.loads(response.content)
    # Check if the assertion was valid.
    if verification_data['status'] != 'okay':
        return wsgihelpers.internal_error(ctx,
            dump = response.text,
            explanation = ctx._(u'Error while verifying authentication assertion'),
            )

    registered_account = model.Account.find_one(
        dict(
            email = verification_data['email'],
            ),
        as_class = collections.OrderedDict,
        )
    session = ctx.session
    if session is None:
        ctx.session = session = model.Session()
        session.expiration = datetime.datetime.utcnow() + datetime.timedelta(hours = 4)
    if registered_account is None:
        user = session.user
        if user is None:
            user = model.Account()
            user.api_key = uuidhelpers.generate_uuid()
        user.email = verification_data['email']
        user.full_name = verification_data['email']
        user.slug = strings.slugify(user.full_name)
        user.compute_words()
        user.save(safe = True)
        session.user = user
    else:
        session.user = registered_account
    session.anonymous_token = uuidhelpers.generate_uuid()
    session.token = uuidhelpers.generate_uuid()
    session.save(safe = True)

    req.response.set_cookie(conf['cookie'], session.token, httponly = True, secure = req.scheme == 'https')
    return wsgihelpers.no_content(ctx)
コード例 #27
0
def login(req):
    """Authorization request."""
    ctx = contexts.Ctx(req)

    params = req.POST
    inputs = dict(assertion=params.get('assertion'), )
    data, errors = conv.struct(
        dict(assertion=conv.pipe(
            conv.cleanup_line,
            conv.not_none,
        ), ), )(inputs, state=ctx)
    if errors is not None:
        return wsgihelpers.bad_request(
            ctx, explanation=ctx._(u'Login Error: {0}').format(errors))

    response = requests.post(
        'https://verifier.login.persona.org/verify',
        data=dict(
            audience=urls.get_full_url(ctx),
            assertion=data['assertion'],
        ),
        verify=True,
    )
    if not response.ok:
        return wsgihelpers.internal_error(
            ctx,
            dump=response.text,
            explanation=ctx._(
                u'Error while verifying authentication assertion'),
        )
    verification_data = json.loads(response.content)
    # Check if the assertion was valid.
    if verification_data['status'] != 'okay':
        return wsgihelpers.internal_error(
            ctx,
            dump=response.text,
            explanation=ctx._(
                u'Error while verifying authentication assertion'),
        )

    registered_account = model.Account.find_one(
        dict(email=verification_data['email'], ),
        as_class=collections.OrderedDict,
    )
    session = ctx.session
    if session is None:
        ctx.session = session = model.Session()
        session.expiration = datetime.datetime.utcnow() + datetime.timedelta(
            hours=4)
    if registered_account is None:
        user = session.user
        if user is None:
            user = model.Account()
            user.api_key = uuidhelpers.generate_uuid()
        user.email = verification_data['email']
        user.full_name = verification_data['email']
        user.slug = strings.slugify(user.full_name)
        user.compute_words()
        user.save(safe=True)
        session.user = user
    else:
        session.user = registered_account
    session.anonymous_token = uuidhelpers.generate_uuid()
    session.token = uuidhelpers.generate_uuid()
    session.save(safe=True)

    req.response.set_cookie(conf['cookie'],
                            session.token,
                            httponly=True,
                            secure=req.scheme == 'https')
    return wsgihelpers.no_content(ctx)
        'Compensated Own-Price and Cross-Price Elasticities -- Modest and aged more than 60'
    ),
]

cross_price_elasticities = pandas.DataFrame()

for table in tables:
    age = table.pop('age')
    name = table.pop('name')
    revenus = table.pop('revenus')
    df = pandas.read_excel(elasticities_origin_xlsx, **table)
    df.dropna(inplace=True)
    df.set_index('Unnamed: 0', inplace=True)
    df.index.name = 'product'

    if age is None or revenus is None:
        df.name = name
        csv_path_name = os.path.join(elasticities_path, slugify(name) + '.csv')
        df.to_csv(csv_path_name)

    else:
        df['age'] = age
        df['revenus'] = revenus
        cross_price_elasticities = cross_price_elasticities.append(df)

csv_path_name = os.path.join(
    elasticities_path,
    'cross_price_elasticities.csv',
)
cross_price_elasticities.to_csv(csv_path_name)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--ipp-translations',
        default = os.path.join(param_dir, 'ipp-tax-and-benefit-tables-to-parameters.yaml'),
        help = 'path of YAML file containing the association between IPP fields and OpenFisca parameters')
    parser.add_argument('-o', '--origin', default = os.path.join(param_dir, 'param.xml'),
        help = 'path of XML file containing the original OpenFisca parameters')
    parser.add_argument('-p', '--param-translations',
        default = os.path.join(param_dir, 'param-to-parameters.yaml'),
        help = 'path of YAML file containing the association between param elements and OpenFisca parameters')
    parser.add_argument('-s', '--source-dir', default = 'yaml-clean',
        help = 'path of source directory containing clean IPP YAML files')
    parser.add_argument('-t', '--target', default = os.path.join(package_dir, 'parameters'),
        help = 'path of generated directory of XML files merging IPP fields with OpenFisca parameters')
    parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity")
    args = parser.parse_args()
    logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout)

    assert os.path.isdir(args.source_dir), args.source_dir

    file_system_encoding = sys.getfilesystemencoding()

    original_element_tree = etree.parse(args.origin)
    original_root_element = original_element_tree.getroot()

    # Apply translations to original parameters.
    with open(args.param_translations) as param_translations_file:
        param_translations = yaml.load(param_translations_file)
    for old_path, new_path in param_translations.iteritems():
        parent_element = None
        element = original_root_element
        for name in old_path.split('.'):
            for child in element:
                if child.get('code') == name:
                    parent_element = element
                    element = child
                    break
            else:
                assert False, 'Path "{}" not found in "{}"'.format(old_path, args.origin)
        parent_element.remove(element)
        if new_path is not None:
            parent_element = original_root_element
            split_new_path = new_path.split('.')
            for name in split_new_path[:-1]:
                for child in parent_element:
                    if child.get('code') == name:
                        parent_element = child
                        break
                else:
                    parent_element = etree.SubElement(parent_element, 'NODE', attrib = dict(
                        code = name,
                        ))
            name = split_new_path[-1]
            assert all(
                child.get('code') != name
                for child in parent_element
                ), 'Path "{}" already exists in "{}"'.format(new_path, args.origin)
            element.set('code', name)
            parent_element.append(element)

    # Build `tree` from IPP YAML files.
    tree = collections.OrderedDict()
    for source_dir_encoded, directories_name_encoded, filenames_encoded in os.walk(args.source_dir):
        directories_name_encoded.sort()
        for filename_encoded in sorted(filenames_encoded):
            if not filename_encoded.endswith('.yaml'):
                continue
            filename = filename_encoded.decode(file_system_encoding)
            sheet_name = os.path.splitext(filename)[0]
            source_file_path_encoded = os.path.join(source_dir_encoded, filename_encoded)
            relative_file_path_encoded = source_file_path_encoded[len(args.source_dir):].lstrip(os.sep)
            relative_file_path = relative_file_path_encoded.decode(file_system_encoding)
            if sheet_name.isupper():
                continue
            assert sheet_name.islower(), sheet_name
            log.info(u'Loading file {}'.format(relative_file_path))
            with open(source_file_path_encoded) as source_file:
                data = yaml.load(source_file)
            rows = data.get(u"Valeurs")
            if rows is None:
                log.info(u'  Skipping file {} without "Valeurs"'.format(relative_file_path))
                continue
            row_by_start = {}
            for row in rows:
                start = row.get(u"Date d'effet")
                if start is None:
                    for date_name in date_names:
                        start = row.get(date_name)
                        if start is not None:
                            break
                    else:
                        # No date found. Skip row.
                        continue
                elif not isinstance(start, datetime.date):
                    start = start[u"Année Revenus"]
                row_by_start[start] = row
            sorted_row_by_start = sorted(row_by_start.iteritems())

            relative_ipp_paths_by_start = {}
            unsorted_relative_ipp_paths = set()
            for start, row in sorted_row_by_start:
                relative_ipp_paths_by_start[start] = start_relative_ipp_paths = []
                for name, child in row.iteritems():
                    if name in date_names:
                        continue
                    if name in note_names:
                        continue
                    if name in reference_names:
                        continue
                    start_relative_ipp_paths.extend(
                        (name,) + tuple(path)
                        for path, value in iter_ipp_values(child)
                        )
                unsorted_relative_ipp_paths.update(start_relative_ipp_paths)

            def compare_relative_ipp_paths(x, y):
                if x == y:
                    return 0
                for relative_ipp_paths in relative_ipp_paths_by_start.itervalues():
                    try:
                        return cmp(relative_ipp_paths.index(x), relative_ipp_paths.index(y))
                    except ValueError:
                        # Either x or y paths are missing in relative_ipp_paths => Their order can't be compared.
                        continue
                return -1

            sorted_relative_ipp_paths = sorted(unsorted_relative_ipp_paths, cmp = compare_relative_ipp_paths)
            # tax_rate_tree_by_bracket_type = {}

            for start, row in sorted_row_by_start:
                for relative_ipp_path in sorted_relative_ipp_paths:
                    value = row
                    for fragment in relative_ipp_path:
                        value = value.get(fragment)
                        if value is None:
                            break

                    if value in (u'-', u'na', u'nc'):
                        # Value is unknown. Previous value must be propagated.
                        continue
                    ipp_path = [
                        fragment if fragment in ('RENAME', 'TRANCHE', 'TYPE') else strings.slugify(fragment,
                            separator = u'_')
                        for fragment in itertools.chain(
                            relative_file_path.split(os.sep)[:-1],
                            [sheet_name],
                            relative_ipp_path,
                            )
                        ]

                    sub_tree = tree
                    for fragment in ipp_path[:-1]:
                        sub_tree = sub_tree.setdefault(fragment, collections.OrderedDict())
                    fragment = ipp_path[-1]
                    sub_tree = sub_tree.setdefault(fragment, [])
                    if sub_tree:
                        last_leaf = sub_tree[-1]
                        if last_leaf['value'] == value:
                            continue
                        last_leaf['stop'] = start - datetime.timedelta(days = 1)
                    sub_tree.append(dict(
                        start = start,
                        value = value,
                        ))

    ipp_tax_and_benefit_tables_to_parameters.transform_ipp_tree(tree)

    root_element = transform_node_to_element(u'root', tree)
    add_origin_openfisca_attrib(original_root_element)
    merge_elements(root_element, original_root_element)
    # Since now `original_root_element` is discarded.

    if os.path.exists(args.target):
        for xml_file_path in glob.glob(os.path.join(args.target, '*.xml')):
            os.remove(xml_file_path)
    else:
        os.mkdir(args.target)
    for child_element in root_element[:]:
        root_element.remove(child_element)
        element_tree = etree.ElementTree(child_element)
        sort_elements(child_element)
        reindent(child_element)
        element_tree.write(os.path.join(args.target, '{}.xml'.format(child_element.attrib['code'])), encoding = 'utf-8')
    element_tree = etree.ElementTree(root_element)
    reindent(root_element)
    element_tree.write(os.path.join(args.target, '__root__.xml'), encoding = 'utf-8')

    return 0
コード例 #30
0
ファイル: pois.py プロジェクト: Gentux/etalage
    def iter_ids(cls, ctx, categories_slug = None, competence_territories_id = None, competence_type = None,
            presence_territory = None, term = None):
        intersected_sets = []

        if competence_territories_id is not None:
            competence_territories_sets = []
            if competence_type in (None, 'by_territory'):
                competence_territories_sets.extend(
                    cls.ids_by_competence_territory_id.get(competence_territory_id)
                    for competence_territory_id in competence_territories_id
                    )
            if competence_type in (None, 'by_nature'):
                competence_territories_sets.append(cls.ids_by_competence_territory_id.get(None))
            territory_competent_pois_id = ramdb.union_set(competence_territories_sets)
            if not territory_competent_pois_id:
                return set()
            intersected_sets.append(territory_competent_pois_id)

        if presence_territory is not None:
            territory_present_pois_id = cls.ids_by_presence_territory_id.get(presence_territory._id)
            if not territory_present_pois_id:
                return set()
            intersected_sets.append(territory_present_pois_id)

        if ctx.base_categories_slug is not None:
            base_categories_sets = []
            base_categories_slug = copy(ctx.base_categories_slug or [])
            for category_slug in set(base_categories_slug or []):
                if category_slug is not None:
                    category_pois_id = cls.ids_by_category_slug.get(category_slug)
                    if category_pois_id:
                        base_categories_sets.append(category_pois_id)
            intersected_sets.append(ramdb.union_set(base_categories_sets))

        for category_slug in set(categories_slug or []):
            if category_slug is not None:
                category_pois_id = cls.ids_by_category_slug.get(category_slug)
                if not category_pois_id:
                    return set()
                intersected_sets.append(category_pois_id)

        if conf['index.date.field']:
            current_datetime = datetime.datetime.utcnow()
            ids_by_begin_datetime_set = set()
            for poi_begin_datetime, poi_id in cls.ids_by_begin_datetime:
                if poi_begin_datetime is None or current_datetime >= poi_begin_datetime:
                    ids_by_begin_datetime_set.add(poi_id)
                else:
                    break
            ids_by_end_datetime_set = set()
            for poi_end_datetime, poi_id in cls.ids_by_end_datetime:
                if poi_end_datetime is None or current_datetime <= poi_end_datetime:
                    ids_by_end_datetime_set.add(poi_id)
                else:
                    break
            intersected_sets.append(ramdb.intersection_set([ids_by_begin_datetime_set, ids_by_end_datetime_set]))

        # We should filter on term *after* having looked for competent organizations. Otherwise, when no organization
        # matching term is found, the nearest organizations will be used even when there are competent organizations
        # (that don't match the term).
        if term:
            prefixes = strings.slugify(term).split(u'-')
            pois_id_by_prefix = {}
            for prefix in prefixes:
                if prefix in pois_id_by_prefix:
                    # TODO? Handle pois with several words sharing the same prefix?
                    continue
                pois_id_by_prefix[prefix] = ramdb.union_set(
                    pois_id
                    for word, pois_id in cls.ids_by_word.iteritems()
                    if word.startswith(prefix)
                    ) or set()
            intersected_sets.extend(pois_id_by_prefix.itervalues())

        found_pois_id = ramdb.intersection_set(intersected_sets)
        if found_pois_id is None:
            return cls.indexed_ids
        return found_pois_id
コード例 #31
0
ファイル: pois.py プロジェクト: Gentux/etalage-passim
 def is_multimodal_info_service(self):
     for field in self.fields:
         if field.id == 'boolean' and strings.slugify(field.label) == 'service-d-information-multimodale':
             return conv.check(conv.guess_bool(field.value))
コード例 #32
0
ファイル: pois.py プロジェクト: Gentux/etalage-passim
    def load(cls, poi_bson):
        metadata = poi_bson['metadata']
        last_update = metadata['last-update']
        if poi_bson.get('geo') is None:
            geo = None
        else:
            geo = poi_bson['geo'][0]
            if len(geo) > 2 and geo[2] == 0:
                # Don't use geographical coordinates with a 0 accuracy because their coordinates may be None.
                geo = None
        self = cls(
            _id = poi_bson['_id'],
            geo = geo,
            last_update_datetime = last_update['date'],
            last_update_organization = last_update['organization'],
            name = metadata['title'],
            schema_name = metadata['schema-name'],
            )

        if conf['theme_field'] is None:
            theme_field_id = None
            theme_field_name = None
        else:
            theme_field_id = conf['theme_field']['id']
            theme_field_name = conf['theme_field'].get('name')
        fields_position = {}
        fields = []
        for field_id in metadata['positions']:
            field_position = fields_position.get(field_id, 0)
            fields_position[field_id] = field_position + 1
            field_metadata = metadata[field_id][field_position]
            field_value = poi_bson[field_id][field_position]
            field = Field.load(field_id, field_metadata, field_value)
            if field.id == u'adr' and self.postal_distribution_str is None:
                for sub_field in (field.value or []):
                    if sub_field.id == u'postal-distribution':
                        self.postal_distribution_str = sub_field.value
                    elif sub_field.id == u'street-address':
                        self.street_address = sub_field.value
            elif field.id == u'link' and field.relation == u'parent':
                assert self.parent is None, str(self)
                self.parent_id = field.value

            if field_id == theme_field_id and (
                    theme_field_name is None or theme_field_name == strings.slugify(field.label)):
                if field.id == u'organism-type':
                    organism_type_slug = ramdb.category_slug_by_pivot_code.get(field.value)
                    if organism_type_slug is None:
                        log.warning('Ignoring organism type "{0}" without matching category.'.format(field.value))
                    else:
                        self.theme_slug = organism_type_slug
                else:
                    theme_slug = strings.slugify(field.value)
                    if theme_slug in ramdb.category_by_slug:
                        self.theme_slug = theme_slug
                    else:
                        log.warning('Ignoring theme "{0}" without matching category.'.format(field.value))

            fields.append(field)
        if fields:
            self.fields = fields

        # Temporarily store bson in poi because it is needed by index_pois.
        self.bson = poi_bson

        cls.instance_by_id[self._id] = self
        if self.parent_id is not None:
            cls.ids_by_parent_id.setdefault(self.parent_id, set()).add(self._id)
        return self
コード例 #33
0
ファイル: model.py プロジェクト: trojette/openfisca-web-ui
 def __init__(self, **attributes):
     super(TestCase, self).__init__(**attributes)
     if self.title is None:
         self.title = babel.dates.format_datetime(datetime.datetime.utcnow())
         self.slug = strings.slugify(self.title)
コード例 #34
0
def main(path, date, option = 'all_months', month = 1):
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--dir', default = path + date, help = 'path of IPP XLS directory')
    parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity")
    args = parser.parse_args()
    # args.dir = path
    logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout)

    forbiden_sheets = {
        u'Impot Revenu': (u'Barème IGR',),
        u'prelevements sociaux': (u'Abréviations', u'ASSIETTE PU', u'AUBRYI',  u'AUBRYII'),
        u'Taxation indirecte': (u'TVA par produit',),
        }
    baremes = [u'Prestations', u'Chomage', u'Impot Revenu', u'prelevements sociaux', u'Taxation indirecte', u'Taxation du capital', u'Taxes locales', u'Marche du travail']
    for bareme in baremes:
        log.info(u'Parsing file {}'.format(bareme))
        xls_path = os.path.join(args.dir.decode('utf-8'), u"Baremes IPP - {0}.xls".format(bareme))
        # xls_path = os.path.join(path, u"Baremes IPP - {0}.xls".format(bareme))
        book = xlrd.open_workbook(filename = xls_path, formatting_info = True)
        sheet_names = [
            sheet_name
            for sheet_name in book.sheet_names()
            if not sheet_name.startswith((u'Sommaire', u'Outline'))
                and not sheet_name in forbiden_sheets.get(bareme, [])
            ]
        vector_by_taxipp_name = {}
        for sheet_name in sheet_names:
            log.info(u'  Parsing sheet {}'.format(sheet_name))
            sheet = book.sheet_by_name(sheet_name)

            # Extract coordinates of merged cells.
            merged_cells_tree = {}
            for row_low, row_high, column_low, column_high in sheet.merged_cells:
                for row_index in range(row_low, row_high):
                    cell_coordinates_by_merged_column_index = merged_cells_tree.setdefault(
                        row_index, {})
                    for column_index in range(column_low, column_high):
                        cell_coordinates_by_merged_column_index[column_index] = (row_low, column_low)

            descriptions_rows = []
            labels_rows = []
            notes_rows = []
            state = 'taxipp_names'
            taxipp_names_row = None
            values_rows = []
            for row_index in range(sheet.nrows):
                ncols = len(sheet.row_values(row_index))
                if state == 'taxipp_names':
                    taxipp_names_row = [
                        taxipp_name
                        for taxipp_name in (
                            transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            for column_index in range(ncols)
                            )
                        ]
                    state = 'labels'
                    continue
                if state == 'labels':
                    first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                    date_or_year, error = conv.pipe(
                        conv.test_isinstance((int, basestring)),
                        cell_to_date_or_year,
                        conv.not_none,
                        )(first_cell_value, state = conv.default_state)
                    if error is not None:
                        # First cell of row is not a date => Assume it is a label.
                        labels_rows.append([
                            transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            for column_index in range(ncols)
                            ])
                        continue
                    state = 'values'
                if state == 'values':
                    first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                    if first_cell_value is None or isinstance(first_cell_value, (int, basestring)):
                        date_or_year, error = cell_to_date_or_year(first_cell_value, state = conv.default_state)
                        if error is None:
                            # First cell of row is a valid date or year.
                            values_row = [
                                transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, column_index)
                                for column_index in range(ncols)
                                ]
                            if date_or_year is not None:
                                assert date_or_year.year < 2601, 'Invalid date {} in {} at row {}'.format(date_or_year,
                                    sheet_name, row_index + 1)
                                values_rows.append(values_row)
                                continue
                            if all(value in (None, u'') for value in values_row):
                                # If first cell is empty and all other cells in line are also empty, ignore this line.
                                continue
                            # First cell has no date and other cells in row are not empty => Assume it is a note.
                    state = 'notes'
                if state == 'notes':
                    first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                    if isinstance(first_cell_value, basestring) and first_cell_value.strip().lower() == 'notes':
                        notes_rows.append([
                            transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            for column_index in range(ncols)
                            ])
                        continue
                    state = 'description'
                assert state == 'description'
                descriptions_rows.append([
                    transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                    for column_index in range(ncols)
                    ])

            dates = [
                conv.check(cell_to_date_or_year)(
                    row[1] if bareme == u'Impot Revenu' else row[0],
                    state = conv.default_state,
                    ).replace(day = 1)
                for row in values_rows
                ]
            for column_index, taxipp_name in enumerate(taxipp_names_row):
                if taxipp_name and strings.slugify(taxipp_name) not in ('date', 'date-ir', 'date-rev', 'note', 'ref-leg', 'notes') :
                    vector = [
                        transform_cell_value(date, row[column_index])
                        for date, row in zip(dates, values_rows)
                        ]
                    vector = [
                        cell if not isinstance(cell, basestring) or cell == u'nc' else '-'
                        for cell in vector
                        ]
                    vector_by_taxipp_name[taxipp_name] = pd.Series(vector, index = dates)
        monthstime = [
                datetime.datetime(y, m, 1,0,0,0)
                for y in range(1914, 2021)
                for m in range(1, 13)
                ]
        data_frame = pd.DataFrame(index = monthstime)
        for taxipp_name, vector in vector_by_taxipp_name.iteritems():
            data_frame[taxipp_name] = np.nan
            data_frame.loc[vector.index.values, taxipp_name] = vector.values
        data_frame.replace(u'nc', np.nan, inplace=True)
        data_frame.fillna(method = 'pad', inplace = True)
        data_frame.dropna(axis = 0, how = 'all', inplace = True)
        if option == 'mean_by_year':
            data_frame.replace('-', 0, inplace=True)
            data_frame = data_frame.resample('AS', how='mean')
        if option == 'which_month_in_year':
            data_frame =  data_frame.iloc[data_frame.index.month == month]
        data_frame.to_csv(args.dir + "/"  + bareme + '.csv', encoding = 'utf-8')
        print u"Voilà, la table agrégée de {} est créée !".format(bareme)

    return 0
コード例 #35
0
ファイル: pois.py プロジェクト: Gentux/etalage-passim
    def index(self, indexed_poi_id):
        poi_bson = self.bson
        metadata = poi_bson['metadata']
        for category_slug in (metadata.get('categories-index') or set()):
            self.ids_by_category_slug.setdefault(category_slug, set()).add(indexed_poi_id)

        if conf['index.date.field']:
            for date_range_index, date_range_metadata in enumerate(metadata.get('date-range') or []):
                if date_range_metadata['label'] == conf['index.date.field']:
                    date_range_values = poi_bson['date-range'][date_range_index]
                    date_range_begin = date_range_values.get('date-range-begin', [None])[0]
                    date_range_end = date_range_values.get('date-range-end', [None])[0]

                    if date_range_begin is not None:
                        for index, (begin_datetime, poi_id) in enumerate(self.ids_by_begin_datetime):
                            if begin_datetime is not None and begin_datetime > date_range_begin:
                                break
                    else:
                        index = 0
                    self.ids_by_begin_datetime.insert(index, (date_range_begin, indexed_poi_id))
                    if date_range_end is not None:
                        for index, (end_datetime, poi_id) in enumerate(self.ids_by_end_datetime):
                            if end_datetime is not None and end_datetime < date_range_end:
                                break
                    else:
                        index = 0
                    self.ids_by_end_datetime.insert(index, (date_range_end, indexed_poi_id))
            if not metadata.get('date-range'):
                self.ids_by_begin_datetime.append((None, indexed_poi_id))
                self.ids_by_end_datetime.append((None, indexed_poi_id))
        self.ids_by_last_update_datetime.append((self.last_update_datetime, indexed_poi_id))

        for i, territory_metadata in enumerate(metadata.get('territories') or []):
            # Note: Don't fail when territory doesn't exist, because Passim can be configured to ignore some kinds
            # of territories (cf conf['territories_kinds']).
            self.territories_id = set(
                territory_id
                for territory_id in (
                    ramdb.territory_id_by_kind_code.get((territory_kind_code['kind'], territory_kind_code['code']))
                    for territory_kind_code in poi_bson['territories'][i]
                    )
                if territory_id is not None
                )
            for territory_id in self.territories_id:
                self.ids_by_territory_id.setdefault(territory_id, set()).add(indexed_poi_id)
            break
        if not self.territories_id:
            self.ids_by_territory_id.setdefault(None, set()).add(indexed_poi_id)

        poi_territories_id = set(
            territory_id
            for territory_id in (
                ramdb.territory_id_by_kind_code.get((territory_kind_code['kind'], territory_kind_code['code']))
                for territory_kind_code in metadata['territories-index']
                if territory_kind_code['kind'] not in (u'Country', u'InternationalOrganization')
                )
            if territory_id is not None
            ) if metadata.get('territories-index') is not None else None
        for territory_id in (poi_territories_id or set()):
            self.ids_by_presence_territory_id.setdefault(territory_id, set()).add(indexed_poi_id)

        for word in strings.slugify(self.name).split(u'-'):
            self.ids_by_word.setdefault(word, set()).add(indexed_poi_id)
        self.slug_by_id[indexed_poi_id] = strings.slugify(self.name)

        if self.schema_name == 'OffreTransport':
            if not self.territories_id and not self.instance_by_id[indexed_poi_id].territories_id:
                france_id = ramdb.territory_id_by_kind_code[(u'Country', u'FR')]
                self.territories_id = set([france_id])
                self.ids_by_territory_id.setdefault(france_id, set()).add(indexed_poi_id)

            for field in self.fields:
                field_slug = strings.slugify(field.label)
                if field.id == 'checkboxes':
                    if field_slug == 'mode-de-transport' and field.value is not None:
                        for transport_mode in field.value:
                            self.ids_by_transport_mode.setdefault(transport_mode, set()).add(
                                indexed_poi_id)
                if field.id == 'select' and field_slug == 'type-de-transport' and field.value is not None:
                    self.ids_by_transport_type.setdefault(field.value, set()).add(indexed_poi_id)

        if self.schema_name == 'ServiceInfo':
            for field in self.fields:
                if field.id == 'select':
                    if strings.slugify(field.label) == 'niveau' and field.value is not None:
                        coverage_slug = strings.slugify(field.value)
                        self.ids_by_coverage.setdefault(coverage_slug, set()).add(indexed_poi_id)

        if self.is_multimodal_info_service():
            for territory_id in poi_territories_id:
                self.sim_ids_by_territory_id.setdefault(territory_id, set()).add(indexed_poi_id)
        self.ids_by_schema_name.setdefault(self.schema_name, set()).add(indexed_poi_id)
コード例 #36
0
def user_extract(req):
    ctx = contexts.Ctx(req)
    user = model.get_user(ctx, check=True)
    if user.email is None:
        return wsgihelpers.forbidden(ctx)
    legislation = ctx.node
    if legislation.is_owner(ctx) and legislation.is_dated:
        return wsgihelpers.bad_request(
            ctx, explanation=ctx._(u'This legislation is already dated.'))

    params = req.GET
    inputs = {
        'date': params.get('date'),
    }
    data, errors = conv.struct({
        'date':
        conv.pipe(
            conv.french_formatted_str_to_datetime,
            conv.default(datetime.datetime.utcnow()),
        ),
    })(inputs, state=ctx)
    if errors is not None:
        return wsgihelpers.bad_request(ctx, explanation=errors)

    new_legislation = None
    new_legislation_title = ctx._(u'{} (copy {})').format(
        legislation.title, user.email)
    new_legislation_slug = strings.slugify(new_legislation_title)
    existing_legislations_cursor = model.Legislation.find(
        dict(slug=new_legislation_slug, ),
        as_class=collections.OrderedDict,
    )
    if existing_legislations_cursor.count() > 0:
        for existing_legislation in existing_legislations_cursor:
            if existing_legislation.is_owner(ctx):
                return wsgihelpers.redirect(
                    ctx, location=existing_legislation.get_user_url(ctx))
        if new_legislation is None:
            return wsgihelpers.bad_request(
                ctx,
                explanation=ctx._(
                    u'A legislation with the same name already exists.'),
            )
    else:
        new_legislation = model.Legislation(
            author_id=user._id,
            datetime_begin=legislation.datetime_begin,
            datetime_end=legislation.datetime_end,
            description=ctx._(u'Copy of legislation "{}"').format(
                legislation.title),
            title=new_legislation_title,
            slug=new_legislation_slug,
        )
        response = requests.post(
            conf['api.urls.legislations'],
            headers={
                'Content-Type': 'application/json',
                'User-Agent': conf['app_name'],
            },
            data=json.dumps(
                dict(date=data['date'].isoformat(),
                     legislation=legislation.json)),
        )
        new_legislation.json = response.json(
            object_pairs_hook=collections.OrderedDict).get('dated_legislation')
        new_legislation.save(safe=True)
    return wsgihelpers.redirect(ctx,
                                location=new_legislation.get_user_url(ctx))
        name = 'Compensated Own-Price and Cross-Price Elasticities -- Modest and aged more than 60'
        ),
    ]

cross_price_elasticities = pandas.DataFrame()

for table in tables:
    age = table.pop('age')
    name = table.pop('name')
    revenus = table.pop('revenus')
    df = pandas.read_excel(elasticities_origin_xlsx, **table)
    df.dropna(inplace = True)
    df.set_index('Unnamed: 0', inplace = True)
    df.index.name = 'product'

    if age is None or revenus is None:
        df.name = name
        csv_path_name = os.path.join(elasticities_path, slugify(name) + '.csv')
        df.to_csv(csv_path_name)

    else:
        df['age'] = age
        df['revenus'] = revenus
        cross_price_elasticities = cross_price_elasticities.append(df)

csv_path_name = os.path.join(
    elasticities_path,
    'cross_price_elasticities.csv',
    )
cross_price_elasticities.to_csv(csv_path_name)
コード例 #38
0
ファイル: ramdb.py プロジェクト: Gentux/etalage
def load():
    """Load MongoDB data into RAM-based database."""
    from . import model

    start_time = datetime.datetime.utcnow()
    global last_timestamp
    # Remove a few seconds, for data changes that occur during startup.
    last_timestamp = start_time - datetime.timedelta(seconds = 30)

    categories_slug_by_tag_slug.clear()
    categories_slug_by_word.clear()
    category_by_slug.clear()
    category_slug_by_pivot_code.clear()
    for db in model.dbs:
        for category_bson in db[conf['categories_collection']].find(None, ['code', 'tags_code', 'title']):
            if not strings.slugify(category_bson.get('title')):
                continue
            category = model.Category.load(category_bson)
            category.index()

    for db in model.dbs:
        for organism_type_bson in db[conf['organism_types_collection']].find(None, ['code', 'slug']):
            if organism_type_bson['slug'] not in category_by_slug:
                log.warning(
                    'Ignoring organism type "{0}" without matching category.'.format(organism_type_bson['code'])
                    )
                continue
            category_slug_by_pivot_code[organism_type_bson['code']] = organism_type_bson['slug']

    territories_id_by_ancestor_id.clear()
    territories_id_by_postal_distribution.clear()
    territories_query = dict(
        kind = {'$in': conf['territories_kinds']},
        )
    territory_by_id.clear()
    territory_id_by_kind_code.clear()
    territories_collection = pymongo.Connection()[conf['territories_database']][conf['territories_collection']]
    territories_fields_list = [
        'ancestors_id',
        'code',
        'geo',
        'hinge_type',
        'kind',
        'main_postal_distribution',
        'name'
        ]
    for territory_bson in territories_collection.find(territories_query, territories_fields_list):
        main_postal_distribution = territory_bson.get('main_postal_distribution')
        if main_postal_distribution is None:
            continue
        territory_class = model.Territory.kind_to_class(territory_bson['kind'])
        assert territory_class is not None, 'Invalid territory type name: {0}'.format(class_name)
        territory_id = territory_bson['_id']
        territory = territory_class(
            _id = territory_id,
            ancestors_id = territory_bson['ancestors_id'],
            code = territory_bson['code'],
            geo = territory_bson.get('geo'),
            hinge_type = territory_bson.get('hinge_type'),
            main_postal_distribution = main_postal_distribution,
            name = territory_bson['name'],
            )
        territory_by_id[territory_id] = territory
        for ancestor_id in territory_bson['ancestors_id']:
            territories_id_by_ancestor_id.setdefault(ancestor_id, set()).add(territory_id)
        territory_id_by_kind_code[(territory_bson['kind'], territory_bson['code'])] = territory_id
        territories_id_by_postal_distribution[(
            main_postal_distribution['postal_code'],
            main_postal_distribution['postal_routing'],
            )] = territory_id

    schema_title_by_name.clear()
    for db in model.dbs:
        for schema in db.schemas.find(None, ['name', 'title']):
            schema_title_by_name[schema['name']] = schema['title']

    model.Poi.clear_indexes()
    model.Poi.load_pois()
    model.Poi.index_pois()

#    # Remove unused categories.
#    for category_slug in category_by_slug.keys():
#        if category_slug not in model.Poi.ids_by_category_slug:
#            log.warning('Ignoring category "{0}" not used by any POI.'.format(category_slug))
#            del category_by_slug[category_slug]
#    for category_slug in model.Poi.ids_by_category_slug'].keys():
#        if category_slug not in category_by_slug:
#            log.warning('Ignoring category "{0}" not defined in categories collection.'.format(category_slug))
#            del model.Poi.ids_by_category_slug[category_slug]

##    for category_slug in category_by_slug.iterkeys():
#        for word in category_slug.split(u'-'):
#            categories_slug_by_word.setdefault(word, set()).add(category_slug)

    log.info('RAM-based database loaded in {0} seconds'.format(datetime.datetime.utcnow() - start_time))
コード例 #39
0
def build_tree_from_yaml_clean(yaml_dir):
    tree = collections.OrderedDict()
    for yaml_dir_encoded, _, filenames_encoded in os.walk(yaml_dir):
        for filename_encoded in sorted(filenames_encoded):
            if not filename_encoded.endswith('.yaml'):
                continue
            filename = filename_encoded.decode(file_system_encoding)
            sheet_name = os.path.splitext(filename)[0]
            yaml_file_path_encoded = os.path.join(yaml_dir_encoded,
                                                  filename_encoded)
            relative_file_path_encoded = yaml_file_path_encoded[len(yaml_dir
                                                                    ):].lstrip(
                                                                        os.sep)
            relative_file_path = relative_file_path_encoded.decode(
                file_system_encoding)
            if sheet_name.isupper():
                continue
            assert sheet_name.islower(), sheet_name
            log.info(u'Loading file {}'.format(relative_file_path))
            with open(yaml_file_path_encoded) as yaml_file:
                data = yaml.load(yaml_file)
            rows = data.get(u"Valeurs")
            if rows is None:
                log.info(u'  Skipping file {} without "Valeurs"'.format(
                    relative_file_path))
                continue
            row_by_start = {}
            for row in rows:
                start = row.get(u"Date d'effet")
                if start is None:
                    for date_name in date_names:
                        start = row.get(date_name)
                        if start is not None:
                            break
                    else:
                        # No date found. Skip row.
                        continue
                elif not isinstance(start, datetime.date):
                    start = start[u"Année Revenus"]
                row_by_start[start] = row
            sorted_row_by_start = sorted(row_by_start.items())

            relative_ipp_paths_by_start = {}
            unsorted_relative_ipp_paths = set()
            for start, row in sorted_row_by_start:
                relative_ipp_paths_by_start[
                    start] = start_relative_ipp_paths = []
                for name, child in row.items():
                    if name in date_names:
                        continue
                    if name in note_names:
                        continue
                    if name in reference_names:
                        continue
                    start_relative_ipp_paths.extend(
                        (name, ) + tuple(path)
                        for path, value in iter_ipp_values(child))
                unsorted_relative_ipp_paths.update(start_relative_ipp_paths)

            def compare_relative_ipp_paths(x, y):
                if x == y:
                    return 0
                for relative_ipp_paths in relative_ipp_paths_by_start.itervalues(
                ):
                    try:
                        return cmp(relative_ipp_paths.index(x),
                                   relative_ipp_paths.index(y))
                    except ValueError:
                        # Either x or y paths are missing in relative_ipp_paths => Their order can't be compared.
                        continue
                return -1

            sorted_relative_ipp_paths = sorted(unsorted_relative_ipp_paths,
                                               cmp=compare_relative_ipp_paths)
            # tax_rate_tree_by_bracket_type = {}

            for start, row in sorted_row_by_start:
                for relative_ipp_path in sorted_relative_ipp_paths:
                    value = row
                    for fragment in relative_ipp_path:
                        value = value.get(fragment)
                        if value is None:
                            break

                    if value in (u'-', u'na', u'nc'):
                        # Value is unknown. Previous value must be propagated.
                        continue
                    ipp_path = [
                        fragment if fragment in ('RENAME', 'TRANCHE', 'TYPE')
                        else strings.slugify(fragment, separator=u'_')
                        for fragment in itertools.chain(
                            relative_file_path.split(os.sep)[:-1],
                            [sheet_name],
                            relative_ipp_path,
                        )
                    ]

                    sub_tree = tree
                    for fragment in ipp_path[:-1]:
                        sub_tree = sub_tree.setdefault(
                            fragment, collections.OrderedDict())
                    fragment = ipp_path[-1]
                    sub_tree = sub_tree.setdefault(fragment, [])
                    if sub_tree:
                        previous_leaf = sub_tree[-1]
                        if previous_leaf['value'] == value:
                            # Merge leaves with the same value.
                            # One day, when we'll support "Références législatives", this behavior may change.
                            continue
                    sub_tree.append(dict(
                        start=start,
                        value=value,
                    ))
    return tree
コード例 #40
0
def slugify_ipp_translation_key(key):
    return key if key in ('RENAME', 'TYPE') else strings.slugify(key, separator = u'_')
コード例 #41
0
ファイル: pois.py プロジェクト: Gentux/etalage-passim
 def slug(self):
     return strings.slugify(self.name)
def transform_node_to_element(name, node):
    if isinstance(node, dict):
        if node.get("TYPE") == u"BAREME":
            scale_element = etree.Element("BAREME", attrib=dict(code=strings.slugify(name, separator=u"_")))
            for slice_name in node.get("SEUIL", {}).keys():
                slice_element = etree.Element("TRANCHE", attrib=dict(code=strings.slugify(slice_name, separator=u"_")))

                threshold_element = etree.Element("SEUIL")
                values, format, type = prepare_xml_values(name, node.get("SEUIL", {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        threshold_element.append(value_element)
                if len(threshold_element) > 0:
                    slice_element.append(threshold_element)

                amount_element = etree.Element("MONTANT")
                values, format, type = prepare_xml_values(name, node.get("MONTANT", {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        amount_element.append(value_element)
                if len(amount_element) > 0:
                    slice_element.append(amount_element)

                rate_element = etree.Element("TAUX")
                values, format, type = prepare_xml_values(name, node.get("TAUX", {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        rate_element.append(value_element)
                if len(rate_element) > 0:
                    slice_element.append(rate_element)

                base_element = etree.Element("ASSIETTE")
                values, format, type = prepare_xml_values(name, node.get("ASSIETTE", {}).get(slice_name, []))
                for value in values:
                    value_element = transform_value_to_element(value)
                    if value_element is not None:
                        base_element.append(value_element)
                if len(base_element) > 0:
                    slice_element.append(base_element)

                if len(slice_element) > 0:
                    scale_element.append(slice_element)
            return scale_element if len(scale_element) > 0 else None
        else:
            node_element = etree.Element("NODE", attrib=dict(code=strings.slugify(name, separator=u"_")))
            for key, value in node.iteritems():
                child_element = transform_node_to_element(key, value)
                if child_element is not None:
                    node_element.append(child_element)
            return node_element if len(node_element) > 0 else None
    else:
        assert isinstance(node, list), node
        values, format, type = prepare_xml_values(name, node)
        if not values:
            return None
        code_element = etree.Element("CODE", attrib=dict(code=strings.slugify(name, separator=u"_")))
        if format is not None:
            code_element.set("format", format)
        if type is not None:
            code_element.set("type", type)
        for value in values:
            value_element = transform_value_to_element(value)
            if value_element is not None:
                code_element.append(value_element)
        return code_element if len(code_element) > 0 else None
コード例 #43
0
ファイル: pois.py プロジェクト: Gentux/etalage
    def index(self, indexed_poi_id):
        poi_bson = self.bson
        metadata = poi_bson['metadata']
        for category_slug in (metadata.get('categories-index') or set()):
            self.ids_by_category_slug.setdefault(category_slug, set()).add(indexed_poi_id)

        if conf['index.date.field']:
            for date_range_index, date_range_metadata in enumerate(metadata.get('date-range') or []):
                if date_range_metadata['label'] == conf['index.date.field']:
                    date_range_values = poi_bson['date-range'][date_range_index]
                    date_range_begin = date_range_values.get('date-range-begin', [None])[0]
                    date_range_end = date_range_values.get('date-range-end', [None])[0]

                    if date_range_begin is not None:
                        for index, (begin_datetime, poi_id) in enumerate(self.ids_by_begin_datetime):
                            if begin_datetime is not None and begin_datetime > date_range_begin:
                                break
                    else:
                        index = 0
                    self.ids_by_begin_datetime.insert(index, (date_range_begin, indexed_poi_id))
                    if date_range_end is not None:
                        for index, (end_datetime, poi_id) in enumerate(self.ids_by_end_datetime):
                            if end_datetime is not None and end_datetime < date_range_end:
                                break
                    else:
                        index = 0
                    self.ids_by_end_datetime.insert(index, (date_range_end, indexed_poi_id))

            if not metadata.get('date-range'):
                self.ids_by_begin_datetime.append((None, indexed_poi_id))
                self.ids_by_end_datetime.append((None, indexed_poi_id))
        self.ids_by_last_update_datetime.append((self.last_update_datetime, indexed_poi_id))

        for i, territory_metadata in enumerate(metadata.get('territories') or []):
            # Note: Don't fail when territory doesn't exist, because Etalage can be configured to ignore some kinds
            # of territories (cf conf['territories_kinds']).
            self.competence_territories_id = set(
                territory_id
                for territory_id in (
                    ramdb.territory_id_by_kind_code.get((territory_kind_code['kind'], territory_kind_code['code']))
                    for territory_kind_code in poi_bson['territories'][i]
                    )
                if territory_id is not None
                )
            for territory_id in self.competence_territories_id:
                self.ids_by_competence_territory_id.setdefault(territory_id, set()).add(indexed_poi_id)
            break
        if not self.competence_territories_id:
            self.ids_by_competence_territory_id.setdefault(None, set()).add(indexed_poi_id)

        poi_territories_id = set(
            territory_id
            for territory_id in (
                ramdb.territory_id_by_kind_code.get((territory_kind_code['kind'], territory_kind_code['code']))
                for territory_kind_code in metadata['territories-index']
                if territory_kind_code['kind'] not in (u'Country', u'InternationalOrganization')
                )
            if territory_id is not None
            ) if metadata.get('territories-index') is not None else None
        for territory_id in (poi_territories_id or set()):
            self.ids_by_presence_territory_id.setdefault(territory_id, set()).add(indexed_poi_id)

        for word in strings.slugify(self.name).split(u'-'):
            self.ids_by_word.setdefault(word, set()).add(indexed_poi_id)
        self.slug_by_id[indexed_poi_id] = strings.slugify(self.name)
コード例 #44
0
ファイル: controllers.py プロジェクト: Gentux/etalage-passim
def index_list(req):
    ctx = contexts.Ctx(req)

    params = req.GET
    inputs = init_base(ctx, params)
    inputs.update(model.Poi.extract_search_inputs_from_params(ctx, params))
    inputs.update(dict(
        coverage = params.get('coverage'),
        page = params.get('page'),
        poi_index = params.get('poi_index'),
        sort_key = params.get('sort_key'),
        ))
    mode = u'liste'

    data, errors = conv.inputs_to_pois_list_data(inputs, state = ctx)
    non_territorial_search_data = model.Poi.extract_non_territorial_search_data(ctx, data)
    if errors is not None:
        raise wsgihelpers.bad_request(ctx, explanation = ctx._('Error: {0}').format(errors))

    territory = data['geolocation'] or (data['term'] if not isinstance(data['term'], basestring) else None)
    if non_territorial_search_data.get('term') and not isinstance(non_territorial_search_data['term'], basestring):
        non_territorial_search_data['term'] = None

    pois_id_iter = model.Poi.iter_ids(
        ctx,
        territory = territory,
        coverages = None if data['coverage'] is None else [data['coverage']],
        **non_territorial_search_data)

    if isinstance(data['term'], basestring):
        for poi_id in pois_id_iter:
            poi = model.Poi.instance_by_id[poi_id]
            if data['term'] == poi.slug:
                raise wsgihelpers.redirect(ctx, location = urls.get_url(ctx, 'organismes', poi.slug, poi._id))

    ids_by_territory_id = dict()
    multimodal_info_services_by_id = dict()
    national_territory_id = ramdb.territory_id_by_kind_code[('Country', 'FR')]
    ids_by_niveau = dict()
    transport_types_by_id = dict()
    web_site_by_id = dict()

    for poi in (
            model.Poi.instance_by_id.get(poi_id)
            for poi_id in pois_id_iter
            ):
        if poi is None:
            continue

        for field in poi.generate_all_fields():
            if poi._id in model.Poi.multimodal_info_service_ids:
                multimodal_info_services_by_id[poi._id] = poi
            else:
                if field.id == 'links' and strings.slugify(field.label) == 'offres-de-transport':
                    for transport_offer in [
                            transport_offer
                            for transport_offer in (
                                model.Poi.instance_by_id.get(transport_offer_id)
                                for transport_offer_id in field.value
                                )
                            if transport_offer is not None
                            ]:
                        for field in transport_offer.fields:
                            field_slug = strings.slugify(field.label)
                            if field_slug == 'type-de-transport' and field.value is not None:
                                transport_types_by_id.setdefault(poi._id, set()).add(field.value)

                if field.id == 'territories' and strings.slugify(field.label) == 'territoire-couvert':
                    for territory_id in field.value:
                        if isinstance(data['term'], model.Territory) and territory_id in data['term'].ancestors_id:
                            territory = ramdb.territory_by_id[territory_id]
                            if territory.__class__.__name__ != 'UrbanTransportsPerimeterOfFrance':
                                ids_by_territory_id.setdefault(territory_id, set()).add(poi._id)
                                break
                            else:
                                PTU_postal_routing = territory.main_postal_distribution.get('postal_routing')
                                if PTU_postal_routing is not None:
                                    for child_territory_id in ramdb.territories_id_by_ancestor_id.get(territory_id):
                                        child_territory = ramdb.territory_by_id.get(child_territory_id)
                                        if child_territory.__class__.__name__ != 'CommuneOfFrance':
                                            continue
                                        child_territory_postal_routing = child_territory.main_postal_distribution.get(
                                            'postal_routing'
                                            )
                                        if all(map(
                                                lambda word: word in child_territory_postal_routing.split(),
                                                PTU_postal_routing.split(),
                                                )):
                                            ids_by_territory_id.setdefault(child_territory_id, set()).add(poi._id)
                                            break
                    else:
                        ids_by_territory_id.setdefault(national_territory_id, set()).add(poi._id)

                if field.id == 'select' and strings.slugify(field.label) == 'niveau':
                    ids_by_niveau_key = {
                        'local': 'local',
                        'locale': 'local',
                        'national': 'national',
                        'departemental': 'departmental',
                        'regional': 'regional',
                        }.get(strings.slugify(field.value))
                    ids_by_niveau.setdefault(ids_by_niveau_key, set()).add(poi._id)

            if field.id == 'url' and strings.slugify(field.label) == 'site-web-url':
                web_site_by_id[poi._id] = field.value
            elif field.id == 'url' and web_site_by_id.get(poi._id) is None:
                web_site_by_id[poi._id] = field.value

    multimodal_info_services = model.Poi.sort_and_paginate_pois_list(
        ctx,
        None,
        multimodal_info_services_by_id,
        multimodal_info_services = True,
        )

    return templates.render(
        ctx,
        '/list.mako',
        data = data,
        errors = errors,
        ids_by_territory_id = ids_by_territory_id,
        inputs = inputs,
        mode = mode,
        multimodal_info_services = multimodal_info_services,
        ids_by_niveau = ids_by_niveau,
        transport_types_by_id = transport_types_by_id,
        web_site_by_id = web_site_by_id,
        **non_territorial_search_data)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--dir', default = 'Baremes_IPP_2015', help = 'path of IPP XLS directory')
    parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity")
    args = parser.parse_args()
    # args.dir = path
    logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout)

    root_node = dict(
        children = [],
        name = "root",
        text = textwrap.dedent(u"""\
            Ce document présente l'ensemble de la législation permettant le calcul des contributions sociales, taxes sur
            les salaires  et cotisations sociales. Il s'agit des barèmes bruts de la législation utilisés dans le
            micro-simulateur de l'IPP, TAXIPP. Les sources législatives (texte de loi, numéro du décret ou arrêté) ainsi
            que la date de publication au Journal Officiel de la République française (JORF) sont systématiquement
            indiquées. La première ligne du fichier (masquée) indique le nom des paramètres dans TAXIPP.

            Citer cette source :
            Barèmes IPP: prélèvements sociaux, Institut des politiques publiques, avril 2014.

            Auteurs :
            Antoine Bozio, Julien Grenet, Malka Guillot, Laura Khoury et Marianne Tenand

            Contacts :
            [email protected]; [email protected]; [email protected]

            Licence :
            Licence ouverte / Open Licence
            """).split(u'\n'),
        title = u"Barème IPP",
        type = u'NODE',
        )

    for bareme in baremes:
        xls_path = os.path.join(args.dir.decode('utf-8'), u"Baremes IPP - {0}.xls".format(bareme))
        if not os.path.exists(xls_path):
            log.warning("Skipping file {} that doesn't exist: {}".format(bareme, xls_path))
            continue
        log.info(u'Parsing file {}'.format(bareme))
        book = xlrd.open_workbook(filename = xls_path, formatting_info = True)

        sheet_names = [
            sheet_name
            for sheet_name in book.sheet_names()
            if not sheet_name.startswith((u'Abréviations', u'Outline')) and sheet_name not in forbiden_sheets.get(
                bareme, [])
            ]
        sheet_title_by_name = {}
        for sheet_name in sheet_names:
            log.info(u'  Parsing sheet {}'.format(sheet_name))
            sheet = book.sheet_by_name(sheet_name)

            # Extract coordinates of merged cells.
            merged_cells_tree = {}
            for row_low, row_high, column_low, column_high in sheet.merged_cells:
                for row_index in range(row_low, row_high):
                    cell_coordinates_by_merged_column_index = merged_cells_tree.setdefault(
                        row_index, {})
                    for column_index in range(column_low, column_high):
                        cell_coordinates_by_merged_column_index[column_index] = (row_low, column_low)

            if sheet_name.startswith(u'Sommaire'):
                # Associate the titles of the sheets to their Excel names.
                for row_index in range(sheet.nrows):
                    linked_sheet_number = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 2)
                    if isinstance(linked_sheet_number, int):
                        linked_sheet_title = transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, 3)
                        if linked_sheet_title is not None:
                            hyperlink = get_hyperlink(sheet, row_index, 3)
                            if hyperlink is not None and hyperlink.type == u'workbook':
                                linked_sheet_name = hyperlink.textmark.split(u'!', 1)[0].strip(u'"').strip(u"'")
                                sheet_title_by_name[linked_sheet_name] = linked_sheet_title
                continue

            descriptions_rows = []
            labels_rows = []
            notes_rows = []
            state = 'taxipp_names'
            taxipp_names_row = None
            values_rows = []
            for row_index in range(sheet.nrows):
                columns_count = len(sheet.row_values(row_index))
                if state == 'taxipp_names':
                    taxipp_names_row = [
                        taxipp_name
                        for taxipp_name in (
                            transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            for column_index in range(columns_count)
                            )
                        ]
                    state = 'labels'
                    continue
                if state == 'labels':
                    first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                    date_or_year, error = conv.pipe(
                        conv.test_isinstance((int, basestring)),
                        cell_to_date,
                        conv.not_none,
                        )(first_cell_value, state = conv.default_state)
                    if error is not None:
                        # First cell of row is not a date => Assume it is a label.
                        labels_rows.append([
                            transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            for column_index in range(columns_count)
                            ])
                        continue
                    state = 'values'
                if state == 'values':
                    first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                    if first_cell_value is None or isinstance(first_cell_value, (int, basestring)):
                        date_or_year, error = cell_to_date(first_cell_value, state = conv.default_state)
                        if error is None:
                            # First cell of row is a valid date or year.
                            values_row = [
                                transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, column_index)
                                for column_index in range(columns_count)
                                ]
                            if date_or_year is not None:
                                assert date_or_year.year < 2601, 'Invalid date {} in {} at row {}'.format(date_or_year,
                                    sheet_name, row_index + 1)
                                values_rows.append(values_row)
                                continue
                            if all(value in (None, u'') for value in values_row):
                                # If first cell is empty and all other cells in line are also empty, ignore this line.
                                continue
                            # First cell has no date and other cells in row are not empty => Assume it is a note.
                    state = 'notes'
                if state == 'notes':
                    first_cell_value = transform_xls_cell_to_json(book, sheet, merged_cells_tree, row_index, 0)
                    if isinstance(first_cell_value, basestring) and first_cell_value.strip().lower() == 'notes':
                        notes_rows.append([
                            transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                            for column_index in range(columns_count)
                            ])
                        continue
                    state = 'description'
                assert state == 'description'
                descriptions_rows.append([
                    transform_xls_cell_to_str(book, sheet, merged_cells_tree, row_index, column_index)
                    for column_index in range(columns_count)
                    ])

            text_lines = []
            for row in notes_rows:
                text_lines.append(u' | '.join(
                    cell for cell in row
                    if cell
                    ))
            if text_lines:
                text_lines.append(None)
            for row in descriptions_rows:
                text_lines.append(u' | '.join(
                    cell for cell in row
                    if cell
                    ))

            sheet_title = sheet_title_by_name.get(sheet_name)
            if sheet_title is None:
                log.warning(u"Missing title for sheet {} in summary".format(sheet_name))
                continue
            labels = []
            for labels_row in labels_rows:
                for column_index, label in enumerate(labels_row):
                    if not label:
                        continue
                    while column_index >= len(labels):
                        labels.append([])
                    labels_column = labels[column_index]
                    if not labels_column or labels_column[-1] != label:
                        labels_column.append(label)
            labels = [
                tuple(labels_column1) if len(labels_column1) > 1 else labels_column1[0]
                for labels_column1 in labels
                ]

            cell_by_label_rows = []
            for value_row in values_rows:
                cell_by_label = collections.OrderedDict(itertools.izip(labels, value_row))
                cell_by_label, errors = values_row_converter(cell_by_label, state = conv.default_state)
                assert errors is None, "Errors in {}:\n{}".format(cell_by_label, errors)
                cell_by_label_rows.append(cell_by_label)

            sheet_node = dict(
                children = [],
                name = strings.slugify(sheet_name, separator = u'_'),
                text = text_lines,
                title = sheet_title,
                type = u'NODE',
                )
            root_node['children'].append(sheet_node)

            for taxipp_name, labels_column in zip(taxipp_names_row, labels):
                if not taxipp_name or taxipp_name in (u'date',):
                    continue
                variable_node = dict(
                    children = [],
                    name = strings.slugify(taxipp_name, separator = u'_'),
                    title = u' - '.join(labels_column) if isinstance(labels_column, tuple) else labels_column,
                    type = u'CODE',
                    )
                sheet_node['children'].append(variable_node)

                for cell_by_label in cell_by_label_rows:
                    amount_and_unit = cell_by_label[labels_column]
                    variable_node['children'].append(dict(
                        law_reference = cell_by_label[u'Références législatives'],
                        notes = cell_by_label[u'Notes'],
                        publication_date = cell_by_label[u"Parution au JO"],
                        start_date = cell_by_label[u"Date d'entrée en vigueur"],
                        type = u'VALUE',
                        unit = amount_and_unit[1] if isinstance(amount_and_unit, tuple) else None,
                        value = amount_and_unit[0] if isinstance(amount_and_unit, tuple) else amount_and_unit,
                        ))

            # dates = [
            #     conv.check(cell_to_date)(
            #         row[1] if bareme == u'Impot Revenu' else row[0],
            #         state = conv.default_state,
            #         )
            #     for row in values_rows
            #     ]
            # for column_index, taxipp_name in enumerate(taxipp_names_row):
            #     if taxipp_name and strings.slugify(taxipp_name) not in (
            #             'date',
            #             'date-ir',
            #             'date-rev',
            #             'note',
            #             'notes',
            #             'ref-leg',
            #             ):
            #         vector = [
            #             transform_cell_value(date, row[column_index])
            #             for date, row in zip(dates, values_rows)
            #             ]
            #         vector = [
            #             cell if not isinstance(cell, basestring) or cell == u'nc' else '-'
            #             for cell in vector
            #             ]
            #         # vector_by_taxipp_name[taxipp_name] = pd.Series(vector, index = dates)
            #         vector_by_taxipp_name[taxipp_name] = vector
            #

    print_node(root_node)

    return 0