Ejemplo n.º 1
0
    def schema_json(self, dataset_type):
        try:
            geno = get_geno(dataset_type)
        except RecombinantException:
            abort(404, _('Recombinant dataset_type not found'))

        schema = OrderedDict()
        for k in ['dataset_type', 'title', 'notes']:
            if k in geno:
                schema[k] = geno[k]

        schema['resources'] = []
        for chromo in geno['resources']:
            resource = OrderedDict()
            schema['resources'].append(resource)
            choice_fields = dict(
                (f['datastore_id'], f['choices'])
                for f in recombinant_choice_fields(
                    chromo['resource_name'],
                    all_languages=True))

            for k in ['title', 'resource_name']:
                if k in chromo:
                    resource[k] = chromo[k]

            resource['fields'] = []
            for field in chromo['fields']:
                if not field.get('visible_to_public', True):
                    continue
                fld = OrderedDict()
                resource['fields'].append(fld)
                fld['id'] = field['datastore_id']
                for k in ['label', 'description', 'obligation', 'format_type']:
                    if k in field:
                        fld[k] = field[k]

                if fld['id'] in choice_fields:
                    choices = OrderedDict()
                    fld['choices'] = choices
                    for ck, cv in choice_fields[fld['id']]:
                        choices[ck] = cv

            resource['primary_key'] = chromo['datastore_primary_key']

            if 'examples' in chromo:
                ex_record = chromo['examples']['record']
                example = OrderedDict()
                for field in chromo['fields']:
                    if field['datastore_id'] in ex_record:
                        example[field['datastore_id']] = ex_record[
                            field['datastore_id']]
                resource['example_record'] = example

        response.headers['Content-Type'] = 'application/json'
        response.headers['Content-Disposition'] = (
            'inline; filename="{0}.json"'.format(
                dataset_type))
        return json.dumps(schema, indent=2, ensure_ascii=False).encode('utf-8')
Ejemplo n.º 2
0
def excel_data_dictionary(geno):
    """
    return an openpyxl.Workbook object containing the field reference
    from geno, one sheet per language
    """
    book = openpyxl.Workbook()
    sheet = book.active

    style1 = {
        'PatternFill': {
            'patternType': 'solid',
            'fgColor': 'FFFFF056'},
        'Font': {
            'bold': True}}
    style2 = {
        'PatternFill': {
            'patternType': 'solid',
            'fgColor': 'FFDFE2DB'}}

    from pylons import config
    from ckan.lib.i18n import handle_request, get_lang
    from ckan.common import c, request

    for lang in config['ckan.locales_offered'].split():
        if sheet is None:
            sheet = book.create_sheet()

        sheet.title = lang.upper()
        # switch language (FIXME: this is harder than it should be)
        request.environ['CKAN_LANG'] = lang
        handle_request(request, c)
        choice_fields = dict(
            (f['datastore_id'], f['choices'])
            for chromo in geno['resources']
            for f in recombinant_choice_fields(chromo['resource_name']))

        refs = []
        for chromo in geno['resources']:
            for field in chromo['fields']:
                _append_field_ref_rows(refs, field, link=None)

                if field['datastore_id'] in choice_fields:
                    _append_field_choices_rows(
                        refs,
                        choice_fields[field['datastore_id']],
                        full_text_choices=False)

        _populate_reference_sheet(sheet, geno, refs)
        sheet = None

    return book
Ejemplo n.º 3
0
def _populate_excel_sheet(sheet, chromo, org, refs):
    """
    Format openpyxl sheet for the resource definition chromo and org.

    refs - list of rows to add to reference sheet, modified
        in place from this function

    returns field information for reference sheet
    """
    boolean_validator = openpyxl.worksheet.datavalidation.DataValidation(
        type="list", formula1='"FALSE,TRUE"', allow_blank=True)
    sheet.add_data_validation(boolean_validator)

    sheet.title = chromo['resource_name']

    def fill_cell(row, column, value, styles):
        c = sheet.cell(row=row, column=column)
        c.value = value
        apply_styles(styles, c)

    org_style = chromo['excel_organization_style']
    fill_cell(1, 1, org['name'], org_style)
    fill_cell(1, 2, org['title'], org_style)
    apply_styles(org_style, sheet.row_dimensions[1])

    header_style = chromo['excel_header_style']

    choice_fields = dict(
        (f['datastore_id'], f['choices'])
        for f in recombinant_choice_fields(chromo['resource_name']))

    for n, field in enumerate(chromo['fields'], 1):
        fill_cell(2, n, _(field['label']), header_style)
        fill_cell(3, n, field['datastore_id'], header_style)
        # jumping through openpyxl hoops:
        col_letter = openpyxl.cell.get_column_letter(n)
        col = sheet.column_dimensions[col_letter]
        col.width = field['excel_column_width']
        # FIXME: format only below header
        col.number_format = datastore_type[field['datastore_type']].xl_format
        validation_range = '{0}4:{0}1004'.format(col_letter)

        if field['datastore_type'] == 'boolean':
            boolean_validator.ranges.append(validation_range)
        if field['datastore_id'] in choice_fields:
            refs.append([_(field['label'])])
            ref1 = len(refs) + 2
            for key, value in choice_fields[field['datastore_id']]:
                refs.append([None, key, value])
            refN = len(refs) + 1
            refs.append([])

            choice_range = 'reference!$B${0}:$B${1}'.format(ref1, refN)
            v = openpyxl.worksheet.datavalidation.DataValidation(
                type="list",
                formula1=choice_range,
                allow_blank=True)
            v.errorTitle = u'Invalid choice'
            v.error = (u'Please enter one of the valid keys shown on '
                'sheet "reference" rows {0}-{1}'.format(ref1, refN))
            sheet.add_data_validation(v)
            v.ranges.append(validation_range)

            # hilight header if bad values pasted below
            sheet.conditional_formatting.add("{0}2".format(col_letter),
                openpyxl.formatting.FormulaRule([(
                    'COUNTIF({0},"<>"&"")' # all non-blank cells
                    '-SUMPRODUCT(COUNTIF({0},{1}))'
                    .format(validation_range, choice_range))],
                    stopIfTrue=True, fill=red_fill))

    apply_styles(header_style, sheet.row_dimensions[2])
    apply_styles(header_style, sheet.row_dimensions[3])
    sheet.row_dimensions[3].hidden = True

    sheet.freeze_panes = sheet['A4']
Ejemplo n.º 4
0
def _update_records(records, org_detail, conn, resource_name):
    """
    Update records on solr core

    :param records: record dicts
    :ptype records: sequence of record dicts

    :param org_detail: org structure as returned via local CKAN
    :ptype org_detail: dict with local CKAN org structure

    :param conn: solr connection
    :ptype conn: obj

    :param resource_name: type being updated
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get("datastore_primary_key", [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail["name"]
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r["_id"]).hexdigest()
            f += u"|" + unicode(r["_id"])
        for k in pk:
            s = hashlib.md5(s + r[k].encode("utf-8")).hexdigest()
            f += u"|" + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f["datastore_id"], dict(f["choices"])) for f in recombinant_choice_fields(resource_name, all_languages=True)
    )

    for r in records:
        unique, friendly = unique_id(r)

        shortform = None
        shortform_fr = None
        for e in org_detail["extras"]:
            if e["key"] == "shortform":
                shortform = e["value"]
            elif e["key"] == "shortform_fr":
                shortform_fr = e["value"]

        solrrec = {
            "id": unique,
            "unique_id": friendly,
            "org_name_code": org_detail["name"],
            "org_name_en": org_detail["title"].split(" | ", 1)[0],
            "org_name_fr": org_detail["title"].split(" | ", 1)[-1],
        }

        for f in chromo["fields"]:
            key = f["datastore_id"]
            value = r[key]

            facet_range = f.get("solr_float_range_facet")
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    for i, fac in enumerate(facet_range):
                        if "less_than" not in fac or float_value < fac["less_than"]:
                            solrrec[key + "_range"] = str(i)
                            solrrec[key + "_range_en"] = fac["label"].split(" | ")[0]
                            solrrec[key + "_range_fr"] = fac["label"].split(" | ")[-1]
                            break

            if f.get("datastore_type") == "date":
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get("extract_date_year"):
                        solrrec["date_year"] = value.split("-", 1)[0]
                    if f.get("extract_date_month"):
                        solrrec["date_month"] = value.split("-")[1]
                except ValueError:
                    pass
            solrrec[key] = value

            choices = choice_fields.get(f["datastore_id"])
            if not choices:
                continue

            if key.endswith("_code"):
                key = key[:-5]
            solrrec[key + "_en"] = recombinant_language_text(choices.get(value, ""), "en")
            solrrec[key + "_fr"] = recombinant_language_text(choices.get(value, ""), "fr")

        solrrec["text"] = u" ".join(unicode(v) for v in solrrec.values())
        out.append(solrrec)

    conn.add_many(out, _commit=True)
Ejemplo n.º 5
0
def _populate_excel_sheet(sheet, geno, chromo, org, refs, resource_num):
    """
    Format openpyxl sheet for the resource definition chromo and org.
    (Version 3)

    refs - list of rows to add to reference sheet, modified
        in place from this function
    resource_num - 1-based index of resource

    returns cranges dict of {datastore_id: reference_key_range}
    """
    sheet.title = chromo['resource_name']

    edge_style = dict(DEFAULT_EDGE_STYLE, **geno.get('excel_edge_style', {}))
    required_style = dict(edge_style, **geno.get('excel_required_style', {}))
    header_style = dict(DEFAULT_HEADER_STYLE, **geno.get('excel_header_style', {}))
    cheadings_style = dict(DEFAULT_CHEADING_STYLE, **geno.get('excel_column_heading_style', {}))
    example_style = dict(DEFAULT_EXAMPLE_STYLE, **geno.get('excel_example_style', {}))
    error_style = dict(DEFAULT_ERROR_STYLE, **geno.get('excel_error_style', {}))

    cranges = {}
    data_num_rows = chromo.get('excel_data_num_rows', DEFAULT_DATA_NUM_ROWS)

    # create rows so we can set all heights
    for i in xrange(1, DATA_FIRST_ROW + data_num_rows):
        sheet.cell(row=i, column=1).value = None

    sheet.merge_cells(EXAMPLE_MERGE)
    fill_cell(sheet, EXAMPLE_ROW, 1, _('e.g.'), example_style)

    fill_cell(
        sheet,
        DATA_FIRST_ROW,
        RPAD_COL_NUM,
        u'=IF(r{rnum}!{col}{row},"","▶")'.format(
            rnum=resource_num,
            col=RPAD_COL,
            row=DATA_FIRST_ROW),
        TYPE_HERE_STYLE)

    fill_cell(
        sheet,
        HEADER_ROW,
        DATA_FIRST_COL_NUM,
        recombinant_language_text(chromo['title'])
            + u' \N{em dash} ' + org_title_lang_hack(org['title']),
        header_style)

    sheet.cell(row=CODE_ROW, column=1).value = 'v3'  # template version
    # allow only upload to this org
    sheet.cell(row=CODE_ROW, column=2).value = org['name']

    cheadings_dimensions = sheet.row_dimensions[CHEADINGS_ROW]

    choice_fields = dict(
        (f['datastore_id'], f['choices'])
        for f in recombinant_choice_fields(chromo['resource_name']))

    for col_num, field in template_cols_fields(chromo):
        field_heading = recombinant_language_text(
            field.get('excel_heading', field['label'])).strip()
        cheadings_dimensions.height = max(
            cheadings_dimensions.height,
            field_heading.count('\n') * LINE_HEIGHT + CHEADINGS_HEIGHT)

        col_heading_style = cheadings_style
        if 'excel_column_heading_style' in field:
            # use geno column heading style as base, just override keys
            col_heading_style = dict(
                cheadings_style,
                **field['excel_column_heading_style'])
            apply_styles(col_heading_style, sheet.cell(
                row=HEADER_ROW, column=col_num))
            apply_styles(col_heading_style, sheet.cell(
                row=CSTATUS_ROW, column=col_num))

        fill_cell(
            sheet,
            CHEADINGS_ROW,
            col_num,
            field_heading,
            col_heading_style)

        reference_row1 = len(refs) + REF_FIRST_ROW

        # match against db columns
        sheet.cell(row=CODE_ROW, column=col_num).value = field['datastore_id']

        example = chromo['examples']['record'].get(field['datastore_id'], '')
        fill_cell(
            sheet,
            EXAMPLE_ROW,
            col_num,
            u','.join(example) if isinstance(example, list)
            else example,
            example_style)

        col_letter = openpyxl.cell.get_column_letter(col_num)

        # jump to first error/required cell in column
        fill_cell(
            sheet,
            CSTATUS_ROW,
            col_num,
            '=IF(e{rnum}!{col}{row}>0,HYPERLINK("#{col}"&e{rnum}!{col}{row},"")'
                ',IF(r{rnum}!{col}{row}>0,HYPERLINK("#{col}"&r{rnum}!{col}{row},""),""))'
                .format(rnum=resource_num, col=col_letter, row=CSTATUS_ROW),
            col_heading_style)

        col = sheet.column_dimensions[col_letter]
        if 'excel_column_width' in field:
            col.width = field['excel_column_width']
        else:
            col.width = max(estimate_width(field_heading), CHEADINGS_MIN_WIDTH)

        validation_range = '{col}{row1}:{col}{rowN}'.format(
            col=col_letter,
            row1=DATA_FIRST_ROW,
            rowN=DATA_FIRST_ROW + data_num_rows - 1)

        xl_format = datastore_type[field['datastore_type']].xl_format
        alignment = openpyxl.styles.Alignment(wrap_text=True)
        protection = openpyxl.styles.Protection(locked=False)
        for (c,) in sheet[validation_range]:
            c.number_format = xl_format
            c.alignment = alignment
            c.protection = protection
        ex_cell = sheet.cell(row=EXAMPLE_ROW, column=col_num)
        ex_cell.number_format = xl_format
        ex_cell.alignment = alignment

        _append_field_ref_rows(refs, field, "#'{sheet}'!{col}{row}".format(
            sheet=sheet.title, col=col_letter, row=CHEADINGS_ROW))

        if field['datastore_id'] in choice_fields:
            full_text_choices = (
                field['datastore_type'] != '_text' and field.get(
                'excel_full_text_choices', False))
            ref1 = len(refs) + REF_FIRST_ROW
            max_choice_width = _append_field_choices_rows(
                refs,
                choice_fields[field['datastore_id']],
                full_text_choices)
            refN = len(refs) + REF_FIRST_ROW - 2

            if full_text_choices:
                if 'excel_column_width' not in field:
                    col.width = max(col.width, max_choice_width)
                # expand example
                for ck, cv in choice_fields[field['datastore_id']]:
                    if ck == example:
                        ex_cell.value = u"{0}: {1}".format(ck, cv)
                        break

            choice_range = 'reference!${col}${ref1}:${col}${refN}'.format(
                col=REF_KEY_COL, ref1=ref1, refN=refN)
            user_choice_range = field.get('excel_choice_range_formula')
            if user_choice_range:
                choice_keys = set(
                    key for (_i, key, _i, _i) in string.Formatter().parse(user_choice_range)
                    if key != 'range' and key != 'range_top')
                choice_values = {}
                if choice_keys:
                    choice_values = {
                        f['datastore_id']: "{col}{num}".format(
                            col=openpyxl.cell.get_column_letter(cn),
                            num=DATA_FIRST_ROW)
                        for cn, f in template_cols_fields(chromo)
                        if f['datastore_id'] in choice_keys}
                user_choice_range = user_choice_range.format(
                    range=choice_range,
                    range_top=choice_range.split(':')[0],
                    **choice_values)
            cranges[field['datastore_id']] = choice_range

            choices = [c[0] for c in choice_fields[field['datastore_id']]]
            if field['datastore_type'] != '_text':
                v = openpyxl.worksheet.datavalidation.DataValidation(
                    type="list",
                    formula1=user_choice_range or choice_range,
                    allow_blank=True)
                v.errorTitle = u'Invalid choice'
                valid_keys = u', '.join(unicode(c) for c in choices)
                if len(valid_keys) < 40:
                    v.error = (u'Please enter one of the valid keys: '
                        + valid_keys)
                else:
                    v.error = (u'Please enter one of the valid keys shown on '
                        'sheet "reference" rows {0}-{1}'.format(ref1, refN))
                sheet.add_data_validation(v)
                v.ranges.append(validation_range)

        sheet.cell(row=CHEADINGS_ROW, column=col_num).hyperlink = (
            '#reference!{colA}{row1}:{colZ}{rowN}'.format(
                colA=REF_FIELD_NUM_COL,
                row1=reference_row1,
                colZ=REF_VALUE_COL,
                rowN=len(refs) + REF_FIRST_ROW - 2))

    _add_conditional_formatting(
        sheet,
        col_letter,
        resource_num,
        error_style,
        required_style,
        data_num_rows)

    sheet.row_dimensions[HEADER_ROW].height = HEADER_HEIGHT
    sheet.row_dimensions[CODE_ROW].hidden = True
    sheet.row_dimensions[CSTATUS_ROW].height = CSTATUS_HEIGHT
    sheet.row_dimensions[EXAMPLE_ROW].height = chromo.get(
        'excel_example_height', DEFAULT_EXAMPLE_HEIGHT)
    for i in xrange(DATA_FIRST_ROW, DATA_FIRST_ROW + data_num_rows):
        sheet.row_dimensions[i].height = chromo.get(
            'excel_data_height', DEFAULT_DATA_HEIGHT)

        # jump to first error/required cell in row
        sheet.cell(row=i, column=RSTATUS_COL_NUM).value = (
            '=IF(e{rnum}!{col}{row}>0,'
                'HYPERLINK("#"&ADDRESS({row},e{rnum}!{col}{row}),""),'
                'IF(r{rnum}!{col}{row}>0,'
                    'HYPERLINK("#"&ADDRESS({row},r{rnum}!{col}{row}),""),""))'
            .format(rnum=resource_num, col=RSTATUS_COL, row=i))

    sheet.column_dimensions[RSTATUS_COL].width = RSTATUS_WIDTH
    sheet.column_dimensions[RPAD_COL].width = RPAD_WIDTH

    sheet.freeze_panes = sheet[FREEZE_PANES]

    apply_styles(header_style, sheet.row_dimensions[HEADER_ROW])
    apply_styles(cheadings_style, sheet.row_dimensions[CHEADINGS_ROW])
    apply_styles(cheadings_style, sheet.row_dimensions[CSTATUS_ROW])
    apply_styles(example_style, sheet.row_dimensions[EXAMPLE_ROW])
    for (c,) in sheet[EDGE_RANGE]:
        apply_styles(edge_style, c)

    # trying to set the active cell (not working yet)
    select = "{col}{row}".format(col=DATA_FIRST_COL, row=DATA_FIRST_ROW)
    sheet.sheet_view.selection[0].activeCell = select
    sheet.sheet_view.selection[0].sqref = select

    return cranges
Ejemplo n.º 6
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id, partial id"
        s = orghash
        f = org
        p = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
            p += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
            if u'|' not in p:
                p += u'|' + unicode(r[k])
        return s, f, p

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly, partial = unique_id(r)

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'partial_id': partial,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
            }

        org_fields = chromo.get('solr_org_fields')
        if org_fields:
            for e in org_detail['extras']:
                if e['key'] in org_fields:
                    solrrec[e['key']] = e['value']

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(dollar_range_facet(
                        key,
                        facet_range,
                        float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                    if f.get('extract_date_clean'):
                        solrrec['date_clean'] = value
                except ValueError:
                    pass
            elif f.get('extract_date_year'):
                if f.get('datastore_type') == 'year':
                    solrrec['date_year'] = value
                else:
                    try:
                        solrrec['date_year'] = int(value.split('-', 1)[0])
                    except ValueError:
                        pass
            if f.get('extract_double_sortable'):
                try:
                    solrrec['doubl_' + key] = float(value)
                except ValueError:
                    pass

            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                choice = choices.get(value, {})
                _add_choice(solrrec, key, r, choice, f)

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    import pysolr
    for a in reversed(range(10)):
        try:
            if out:
                conn.add(out, commit=False)
            break
        except pysolr.SolrError:
            if not a:
                raise
            print "waiting..."
            import time
            time.sleep((10-a) * 5)
            print "retrying..."
    return unmatched
Ejemplo n.º 7
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type +
                            "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly = unique_id(r)

        shortform = org_detail['shortform']
        shortform_fr = org_detail['shortform_fr']

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title_tranlated']['en'],
            'org_name_fr': org_detail['title_tranlsated']['fr'],
        }

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(
                        dollar_range_facet(key, facet_range, float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                except ValueError:
                    pass
            elif f.get('datastore_type') == 'year':
                if f.get('extract_date_year'):
                    solrrec['date_year'] = value
            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                solrrec[key + '_en'] = recombinant_language_text(
                    choices.get(value, ''), 'en')
                solrrec[key + '_fr'] = recombinant_language_text(
                    choices.get(value, ''), 'fr')

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if out:
        conn.add_many(out, _commit=True)
    return unmatched
Ejemplo n.º 8
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly = unique_id(r)

        shortform = None
        shortform_fr = None
        for e in org_detail['extras']:
            if e['key'] == 'shortform':
                shortform = e['value']
            elif e['key'] == 'shortform_fr':
                shortform_fr = e['value']

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
            }

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(dollar_range_facet(
                        key,
                        facet_range,
                        float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                except ValueError:
                    pass
            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                solrrec[key + '_en'] = recombinant_language_text(
                    choices.get(value, ''), 'en')
                solrrec[key + '_fr'] = recombinant_language_text(
                    choices.get(value, ''), 'fr')

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if out:
        conn.add_many(out, _commit=True)
    return unmatched
Ejemplo n.º 9
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id, partial id"
        s = orghash
        f = org
        p = org
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
            if u'|' not in p:
                p += u'|' + unicode(r[k])
        return s, f, p

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly, partial = unique_id(r)

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'partial_id': partial,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
        }

        org_fields = chromo.get('solr_org_fields')
        if org_fields:
            for e in org_detail['extras']:
                if e['key'] in org_fields:
                    solrrec[e['key']] = e['value']

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r.get(key, '')

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(
                        value.replace('$', '').replace(',', ''))
                except ValueError:
                    pass
                else:
                    solrrec.update(
                        dollar_range_facet(key, facet_range, float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                    if f.get('extract_date_clean'):
                        solrrec['date_clean'] = value
                except ValueError:
                    pass
            elif f.get('extract_date_year'):
                if f.get('datastore_type') == 'year':
                    solrrec['date_year'] = value
                else:
                    try:
                        solrrec['date_year'] = int(value.split('-', 1)[0])
                    except ValueError:
                        pass
            if f.get('extract_double_sortable'):
                try:
                    solrrec['doubl_' + key] = float(value)
                except ValueError:
                    pass

            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                if f.get('datastore_type') == '_text':
                    solrrec[key + '_en'] = '; '.join(
                        recombinant_language_text(choices[v], 'en')
                        for v in value.split(',') if v in choices)
                    solrrec[key + '_fr'] = '; '.join(
                        recombinant_language_text(choices[v], 'fr')
                        for v in value.split(',') if v in choices)
                else:
                    choice = choices.get(value, {})
                    _add_choice(solrrec, key, r, choice, f)

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        ssrf = chromo.get('solr_sum_range_facet')
        if ssrf:
            key = ssrf['sum_field']
            float_value = float(solrrec[key])
            solrrec.update(
                numeric_range_facet(key, ssrf['facet_values'], float_value))

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if unmatched:
        out.extend(unmatched[1].values())

    import pysolr
    for a in reversed(range(10)):
        try:
            if out:
                conn.add(out, commit=False)
            break
        except pysolr.SolrError:
            if not a:
                raise
            print "waiting..."
            import time
            time.sleep((10 - a) * 5)
            print "retrying..."
    return unmatched
Ejemplo n.º 10
0
def _populate_excel_sheet(sheet, chromo, org, refs):
    """
    Format openpyxl sheet for the resource definition chromo and org.

    refs - list of rows to add to reference sheet, modified
        in place from this function

    returns field information for reference sheet
    """
    boolean_validator = openpyxl.worksheet.datavalidation.DataValidation(
        type="list", formula1='"FALSE,TRUE"', allow_blank=True)
    sheet.add_data_validation(boolean_validator)

    sheet.title = chromo['resource_name']

    def fill_cell(row, column, value, styles):
        c = sheet.cell(row=row, column=column)
        c.value = value
        apply_styles(styles, c)

    org_style = chromo['excel_organization_style']
    fill_cell(1, 1, org['name'], org_style)
    fill_cell(1, 2, org['title'], org_style)
    apply_styles(org_style, sheet.row_dimensions[1])

    header_style = chromo['excel_header_style']

    choice_fields = dict(
        (f['datastore_id'], f['choices'])
        for f in recombinant_choice_fields(chromo['resource_name']))

    for n, field in enumerate(chromo['fields'], 1):
        fill_cell(2, n, _(field['label']), header_style)
        fill_cell(3, n, field['datastore_id'], header_style)
        # jumping through openpyxl hoops:
        col_letter = openpyxl.cell.get_column_letter(n)
        col = sheet.column_dimensions[col_letter]
        col.width = field['excel_column_width']
        # FIXME: format only below header
        col.number_format = datastore_type[field['datastore_type']].xl_format
        validation_range = '{0}4:{0}1004'.format(col_letter)

        if field['datastore_type'] == 'boolean':
            boolean_validator.ranges.append(validation_range)
        if field['datastore_id'] in choice_fields:
            refs.append([_(field['label'])])
            ref1 = len(refs) + 2
            for key, value in choice_fields[field['datastore_id']]:
                refs.append([None, key, value])
            refN = len(refs) + 1
            refs.append([])

            choice_range = 'reference!$B${0}:$B${1}'.format(ref1, refN)
            v = openpyxl.worksheet.datavalidation.DataValidation(
                type="list", formula1=choice_range, allow_blank=True)
            v.errorTitle = u'Invalid choice'
            v.error = (u'Please enter one of the valid keys shown on '
                       'sheet "reference" rows {0}-{1}'.format(ref1, refN))
            sheet.add_data_validation(v)
            v.ranges.append(validation_range)

            # hilight header if bad values pasted below
            sheet.conditional_formatting.add(
                "{0}2".format(col_letter),
                openpyxl.formatting.FormulaRule(
                    [(
                        'COUNTIF({0},"<>"&"")'  # all non-blank cells
                        '-SUMPRODUCT(COUNTIF({0},{1}))'.format(
                            validation_range, choice_range))],
                    stopIfTrue=True,
                    fill=red_fill))

    apply_styles(header_style, sheet.row_dimensions[2])
    apply_styles(header_style, sheet.row_dimensions[3])
    sheet.row_dimensions[3].hidden = True

    sheet.freeze_panes = sheet['A4']