Beispiel #1
0
def _add_choice(solrrec, key, record, choice, field):
    """
    add the english+french values for choice to solrrec
    """
    solrrec[key + '_en'] = recombinant_language_text(choice, 'en')
    solrrec[key + '_fr'] = recombinant_language_text(choice, 'fr')

    # lookups used for choices that expand to multiple values
    if 'lookup' in choice:
        lookup = choice['lookup']
    elif 'conditional_lookup' in choice:
        for conditional in choice['conditional_lookup']:
            if 'column' in conditional:
                column = record[conditional['column']]
                if not column < conditional['less_than']:
                    continue
            lookup = conditional['lookup']
            break
    else:
        return
    solrrec['multi_' + key + '_en'] = [
        recombinant_language_text(field['choices_lookup'][cl], 'en')
        for cl in lookup]
    solrrec['multi_' + key + '_fr'] = [
        recombinant_language_text(field['choices_lookup'][cl], 'fr')
        for cl in lookup]
Beispiel #2
0
def _add_choice(solrrec, key, record, choice, field):
    """
    add the english+french values for choice to solrrec
    """
    solrrec[key + '_en'] = recombinant_language_text(choice, 'en')
    solrrec[key + '_fr'] = recombinant_language_text(choice, 'fr')

    # lookups used for choices that expand to multiple values
    if 'lookup' in choice:
        lookup = choice['lookup']
    elif 'conditional_lookup' in choice:
        for conditional in choice['conditional_lookup']:
            if 'column' in conditional:
                column = record[conditional['column']]
                if not column < conditional['less_than']:
                    continue
            lookup = conditional['lookup']
            break
    else:
        return
    solrrec['multi_' + key + '_en'] = [
        recombinant_language_text(field['choices_lookup'][cl], 'en')
        for cl in lookup
    ]
    solrrec['multi_' + key + '_fr'] = [
        recombinant_language_text(field['choices_lookup'][cl], 'fr')
        for cl in lookup
    ]
def _append_field_ref_rows(refs, field, link):
    refs.append((None, []))
    label = recombinant_language_text(field['label'])
    refs.append(('title', [(link, label) if link else label]))
    refs.append(('attr', [
        _('ID'),
        field['datastore_id']]))
    if 'description' in field:
        refs.append(('attr', [
            _('Description'),
            recombinant_language_text(field['description'])]))
    if 'obligation' in field:
        refs.append(('attr', [
            _('Obligation'),
            recombinant_language_text(field['obligation'])]))
    if 'format_type' in field:
        refs.append(('attr', [
            _('Format'),
            recombinant_language_text(field['format_type'])]))
Beispiel #4
0
def _update_records(records, org_detail, conn, resource_name):
    """
    Update records on solr core

    :param records: record dicts
    :ptype records: sequence of record dicts

    :param org_detail: org structure as returned via local CKAN
    :ptype org_detail: dict with local CKAN org structure

    :param conn: solr connection
    :ptype conn: obj

    :param resource_name: type being updated
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get("datastore_primary_key", [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail["name"]
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r["_id"]).hexdigest()
            f += u"|" + unicode(r["_id"])
        for k in pk:
            s = hashlib.md5(s + r[k].encode("utf-8")).hexdigest()
            f += u"|" + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f["datastore_id"], dict(f["choices"])) for f in recombinant_choice_fields(resource_name, all_languages=True)
    )

    for r in records:
        unique, friendly = unique_id(r)

        shortform = None
        shortform_fr = None
        for e in org_detail["extras"]:
            if e["key"] == "shortform":
                shortform = e["value"]
            elif e["key"] == "shortform_fr":
                shortform_fr = e["value"]

        solrrec = {
            "id": unique,
            "unique_id": friendly,
            "org_name_code": org_detail["name"],
            "org_name_en": org_detail["title"].split(" | ", 1)[0],
            "org_name_fr": org_detail["title"].split(" | ", 1)[-1],
        }

        for f in chromo["fields"]:
            key = f["datastore_id"]
            value = r[key]

            facet_range = f.get("solr_float_range_facet")
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    for i, fac in enumerate(facet_range):
                        if "less_than" not in fac or float_value < fac["less_than"]:
                            solrrec[key + "_range"] = str(i)
                            solrrec[key + "_range_en"] = fac["label"].split(" | ")[0]
                            solrrec[key + "_range_fr"] = fac["label"].split(" | ")[-1]
                            break

            if f.get("datastore_type") == "date":
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get("extract_date_year"):
                        solrrec["date_year"] = value.split("-", 1)[0]
                    if f.get("extract_date_month"):
                        solrrec["date_month"] = value.split("-")[1]
                except ValueError:
                    pass
            solrrec[key] = value

            choices = choice_fields.get(f["datastore_id"])
            if not choices:
                continue

            if key.endswith("_code"):
                key = key[:-5]
            solrrec[key + "_en"] = recombinant_language_text(choices.get(value, ""), "en")
            solrrec[key + "_fr"] = recombinant_language_text(choices.get(value, ""), "fr")

        solrrec["text"] = u" ".join(unicode(v) for v in solrrec.values())
        out.append(solrrec)

    conn.add_many(out, _commit=True)
def _populate_reference_sheet(sheet, geno, refs):
    field_count = 1

    edge_style = dict(DEFAULT_EDGE_STYLE, **geno.get('excel_edge_style', {}))
    header1_style = dict(DEFAULT_HEADER_STYLE, **geno.get('excel_header_style', {}))
    header2_style = dict(DEFAULT_REF_HEADER2_STYLE, **geno.get('excel_header_style', {}))
    choice_style = dict(DEFAULT_EXAMPLE_STYLE, **geno.get('excel_example_style', {}))

    fill_cell(
        sheet,
        REF_HEADER1_ROW,
        REF_KEY_COL_NUM,
        recombinant_language_text(geno['title']),
        header1_style)
    apply_styles(header1_style, sheet.row_dimensions[REF_HEADER1_ROW])
    fill_cell(
        sheet,
        REF_HEADER2_ROW,
        REF_KEY_COL_NUM,
        _('Reference'),
        header2_style)
    apply_styles(header2_style, sheet.row_dimensions[REF_HEADER2_ROW])
    for (c,) in sheet[REF_EDGE_RANGE]:
        apply_styles(edge_style, c)
    sheet.row_dimensions[REF_HEADER1_ROW].height = REF_HEADER1_HEIGHT
    sheet.row_dimensions[REF_HEADER2_ROW].height = REF_HEADER2_HEIGHT


    for row_number, (style, ref_line) in enumerate(refs, REF_FIRST_ROW - 1):
        link = None
        if len(ref_line) == 2:
            value = wrap_text_to_width(ref_line[1], REF_VALUE_WIDTH).strip()
            ref_line = [ref_line[0], value]
        elif len(ref_line) == 1 and isinstance(ref_line[0], tuple):
            link, value = ref_line[0]
            value = value.strip()
            ref_line = [value]

        for cnum, cval in enumerate(ref_line, REF_KEY_COL_NUM):
            sheet.cell(row=row_number, column=cnum).value = (
                cval.strip().replace('\n', '\r\n'))

        if len(ref_line) == 2:
            sheet.row_dimensions[row_number].height = LINE_HEIGHT + (
                value.count('\n') * LINE_HEIGHT)

        key_cell = sheet.cell(row=row_number, column=REF_KEY_COL_NUM)
        value_cell = sheet.cell(row=row_number, column=REF_VALUE_COL_NUM)

        if style == 'title':
            sheet.merge_cells(REF_FIELD_NUM_MERGE.format(row=row_number))
            sheet.merge_cells(REF_FIELD_TITLE_MERGE.format(row=row_number))
            fill_cell(
                sheet,
                row_number,
                REF_FIELD_NUM_COL_NUM,
                field_count,
                REF_NUMBER_STYLE)
            title_cell = sheet.cell(row=row_number, column=REF_KEY_COL_NUM)
            if link:
                title_cell.hyperlink = link
            apply_styles(REF_TITLE_STYLE, title_cell)
            sheet.row_dimensions[row_number].height = REF_FIELD_TITLE_HEIGHT
            field_count += 1
        elif style == 'choice':
            pad_cell = sheet.cell(row=row_number, column=REF_KEY_COL_NUM - 1)
            apply_styles(choice_style, pad_cell)
            apply_styles(choice_style, key_cell)
            apply_styles(choice_style, value_cell)
        elif style == 'attr':
            apply_styles(REF_ATTR_STYLE, key_cell)
            apply_styles(REF_VALUE_STYLE, value_cell)
        elif style == 'choice heading':
            apply_styles(REF_ATTR_STYLE, key_cell)
            apply_styles(REF_VALUE_STYLE, value_cell)
            sheet.row_dimensions[row_number].height = REF_CHOICE_HEADING_HEIGHT

        apply_styles(REF_PAPER_STYLE, sheet.row_dimensions[row_number])

    sheet.column_dimensions[RSTATUS_COL].width = RSTATUS_WIDTH
    sheet.cell(row=1, column=RPAD_COL_NUM).value = None  # make sure rpad col exists
    sheet.column_dimensions[RPAD_COL].width = RPAD_WIDTH
    sheet.column_dimensions[REF_KEY_COL].width = REF_KEY_WIDTH
    sheet.column_dimensions[REF_VALUE_COL].width = REF_VALUE_WIDTH
def _populate_excel_sheet(sheet, geno, chromo, org, refs, resource_num):
    """
    Format openpyxl sheet for the resource definition chromo and org.
    (Version 3)

    refs - list of rows to add to reference sheet, modified
        in place from this function
    resource_num - 1-based index of resource

    returns cranges dict of {datastore_id: reference_key_range}
    """
    sheet.title = chromo['resource_name']

    edge_style = dict(DEFAULT_EDGE_STYLE, **geno.get('excel_edge_style', {}))
    required_style = dict(edge_style, **geno.get('excel_required_style', {}))
    header_style = dict(DEFAULT_HEADER_STYLE, **geno.get('excel_header_style', {}))
    cheadings_style = dict(DEFAULT_CHEADING_STYLE, **geno.get('excel_column_heading_style', {}))
    example_style = dict(DEFAULT_EXAMPLE_STYLE, **geno.get('excel_example_style', {}))
    error_style = dict(DEFAULT_ERROR_STYLE, **geno.get('excel_error_style', {}))

    cranges = {}
    data_num_rows = chromo.get('excel_data_num_rows', DEFAULT_DATA_NUM_ROWS)

    # create rows so we can set all heights
    for i in xrange(1, DATA_FIRST_ROW + data_num_rows):
        sheet.cell(row=i, column=1).value = None

    sheet.merge_cells(EXAMPLE_MERGE)
    fill_cell(sheet, EXAMPLE_ROW, 1, _('e.g.'), example_style)

    fill_cell(
        sheet,
        DATA_FIRST_ROW,
        RPAD_COL_NUM,
        u'=IF(r{rnum}!{col}{row},"","▶")'.format(
            rnum=resource_num,
            col=RPAD_COL,
            row=DATA_FIRST_ROW),
        TYPE_HERE_STYLE)

    fill_cell(
        sheet,
        HEADER_ROW,
        DATA_FIRST_COL_NUM,
        recombinant_language_text(chromo['title'])
            + u' \N{em dash} ' + org_title_lang_hack(org['title']),
        header_style)

    sheet.cell(row=CODE_ROW, column=1).value = 'v3'  # template version
    # allow only upload to this org
    sheet.cell(row=CODE_ROW, column=2).value = org['name']

    cheadings_dimensions = sheet.row_dimensions[CHEADINGS_ROW]

    choice_fields = dict(
        (f['datastore_id'], f['choices'])
        for f in recombinant_choice_fields(chromo['resource_name']))

    for col_num, field in template_cols_fields(chromo):
        field_heading = recombinant_language_text(
            field.get('excel_heading', field['label'])).strip()
        cheadings_dimensions.height = max(
            cheadings_dimensions.height,
            field_heading.count('\n') * LINE_HEIGHT + CHEADINGS_HEIGHT)

        col_heading_style = cheadings_style
        if 'excel_column_heading_style' in field:
            # use geno column heading style as base, just override keys
            col_heading_style = dict(
                cheadings_style,
                **field['excel_column_heading_style'])
            apply_styles(col_heading_style, sheet.cell(
                row=HEADER_ROW, column=col_num))
            apply_styles(col_heading_style, sheet.cell(
                row=CSTATUS_ROW, column=col_num))

        fill_cell(
            sheet,
            CHEADINGS_ROW,
            col_num,
            field_heading,
            col_heading_style)

        reference_row1 = len(refs) + REF_FIRST_ROW

        # match against db columns
        sheet.cell(row=CODE_ROW, column=col_num).value = field['datastore_id']

        example = chromo['examples']['record'].get(field['datastore_id'], '')
        fill_cell(
            sheet,
            EXAMPLE_ROW,
            col_num,
            u','.join(example) if isinstance(example, list)
            else example,
            example_style)

        col_letter = openpyxl.cell.get_column_letter(col_num)

        # jump to first error/required cell in column
        fill_cell(
            sheet,
            CSTATUS_ROW,
            col_num,
            '=IF(e{rnum}!{col}{row}>0,HYPERLINK("#{col}"&e{rnum}!{col}{row},"")'
                ',IF(r{rnum}!{col}{row}>0,HYPERLINK("#{col}"&r{rnum}!{col}{row},""),""))'
                .format(rnum=resource_num, col=col_letter, row=CSTATUS_ROW),
            col_heading_style)

        col = sheet.column_dimensions[col_letter]
        if 'excel_column_width' in field:
            col.width = field['excel_column_width']
        else:
            col.width = max(estimate_width(field_heading), CHEADINGS_MIN_WIDTH)

        validation_range = '{col}{row1}:{col}{rowN}'.format(
            col=col_letter,
            row1=DATA_FIRST_ROW,
            rowN=DATA_FIRST_ROW + data_num_rows - 1)

        xl_format = datastore_type[field['datastore_type']].xl_format
        alignment = openpyxl.styles.Alignment(wrap_text=True)
        protection = openpyxl.styles.Protection(locked=False)
        for (c,) in sheet[validation_range]:
            c.number_format = xl_format
            c.alignment = alignment
            c.protection = protection
        ex_cell = sheet.cell(row=EXAMPLE_ROW, column=col_num)
        ex_cell.number_format = xl_format
        ex_cell.alignment = alignment

        _append_field_ref_rows(refs, field, "#'{sheet}'!{col}{row}".format(
            sheet=sheet.title, col=col_letter, row=CHEADINGS_ROW))

        if field['datastore_id'] in choice_fields:
            full_text_choices = (
                field['datastore_type'] != '_text' and field.get(
                'excel_full_text_choices', False))
            ref1 = len(refs) + REF_FIRST_ROW
            max_choice_width = _append_field_choices_rows(
                refs,
                choice_fields[field['datastore_id']],
                full_text_choices)
            refN = len(refs) + REF_FIRST_ROW - 2

            if full_text_choices:
                if 'excel_column_width' not in field:
                    col.width = max(col.width, max_choice_width)
                # expand example
                for ck, cv in choice_fields[field['datastore_id']]:
                    if ck == example:
                        ex_cell.value = u"{0}: {1}".format(ck, cv)
                        break

            choice_range = 'reference!${col}${ref1}:${col}${refN}'.format(
                col=REF_KEY_COL, ref1=ref1, refN=refN)
            user_choice_range = field.get('excel_choice_range_formula')
            if user_choice_range:
                choice_keys = set(
                    key for (_i, key, _i, _i) in string.Formatter().parse(user_choice_range)
                    if key != 'range' and key != 'range_top')
                choice_values = {}
                if choice_keys:
                    choice_values = {
                        f['datastore_id']: "{col}{num}".format(
                            col=openpyxl.cell.get_column_letter(cn),
                            num=DATA_FIRST_ROW)
                        for cn, f in template_cols_fields(chromo)
                        if f['datastore_id'] in choice_keys}
                user_choice_range = user_choice_range.format(
                    range=choice_range,
                    range_top=choice_range.split(':')[0],
                    **choice_values)
            cranges[field['datastore_id']] = choice_range

            choices = [c[0] for c in choice_fields[field['datastore_id']]]
            if field['datastore_type'] != '_text':
                v = openpyxl.worksheet.datavalidation.DataValidation(
                    type="list",
                    formula1=user_choice_range or choice_range,
                    allow_blank=True)
                v.errorTitle = u'Invalid choice'
                valid_keys = u', '.join(unicode(c) for c in choices)
                if len(valid_keys) < 40:
                    v.error = (u'Please enter one of the valid keys: '
                        + valid_keys)
                else:
                    v.error = (u'Please enter one of the valid keys shown on '
                        'sheet "reference" rows {0}-{1}'.format(ref1, refN))
                sheet.add_data_validation(v)
                v.ranges.append(validation_range)

        sheet.cell(row=CHEADINGS_ROW, column=col_num).hyperlink = (
            '#reference!{colA}{row1}:{colZ}{rowN}'.format(
                colA=REF_FIELD_NUM_COL,
                row1=reference_row1,
                colZ=REF_VALUE_COL,
                rowN=len(refs) + REF_FIRST_ROW - 2))

    _add_conditional_formatting(
        sheet,
        col_letter,
        resource_num,
        error_style,
        required_style,
        data_num_rows)

    sheet.row_dimensions[HEADER_ROW].height = HEADER_HEIGHT
    sheet.row_dimensions[CODE_ROW].hidden = True
    sheet.row_dimensions[CSTATUS_ROW].height = CSTATUS_HEIGHT
    sheet.row_dimensions[EXAMPLE_ROW].height = chromo.get(
        'excel_example_height', DEFAULT_EXAMPLE_HEIGHT)
    for i in xrange(DATA_FIRST_ROW, DATA_FIRST_ROW + data_num_rows):
        sheet.row_dimensions[i].height = chromo.get(
            'excel_data_height', DEFAULT_DATA_HEIGHT)

        # jump to first error/required cell in row
        sheet.cell(row=i, column=RSTATUS_COL_NUM).value = (
            '=IF(e{rnum}!{col}{row}>0,'
                'HYPERLINK("#"&ADDRESS({row},e{rnum}!{col}{row}),""),'
                'IF(r{rnum}!{col}{row}>0,'
                    'HYPERLINK("#"&ADDRESS({row},r{rnum}!{col}{row}),""),""))'
            .format(rnum=resource_num, col=RSTATUS_COL, row=i))

    sheet.column_dimensions[RSTATUS_COL].width = RSTATUS_WIDTH
    sheet.column_dimensions[RPAD_COL].width = RPAD_WIDTH

    sheet.freeze_panes = sheet[FREEZE_PANES]

    apply_styles(header_style, sheet.row_dimensions[HEADER_ROW])
    apply_styles(cheadings_style, sheet.row_dimensions[CHEADINGS_ROW])
    apply_styles(cheadings_style, sheet.row_dimensions[CSTATUS_ROW])
    apply_styles(example_style, sheet.row_dimensions[EXAMPLE_ROW])
    for (c,) in sheet[EDGE_RANGE]:
        apply_styles(edge_style, c)

    # trying to set the active cell (not working yet)
    select = "{col}{row}".format(col=DATA_FIRST_COL, row=DATA_FIRST_ROW)
    sheet.sheet_view.selection[0].activeCell = select
    sheet.sheet_view.selection[0].sqref = select

    return cranges
Beispiel #7
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type +
                            "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly = unique_id(r)

        shortform = org_detail['shortform']
        shortform_fr = org_detail['shortform_fr']

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title_tranlated']['en'],
            'org_name_fr': org_detail['title_tranlsated']['fr'],
        }

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(
                        dollar_range_facet(key, facet_range, float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                except ValueError:
                    pass
            elif f.get('datastore_type') == 'year':
                if f.get('extract_date_year'):
                    solrrec['date_year'] = value
            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                solrrec[key + '_en'] = recombinant_language_text(
                    choices.get(value, ''), 'en')
                solrrec[key + '_fr'] = recombinant_language_text(
                    choices.get(value, ''), 'fr')

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if out:
        conn.add_many(out, _commit=True)
    return unmatched
Beispiel #8
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly = unique_id(r)

        shortform = None
        shortform_fr = None
        for e in org_detail['extras']:
            if e['key'] == 'shortform':
                shortform = e['value']
            elif e['key'] == 'shortform_fr':
                shortform_fr = e['value']

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
            }

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(dollar_range_facet(
                        key,
                        facet_range,
                        float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                except ValueError:
                    pass
            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                solrrec[key + '_en'] = recombinant_language_text(
                    choices.get(value, ''), 'en')
                solrrec[key + '_fr'] = recombinant_language_text(
                    choices.get(value, ''), 'fr')

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if out:
        conn.add_many(out, _commit=True)
    return unmatched
Beispiel #9
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id, partial id"
        s = orghash
        f = org
        p = org
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
            if u'|' not in p:
                p += u'|' + unicode(r[k])
        return s, f, p

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly, partial = unique_id(r)

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'partial_id': partial,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
        }

        org_fields = chromo.get('solr_org_fields')
        if org_fields:
            for e in org_detail['extras']:
                if e['key'] in org_fields:
                    solrrec[e['key']] = e['value']

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r.get(key, '')

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(
                        value.replace('$', '').replace(',', ''))
                except ValueError:
                    pass
                else:
                    solrrec.update(
                        dollar_range_facet(key, facet_range, float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                    if f.get('extract_date_clean'):
                        solrrec['date_clean'] = value
                except ValueError:
                    pass
            elif f.get('extract_date_year'):
                if f.get('datastore_type') == 'year':
                    solrrec['date_year'] = value
                else:
                    try:
                        solrrec['date_year'] = int(value.split('-', 1)[0])
                    except ValueError:
                        pass
            if f.get('extract_double_sortable'):
                try:
                    solrrec['doubl_' + key] = float(value)
                except ValueError:
                    pass

            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                if f.get('datastore_type') == '_text':
                    solrrec[key + '_en'] = '; '.join(
                        recombinant_language_text(choices[v], 'en')
                        for v in value.split(',') if v in choices)
                    solrrec[key + '_fr'] = '; '.join(
                        recombinant_language_text(choices[v], 'fr')
                        for v in value.split(',') if v in choices)
                else:
                    choice = choices.get(value, {})
                    _add_choice(solrrec, key, r, choice, f)

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        ssrf = chromo.get('solr_sum_range_facet')
        if ssrf:
            key = ssrf['sum_field']
            float_value = float(solrrec[key])
            solrrec.update(
                numeric_range_facet(key, ssrf['facet_values'], float_value))

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if unmatched:
        out.extend(unmatched[1].values())

    import pysolr
    for a in reversed(range(10)):
        try:
            if out:
                conn.add(out, commit=False)
            break
        except pysolr.SolrError:
            if not a:
                raise
            print "waiting..."
            import time
            time.sleep((10 - a) * 5)
            print "retrying..."
    return unmatched