예제 #1
0
 def test_example(self):
     lc = LocalCKAN()
     record = get_chromo('service')['examples']['record']
     lc.action.datastore_upsert(
         resource_id=self.service_id,
         records=[record])
     record = get_chromo('service-std')['examples']['record']
     lc.action.datastore_upsert(
         resource_id=self.service_std_id,
         records=[record])
예제 #2
0
 def test_inter_field_errors(self):
     lc = LocalCKAN()
     record = dict(
         get_chromo('contracts')['examples']['record'],
         contract_date='2022-01-01',
         instrument_type='A',
         buyer_name='Smith',
         economic_object_code='NA',
         trade_agreement=['CA'],
         land_claims=['JN'],
         award_criteria='0',
         solicitation_procedure='TN',
     )
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(resource_id=self.resource_id,
                                    records=[record])
     err = ve.exception.error_dict['records'][0]
     expected = {
         'buyer_name': [
             'This field must be populated with an NA '
             'if an amendment is disclosed under Instrument Type'
         ],
         'economic_object_code': [
             'If N/A, then Instrument Type must be identified '
             'as a standing offer/supply arrangement (SOSA)'
         ],
         'number_of_bids': [
             'This field must be populated with a 1 if the solicitation procedure is '
             'identified as non-competitive (TN) or Advance Contract Award Notice (AC).'
         ],
     }
     assert isinstance(err, dict), err
     for k in set(err) | set(expected):
         assert_equal(err.get(k), expected.get(k), (k, err))
예제 #3
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(q='type:%s organization:%s' %
                                               (dataset_type, org_name),
                                               rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (dataset_type,
                                                              org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for r in results[0]['resources']:
                if r['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            print '-', org_name, len(records)
            lc.action.datastore_upsert(method=method,
                                       resource_id=r['id'],
                                       records=records)
        return 0
예제 #4
0
def get_datapreview_recombinant(resource_name, res_id):
    from ckanext.recombinant.tables import get_chromo
    chromo = get_chromo(resource_name)
    default_preview_args = {}

    lc = ckanapi.LocalCKAN(username=c.user)
    results = lc.action.datastore_search(
        resource_id=res_id,
        limit=0,
        )

    priority = len(chromo['datastore_primary_key'])
    pk_priority = 0
    fields = []
    for f in chromo['fields']:
        out = {
            'type': f['datastore_type'],
            'id': f['datastore_id'],
            'label': h._(f['label'])}
        if out['id'] in chromo['datastore_primary_key']:
            out['priority'] = pk_priority
            pk_priority += 1
        else:
            out['priority'] = priority
            priority += 1
        fields.append(out)

    return h.snippet('package/wet_datatable.html',
        resource_name=resource_name,
        resource_id=res_id,
        ds_fields=fields)
예제 #5
0
    def preview_table(self, resource_name, owner_org, errors=None):
        lc = ckanapi.LocalCKAN(username=c.user)
        try:
            chromo = get_chromo(resource_name)
        except RecombinantException:
            abort(404, _('Recombinant resource_name not found'))
        try:
            dataset = lc.action.recombinant_show(
                dataset_type=chromo['dataset_type'], owner_org=owner_org)
        except ckanapi.NotFound:
            abort(404, _('Table for this organization not found'))
        org = lc.action.organization_show(id=owner_org)

        for r in dataset['resources']:
            if r['name'] == resource_name:
                break
        else:
            abort(404, _('Resource not found'))

        return render('recombinant/resource_edit.html', extra_vars={
            'dataset': dataset,
            'resource': r,
            'organization': org,
            'errors': errors,
            })
예제 #6
0
 def test_ministers_office(self):
     lc = LocalCKAN()
     record = dict(get_chromo('contracts')['examples']['record'],
                   contract_date='2019-06-21',
                   ministers_office='N')
     lc.action.datastore_upsert(resource_id=self.resource_id,
                                records=[record])
예제 #7
0
 def test_multi_field_errors(self):
     lc = LocalCKAN()
     record = dict(
         get_chromo('contracts')['examples']['record'],
         trade_agreement=['XX', 'NA'],
         land_claims=['JN', 'NA'],
         limited_tendering_reason=['00', '05'],
         trade_agreement_exceptions=['00', '01'],
     )
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(resource_id=self.resource_id,
                                    records=[record])
     err = ve.exception.error_dict['records'][0]
     expected = {
         'trade_agreement': [
             'If the value XX (none) is entered, then no other value '
             'can be entered in this field.'
         ],
         'land_claims': [
             'If the value NA (not applicable) is entered, then no other '
             'value can be entered in this field.'
         ],
         'limited_tendering_reason': [
             'If the value 00 (none) is entered, then no other value can '
             'be entered in this field.'
         ],
         'trade_agreement_exceptions': [
             'If the value 00 (none) is entered, then no other value can '
             'be entered in this field.'
         ],
     }
     assert isinstance(err, dict), err
     for k in set(err) | set(expected):
         assert_equal(err.get(k), expected.get(k), (k, err))
예제 #8
0
 def test_2022_fields(self):
     lc = LocalCKAN()
     record = dict(
         get_chromo('contracts')['examples']['record'],
         contract_date='2022-01-01',
         vendor_postal_code=None,
         buyer_name='',
         trade_agreement='',
         agreement_type_code='Z',
         land_claims=None,
         aboriginal_business='',
     )
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(resource_id=self.resource_id,
                                    records=[record])
     err = ve.exception.error_dict['records'][0]
     expected = {
         'vendor_postal_code': ['This field must not be empty'],
         'buyer_name': ['This field must not be empty'],
         'trade_agreement': ['This field must not be empty'],
         'agreement_type_code': ['Discontinued as of 2022-01-01'],
         'land_claims': ['This field must not be empty'],
         'aboriginal_business': ['This field must not be empty'],
     }
     assert isinstance(err, dict), err
     for k in set(err) | set(expected):
         assert_equal(err.get(k), expected.get(k), (k, err))
예제 #9
0
 def test_empty_string_instead_of_null(self):
     lc = LocalCKAN()
     record = dict(get_chromo('grants')['examples']['record'])
     record['foreign_currency_type'] = ''
     record['foreign_currency_value'] = ''
     lc.action.datastore_upsert(resource_id=self.resource_id,
                                records=[record])
예제 #10
0
def get_datapreview_recombinant(resource_name, res_id):
    from ckanext.recombinant.tables import get_chromo
    chromo = get_chromo(resource_name)
    default_preview_args = {}

    lc = ckanapi.LocalCKAN(username=c.user)
    results = lc.action.datastore_search(
        resource_id=res_id,
        limit=0,
        )

    priority = len(chromo['datastore_primary_key'])
    pk_priority = 0
    fields = []
    for f in chromo['fields']:
        out = {
            'type': f['datastore_type'],
            'id': f['datastore_id'],
            'label': h.recombinant_language_text(f['label'])}
        if out['id'] in chromo['datastore_primary_key']:
            out['priority'] = pk_priority
            pk_priority += 1
        else:
            out['priority'] = priority
            priority += 1
        fields.append(out)

    return h.snippet('package/wet_datatable.html',
        resource_name=resource_name,
        resource_id=res_id,
        ds_fields=fields)
예제 #11
0
    def preview_table(self, resource_name, owner_org, errors=None):
        lc = ckanapi.LocalCKAN(username=c.user)
        try:
            chromo = get_chromo(resource_name)
        except RecombinantException:
            abort(404, _('Recombinant resource_name not found'))
        try:
            dataset = lc.action.recombinant_show(
                dataset_type=chromo['dataset_type'], owner_org=owner_org)
        except ckanapi.NotFound:
            abort(404, _('Table for this organization not found'))
        org = lc.action.organization_show(id=owner_org)

        for r in dataset['resources']:
            if r['name'] == resource_name:
                break
        else:
            abort(404, _('Resource not found'))

        return render('recombinant/resource_edit.html',
                      extra_vars={
                          'dataset': dataset,
                          'resource': r,
                          'organization': org,
                          'errors': errors,
                      })
예제 #12
0
def recombinant_get_chromo(resource_name):
    """
    Get the resource definition (chromo) for the given resource name
    """
    try:
        return get_chromo(resource_name)
    except RecombinantException:
        return
예제 #13
0
def recombinant_get_chromo(resource_name):
    """
    Get the resource definition (chromo) for the given resource name
    """
    try:
        return get_chromo(resource_name)
    except RecombinantException:
        return
예제 #14
0
def recombinant_primary_key_fields(resource_name):
    try:
        chromo = get_chromo(resource_name)
    except RecombinantException:
        return []
    return [
        f for f in chromo['fields']
        if f['datastore_id'] in chromo['datastore_primary_key']
        ]
예제 #15
0
def recombinant_primary_key_fields(resource_name):
    try:
        chromo = get_chromo(resource_name)
    except RecombinantException:
        return []
    return [
        f for f in chromo['fields']
        if f['datastore_id'] in chromo['datastore_primary_key']
        ]
예제 #16
0
 def test_ministers_office_missing(self):
     lc = LocalCKAN()
     record = dict(get_chromo('contracts')['examples']['record'],
                   contract_date='2019-06-21',
                   ministers_office=None)
     assert_raises(ValidationError,
                   lc.action.datastore_upsert,
                   resource_id=self.resource_id,
                   records=[record])
예제 #17
0
def rebuild(command_name, csv_files=None, solr_url=None, strict=True):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name, solr_url, False)

    conn = solr_connection(command_name, solr_url)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            prev_org = None
            unmatched = None
            firstpart, filename = os.path.split(csv_file)
            assert filename.endswith('.csv')
            resource_name = filename[:-4]

            chromo = get_chromo(resource_name)
            geno = get_geno(chromo['dataset_type'])

            for org_id, records in csv_data_batch(csv_file,
                                                  chromo,
                                                  strict=strict):
                records = [
                    dict((k, safe_for_solr(v)) for k, v in row_dict.items())
                    for row_dict in records
                ]
                if org_id != prev_org:
                    unmatched = None
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(records, org_detail, conn,
                                            resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc,
                                                     command_name):
                unmatched = _update_records(records, org_detail, conn,
                                            resource_name, unmatched)
                count += len(records)
            print org, count

    print "commit"
    conn.commit()
예제 #18
0
    def _build_templates(self):
        """
        Implement build-templates command
        """
        lc = LocalCKAN()
        output_files = {}
        next_row = {}
        output_counter = {}
        output_path = self.args[2:][-1]
        dataset_types = get_dataset_types(self.command_name)
        table = get_chromo(dataset_types[0])

        def close_write_file(org_id):
            book = output_files[org_id]
            if not book:
                return
            book.save(os.path.join(output_path, org_id + "-" + str(output_counter[org_id]) + ".xls"))
            output_files[org_id] = None

        def out_file(org_id):
            if org_id in output_files:
                next_row[org_id] += 1
                # need to start a new file?
                if next_row[org_id] > SPLIT_XLS_ROWS:
                    close_write_file(org_id)
                else:
                    return output_files[org_id], next_row[org_id]
            try:
                org = lc.action.organization_show(id=org_id, include_data_batch=False)
            except NotFound:
                logging.error("org id", org_id, "not found")
                output_files[org_id] = None
                next_row[org_id] = 0
                return None, None
            book = excel_template(dataset_types[0], org)
            output_files[org_id] = book
            output_counter[org_id] = output_counter.get(org_id, 0) + 1
            next_row[org_id] = len(book.get_sheet(0).get_rows())
            return book, next_row[org_id]

        def add_row(book, row, d):
            sheet = book.get_sheet(0)
            for i, f in enumerate(table["fields"]):
                sheet.write(row, i, d[f["datastore_id"]])

        for f in self.args[1:-1]:
            for d in DictReader(open(f, "rb")):
                book, row = out_file(d["organization"])
                if not book:
                    continue
                add_row(book, row, d)

        for org_id in output_files:
            close_write_file(org_id)
예제 #19
0
 def test_not_going_forward_unpublished(self):
     lc = LocalCKAN()
     record = get_chromo('consultations')['examples']['record']
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(
             resource_id=self.resource_id,
             records=[dict(record, publishable='Y', status='NF')])
     err = ve.exception.error_dict['records'][0]
     expected = {
         u'status': [u'If Status is set to: Not Going Forward, Publish Record must be set to No']
         }
     assert_equal(err, expected)
예제 #20
0
    def type_redirect(self, resource_name):
        orgs = h.organizations_available('read')

        if not orgs:
            abort(404, _('No organizations found'))
        try:
            chromo = get_chromo(resource_name)
        except RecombinantException:
            abort(404, _('Recombinant resource_name not found'))

        return redirect(h.url_for('recombinant_resource',
            resource_name=resource_name, owner_org=orgs[0]['name']))
예제 #21
0
 def test_not_going_forward_unpublished(self):
     lc = LocalCKAN()
     record = get_chromo('consultations')['examples']['record']
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(
             resource_id=self.resource_id,
             records=[dict(record, publishable='Y', status='NF')])
     err = ve.exception.error_dict['records'][0]
     expected = {
         u'status': [u'If Status is set to: Not Going Forward, Publish Record must be set to No']
         }
     assert_equal(err, expected)
예제 #22
0
파일: pd.py 프로젝트: wardi/ckanext-canada
def rebuild(command_name, csv_files=None, solr_url=None):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name, solr_url, False)

    conn = solr_connection(command_name, solr_url)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            prev_org = None
            unmatched = None
            firstpart, filename = os.path.split(csv_file)
            assert filename.endswith('.csv')
            resource_name = filename[:-4]

            chromo = get_chromo(resource_name)
            geno = get_geno(chromo['dataset_type'])

            for org_id, records in csv_data_batch(csv_file, chromo):
                records = [dict((k, safe_for_solr(v)) for k, v in
                            row_dict.items()) for row_dict in records]
                if org_id != prev_org:
                    unmatched = None
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc, command_name):
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
                count += len(records)
            print org, count

    print "commit"
    conn.commit()
예제 #23
0
    def _rebuild(self, csv_files=None, solr_url=None, strict=True):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index(solr_url, False)

        conn = solr_connection('ati', solr_url)
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ':'
                firstpart, filename = os.path.split(csv_file)
                assert filename.endswith('.csv')
                resource_name = filename[:-4]

                chromo = get_chromo(resource_name)
                geno = get_geno(chromo['dataset_type'])
                assert geno.get('target_dataset') == TARGET_DATASET

                for org_id, records in csv_data_batch(csv_file,
                                                      chromo,
                                                      strict=strict):
                    records = [
                        dict((k, safe_for_solr(v))
                             for k, v in row_dict.items())
                        for row_dict in records
                    ]
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for resource_name, records in data_batch(
                        org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count

        print "commit"
        conn.commit()
예제 #24
0
    def type_redirect(self, resource_name):
        orgs = h.organizations_available('read')

        if not orgs:
            abort(404, _('No organizations found'))
        try:
            chromo = get_chromo(resource_name)
        except RecombinantException:
            abort(404, _('Recombinant resource_name not found'))

        return redirect(
            h.url_for('recombinant_resource',
                      resource_name=resource_name,
                      owner_org=orgs[0]['name']))
예제 #25
0
 def test_ministers_office_missing(self):
     lc = LocalCKAN()
     record = dict(get_chromo('contracts')['examples']['record'],
                   contract_date='2019-06-21',
                   ministers_office=None)
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(resource_id=self.resource_id,
                                    records=[record])
     err = ve.exception.error_dict['records'][0]
     expected = {
         'ministers_office': ['This field must not be empty'],
     }
     for k in set(err) | set(expected):
         assert_equal(err.get(k), expected.get(k), (k, err))
예제 #26
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(
                q='type:%s organization:%s' % (dataset_type, org_name),
                include_private=True,
                rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (
                    dataset_type, org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for res in results[0]['resources']:
                if res['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            # convert list values to lists
            list_fields = [f['datastore_id']
                for f in chromo['fields'] if f['datastore_type'] == '_text']
            if list_fields:
                for r in records:
                    for k in list_fields:
                        if not r[k]:
                            r[k] = []
                        else:
                            r[k] = r[k].split(',')

            print '-', org_name, len(records)
            lc.action.datastore_upsert(
                method=method,
                resource_id=res['id'],
                records=records)
        return 0
예제 #27
0
    def test_indicators_depends_on_milestones(self):
        lc = LocalCKAN()
        record = get_chromo('nap')['examples']['record']
        record['commitments'] = 'C01'
        record['milestones'] = 'C01.1'
        record['indicators'] = 'C01.1.1'
        lc.action.datastore_upsert(resource_id=self.resource_id,
                                   records=[record])

        record['commitments'] = 'C01'
        record['milestones'] = 'C01.1'
        record['indicators'] = 'C02.1.1'
        assert_raises(ValidationError,
                      lc.action.datastore_upsert,
                      resource_id=self.resource_id,
                      records=[record])
예제 #28
0
 def test_postal_code(self):
     lc = LocalCKAN()
     record = dict(get_chromo('contracts')['examples']['record'],
                   vendor_postal_code='1A1')
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(resource_id=self.resource_id,
                                    records=[record])
     err = ve.exception.error_dict['records'][0]
     expected = {
         'vendor_postal_code': [
             'This field must contain the first three digits of a postal code '
             'in A1A format or the value "NA"'
         ],
     }
     for k in set(err) | set(expected):
         assert_equal(err.get(k), expected.get(k), (k, err))
예제 #29
0
    def type_redirect(self, resource_name):
        orgs = h.organizations_available('read')

        if not orgs:
            abort(404, _('No organizations found'))
        try:
            chromo = get_chromo(resource_name)
        except RecombinantException:
            abort(404, _('Recombinant resource_name not found'))

        # custom business logic
        if is_sysadmin(c.user):
            return redirect(h.url_for('recombinant_resource',
                                      resource_name=resource_name, owner_org='tbs-sct'))
        return redirect(h.url_for('recombinant_resource',
                                  resource_name=resource_name, owner_org=orgs[0]['name']))
예제 #30
0
    def _rebuild(self, csv_files=None, solr_url=None):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index(solr_url, False)

        conn = solr_connection('ati', solr_url)
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ':'
                firstpart, filename = os.path.split(csv_file)
                assert filename.endswith('.csv')
                resource_name = filename[:-4]

                chromo = get_chromo(resource_name)
                geno = get_geno(chromo['dataset_type'])
                assert geno.get('target_dataset') == TARGET_DATASET

                for org_id, records in csv_data_batch(csv_file, chromo):
                    records = [dict((k, safe_for_solr(v)) for k, v in
                            row_dict.items()) for row_dict in records]
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for resource_name, records in data_batch(org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count

        print "commit"
        conn.commit()
예제 #31
0
def csv_data_batch(csv_path, target_dataset):
    """
    Generator of dataset records from csv file

    :param csv_path: file to parse

    :return a batch of records for at most one organization
    :rtype: dict mapping at most one org-id to
            at most BATCH_SIZE (dict) records
    """
    records = []
    current_owner_org = None

    firstpart, filename = os.path.split(csv_path)
    assert filename.endswith(".csv")

    chromo = get_chromo(filename[:-4])
    geno = get_geno(chromo["dataset_type"])
    assert geno.get("target_dataset") == target_dataset

    with open(csv_path) as f:
        csv_in = DictReader(f)
        cols = csv_in.unicode_fieldnames

        expected = [f["datastore_id"] for f in chromo["fields"]]
        assert cols[:-2] == expected, "column mismatch:\n{0}\n{1}".format(cols[:-2], expected)

        for row_dict in csv_in:
            owner_org = row_dict.pop("owner_org")
            owner_org_title = row_dict.pop("owner_org_title")
            if owner_org != current_owner_org:
                if records:
                    yield (current_owner_org, records)
                records = []
                current_owner_org = owner_org

            row_dict = dict((k, safe_for_solr(v)) for k, v in row_dict.items())
            records.append(row_dict)
            if len(records) >= BATCH_SIZE:
                yield (current_owner_org, records)
                records = []
    if records:
        yield (current_owner_org, records)
예제 #32
0
 def test_field_length_errors(self):
     lc = LocalCKAN()
     record = dict(
         get_chromo('contracts')['examples']['record'],
         economic_object_code='467782',
         commodity_code='K23HG367BU',
     )
     with assert_raises(ValidationError) as ve:
         lc.action.datastore_upsert(resource_id=self.resource_id,
                                    records=[record])
     err = ve.exception.error_dict['records'][0]
     expected = {
         'economic_object_code':
         ['This field is limited to only 3 or 4 digits.'],
         'commodity_code':
         ['The field is limited to eight alpha-numeric digits or less.'],
     }
     assert isinstance(err, dict), err
     for k in set(err) | set(expected):
         assert_equal(err.get(k), expected.get(k), (k, err))
예제 #33
0
    def _combine_csv(self, target_dir, resource_names):
        if target_dir and not os.path.isdir(target_dir):
            print '"{0}" is not a directory'.format(target_dir)
            return 1

        orgs = self._get_orgs()
        lc = LocalCKAN()
        outf = sys.stdout
        for resource_name in self._expand_resource_names(resource_names):
            if target_dir:
                outf = open(os.path.join(target_dir, resource_name + '.csv'),
                            'wb')
            self._write_one_csv(
                lc,
                self._get_packages(
                    get_dataset_type_for_resource_name(resource_name), orgs),
                get_chromo(resource_name), outf)

            if target_dir:
                outf.close()
예제 #34
0
def csv_data_batch(csv_path, target_dataset):
    """
    Generator of dataset records from csv file

    :param csv_path: file to parse
    """
    records = []
    current_owner_org = None

    firstpart, filename = os.path.split(csv_path)
    assert filename.endswith('.csv')
    resource_name = filename[:-4]

    chromo = get_chromo(resource_name)
    geno = get_geno(chromo['dataset_type'])
    assert geno.get('target_dataset') == target_dataset

    with open(csv_path) as f:
        csv_in = DictReader(f)
        cols = csv_in.unicode_fieldnames

        expected = [f['datastore_id'] for f in chromo['fields']]
        assert cols[:-2] == expected, 'column mismatch:\n{0}\n{1}'.format(
            cols[:-2], expected)

        for row_dict in csv_in:
            owner_org = row_dict.pop('owner_org')
            owner_org_title = row_dict.pop('owner_org_title')
            if owner_org != current_owner_org:
                if records:
                    yield (resource_name, current_owner_org, records)
                records = []
                current_owner_org = owner_org

            row_dict = dict((k, safe_for_solr(v)) for k, v in row_dict.items())
            records.append(row_dict)
            if len(records) >= BATCH_SIZE:
                yield (resource_name, current_owner_org, records)
                records = []
    if records:
        yield (resource_name, current_owner_org, records)
예제 #35
0
    def datatable(self, resource_name, resource_id):
        from ckanext.recombinant.tables import get_chromo
        t = get_chromo(resource_name)
        echo = int(request.params['sEcho'])
        search_text = unicode(request.params['sSearch'])
        offset = int(request.params['iDisplayStart'])
        limit = int(request.params['iDisplayLength'])
        sort_cols = int(request.params['iSortingCols'])
        if sort_cols:
            sort_by_num = int(request.params['iSortCol_0'])
            sort_order = 'desc' if request.params['sSortDir_0'] == 'desc' else 'asc'

        lc = LocalCKAN(username=c.user)

        unfiltered_response = lc.action.datastore_search(
            resource_id=resource_id,
            limit=1,
            )

        cols = [f['datastore_id'] for f in t['fields']]
        sort_str = ''
        if sort_cols:
            sort_str = cols[sort_by_num] + ' ' + sort_order

        response = lc.action.datastore_search(
            q=search_text,
            resource_id=resource_id,
            fields=cols,
            offset=offset,
            limit=limit,
            sort=sort_str)

        return json.dumps({
            'sEcho': echo,
            'iTotalRecords': unfiltered_response.get('total', 0),
            'iTotalDisplayRecords': response.get('total', 0),
            'aaData': [
                [row[colname] for colname in cols]
                for row in response['records']],
            })
예제 #36
0
    def _combine_csv(self, target_dir, resource_names):
        if target_dir and not os.path.isdir(target_dir):
            print '"{0}" is not a directory'.format(target_dir)
            return 1

        orgs = self._get_orgs()
        lc = LocalCKAN()
        outf = sys.stdout
        for resource_name in self._expand_resource_names(resource_names):
            if target_dir:
                outf = open(os.path.join(target_dir,
                    resource_name + '.csv'), 'wb')
            outf.write(codecs.BOM_UTF8)
            self._write_one_csv(
                lc,
                self._get_packages(
                    get_dataset_type_for_resource_name(resource_name), orgs),
                get_chromo(resource_name),
                outf)

            if target_dir:
                outf.close()
예제 #37
0
 def test_service_std_target(self):
     lc = LocalCKAN()
     record = dict(
         get_chromo('service-std')['examples']['record'],
         service_std_target='0.99999')
     lc.action.datastore_upsert(
         resource_id=self.service_std_id,
         records=[record])
     assert_equal(
         lc.action.datastore_search(resource_id=self.service_std_id)
             ['records'][0]['service_std_target'],
         u'0.99999')
     record['service_std_target'] = 0.5
     lc.action.datastore_upsert(
         resource_id=self.service_std_id,
         records=[record])
     assert_equal(
         lc.action.datastore_search(resource_id=self.service_std_id)
             ['records'][0]['service_std_target'],
         u'0.5')
     record['service_std_target'] = None
     lc.action.datastore_upsert(
         resource_id=self.service_std_id,
         records=[record])
     assert_equal(
         lc.action.datastore_search(resource_id=self.service_std_id)
             ['records'][0]['service_std_target'],
         None)
     record['service_std_target'] = -0.01
     assert_raises(ValidationError,
         lc.action.datastore_upsert,
         resource_id=self.service_std_id,
         records=[record])
     record['service_std_target'] = 1.01
     assert_raises(ValidationError,
         lc.action.datastore_upsert,
         resource_id=self.service_std_id,
         records=[record])
예제 #38
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(
                q='type:%s organization:%s' % (dataset_type, org_name),
                rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (
                    dataset_type, org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for r in results[0]['resources']:
                if r['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            print '-', org_name, len(records)
            lc.action.datastore_upsert(
                method=method,
                resource_id=r['id'],
                records=records)
        return 0
예제 #39
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly = unique_id(r)

        shortform = None
        shortform_fr = None
        for e in org_detail['extras']:
            if e['key'] == 'shortform':
                shortform = e['value']
            elif e['key'] == 'shortform_fr':
                shortform_fr = e['value']

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
            }

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(dollar_range_facet(
                        key,
                        facet_range,
                        float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                except ValueError:
                    pass
            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                solrrec[key + '_en'] = recombinant_language_text(
                    choices.get(value, ''), 'en')
                solrrec[key + '_fr'] = recombinant_language_text(
                    choices.get(value, ''), 'fr')

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if out:
        conn.add_many(out, _commit=True)
    return unmatched
예제 #40
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id, partial id"
        s = orghash
        f = org
        p = org
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
            if u'|' not in p:
                p += u'|' + unicode(r[k])
        return s, f, p

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly, partial = unique_id(r)

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'partial_id': partial,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
        }

        org_fields = chromo.get('solr_org_fields')
        if org_fields:
            for e in org_detail['extras']:
                if e['key'] in org_fields:
                    solrrec[e['key']] = e['value']

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r.get(key, '')

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(
                        value.replace('$', '').replace(',', ''))
                except ValueError:
                    pass
                else:
                    solrrec.update(
                        dollar_range_facet(key, facet_range, float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                    if f.get('extract_date_clean'):
                        solrrec['date_clean'] = value
                except ValueError:
                    pass
            elif f.get('extract_date_year'):
                if f.get('datastore_type') == 'year':
                    solrrec['date_year'] = value
                else:
                    try:
                        solrrec['date_year'] = int(value.split('-', 1)[0])
                    except ValueError:
                        pass
            if f.get('extract_double_sortable'):
                try:
                    solrrec['doubl_' + key] = float(value)
                except ValueError:
                    pass

            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                if f.get('datastore_type') == '_text':
                    solrrec[key + '_en'] = '; '.join(
                        recombinant_language_text(choices[v], 'en')
                        for v in value.split(',') if v in choices)
                    solrrec[key + '_fr'] = '; '.join(
                        recombinant_language_text(choices[v], 'fr')
                        for v in value.split(',') if v in choices)
                else:
                    choice = choices.get(value, {})
                    _add_choice(solrrec, key, r, choice, f)

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        ssrf = chromo.get('solr_sum_range_facet')
        if ssrf:
            key = ssrf['sum_field']
            float_value = float(solrrec[key])
            solrrec.update(
                numeric_range_facet(key, ssrf['facet_values'], float_value))

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if unmatched:
        out.extend(unmatched[1].values())

    import pysolr
    for a in reversed(range(10)):
        try:
            if out:
                conn.add(out, commit=False)
            break
        except pysolr.SolrError:
            if not a:
                raise
            print "waiting..."
            import time
            time.sleep((10 - a) * 5)
            print "retrying..."
    return unmatched
예제 #41
0
def _process_upload_file(lc, dataset, upload_file, geno):
    """
    Use lc.action.datastore_upsert to load data from upload_file

    raises BadExcelData on errors.
    """
    owner_org = dataset['organization']['name']

    expected_sheet_names = dict(
        (resource['name'], resource['id'])
        for resource in dataset['resources'])

    upload_data = read_excel(upload_file)
    while True:
        try:
            sheet_name, org_name, column_names, rows = next(upload_data)
        except StopIteration:
            return
        except:
            # XXX bare except because this can fail in all sorts of ways
            if asbool(config.get('debug', False)):
                # on debug we want the real error
                raise
            raise BadExcelData(
                _("The server encountered a problem processing the file "
                "uploaded. Please try copying your data into the latest "
                "version of the template and uploading again. If this "
                "problem continues, send your Excel file to "
                "[email protected] so we may investigate."))

        if sheet_name not in expected_sheet_names:
            raise BadExcelData(_('Invalid file for this data type. ' +
                'Sheet must be labeled "{0}", ' +
                'but you supplied a sheet labeled "{1}"').format(
                    '"/"'.join(sorted(expected_sheet_names)),
                    sheet_name))

        if org_name != owner_org:
            raise BadExcelData(_(
                'Invalid sheet for this organization. ' +
                'Sheet must be labeled for {0}, ' +
                'but you supplied a sheet for {1}').format(
                    owner_org, org_name))

        # custom styles or other errors cause columns to be read
        # that actually have no data. strip them here to avoid error below
        while column_names[-1] is None:
            column_names.pop()

        chromo = get_chromo(sheet_name)
        expected_columns = [f['datastore_id'] for f in chromo['fields']]
        if column_names != expected_columns:
            raise BadExcelData(
                _("This template is out of date. "
                "Please try copying your data into the latest "
                "version of the template and uploading again. If this "
                "problem continues, send your Excel file to "
                "[email protected] so we may investigate."))

        records = get_records(rows, chromo['fields'])
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        try:
            lc.action.datastore_upsert(
                method=method,
                resource_id=expected_sheet_names[sheet_name],
                records=records,
                )
        except ValidationError as e:
            # because, where else would you put the error text?
            # XXX improve this in datastore, please
            pgerror = e.error_dict['info']['orig'][0].decode('utf-8')
            # remove some postgres-isms that won't help the user
            # when we render this as an error in the form
            pgerror = re.sub(ur'\nLINE \d+:', u'', pgerror)
            pgerror = re.sub(ur'\n *\^\n$', u'', pgerror)
            raise BadExcelData(
                _(u"Error while importing data: {0}").format(
                    pgerror))
예제 #42
0
 def test_example(self):
     lc = LocalCKAN()
     record = get_chromo('grants')['examples']['record']
     lc.action.datastore_upsert(resource_id=self.resource_id,
                                records=[record])
예제 #43
0
def _process_upload_file(lc, dataset, upload_file, geno, dry_run):
    """
    Use lc.action.datastore_upsert to load data from upload_file

    raises BadExcelData on errors.
    """
    owner_org = dataset['organization']['name']

    expected_sheet_names = dict(
        (resource['name'], resource['id'])
        for resource in dataset['resources'])

    upload_data = read_excel(upload_file)
    total_records = 0
    while True:
        try:
            sheet_name, org_name, column_names, rows = next(upload_data)
        except StopIteration:
            break
        except Exception:
            # unfortunately this can fail in all sorts of ways
            if asbool(config.get('debug', False)):
                # on debug we want the real error
                raise
            raise BadExcelData(
                _("The server encountered a problem processing the file "
                "uploaded. Please try copying your data into the latest "
                "version of the template and uploading again. If this "
                "problem continues, send your Excel file to "
                "[email protected] so we may investigate."))

        if sheet_name not in expected_sheet_names:
            raise BadExcelData(_('Invalid file for this data type. ' +
                'Sheet must be labeled "{0}", ' +
                'but you supplied a sheet labeled "{1}"').format(
                    '"/"'.join(sorted(expected_sheet_names)),
                    sheet_name))

        if org_name != owner_org:
            raise BadExcelData(_(
                'Invalid sheet for this organization. ' +
                'Sheet must be labeled for {0}, ' +
                'but you supplied a sheet for {1}').format(
                    owner_org, org_name))

        # custom styles or other errors cause columns to be read
        # that actually have no data. strip them here to avoid error below
        while column_names and column_names[-1] is None:
            column_names.pop()

        chromo = get_chromo(sheet_name)
        expected_columns = [f['datastore_id'] for f in chromo['fields']
            if f.get('import_template_include', True)]
        if column_names != expected_columns:
            raise BadExcelData(
                _("This template is out of date. "
                "Please try copying your data into the latest "
                "version of the template and uploading again. If this "
                "problem continues, send your Excel file to "
                "[email protected] so we may investigate."))

        pk = chromo.get('datastore_primary_key', [])
        choice_fields = {
            f['datastore_id']:
                'full' if f.get('excel_full_text_choices') else True
            for f in chromo['fields']
            if ('choices' in f or 'choices_file' in f)}

        records = get_records(
            rows,
            [f for f in chromo['fields'] if f.get('import_template_include', True)],
            pk,
            choice_fields)
        method = 'upsert' if pk else 'insert'
        total_records += len(records)
        if not records:
            continue
        try:
            lc.action.datastore_upsert(
                method=method,
                resource_id=expected_sheet_names[sheet_name],
                records=[r[1] for r in records],
                dry_run=dry_run,
                )
        except ValidationError as e:
            if 'info' in e.error_dict:
                # because, where else would you put the error text?
                # XXX improve this in datastore, please
                pgerror = e.error_dict['info']['orig'][0].decode('utf-8')
            else:
                pgerror = e.error_dict['records'][0]
            if isinstance(pgerror, dict):
                pgerror = u'; '.join(
                    k + u': ' + u', '.join(v)
                    for k, v in pgerror.items())
            else:
                # remove some postgres-isms that won't help the user
                # when we render this as an error in the form
                pgerror = re.sub(ur'\nLINE \d+:', u'', pgerror)
                pgerror = re.sub(ur'\n *\^\n$', u'', pgerror)
            if '_records_row' in e.error_dict:
                raise BadExcelData(_(u'Sheet {0} Row {1}:').format(
                    sheet_name, records[e.error_dict['_records_row']][0])
                    + u' ' + pgerror)
            raise BadExcelData(
                _(u"Error while importing data: {0}").format(
                    pgerror))
    if not total_records:
        raise BadExcelData(_("The template uploaded is empty"))
예제 #44
0
def _process_upload_file(lc, dataset, upload_file, geno):
    """
    Use lc.action.datastore_upsert to load data from upload_file

    raises BadExcelData on errors.
    """
    owner_org = dataset['organization']['name']

    expected_sheet_names = dict((resource['name'], resource['id'])
                                for resource in dataset['resources'])

    upload_data = read_excel(upload_file)
    while True:
        try:
            sheet_name, org_name, column_names, rows = next(upload_data)
        except StopIteration:
            return
        except:
            # XXX bare except because this can fail in all sorts of ways
            if asbool(config.get('debug', False)):
                # on debug we want the real error
                raise
            raise BadExcelData(
                _("The server encountered a problem processing the file "
                  "uploaded. Please try copying your data into the latest "
                  "version of the template and uploading again. If this "
                  "problem continues, send your Excel file to "
                  "[email protected] so we may investigate."))

        if sheet_name not in expected_sheet_names:
            raise BadExcelData(
                _('Invalid file for this data type. ' +
                  'Sheet must be labeled "{0}", ' +
                  'but you supplied a sheet labeled "{1}"').format(
                      '"/"'.join(sorted(expected_sheet_names)), sheet_name))

        if org_name != owner_org:
            raise BadExcelData(
                _('Invalid sheet for this organization. ' +
                  'Sheet must be labeled for {0}, ' +
                  'but you supplied a sheet for {1}').format(
                      owner_org, org_name))

        # custom styles or other errors cause columns to be read
        # that actually have no data. strip them here to avoid error below
        while column_names[-1] is None:
            column_names.pop()

        chromo = get_chromo(sheet_name)
        expected_columns = [f['datastore_id'] for f in chromo['fields']]
        if column_names != expected_columns:
            raise BadExcelData(
                _("This template is out of date. "
                  "Please try copying your data into the latest "
                  "version of the template and uploading again. If this "
                  "problem continues, send your Excel file to "
                  "[email protected] so we may investigate."))

        records = get_records(rows, chromo['fields'])
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        try:
            lc.action.datastore_upsert(
                method=method,
                resource_id=expected_sheet_names[sheet_name],
                records=records,
            )
        except ValidationError as e:
            # because, where else would you put the error text?
            # XXX improve this in datastore, please
            pgerror = e.error_dict['info']['orig'][0].decode('utf-8')
            # remove some postgres-isms that won't help the user
            # when we render this as an error in the form
            pgerror = re.sub(ur'\nLINE \d+:', u'', pgerror)
            pgerror = re.sub(ur'\n *\^\n$', u'', pgerror)
            raise BadExcelData(
                _(u"Error while importing data: {0}").format(pgerror))
예제 #45
0
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type +
                            "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly = unique_id(r)

        shortform = org_detail['shortform']
        shortform_fr = org_detail['shortform_fr']

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title_tranlated']['en'],
            'org_name_fr': org_detail['title_tranlsated']['fr'],
        }

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(
                        dollar_range_facet(key, facet_range, float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                except ValueError:
                    pass
            elif f.get('datastore_type') == 'year':
                if f.get('extract_date_year'):
                    solrrec['date_year'] = value
            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                solrrec[key + '_en'] = recombinant_language_text(
                    choices.get(value, ''), 'en')
                solrrec[key + '_fr'] = recombinant_language_text(
                    choices.get(value, ''), 'fr')

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    if out:
        conn.add_many(out, _commit=True)
    return unmatched
예제 #46
0
파일: pd.py 프로젝트: wardi/ckanext-canada
def _update_records(records, org_detail, conn, resource_name, unmatched):
    """
    Update records on solr core

    :param records: record dicts
    :param org_detail: org structure as returned via local CKAN
    :param conn: solr connection
    :param resource_name: type being updated
    :param unmatched: yet-unmatched values for comparing prev/next year

    :returns: new unmatched for next call for same org+resource_name
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get('datastore_primary_key', [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail['name']
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id, partial id"
        s = orghash
        f = org
        p = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest()
            f += u'|' + unicode(r['_id'])
            p += u'|' + unicode(r['_id'])
        for k in pk:
            s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest()
            f += u'|' + unicode(r[k])
            if u'|' not in p:
                p += u'|' + unicode(r[k])
        return s, f, p

    out = []

    choice_fields = dict(
        (f['datastore_id'], dict(f['choices']))
        for f in recombinant_choice_fields(resource_name, all_languages=True))

    if any('solr_compare_previous_year' in f for f in chromo['fields']):
        if not unmatched:
            # previous years, next years
            unmatched = ({}, {})
    else:
        unmatched = None

    for r in records:
        unique, friendly, partial = unique_id(r)

        solrrec = {
            'id': unique,
            'unique_id': friendly,
            'partial_id': partial,
            'org_name_code': org_detail['name'],
            'org_name_en': org_detail['title'].split(' | ', 1)[0],
            'org_name_fr': org_detail['title'].split(' | ', 1)[-1],
            }

        org_fields = chromo.get('solr_org_fields')
        if org_fields:
            for e in org_detail['extras']:
                if e['key'] in org_fields:
                    solrrec[e['key']] = e['value']

        for f in chromo['fields']:
            key = f['datastore_id']
            value = r[key]

            facet_range = f.get('solr_dollar_range_facet')
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    solrrec.update(dollar_range_facet(
                        key,
                        facet_range,
                        float_value))

            sum_to = list_or_none(f.get('solr_sum_to_field'))
            if sum_to:
                for fname in sum_to:
                    sum_to_field(solrrec, fname, value)

            if f.get('datastore_type') == 'date':
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get('extract_date_year'):
                        solrrec['date_year'] = value.split('-', 1)[0]
                    if f.get('extract_date_month'):
                        solrrec['date_month'] = value.split('-')[1]
                    if f.get('extract_date_clean'):
                        solrrec['date_clean'] = value
                except ValueError:
                    pass
            elif f.get('extract_date_year'):
                if f.get('datastore_type') == 'year':
                    solrrec['date_year'] = value
                else:
                    try:
                        solrrec['date_year'] = int(value.split('-', 1)[0])
                    except ValueError:
                        pass
            if f.get('extract_double_sortable'):
                try:
                    solrrec['doubl_' + key] = float(value)
                except ValueError:
                    pass

            solrrec[key] = value

            choices = choice_fields.get(f['datastore_id'])
            if choices:
                if key.endswith('_code'):
                    key = key[:-5]
                choice = choices.get(value, {})
                _add_choice(solrrec, key, r, choice, f)

        solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values())

        if 'solr_static_fields' in chromo:
            solrrec.update(chromo['solr_static_fields'])

        if unmatched:
            match_compare_output(solrrec, out, unmatched, chromo)
        else:
            out.append(solrrec)

    import pysolr
    for a in reversed(range(10)):
        try:
            if out:
                conn.add(out, commit=False)
            break
        except pysolr.SolrError:
            if not a:
                raise
            print "waiting..."
            import time
            time.sleep((10-a) * 5)
            print "retrying..."
    return unmatched
예제 #47
0
def _update_records(records, org_detail, conn, resource_name):
    """
    Update records on solr core

    :param records: record dicts
    :ptype records: sequence of record dicts

    :param org_detail: org structure as returned via local CKAN
    :ptype org_detail: dict with local CKAN org structure

    :param conn: solr connection
    :ptype conn: obj

    :param resource_name: type being updated
    """
    chromo = get_chromo(resource_name)
    pk = chromo.get("datastore_primary_key", [])
    if not isinstance(pk, list):
        pk = [pk]

    org = org_detail["name"]
    orghash = hashlib.md5(org).hexdigest()

    def unique_id(r):
        "return hash, friendly id"
        s = orghash
        f = org
        if not pk:
            s = hashlib.md5(s + recombinant_type + "-%d" % r["_id"]).hexdigest()
            f += u"|" + unicode(r["_id"])
        for k in pk:
            s = hashlib.md5(s + r[k].encode("utf-8")).hexdigest()
            f += u"|" + unicode(r[k])
        return s, f

    out = []

    choice_fields = dict(
        (f["datastore_id"], dict(f["choices"])) for f in recombinant_choice_fields(resource_name, all_languages=True)
    )

    for r in records:
        unique, friendly = unique_id(r)

        shortform = None
        shortform_fr = None
        for e in org_detail["extras"]:
            if e["key"] == "shortform":
                shortform = e["value"]
            elif e["key"] == "shortform_fr":
                shortform_fr = e["value"]

        solrrec = {
            "id": unique,
            "unique_id": friendly,
            "org_name_code": org_detail["name"],
            "org_name_en": org_detail["title"].split(" | ", 1)[0],
            "org_name_fr": org_detail["title"].split(" | ", 1)[-1],
        }

        for f in chromo["fields"]:
            key = f["datastore_id"]
            value = r[key]

            facet_range = f.get("solr_float_range_facet")
            if facet_range:
                try:
                    float_value = float(value)
                except ValueError:
                    pass
                else:
                    for i, fac in enumerate(facet_range):
                        if "less_than" not in fac or float_value < fac["less_than"]:
                            solrrec[key + "_range"] = str(i)
                            solrrec[key + "_range_en"] = fac["label"].split(" | ")[0]
                            solrrec[key + "_range_fr"] = fac["label"].split(" | ")[-1]
                            break

            if f.get("datastore_type") == "date":
                try:
                    value = date2zulu(value)
                    # CM: If this only applies to PD types this should be accurate
                    # CM: This should only apply if valid (as per date2zulu) else NULL
                    if f.get("extract_date_year"):
                        solrrec["date_year"] = value.split("-", 1)[0]
                    if f.get("extract_date_month"):
                        solrrec["date_month"] = value.split("-")[1]
                except ValueError:
                    pass
            solrrec[key] = value

            choices = choice_fields.get(f["datastore_id"])
            if not choices:
                continue

            if key.endswith("_code"):
                key = key[:-5]
            solrrec[key + "_en"] = recombinant_language_text(choices.get(value, ""), "en")
            solrrec[key + "_fr"] = recombinant_language_text(choices.get(value, ""), "fr")

        solrrec["text"] = u" ".join(unicode(v) for v in solrrec.values())
        out.append(solrrec)

    conn.add_many(out, _commit=True)
예제 #48
0
 def test_example(self):
     lc = LocalCKAN()
     record = get_chromo('briefingt')['examples']['record']
     lc.action.datastore_upsert(
         resource_id=self.resource_id,
         records=[record])