Ejemplo n.º 1
0
 def ensure_dataset_is_in_couple_table(cls, dataset_record):
     from ckan import model
     from ckanext.harvest.model import HarvestCoupledResource
     
     q = model.Session.query(HarvestCoupledResource) \
         .filter_by(dataset_record_package_id=dataset_record.id)
     if q.count() == 0:
         harvest_objects = [ho for ho in dataset_record.harvest_objects \
                            if ho.current]
         if len(harvest_objects) != 1:
             log.warning('Wrong num of current harvest_objects (%i)',
                         len(harvest_objects))
             additional_couple_stats.add('Wrong num of harvest_objects (%i)' % len(harvest_objects),
                                         dataset_record.name)
             return
         harvest_object = harvest_objects[0]
         harvest_source_reference = harvest_object.harvest_source_reference
         
         obj = HarvestCoupledResource(
             dataset_record_package_id=dataset_record.id,
             harvest_source_reference=harvest_source_reference)
         model.Session.add(obj)
         model.Session.commit()
         additional_couple_stats.add('Added to couple table',
                                     dataset_record.name)
         log.info('Added to couple table: %s', dataset_record.name)
     else:
         additional_couple_stats.add('Already in couple table',
                                     dataset_record.name)
         log.info('Already in couple table: %s', dataset_record.name)
Ejemplo n.º 2
0
 def _create_coupled_resource(self, service_name, ref, dataset_name):
     service = model.Package.by_name(unicode(service_name or ''))
     dataset = model.Package.by_name(unicode(dataset_name or ''))
     if service_name: assert service
     if dataset_name: assert dataset
     model.Session.add(
         HarvestCoupledResource(service_record=service,
                                harvest_source_reference=ref_prefix + ref,
                                dataset_record=dataset))
Ejemplo n.º 3
0
 def add_coupling(cls, service_record, dataset_record,
                  dataset_harvest_object, harvest_source_reference):
     from ckan import model
     from ckanext.harvest.model import HarvestCoupledResource
     
     if dataset_harvest_object.harvest_source_reference != harvest_source_reference:
         dataset_harvest_object.harvest_source_reference = harvest_source_reference
         model.Session.commit()
     q = model.Session.query(HarvestCoupledResource) \
         .filter_by(service_record_package_id=service_record.id) \
         .filter_by(dataset_record_package_id=dataset_record.id) \
         .filter_by(harvest_source_reference=harvest_source_reference)
     if q.count() == 0:
         obj = HarvestCoupledResource(
             service_record_package_id=service_record.id,
             dataset_record_package_id=dataset_record.id,
             harvest_source_reference=harvest_source_reference)
         model.Session.add(obj)
         model.Session.commit()
Ejemplo n.º 4
0
def update_coupled_resources(package, harvest_source_reference):
    '''Update the harvest_coupled_resource_table with the details of this
    harvested package\'s couplings.

    :param package: the Package object containing extra fields with couples
                    to update in the table.
    :param harvest_source_reference: the ref of this package being harvested.
                    This is not relevant if it is a service record, but
                    essential if it is a dataset.
    '''
    resource_type = package.extras['resource-type']
    if resource_type == 'service':
        # When a service record is harvested, ensure the couples listed
        # in it match the couples in the HarvestCoupledResource objects,
        # ignoring their dataset values (they might be filled in or not).
        pkg_couples_str = package.extras['coupled-resource']
        pkg_couples = json.loads(pkg_couples_str)
        log.info('Service Record %s has %i coupled resources to update',
                 package.name, len(pkg_couples))

        table_couples_matching_service = HarvestCoupledResource.get_by_service_record(package)
        table_couples_not_matching_pkg = table_couples_matching_service.all() # cross them off as we go

        for pkg_couple in pkg_couples:
            try:
                ref = extract_harvest_source_reference_from_coupled_resource(pkg_couple)
            except CoupledResourceParseError, e:
                log.warn('Error parsing couple: %s Ignoring couple=%s', e, pkg_couple)
                continue
            # Match both service and ref
            matching_table_couples = table_couples_matching_service.filter_by(harvest_source_reference=ref)
            if matching_table_couples.count() > 0:
                # Test: test_02_reharvest_existing_service
                # Note down the matches so we don't delete them later
                for matching_table_couple in matching_table_couples:
                    log.info('Service couple is already there (%s, %s, %s)',
                             package.name, ref,
                             _package_name(matching_table_couple.dataset_record))
                    table_couples_not_matching_pkg.remove(matching_table_couple)
                continue
            # Match just ref with blank service
            matching_table_couples = HarvestCoupledResource.get_by_harvest_source_reference(ref)\
                                     .filter_by(service_record=None)
            if matching_table_couples.count() == 0:
                # Test: test_06_harvest_service_not_matching_a_dataset
                # create the row
                obj = HarvestCoupledResource(service_record=package,
                                             harvest_source_reference=ref)
                model.Session.add(obj)
                log.info('Ref is new for this service - adding (%s, %s, None)',
                         package.name, ref)
                model.Session.commit()
            else:
                # Test: test_04_harvest_service_to_match_existing_dataset
                for matching_table_couple in matching_table_couples:
                    # fill in the service value
                    matching_table_couple.service_record = package
                    log.info('Service filled into couple matching ref (%s, %s, %s)',
                             package.name, ref,
                             _package_name(matching_table_couple.dataset_record))
                model.Session.commit()

        # Delete service value for any table_couples not matching the package
        # Test: test_08_reharvest_existing_service_to_delete_and_add_couples
        for table_couple in table_couples_not_matching_pkg:
            log.info('Service couple not matched - deleted service (%s->None, %s, %s)',
                     _package_name(table_couple.service_record),
                     ref, _package_name(table_couple.dataset_record))
            table_couple.service_record = None
            model.Session.commit()
        return
Ejemplo n.º 5
0
        ref = harvest_source_reference
        assert ref
        for couple in model.Session.query(HarvestCoupledResource) \
            .filter_by(dataset_record=package) \
            .filter(HarvestCoupledResource.harvest_source_reference!=ref):
            log.info('Ref %s has been replaced for this dataset record with '
                     '%s. Removing link to the dataset record (%s, %s, %s->None)',
                     couple.harvest_source_reference, ref,
                     _package_name(couple.service_record),
                     couple.harvest_source_reference,
                     _package_name(couple.dataset_record))
            couple.dataset_record = None
            model.Session.commit()

        # Couples with this ref
        for couple in HarvestCoupledResource.get_by_harvest_source_reference(ref):
            if couple.dataset_record != package:
                # Test: test_03_harvest_dataset_to_match_existing_service
                log.info('Linking ref to this dataset record (%s, %s, %s->%s)',
                         _package_name(couple.service_record),
                         ref,
                         _package_name(couple.dataset_record),
                         package.name)
                couple.dataset_record = package
                model.Session.commit()
            else:
                # Test: test_01_reharvest_existing_dataset
                log.info('Couple for this dataset and ref already exists (%s, %s, %s)',
                         _package_name(couple.service_record),
                         ref,
                         _package_name(couple.dataset_record))
def update_coupled_resources(package, harvest_source_reference):
    '''Update the harvest_coupled_resource_table with the details of this
    harvested package\'s couplings.

    :param package: the Package object containing extra fields with couples
                    to update in the table.
    :param harvest_source_reference: the ref of this package being harvested.
                    This is not relevant if it is a service record, but
                    essential if it is a dataset.
    '''
    resource_type = package.extras['resource-type']
    if resource_type == 'service':
        # When a service record is harvested, ensure the couples listed
        # in it match the couples in the HarvestCoupledResource objects,
        # ignoring their dataset values (they might be filled in or not).
        pkg_couples_str = package.extras['coupled-resource']
        pkg_couples = json.loads(pkg_couples_str)
        log.info('Service Record %s has %i coupled resources to update',
                 package.name, len(pkg_couples))

        table_couples_matching_service = HarvestCoupledResource.get_by_service_record(
            package)
        table_couples_not_matching_pkg = table_couples_matching_service.all(
        )  # cross them off as we go

        for pkg_couple in pkg_couples:
            try:
                ref = extract_harvest_source_reference_from_coupled_resource(
                    pkg_couple)
            except CoupledResourceParseError, e:
                log.warn('Error parsing couple: %s Ignoring couple=%s', e,
                         pkg_couple)
                continue
            # Match both service and ref
            matching_table_couples = table_couples_matching_service.filter_by(
                harvest_source_reference=ref)
            if matching_table_couples.count() > 0:
                # Test: test_02_reharvest_existing_service
                # Note down the matches so we don't delete them later
                for matching_table_couple in matching_table_couples:
                    log.info(
                        'Service couple is already there (%s, %s, %s)',
                        package.name, ref,
                        _package_name(matching_table_couple.dataset_record))
                    table_couples_not_matching_pkg.remove(
                        matching_table_couple)
                continue
            # Match just ref with blank service
            matching_table_couples = HarvestCoupledResource.get_by_harvest_source_reference(ref)\
                                     .filter_by(service_record=None)
            if matching_table_couples.count() == 0:
                # Test: test_06_harvest_service_not_matching_a_dataset
                # create the row
                obj = HarvestCoupledResource(service_record=package,
                                             harvest_source_reference=ref)
                model.Session.add(obj)
                log.info('Ref is new for this service - adding (%s, %s, None)',
                         package.name, ref)
                model.Session.commit()
            else:
                # Test: test_04_harvest_service_to_match_existing_dataset
                for matching_table_couple in matching_table_couples:
                    # fill in the service value
                    matching_table_couple.service_record = package
                    log.info(
                        'Service filled into couple matching ref (%s, %s, %s)',
                        package.name, ref,
                        _package_name(matching_table_couple.dataset_record))
                model.Session.commit()

        # Delete service value for any table_couples not matching the package
        # Test: test_08_reharvest_existing_service_to_delete_and_add_couples
        for table_couple in table_couples_not_matching_pkg:
            log.info(
                'Service couple not matched - deleted service (%s->None, %s, %s)',
                _package_name(table_couple.service_record), ref,
                _package_name(table_couple.dataset_record))
            table_couple.service_record = None
            model.Session.commit()
        return
        assert ref
        for couple in model.Session.query(HarvestCoupledResource) \
            .filter_by(dataset_record=package) \
            .filter(HarvestCoupledResource.harvest_source_reference!=ref):
            log.info(
                'Ref %s has been replaced for this dataset record with '
                '%s. Removing link to the dataset record (%s, %s, %s->None)',
                couple.harvest_source_reference, ref,
                _package_name(couple.service_record),
                couple.harvest_source_reference,
                _package_name(couple.dataset_record))
            couple.dataset_record = None
            model.Session.commit()

        # Couples with this ref
        for couple in HarvestCoupledResource.get_by_harvest_source_reference(
                ref):
            if couple.dataset_record != package:
                # Test: test_03_harvest_dataset_to_match_existing_service
                log.info('Linking ref to this dataset record (%s, %s, %s->%s)',
                         _package_name(couple.service_record), ref,
                         _package_name(couple.dataset_record), package.name)
                couple.dataset_record = package
                model.Session.commit()
            else:
                # Test: test_01_reharvest_existing_dataset
                log.info(
                    'Couple for this dataset and ref already exists (%s, %s, %s)',
                    _package_name(couple.service_record), ref,
                    _package_name(couple.dataset_record))

        # No couples for this ref