def update_coupled_resources(package, harvest_source_reference): '''Update the harvest_coupled_resource_table with the details of this harvested package\'s couplings. :param package: the Package object containing extra fields with couples to update in the table. :param harvest_source_reference: the ref of this package being harvested. This is not relevant if it is a service record, but essential if it is a dataset. ''' resource_type = package.extras['resource-type'] if resource_type == 'service': # When a service record is harvested, ensure the couples listed # in it match the couples in the HarvestCoupledResource objects, # ignoring their dataset values (they might be filled in or not). pkg_couples_str = package.extras['coupled-resource'] pkg_couples = json.loads(pkg_couples_str) log.info('Service Record %s has %i coupled resources to update', package.name, len(pkg_couples)) table_couples_matching_service = HarvestCoupledResource.get_by_service_record(package) table_couples_not_matching_pkg = table_couples_matching_service.all() # cross them off as we go for pkg_couple in pkg_couples: try: ref = extract_harvest_source_reference_from_coupled_resource(pkg_couple) except CoupledResourceParseError, e: log.warn('Error parsing couple: %s Ignoring couple=%s', e, pkg_couple) continue # Match both service and ref matching_table_couples = table_couples_matching_service.filter_by(harvest_source_reference=ref) if matching_table_couples.count() > 0: # Test: test_02_reharvest_existing_service # Note down the matches so we don't delete them later for matching_table_couple in matching_table_couples: log.info('Service couple is already there (%s, %s, %s)', package.name, ref, _package_name(matching_table_couple.dataset_record)) table_couples_not_matching_pkg.remove(matching_table_couple) continue # Match just ref with blank service matching_table_couples = HarvestCoupledResource.get_by_harvest_source_reference(ref)\ .filter_by(service_record=None) if matching_table_couples.count() == 0: # Test: test_06_harvest_service_not_matching_a_dataset # create the row obj = HarvestCoupledResource(service_record=package, harvest_source_reference=ref) model.Session.add(obj) log.info('Ref is new for this service - adding (%s, %s, None)', package.name, ref) model.Session.commit() else: # Test: test_04_harvest_service_to_match_existing_dataset for matching_table_couple in matching_table_couples: # fill in the service value matching_table_couple.service_record = package log.info('Service filled into couple matching ref (%s, %s, %s)', package.name, ref, _package_name(matching_table_couple.dataset_record)) model.Session.commit() # Delete service value for any table_couples not matching the package # Test: test_08_reharvest_existing_service_to_delete_and_add_couples for table_couple in table_couples_not_matching_pkg: log.info('Service couple not matched - deleted service (%s->None, %s, %s)', _package_name(table_couple.service_record), ref, _package_name(table_couple.dataset_record)) table_couple.service_record = None model.Session.commit() return
def update_coupled_resources(package, harvest_source_reference): '''Update the harvest_coupled_resource_table with the details of this harvested package\'s couplings. :param package: the Package object containing extra fields with couples to update in the table. :param harvest_source_reference: the ref of this package being harvested. This is not relevant if it is a service record, but essential if it is a dataset. ''' resource_type = package.extras['resource-type'] if resource_type == 'service': # When a service record is harvested, ensure the couples listed # in it match the couples in the HarvestCoupledResource objects, # ignoring their dataset values (they might be filled in or not). pkg_couples_str = package.extras['coupled-resource'] pkg_couples = json.loads(pkg_couples_str) log.info('Service Record %s has %i coupled resources to update', package.name, len(pkg_couples)) table_couples_matching_service = HarvestCoupledResource.get_by_service_record( package) table_couples_not_matching_pkg = table_couples_matching_service.all( ) # cross them off as we go for pkg_couple in pkg_couples: try: ref = extract_harvest_source_reference_from_coupled_resource( pkg_couple) except CoupledResourceParseError, e: log.warn('Error parsing couple: %s Ignoring couple=%s', e, pkg_couple) continue # Match both service and ref matching_table_couples = table_couples_matching_service.filter_by( harvest_source_reference=ref) if matching_table_couples.count() > 0: # Test: test_02_reharvest_existing_service # Note down the matches so we don't delete them later for matching_table_couple in matching_table_couples: log.info( 'Service couple is already there (%s, %s, %s)', package.name, ref, _package_name(matching_table_couple.dataset_record)) table_couples_not_matching_pkg.remove( matching_table_couple) continue # Match just ref with blank service matching_table_couples = HarvestCoupledResource.get_by_harvest_source_reference(ref)\ .filter_by(service_record=None) if matching_table_couples.count() == 0: # Test: test_06_harvest_service_not_matching_a_dataset # create the row obj = HarvestCoupledResource(service_record=package, harvest_source_reference=ref) model.Session.add(obj) log.info('Ref is new for this service - adding (%s, %s, None)', package.name, ref) model.Session.commit() else: # Test: test_04_harvest_service_to_match_existing_dataset for matching_table_couple in matching_table_couples: # fill in the service value matching_table_couple.service_record = package log.info( 'Service filled into couple matching ref (%s, %s, %s)', package.name, ref, _package_name(matching_table_couple.dataset_record)) model.Session.commit() # Delete service value for any table_couples not matching the package # Test: test_08_reharvest_existing_service_to_delete_and_add_couples for table_couple in table_couples_not_matching_pkg: log.info( 'Service couple not matched - deleted service (%s->None, %s, %s)', _package_name(table_couple.service_record), ref, _package_name(table_couple.dataset_record)) table_couple.service_record = None model.Session.commit() return
ref = harvest_source_reference assert ref for couple in model.Session.query(HarvestCoupledResource) \ .filter_by(dataset_record=package) \ .filter(HarvestCoupledResource.harvest_source_reference!=ref): log.info('Ref %s has been replaced for this dataset record with ' '%s. Removing link to the dataset record (%s, %s, %s->None)', couple.harvest_source_reference, ref, _package_name(couple.service_record), couple.harvest_source_reference, _package_name(couple.dataset_record)) couple.dataset_record = None model.Session.commit() # Couples with this ref for couple in HarvestCoupledResource.get_by_harvest_source_reference(ref): if couple.dataset_record != package: # Test: test_03_harvest_dataset_to_match_existing_service log.info('Linking ref to this dataset record (%s, %s, %s->%s)', _package_name(couple.service_record), ref, _package_name(couple.dataset_record), package.name) couple.dataset_record = package model.Session.commit() else: # Test: test_01_reharvest_existing_dataset log.info('Couple for this dataset and ref already exists (%s, %s, %s)', _package_name(couple.service_record), ref, _package_name(couple.dataset_record))
assert ref for couple in model.Session.query(HarvestCoupledResource) \ .filter_by(dataset_record=package) \ .filter(HarvestCoupledResource.harvest_source_reference!=ref): log.info( 'Ref %s has been replaced for this dataset record with ' '%s. Removing link to the dataset record (%s, %s, %s->None)', couple.harvest_source_reference, ref, _package_name(couple.service_record), couple.harvest_source_reference, _package_name(couple.dataset_record)) couple.dataset_record = None model.Session.commit() # Couples with this ref for couple in HarvestCoupledResource.get_by_harvest_source_reference( ref): if couple.dataset_record != package: # Test: test_03_harvest_dataset_to_match_existing_service log.info('Linking ref to this dataset record (%s, %s, %s->%s)', _package_name(couple.service_record), ref, _package_name(couple.dataset_record), package.name) couple.dataset_record = package model.Session.commit() else: # Test: test_01_reharvest_existing_dataset log.info( 'Couple for this dataset and ref already exists (%s, %s, %s)', _package_name(couple.service_record), ref, _package_name(couple.dataset_record)) # No couples for this ref