def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): items_ids = [] try: ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get('associations', {}).items(): set_default_state(assoc, CONTENT_STATE.INGESTED) if assoc.get('renditions'): transfer_renditions(assoc['renditions']) # wire up the id of the associated feature media to the ingested one guid = assoc.get('guid') if guid: lookup = {'guid': guid} ingested = ingest_service.get_from_mongo(req=None, lookup=lookup) if ingested.count() >= 1: assoc['_id'] = ingested[0]['_id'] for rendition in ingested[0].get('renditions', {}): # add missing renditions assoc['renditions'].setdefault( rendition, ingested[0]['renditions'][rendition]) else: # there is no such item in the system - ingest it status, ids = ingest_item(assoc, provider, feeding_service, rule_set) if status: assoc['_id'] = ids[0] items_ids.extend(ids) new_version = True if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item['_id']) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e) raise e if routing_scheme and new_version: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) ProviderError.ingestItemError(ex, provider, item=item) return False, [] return True, items_ids
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): items_ids = [] try: ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get('associations', {}).items(): if assoc.get('renditions'): transfer_renditions(assoc['renditions']) # wire up the id of the associated feature media to the ingested one guid = assoc.get('guid') if guid: lookup = {'guid': guid} ingested = ingest_service.get_from_mongo(req=None, lookup=lookup) if ingested.count() >= 1: assoc['_id'] = ingested[0]['_id'] for rendition in ingested[0].get('renditions', {}): # add missing renditions assoc['renditions'].setdefault( rendition, ingested[0]['renditions'][rendition]) else: # there is no such item in the system - ingest it status, ids = ingest_item(assoc, provider, feeding_service, rule_set) if status: assoc['_id'] = ids[0] items_ids.extend(ids) new_version = True if old_item: updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item['_id']) # if the feed is versioned and this is not a new version if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'): new_version = False else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e) if routing_scheme and new_version: routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) return False, [] return True, items_ids
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None, expiry=None): items_ids = [] try: ingest_collection = get_ingest_collection(feeding_service, item) ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD]) item.setdefault("source", provider.get("source", "")) item.setdefault("uri", item[GUID_FIELD]) # keep it as original guid if item.get("profile"): try: item["profile"] = bson.ObjectId(item["profile"]) except bson.errors.InvalidId: pass profile = superdesk.get_resource_service("content_types").find_one( req=None, _id=item["profile"]) if not profile: # unknown profile item.pop("profile") set_default_state(item, CONTENT_STATE.INGESTED) item["expiry"] = ( get_expiry_date( provider.get("content_expiry") or app.config["INGEST_EXPIRY_MINUTES"], item.get("versioncreated")) if not expiry else expiry ) # when fetching associated item set expiry to match parent if "anpa_category" in item: process_anpa_category(item, provider) if "subject" in item: if not app.config.get("INGEST_SKIP_IPTC_CODES", False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if "anpa_category" not in item: derive_category(item, provider) elif "anpa_category" in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get("pubstatus", "") == "canceled": item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get("renditions", {}) if rend: baseImageRend = rend.get("baseImage") or next(iter(rend.values())) if baseImageRend and not baseImageRend.get( "media"): # if there is media should be processed already href = feeding_service.prepare_href(baseImageRend["href"], rend.get("mimetype")) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get("associations", {}).items(): set_default_state(assoc, CONTENT_STATE.INGESTED) # wire up the id of the associated feature media to the ingested one guid = assoc.get("guid") assoc_name = assoc.get("headline") or assoc.get("slugline") or guid if guid: ingested = ingest_service.find_one(req=None, guid=guid) if ingested is not None: logger.info("assoc ingested before %s", assoc_name) assoc["_id"] = ingested["_id"] # update expiry so assoc will stay as long as the item using it ingest_service.system_update(ingested["_id"], {"expiry": item["expiry"]}, ingested) if is_new_version(assoc, ingested) and assoc.get( "renditions"): # new version logger.info( "new assoc version - re-transfer renditions for %s", assoc_name) try: transfer_renditions(assoc["renditions"]) except SuperdeskApiError: logger.exception( "failed to update associated item renditions", extra=dict( guid=guid, name=assoc_name, ), ) else: logger.info( "same/old version - use already fetched renditions for %s", assoc_name) update_assoc_renditions(assoc, ingested) else: # there is no such item in the system - ingest it if assoc.get("renditions") and has_system_renditions( assoc): # all set, just download logger.info( "new association with system renditions - transfer %s", assoc_name) try: transfer_renditions(assoc["renditions"]) except SuperdeskApiError: logger.exception( "failed to download renditions", extra=dict( guid=guid, name=assoc_name, ), ) status, ids = ingest_item(assoc, provider, feeding_service, rule_set, expiry=item["expiry"]) if status: assoc["_id"] = ids[0] items_ids.extend(ids) ingested = ingest_service.find_one(req=None, _id=ids[0]) update_assoc_renditions(assoc, ingested) elif assoc.get("residRef"): item["associations"][key] = resolve_ref(assoc) new_version = True if old_item: new_version = is_new_version(item, old_item) updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item["_id"]) else: if item.get("ingest_provider_sequence") is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error( "Exception while persisting item in %s collection: %s", ingest_collection, e) raise e if routing_scheme and new_version: routed = ingest_service.find_one( _id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service( "routing_schemes").apply_routing_scheme( routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) ProviderError.ingestItemError(ex, provider, item=item) return False, [] return True, items_ids
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None): items_ids = [] try: ingest_collection = feeding_service.service if hasattr( feeding_service, 'service') else 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) # determine if we already have this item old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML)) item[FAMILY_ID] = item[superdesk.config.ID_FIELD] item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) item.setdefault('source', provider.get('source', '')) item.setdefault('uri', item[GUID_FIELD]) # keep it as original guid if item.get('profile'): try: item['profile'] = bson.ObjectId(item['profile']) except bson.errors.InvalidId: pass set_default_state(item, CONTENT_STATE.INGESTED) item['expiry'] = get_expiry_date( provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated')) if 'anpa_category' in item: process_anpa_category(item, provider) if 'subject' in item: if not app.config.get('INGEST_SKIP_IPTC_CODES', False): # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented process_iptc_codes(item, provider) if 'anpa_category' not in item: derive_category(item, provider) elif 'anpa_category' in item: derive_subject(item) apply_rule_set(item, provider, rule_set) if item.get('pubstatus', '') == 'canceled': item[ITEM_STATE] = CONTENT_STATE.KILLED ingest_cancel(item, feeding_service) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend and not baseImageRend.get( 'media'): # if there is media should be processed already href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype')) update_renditions(item, href, old_item) # if the item has associated media for key, assoc in item.get('associations', {}).items(): set_default_state(assoc, CONTENT_STATE.INGESTED) # wire up the id of the associated feature media to the ingested one guid = assoc.get('guid') assoc_name = assoc.get('headline') or assoc.get('slugline') or guid if guid: ingested = ingest_service.find_one(req=None, guid=guid) logger.info('assoc ingested before %s', assoc_name) if ingested is not None: assoc['_id'] = ingested['_id'] if is_new_version(assoc, ingested) and assoc.get( 'renditions'): # new version logger.info( 'new assoc version - re-transfer renditions for %s', assoc_name) transfer_renditions(assoc['renditions']) else: logger.info( 'same/old version - use already fetched renditions for %s', assoc_name) update_assoc_renditions(assoc, ingested) else: # there is no such item in the system - ingest it if assoc.get('renditions') and has_system_renditions( assoc): # all set, just download logger.info( 'new association with system renditions - transfer %s', assoc_name) transfer_renditions(assoc['renditions']) status, ids = ingest_item(assoc, provider, feeding_service, rule_set) if status: assoc['_id'] = ids[0] items_ids.extend(ids) ingested = ingest_service.find_one(req=None, _id=ids[0]) update_assoc_renditions(assoc, ingested) elif assoc.get('residRef'): item['associations'][key] = resolve_ref(assoc) new_version = True if old_item: new_version = is_new_version(item, old_item) updates = deepcopy(item) ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item) item.update(old_item) item.update(updates) items_ids.append(item['_id']) else: if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) try: items_ids.extend(ingest_service.post_in_mongo([item])) except HTTPException as e: logger.error( 'Exception while persisting item in %s collection: %s', ingest_collection, e) raise e if routing_scheme and new_version: routed = ingest_service.find_one( _id=item[superdesk.config.ID_FIELD], req=None) superdesk.get_resource_service( 'routing_schemes').apply_routing_scheme( routed, provider, routing_scheme) except Exception as ex: logger.exception(ex) ProviderError.ingestItemError(ex, provider, item=item) return False, [] return True, items_ids