def callback(item, **kwargs): """ This macro will set the language of the articles to the Desk language. """ rule = kwargs.get('rule') item['profile'] = 'autorouting' if rule and ':' in rule['name']: service, destination = re.sub(r'\([A-Z]+\)', '', rule['name']).split(':') mapping = { cp.DISTRIBUTION: service.strip(), cp.DESTINATIONS: destination.strip(), } for cv_id, name in mapping.items(): subject = find_name_item(cv_id, name.lower()) if subject: item.setdefault('subject', []).append({ 'name': subject['name'], 'qcode': subject['qcode'], 'scheme': cv_id, }) else: logger.error('no item found in vocabulary %s with name %s', cv_id, name) # remove associations for Broadcast content if cp.is_broadcast(item) and item.get('associations'): item['associations'] = {key: None for key in item['associations']} return item
def callback(item, **kwargs): """ This macro will set the language of the articles to the Desk language. """ rule = kwargs.get("rule") item["profile"] = "autorouting" if rule and ":" in rule["name"]: service, destination = re.sub(r"\([A-Z]+\)", "", rule["name"]).split(":") mapping = { cp.DISTRIBUTION: service.strip(), cp.DESTINATIONS: destination.strip(), } for cv_id, name in mapping.items(): subject = find_name_item(cv_id, name.lower()) if subject: item.setdefault("subject", []).append({ "name": subject["name"], "qcode": subject["qcode"], "scheme": cv_id, }) else: logger.error("no item found in vocabulary %s with name %s", cv_id, name) # handle APR specific output if cp.is_broadcast(item): if item.get("associations"): item["associations"] = { key: None for key in item["associations"] } if item.get("abstract"): item["body_html"] = item.pop("abstract") return item
def _format_item(self, root, item, pub_seq_num, service, services) -> None: if is_picture(item): D2P1 = "http://www.w3.org/2001/XMLSchema-instance" content = etree.SubElement( root, "ContentItem", {"{%s}type" % D2P1: "PhotoContentItem"}, nsmap={ "d2p1": D2P1, }, ) else: content = etree.SubElement(root, "ContentItem") extra = item.get("extra") or {} # root system fields etree.SubElement(root, "Reschedule").text = "false" etree.SubElement(root, "IsRegional").text = "false" etree.SubElement(root, "CanAutoRoute").text = "true" etree.SubElement(root, "PublishID").text = str(pub_seq_num) etree.SubElement(root, "Username") etree.SubElement(root, "UseLocalsOut").text = "false" etree.SubElement(root, "UserProfileID").text = "0" etree.SubElement(root, "PublishOrder").text = "0" etree.SubElement(root, "NewCycle").text = "false" etree.SubElement(root, "OnlineResend").text = "false" # item system fields etree.SubElement(content, "AutoSaveID").text = "0" etree.SubElement(content, "Type").text = "0" etree.SubElement(content, "MediaType").text = "0" etree.SubElement(content, "Status").text = "0" if is_picture(item): etree.SubElement(root, "Services").text = "Pictures" self._format_subject_code(root, item, "PscCodes", cp.DESTINATIONS) if root.find("PscCodes") is None: etree.SubElement(root, "PscCodes").text = "Online" elif service: etree.SubElement(root, "Services").text = "Print" etree.SubElement(root, "PscCodes").text = service else: self._format_subject_code(root, item, "PscCodes", cp.DESTINATIONS) self._format_services(root, item) is_broadcast = cp.is_broadcast(item) # content system fields orig = self._get_original_item(item) seq_id = "{:08d}".format(pub_seq_num % 100000000) item_id = "{:08d}".format(orig["unique_id"] % 100000000) etree.SubElement(content, "Name") etree.SubElement(content, "Cachable").text = "false" etree.SubElement(content, "FileName").text = filename(orig) etree.SubElement(content, "NewsCompID").text = item_id etree.SubElement(content, "SystemSlug").text = slug(orig) etree.SubElement(content, "ContentItemID").text = seq_id etree.SubElement(content, "ProfileID").text = "204" etree.SubElement(content, "SysContentType").text = "0" if is_picture(item): etree.SubElement(content, "PhotoContentItemID").text = item_id if extra.get(cp.FILENAME): etree.SubElement(content, "OrigTransRef").text = extra[cp.FILENAME] if service: etree.SubElement(content, "Note").text = ",".join(services) # timestamps firstpublished = item.get("firstpublished") or item["versioncreated"] etree.SubElement(root, "PublishDateTime").text = self._format_datetime( firstpublished ) try: etree.SubElement(content, "EmbargoTime").text = self._format_datetime( item[SCHEDULE_SETTINGS]["utc_embargo"], local=True, ) except KeyError: etree.SubElement(content, "EmbargoTime").text = self._format_datetime( item.get("embargoed"), local=True ) etree.SubElement(content, "CreatedDateTime").text = self._format_datetime( firstpublished ) # SDCP-380 etree.SubElement(content, "UpdatedDateTime").text = self._format_datetime( item["versioncreated"], rel=True ) # obvious etree.SubElement(content, "ContentType").text = ( "Photo" if is_picture(item) else item["type"].capitalize() ) # SDCP-309 etree.SubElement(content, "Headline").text = format_maxlength( extra.get(cp.HEADLINE2) or item.get("headline"), OUTPUT_LENGTH_LIMIT ) if not is_picture(item): etree.SubElement(content, "Headline2").text = format_maxlength( item.get("headline"), OUTPUT_LENGTH_LIMIT ) etree.SubElement(content, "SlugProper").text = item.get("slugline") etree.SubElement(content, "Credit").text = self._format_credit(item) etree.SubElement(content, "Source").text = item.get("source") content_html = self._format_content(item, is_broadcast) etree.SubElement(content, "DirectoryText").text = self._format_text( item.get("abstract") ) etree.SubElement(content, "ContentText").text = self._format_html(content_html) etree.SubElement(content, "Language").text = ( "2" if "fr" in item.get("language", "") else "1" ) if item["type"] == "text" and content_html: content.find("DirectoryText").text = format_maxlength( get_text(content_html, "html", lf_on_block=False).replace("\n", " "), 200, ) word_count = str(get_word_count(content_html)) etree.SubElement(content, "Length").text = word_count etree.SubElement(content, "WordCount").text = word_count etree.SubElement(content, "BreakWordCount").text = word_count if item.get("keywords") and item.get("source") == globenewswire.SOURCE: etree.SubElement(content, "Stocks").text = ",".join(item["keywords"]) self._format_category_index(content, item) self._format_genre(content, item) self._format_urgency(content, item.get("urgency"), item["language"]) self._format_keyword( content, item.get("keywords"), ", " if item.get("type") == "picture" else ",", ) self._format_dateline(content, item.get("dateline")) self._format_writethru(content, item) if item.get("byline"): etree.SubElement(content, "Byline").text = item["byline"] if is_picture(item): self._format_picture_metadata(content, item) else: etree.SubElement(content, "EditorNote").text = item.get("ednote") if extra.get(cp.UPDATE): etree.SubElement(content, "UpdateNote").text = extra[cp.UPDATE] if extra.get(cp.CORRECTION): etree.SubElement(content, "Corrections").text = extra[cp.CORRECTION] if item.get("associations"): self._format_associations(content, item)
def _format_item(self, root, item, pub_seq_num, service, services) -> None: if is_picture(item): D2P1 = 'http://www.w3.org/2001/XMLSchema-instance' content = etree.SubElement(root, 'ContentItem', {'{%s}type' % D2P1: 'PhotoContentItem'}, nsmap={ 'd2p1': D2P1, }) else: content = etree.SubElement(root, 'ContentItem') extra = item.get('extra') or {} # root system fields etree.SubElement(root, 'Reschedule').text = 'false' etree.SubElement(root, 'IsRegional').text = 'false' etree.SubElement(root, 'CanAutoRoute').text = 'true' etree.SubElement(root, 'PublishID').text = str(pub_seq_num) etree.SubElement(root, 'Username') etree.SubElement(root, 'UseLocalsOut').text = 'false' etree.SubElement(root, 'UserProfileID').text = '0' etree.SubElement(root, 'PublishOrder').text = '0' etree.SubElement(root, 'NewCycle').text = 'false' etree.SubElement(root, 'OnlineResend').text = 'false' # item system fields etree.SubElement(content, 'AutoSaveID').text = '0' etree.SubElement(content, 'Type').text = '0' etree.SubElement(content, 'MediaType').text = '0' etree.SubElement(content, 'Status').text = '0' if is_picture(item): etree.SubElement(root, 'Services').text = 'Pictures' self._format_subject_code(root, item, 'PscCodes', cp.DESTINATIONS) if root.find('PscCodes') is None: etree.SubElement(root, 'PscCodes').text = 'Online' elif service: etree.SubElement(root, 'Services').text = 'Print' etree.SubElement(root, 'PscCodes').text = service else: self._format_subject_code(root, item, 'PscCodes', cp.DESTINATIONS) self._format_services(root, item) is_broadcast = cp.is_broadcast(item) # content system fields orig = self._get_original_item(item) seq_id = '{:08d}'.format(pub_seq_num % 100000000) item_id = '{:08d}'.format(orig['unique_id'] % 100000000) etree.SubElement(content, 'Name') etree.SubElement(content, 'Cachable').text = 'false' etree.SubElement(content, 'FileName').text = filename(orig) etree.SubElement(content, 'NewsCompID').text = item_id etree.SubElement(content, 'SystemSlug').text = slug(orig) etree.SubElement(content, 'ContentItemID').text = seq_id etree.SubElement(content, 'ProfileID').text = '204' etree.SubElement(content, 'SysContentType').text = '0' if is_picture(item): etree.SubElement(content, 'PhotoContentItemID').text = item_id if extra.get(cp.FILENAME): etree.SubElement(content, 'OrigTransRef').text = extra[cp.FILENAME] if service: etree.SubElement(content, 'Note').text = ','.join(services) # timestamps firstpublished = item.get('firstpublished') or item['versioncreated'] etree.SubElement( root, 'PublishDateTime').text = self._format_datetime(firstpublished) try: etree.SubElement(content, 'EmbargoTime').text = self._format_datetime( item[SCHEDULE_SETTINGS]['utc_embargo'], local=True, ) except KeyError: etree.SubElement(content, 'EmbargoTime').text = self._format_datetime( item.get('embargoed'), local=True) etree.SubElement(content, 'CreatedDateTime').text = self._format_datetime( firstpublished) # SDCP-380 etree.SubElement(content, 'UpdatedDateTime').text = self._format_datetime( item['versioncreated'], rel=True) # obvious etree.SubElement(content, 'ContentType').text = 'Photo' if is_picture( item) else item['type'].capitalize() # SDCP-309 etree.SubElement(content, 'Headline').text = format_maxlength( extra.get(cp.HEADLINE2) or item.get('headline'), OUTPUT_LENGTH_LIMIT) if not is_picture(item): etree.SubElement(content, 'Headline2').text = format_maxlength( item.get('headline'), OUTPUT_LENGTH_LIMIT) etree.SubElement(content, 'SlugProper').text = item.get('slugline') etree.SubElement(content, 'Credit').text = self._format_credit(item) etree.SubElement(content, 'Source').text = item.get('source') content_html = self._format_content(item, is_broadcast) etree.SubElement(content, 'DirectoryText').text = self._format_text( item.get('abstract')) etree.SubElement(content, 'ContentText').text = self._format_html(content_html) etree.SubElement( content, 'Language').text = '2' if 'fr' in item.get('language', '') else '1' if item['type'] == 'text' and content_html: content.find('DirectoryText').text = format_maxlength( get_text(content_html, 'html', lf_on_block=False).replace('\n', ' '), 200) word_count = str(get_word_count(content_html)) etree.SubElement(content, 'Length').text = word_count etree.SubElement(content, 'WordCount').text = word_count etree.SubElement(content, 'BreakWordCount').text = word_count if item.get('keywords') and item.get('source') == globenewswire.SOURCE: etree.SubElement(content, 'Stocks').text = ','.join(item['keywords']) self._format_category_index(content, item) self._format_genre(content, item) self._format_urgency(content, item.get('urgency'), item['language']) self._format_keyword(content, item.get('keywords'), ', ' if item.get('type') == 'picture' else ',') self._format_dateline(content, item.get('dateline')) self._format_writethru(content, item) if item.get('byline'): etree.SubElement(content, 'Byline').text = item['byline'] if is_picture(item): self._format_picture_metadata(content, item) else: etree.SubElement(content, 'EditorNote').text = item.get('ednote') if extra.get(cp.UPDATE): etree.SubElement(content, 'UpdateNote').text = extra[cp.UPDATE] if extra.get(cp.CORRECTION): etree.SubElement(content, 'Corrections').text = extra[cp.CORRECTION] if item.get('associations'): self._format_associations(content, item)