Ejemplo n.º 1
0
def callback(item, **kwargs):
    """ This macro will set the language of the articles to the Desk language. """
    rule = kwargs.get('rule')
    item['profile'] = 'autorouting'
    if rule and ':' in rule['name']:
        service, destination = re.sub(r'\([A-Z]+\)', '',
                                      rule['name']).split(':')
        mapping = {
            cp.DISTRIBUTION: service.strip(),
            cp.DESTINATIONS: destination.strip(),
        }

        for cv_id, name in mapping.items():
            subject = find_name_item(cv_id, name.lower())
            if subject:
                item.setdefault('subject', []).append({
                    'name': subject['name'],
                    'qcode': subject['qcode'],
                    'scheme': cv_id,
                })
            else:
                logger.error('no item found in vocabulary %s with name %s',
                             cv_id, name)

        # remove associations for Broadcast content
        if cp.is_broadcast(item) and item.get('associations'):
            item['associations'] = {key: None for key in item['associations']}

    return item
Ejemplo n.º 2
0
def callback(item, **kwargs):
    """ This macro will set the language of the articles to the Desk language. """
    rule = kwargs.get("rule")
    item["profile"] = "autorouting"
    if rule and ":" in rule["name"]:
        service, destination = re.sub(r"\([A-Z]+\)", "",
                                      rule["name"]).split(":")
        mapping = {
            cp.DISTRIBUTION: service.strip(),
            cp.DESTINATIONS: destination.strip(),
        }

        for cv_id, name in mapping.items():
            subject = find_name_item(cv_id, name.lower())
            if subject:
                item.setdefault("subject", []).append({
                    "name": subject["name"],
                    "qcode": subject["qcode"],
                    "scheme": cv_id,
                })
            else:
                logger.error("no item found in vocabulary %s with name %s",
                             cv_id, name)

        # handle APR specific output
        if cp.is_broadcast(item):
            if item.get("associations"):
                item["associations"] = {
                    key: None
                    for key in item["associations"]
                }
            if item.get("abstract"):
                item["body_html"] = item.pop("abstract")

    return item
Ejemplo n.º 3
0
    def _format_item(self, root, item, pub_seq_num, service, services) -> None:
        if is_picture(item):
            D2P1 = "http://www.w3.org/2001/XMLSchema-instance"
            content = etree.SubElement(
                root,
                "ContentItem",
                {"{%s}type" % D2P1: "PhotoContentItem"},
                nsmap={
                    "d2p1": D2P1,
                },
            )
        else:
            content = etree.SubElement(root, "ContentItem")
        extra = item.get("extra") or {}

        # root system fields
        etree.SubElement(root, "Reschedule").text = "false"
        etree.SubElement(root, "IsRegional").text = "false"
        etree.SubElement(root, "CanAutoRoute").text = "true"
        etree.SubElement(root, "PublishID").text = str(pub_seq_num)
        etree.SubElement(root, "Username")
        etree.SubElement(root, "UseLocalsOut").text = "false"
        etree.SubElement(root, "UserProfileID").text = "0"
        etree.SubElement(root, "PublishOrder").text = "0"
        etree.SubElement(root, "NewCycle").text = "false"
        etree.SubElement(root, "OnlineResend").text = "false"

        # item system fields
        etree.SubElement(content, "AutoSaveID").text = "0"
        etree.SubElement(content, "Type").text = "0"
        etree.SubElement(content, "MediaType").text = "0"
        etree.SubElement(content, "Status").text = "0"

        if is_picture(item):
            etree.SubElement(root, "Services").text = "Pictures"
            self._format_subject_code(root, item, "PscCodes", cp.DESTINATIONS)
            if root.find("PscCodes") is None:
                etree.SubElement(root, "PscCodes").text = "Online"
        elif service:
            etree.SubElement(root, "Services").text = "Print"
            etree.SubElement(root, "PscCodes").text = service
        else:
            self._format_subject_code(root, item, "PscCodes", cp.DESTINATIONS)
            self._format_services(root, item)

        is_broadcast = cp.is_broadcast(item)

        # content system fields
        orig = self._get_original_item(item)
        seq_id = "{:08d}".format(pub_seq_num % 100000000)
        item_id = "{:08d}".format(orig["unique_id"] % 100000000)
        etree.SubElement(content, "Name")
        etree.SubElement(content, "Cachable").text = "false"
        etree.SubElement(content, "FileName").text = filename(orig)
        etree.SubElement(content, "NewsCompID").text = item_id
        etree.SubElement(content, "SystemSlug").text = slug(orig)
        etree.SubElement(content, "ContentItemID").text = seq_id
        etree.SubElement(content, "ProfileID").text = "204"
        etree.SubElement(content, "SysContentType").text = "0"

        if is_picture(item):
            etree.SubElement(content, "PhotoContentItemID").text = item_id

        if extra.get(cp.FILENAME):
            etree.SubElement(content, "OrigTransRef").text = extra[cp.FILENAME]

        if service:
            etree.SubElement(content, "Note").text = ",".join(services)

        # timestamps
        firstpublished = item.get("firstpublished") or item["versioncreated"]
        etree.SubElement(root, "PublishDateTime").text = self._format_datetime(
            firstpublished
        )
        try:
            etree.SubElement(content, "EmbargoTime").text = self._format_datetime(
                item[SCHEDULE_SETTINGS]["utc_embargo"],
                local=True,
            )
        except KeyError:
            etree.SubElement(content, "EmbargoTime").text = self._format_datetime(
                item.get("embargoed"), local=True
            )
        etree.SubElement(content, "CreatedDateTime").text = self._format_datetime(
            firstpublished
        )  # SDCP-380
        etree.SubElement(content, "UpdatedDateTime").text = self._format_datetime(
            item["versioncreated"], rel=True
        )

        # obvious
        etree.SubElement(content, "ContentType").text = (
            "Photo" if is_picture(item) else item["type"].capitalize()
        )

        # SDCP-309
        etree.SubElement(content, "Headline").text = format_maxlength(
            extra.get(cp.HEADLINE2) or item.get("headline"), OUTPUT_LENGTH_LIMIT
        )
        if not is_picture(item):
            etree.SubElement(content, "Headline2").text = format_maxlength(
                item.get("headline"), OUTPUT_LENGTH_LIMIT
            )

        etree.SubElement(content, "SlugProper").text = item.get("slugline")
        etree.SubElement(content, "Credit").text = self._format_credit(item)
        etree.SubElement(content, "Source").text = item.get("source")

        content_html = self._format_content(item, is_broadcast)
        etree.SubElement(content, "DirectoryText").text = self._format_text(
            item.get("abstract")
        )
        etree.SubElement(content, "ContentText").text = self._format_html(content_html)
        etree.SubElement(content, "Language").text = (
            "2" if "fr" in item.get("language", "") else "1"
        )

        if item["type"] == "text" and content_html:
            content.find("DirectoryText").text = format_maxlength(
                get_text(content_html, "html", lf_on_block=False).replace("\n", " "),
                200,
            )
            word_count = str(get_word_count(content_html))
            etree.SubElement(content, "Length").text = word_count
            etree.SubElement(content, "WordCount").text = word_count
            etree.SubElement(content, "BreakWordCount").text = word_count

        if item.get("keywords") and item.get("source") == globenewswire.SOURCE:
            etree.SubElement(content, "Stocks").text = ",".join(item["keywords"])

        self._format_category_index(content, item)
        self._format_genre(content, item)
        self._format_urgency(content, item.get("urgency"), item["language"])
        self._format_keyword(
            content,
            item.get("keywords"),
            ", " if item.get("type") == "picture" else ",",
        )
        self._format_dateline(content, item.get("dateline"))
        self._format_writethru(content, item)

        if item.get("byline"):
            etree.SubElement(content, "Byline").text = item["byline"]

        if is_picture(item):
            self._format_picture_metadata(content, item)
        else:
            etree.SubElement(content, "EditorNote").text = item.get("ednote")
            if extra.get(cp.UPDATE):
                etree.SubElement(content, "UpdateNote").text = extra[cp.UPDATE]
            if extra.get(cp.CORRECTION):
                etree.SubElement(content, "Corrections").text = extra[cp.CORRECTION]

        if item.get("associations"):
            self._format_associations(content, item)
Ejemplo n.º 4
0
    def _format_item(self, root, item, pub_seq_num, service, services) -> None:
        if is_picture(item):
            D2P1 = 'http://www.w3.org/2001/XMLSchema-instance'
            content = etree.SubElement(root,
                                       'ContentItem',
                                       {'{%s}type' % D2P1: 'PhotoContentItem'},
                                       nsmap={
                                           'd2p1': D2P1,
                                       })
        else:
            content = etree.SubElement(root, 'ContentItem')
        extra = item.get('extra') or {}

        # root system fields
        etree.SubElement(root, 'Reschedule').text = 'false'
        etree.SubElement(root, 'IsRegional').text = 'false'
        etree.SubElement(root, 'CanAutoRoute').text = 'true'
        etree.SubElement(root, 'PublishID').text = str(pub_seq_num)
        etree.SubElement(root, 'Username')
        etree.SubElement(root, 'UseLocalsOut').text = 'false'
        etree.SubElement(root, 'UserProfileID').text = '0'
        etree.SubElement(root, 'PublishOrder').text = '0'
        etree.SubElement(root, 'NewCycle').text = 'false'
        etree.SubElement(root, 'OnlineResend').text = 'false'

        # item system fields
        etree.SubElement(content, 'AutoSaveID').text = '0'
        etree.SubElement(content, 'Type').text = '0'
        etree.SubElement(content, 'MediaType').text = '0'
        etree.SubElement(content, 'Status').text = '0'

        if is_picture(item):
            etree.SubElement(root, 'Services').text = 'Pictures'
            self._format_subject_code(root, item, 'PscCodes', cp.DESTINATIONS)
            if root.find('PscCodes') is None:
                etree.SubElement(root, 'PscCodes').text = 'Online'
        elif service:
            etree.SubElement(root, 'Services').text = 'Print'
            etree.SubElement(root, 'PscCodes').text = service
        else:
            self._format_subject_code(root, item, 'PscCodes', cp.DESTINATIONS)
            self._format_services(root, item)

        is_broadcast = cp.is_broadcast(item)

        # content system fields
        orig = self._get_original_item(item)
        seq_id = '{:08d}'.format(pub_seq_num % 100000000)
        item_id = '{:08d}'.format(orig['unique_id'] % 100000000)
        etree.SubElement(content, 'Name')
        etree.SubElement(content, 'Cachable').text = 'false'
        etree.SubElement(content, 'FileName').text = filename(orig)
        etree.SubElement(content, 'NewsCompID').text = item_id
        etree.SubElement(content, 'SystemSlug').text = slug(orig)
        etree.SubElement(content, 'ContentItemID').text = seq_id
        etree.SubElement(content, 'ProfileID').text = '204'
        etree.SubElement(content, 'SysContentType').text = '0'

        if is_picture(item):
            etree.SubElement(content, 'PhotoContentItemID').text = item_id

        if extra.get(cp.FILENAME):
            etree.SubElement(content, 'OrigTransRef').text = extra[cp.FILENAME]

        if service:
            etree.SubElement(content, 'Note').text = ','.join(services)

        # timestamps
        firstpublished = item.get('firstpublished') or item['versioncreated']
        etree.SubElement(
            root,
            'PublishDateTime').text = self._format_datetime(firstpublished)
        try:
            etree.SubElement(content,
                             'EmbargoTime').text = self._format_datetime(
                                 item[SCHEDULE_SETTINGS]['utc_embargo'],
                                 local=True,
                             )
        except KeyError:
            etree.SubElement(content,
                             'EmbargoTime').text = self._format_datetime(
                                 item.get('embargoed'), local=True)
        etree.SubElement(content,
                         'CreatedDateTime').text = self._format_datetime(
                             firstpublished)  # SDCP-380
        etree.SubElement(content,
                         'UpdatedDateTime').text = self._format_datetime(
                             item['versioncreated'], rel=True)

        # obvious
        etree.SubElement(content, 'ContentType').text = 'Photo' if is_picture(
            item) else item['type'].capitalize()

        # SDCP-309
        etree.SubElement(content, 'Headline').text = format_maxlength(
            extra.get(cp.HEADLINE2) or item.get('headline'),
            OUTPUT_LENGTH_LIMIT)
        if not is_picture(item):
            etree.SubElement(content, 'Headline2').text = format_maxlength(
                item.get('headline'), OUTPUT_LENGTH_LIMIT)

        etree.SubElement(content, 'SlugProper').text = item.get('slugline')
        etree.SubElement(content, 'Credit').text = self._format_credit(item)
        etree.SubElement(content, 'Source').text = item.get('source')

        content_html = self._format_content(item, is_broadcast)
        etree.SubElement(content, 'DirectoryText').text = self._format_text(
            item.get('abstract'))
        etree.SubElement(content,
                         'ContentText').text = self._format_html(content_html)
        etree.SubElement(
            content,
            'Language').text = '2' if 'fr' in item.get('language', '') else '1'

        if item['type'] == 'text' and content_html:
            content.find('DirectoryText').text = format_maxlength(
                get_text(content_html, 'html',
                         lf_on_block=False).replace('\n', ' '), 200)
            word_count = str(get_word_count(content_html))
            etree.SubElement(content, 'Length').text = word_count
            etree.SubElement(content, 'WordCount').text = word_count
            etree.SubElement(content, 'BreakWordCount').text = word_count

        if item.get('keywords') and item.get('source') == globenewswire.SOURCE:
            etree.SubElement(content,
                             'Stocks').text = ','.join(item['keywords'])

        self._format_category_index(content, item)
        self._format_genre(content, item)
        self._format_urgency(content, item.get('urgency'), item['language'])
        self._format_keyword(content, item.get('keywords'),
                             ', ' if item.get('type') == 'picture' else ',')
        self._format_dateline(content, item.get('dateline'))
        self._format_writethru(content, item)

        if item.get('byline'):
            etree.SubElement(content, 'Byline').text = item['byline']

        if is_picture(item):
            self._format_picture_metadata(content, item)
        else:
            etree.SubElement(content, 'EditorNote').text = item.get('ednote')
            if extra.get(cp.UPDATE):
                etree.SubElement(content, 'UpdateNote').text = extra[cp.UPDATE]
            if extra.get(cp.CORRECTION):
                etree.SubElement(content,
                                 'Corrections').text = extra[cp.CORRECTION]

        if item.get('associations'):
            self._format_associations(content, item)