コード例 #1
0
ファイル: eve_backend.py プロジェクト: hlmnrmr/superdesk-core
    def delete(self, endpoint_name, lookup):
        """Delete method to delete by using mongo query syntax.

        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}``
        :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1}
        """
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)
        docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
        ids = [doc[config.ID_FIELD] for doc in docs]
        removed_ids = ids
        logger.info("total documents to be removed {}".format(len(ids)))
        if search_backend and ids:
            removed_ids = []
            # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
            for _id in ids:
                try:
                    self.remove_from_search(endpoint_name, _id)
                    removed_ids.append(_id)
                except NotFoundError:
                    logger.warning('item missing from elastic _id=%s' % (_id, ))
                    removed_ids.append(_id)
                except:
                    logger.exception('item can not be removed from elastic _id=%s' % (_id, ))
        backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}})
        logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name))
        if not ids:
            logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
コード例 #2
0
def remove_media_files(doc):
    """Removes the media files of the given doc.

    If media files are not references by any other
    story then delete the media files
    :param dict doc: document for which the media are being deleted
    :return boolean: True if files are deleted else false.
    """
    logger.info('Removing Media Files...')
    references = None

    if doc.get('renditions'):
        references = [doc.get('renditions')]

    if not references:
        references = [assoc.get('renditions') for assoc in (doc.get(ASSOCIATIONS) or {}).values()
                      if assoc and assoc.get('renditions')]

    for renditions in references:
        for rendition in renditions.values():
            media = rendition.get('media') if isinstance(rendition.get('media'), str) else str(rendition.get('media'))
            try:
                references = get_resource_service('media_references').get(req=None, lookup={
                    'media_id': media, 'published': True
                })

                if references.count() == 0:
                    logger.info('Deleting media:{}'.format(rendition.get('media')))
                    app.media.delete(media)
            except Exception:
                logger.exception('Failed to remove Media Id: {} from item: {}'.format(media, doc.get(config.ID_FIELD)))

    for attachment in doc.get('attachments', []):
        lookup = {'_id': attachment['attachment']}
        get_resource_service('attachments').delete_action(lookup)
コード例 #3
0
ファイル: zczc_bob.py プロジェクト: akintolga/superdesk-aap
    def post_process_item(self, item, provider):
        try:
            item['body_html'] = '<p>{}</p>'.format(
                re.sub('<p>   ', '<p>', item.get('body_html', '').replace('\n\n', '\n').replace('\n', '</p><p>')))
            if self.ITEM_PLACE in item:
                if item[self.ITEM_PLACE]:
                    item['headline'] = '{}: {}'.format(item[self.ITEM_PLACE], item.get(self.ITEM_HEADLINE, ''))
                locator_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='locators')
                place = [x for x in locator_map.get('items', []) if
                         x['qcode'] == item.get(self.ITEM_PLACE, '').upper()]
                if place is not None:
                    item[self.ITEM_PLACE] = place
                else:
                    item.pop(self.ITEM_PLACE)
            genre_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='genre')
            item['genre'] = [x for x in genre_map.get('items', []) if
                             x['qcode'] == 'Broadcast Script' and x['is_active']]

            # Remove the attribution
            item['body_html'] = item.get('body_html', '').replace('<p>AAP RTV</p>', '')
            item['sign_off'] = 'RTV'
        except Exception as ex:
            logger.exception(ex)

        return item
コード例 #4
0
    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        provider_config.setdefault('url', 'http://rmb.reuters.com/rmd/rest/xml')
        provider_config.setdefault('auth_url', 'https://commerce.reuters.com/rmd/rest/xml/login')
        self.URL = provider_config.get('url')

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn('Reuters item {} has not been retrieved'.format(id))
                    logger.exception(ex)
コード例 #5
0
ファイル: zczc_pmf.py プロジェクト: akintolga/superdesk-aap
    def post_process_item(self, item, provider):
        try:
            # is it a horse or dog racing item
            if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1 or item.get(self.ITEM_SLUGLINE, '').find(
                    'Trot') != -1 or item.get(self.ITEM_SLUGLINE, '').find('Gallop') != -1:
                # Don't look for the date in the TAB Dividends
                if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') == -1:
                    try:
                        raceday = datetime.strptime(item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                        item[self.ITEM_TAKE_KEY] = 'Fields ' + raceday.strftime('%A')
                    except:
                        item[self.ITEM_TAKE_KEY] = 'Fields'
                    # it's the dogs
                    if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1:
                        item[self.ITEM_HEADLINE] = item.get(self.ITEM_SLUGLINE) + 'hound ' + item.get(
                            self.ITEM_TAKE_KEY,
                            '')
                        item[self.ITEM_SUBJECT] = [{'qcode': '15082000', 'name': subject_codes['15082000']}]
                    if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                        item[self.ITEM_HEADLINE] = item.get(self.ITEM_SLUGLINE) + ' ' + item.get(self.ITEM_TAKE_KEY,
                                                                                                 '')
                        item[self.ITEM_SUBJECT] = [{'qcode': '15030003', 'name': subject_codes['15030003']}]
                else:
                    # Dividends
                    if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') != -1:
                        item[self.ITEM_TAKE_KEY] = re.sub(' Monday$| Tuesday$| Wednesday$| Thursday$| Friday$',
                                                          '', item[self.ITEM_HEADLINE])
                        item[self.ITEM_HEADLINE] = '{} {}'.format(item[self.ITEM_SLUGLINE], item[self.ITEM_HEADLINE])
                        if item.get(self.ITEM_SLUGLINE, '').find('Greyhound') != -1:
                            item[self.ITEM_SLUGLINE] = item.get(self.ITEM_SLUGLINE, '').replace('Greyhound', 'Greys')
                            item[self.ITEM_SUBJECT] = [{'qcode': '15082000', 'name': subject_codes['15082000']}]
                        if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                            item[self.ITEM_SUBJECT] = [{'qcode': '15030003', 'name': subject_codes['15030003']}]
                        if item.get(self.ITEM_SLUGLINE, '').find('Gallop') != -1:
                            item[self.ITEM_SUBJECT] = [{'qcode': '15030001', 'name': subject_codes['15030001']}]
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
                self._set_results_genre(item)
            elif item.get(self.ITEM_SLUGLINE, '').find(' Betting') != -1:
                try:
                    raceday = datetime.strptime(item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                    item[self.ITEM_TAKE_KEY] = raceday.strftime('%A')
                except:
                    pass
                item[self.ITEM_SLUGLINE] = item.get(self.ITEM_SLUGLINE, '').replace(' Betting', ' Market')
                item[self.ITEM_HEADLINE] = '{} {}'.format(item[self.ITEM_SLUGLINE], item[self.ITEM_TAKE_KEY])
                item[self.ITEM_SUBJECT] = [{'qcode': '15030001', 'name': subject_codes['15030001']}]
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
            elif item.get(self.ITEM_SLUGLINE, '').find('AFL') != -1:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 't'}]
                item[self.ITEM_SUBJECT] = [{'qcode': '15084000', 'name': subject_codes['15084000']}]
                self._set_results_genre(item)
            else:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'f'}]
                item[self.ITEM_SUBJECT] = [{'qcode': '04000000', 'name': subject_codes['04000000']}]
            return item

        except Exception as ex:
            logger.exception(ex)
コード例 #6
0
ファイル: validate.py プロジェクト: sjunaid/superdesk-core
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and 'headline' in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            validation_schema = self._get_validator_schema(validator)
            self._sanitize_fields(doc['validate'], validator)
            self._set_default_subject_scheme(doc['validate'])
            self._process_media(doc['validate'], validation_schema)
            self._process_sms(doc['validate'], validation_schema)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc['validate'], validation_schema)
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' % str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                messages = []
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get('act', None) == 'correct' and e == 'dateline':
                    continue
                elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \
                        e in ('headline', 'abstract', 'body_html'):
                    continue
                elif e == 'extra':
                    for field in error_list[e]:
                        display_name = self._get_vocabulary_display_name(field)
                        if 'required' in error_list[e][field]:
                            messages.append(REQUIRED_ERROR.format(display_name))
                        else:
                            messages.append('{} {}'.format(display_name, error_list[e][field]))
                elif error_list[e] == 'required field' or type(error_list[e]) is dict or \
                        type(error_list[e]) is list:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is 1' == error_list[e] or 'null value not allowed' in error_list[e]:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is' in error_list[e]:
                    messages.append('{} is too short'.format(e.upper()))
                elif 'max length is' in error_list[e]:
                    messages.append('{} is too long'.format(e.upper()))
                else:
                    messages.append('{} {}'.format(e.upper(), error_list[e]))

                for message in messages:
                    if use_headline:
                        headline = '{}: {}'.format(doc['validate'].get('headline',
                                                                       doc['validate'].get('_id')), message)
                        response.append(headline)
                    else:
                        response.append(message)
            return response
        else:
            logger.warn('validator was not found for {}'.format(doc['act']))
            return []
コード例 #7
0
ファイル: news_bites.py プロジェクト: akintolga/superdesk-aap
 def can_parse(self, file_path):
     try:
         with open(file_path, 'r', encoding='windows-1252') as f:
             lines = [line for line in f]
             m = re.match(self.START_OF_MESSAGE, lines[0])
             if m.group(0) == self.START_OF_MESSAGE:
                 return True
             return False
     except Exception as ex:
         logger.exception(ex)
         return False
コード例 #8
0
ファイル: zczc.py プロジェクト: mdhaman/superdesk-aap
 def can_parse(self, file_path):
     try:
         with open(file_path, 'r', encoding='latin-1') as f:
             lines = f.readlines()
             for line in lines:
                 if self.START_OF_MESSAGE in line:
                     return True
             return False
     except Exception as ex:
         logger.exception(ex)
         return False
コード例 #9
0
    def remove_expired(self, provider):
        lock_name = 'ingest:gc'

        if not lock(lock_name, expire=300):
            return

        try:

            remove_expired_data(provider)
            push_notification('ingest:cleaned')
        except Exception as err:
            logger.exception(err)
            raise ProviderError.expiredContentError(err, provider)
        finally:
            unlock(lock_name)
コード例 #10
0
    def validate_and_run_elastic_query(self, elastic_query, index):
        """
        Validates the elastic_query against ElasticSearch.

        :param elastic_query: JSON format inline with ElasticSearch syntax
        :param index: Name of the ElasticSearch index
        :raise SuperdeskError: If failed to validate the elastic_query against ElasticSearch
        """

        parsed_request = self.init_request(elastic_query)
        try:
            return get_resource_service(index).get(req=parsed_request, lookup={})
        except Exception as e:
            logger.exception(e)
            raise SuperdeskApiError.badRequestError('Fail to validate the filter against %s.' % index)
コード例 #11
0
    def post_process_item(self, item, provider):
        try:
            # Pagemasters sourced content is Greyhound or Trot related, maybe AFL otherwise financial
            # It is from the Racing system
            item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]
            item[self.ITEM_SUBJECT] = [{'qcode': '15030001', 'name': subject_codes['15030001']}]
            lines = item['body_html'].split('\n')
            if lines[2] and lines[2].find(':SPORT -') != -1:
                item[self.ITEM_HEADLINE] = lines[2][9:]
                if lines[1] and lines[1].find(':POTTED :') != -1:
                    item[self.ITEM_SLUGLINE] = lines[1][9:]
            elif lines[1] and lines[1].find('RACING : ') != -1:
                item[self.ITEM_HEADLINE] = lines[1][8:]
                item[self.ITEM_SLUGLINE] = lines[1][8:]
            elif lines[1] and lines[1].find(':POTTED :') != -1:
                item[self.ITEM_HEADLINE] = lines[1][9:]
                item[self.ITEM_SLUGLINE] = lines[1][9:]
            elif lines[1] and lines[1].find(':PREMIERSHIP') != -1:
                self._scan_lines(item, lines)
            elif lines[1] and lines[1].find(' WEIGHTS ') != -1:
                self._scan_lines(item, lines)
            elif lines[0] and lines[0].find('YY ') != -1:
                item[self.ITEM_HEADLINE] = lines[1]
                item[self.ITEM_SLUGLINE] = lines[1]
                if lines[1].find(' Comment ') != -1:
                    item[self.ITEM_SLUGLINE] = lines[1][:(lines[1].find(' Comment ') + 8)]
                    item[self.ITEM_TAKE_KEY] = lines[1][(lines[1].find(' Comment ') + 9):]
            else:
                self._scan_lines(item, lines)

            # Truncate the slugline and headline to the lengths defined on the validators if required
            lookup = {'act': ITEM_PUBLISH, 'type': CONTENT_TYPE.TEXT}
            validators = superdesk.get_resource_service('validators').get(req=None, lookup=lookup)
            if validators.count():
                max_slugline_len = validators[0]['schema']['slugline']['maxlength']
                max_headline_len = validators[0]['schema']['headline']['maxlength']
                if self.ITEM_SLUGLINE in item and len(item[self.ITEM_SLUGLINE]) > max_slugline_len:
                    # the overflow of the slugline is dumped in the take key
                    item[self.ITEM_TAKE_KEY] = item.get(self.ITEM_SLUGLINE)[max_slugline_len:]
                    item[self.ITEM_SLUGLINE] = item[self.ITEM_SLUGLINE][:max_slugline_len]
                if self.ITEM_HEADLINE in item:
                    item[self.ITEM_HEADLINE] = item[self.ITEM_HEADLINE][:max_headline_len] \
                        if len(item[self.ITEM_HEADLINE]) > max_headline_len else item[self.ITEM_HEADLINE]

            return item

        except Exception as ex:
            logger.exception(ex)
コード例 #12
0
ファイル: validate.py プロジェクト: superdesk/superdesk-core
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and "headline" in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            self._sanitize_fields(doc["validate"], validator)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc["validate"], self._get_validator_schema(validator))
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' % str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get("act", None) == "correct" and e == "dateline":
                    continue
                elif (
                    doc.get("act", None) == "kill"
                    and doc["validate"].get("profile", None)
                    and e in ("headline", "abstract", "body_html")
                ):
                    continue
                elif error_list[e] == "required field" or type(error_list[e]) is dict or type(error_list[e]) is list:
                    message = "{} is a required field".format(e.upper())
                elif "min length is 1" == error_list[e]:
                    message = "{} is a required field".format(e.upper())
                elif "min length is" in error_list[e]:
                    message = "{} is too short".format(e.upper())
                elif "max length is" in error_list[e]:
                    message = "{} is too long".format(e.upper())
                else:
                    message = "{} {}".format(e.upper(), error_list[e])

                if use_headline:
                    response.append(
                        "{}: {}".format(doc["validate"].get("headline", doc["validate"].get("_id")), message)
                    )
                else:
                    response.append(message)
            return response
        else:
            logger.warn("validator was not found for {}".format(doc["act"]))
            return []
コード例 #13
0
ファイル: eve_backend.py プロジェクト: hlmnrmr/superdesk-core
    def _change_request(self, endpoint_name, id, updates, original):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            backend.update(endpoint_name, id, updates, original)
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None, _id=id):
                # item is in elastic, not in mongo - not good
                logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id))
                self.remove_from_search(endpoint_name, id)
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done - ok
                logger.exception('Item : {} not updated in collection {}. '
                                 'Updates are : {}'.format(id, endpoint_name, updates))
                return updates

        if search_backend:
            doc = backend.find_one(endpoint_name, req=None, _id=id)
            search_backend.update(endpoint_name, id, doc)

        return updates
コード例 #14
0
    def _validate(self, doc, fields=False, **kwargs):
        item = deepcopy(
            doc["validate"]
        )  # make a copy for signal before validation processing
        use_headline = kwargs and "headline" in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            validation_schema = self._get_validator_schema(validator)
            self._sanitize_fields(doc["validate"], validator)
            self._set_default_subject_scheme(doc["validate"])
            self._process_media(doc["validate"], validation_schema)
            self._process_sms(doc["validate"], validation_schema)
            self._process_media_metadata(doc["validate"], validation_schema)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc["validate"], validation_schema)
            except TypeError as ex:
                logger.exception('Invalid validator schema value "%s" for ' %
                                 str(ex))
            error_list = v.errors
            response = []
            for e in error_list:
                messages = []
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get("act", None) == "correct" and e == "dateline":
                    continue
                elif (doc.get("act", None) == "kill"
                      and doc["validate"].get("profile", None)
                      and e in ("headline", "abstract", "body_html")):
                    continue
                elif e == "extra":
                    for field in error_list[e]:
                        display_name = self._get_vocabulary_display_name(field)
                        if "required" in error_list[e][field]:
                            messages.append(
                                ERROR_MESSAGES[REQUIRED_ERROR].format(
                                    display_name))
                        else:
                            error_field = self.get_error_field_name(
                                display_name)
                            messages.append("{} {}".format(
                                error_field, error_list[e][field]))
                elif "required field" in error_list[e] or type(
                        error_list[e]) is dict or type(error_list[e]) is list:
                    display_name = self._get_vocabulary_display_name(e)
                    error_field = self.get_error_field_name(display_name)
                    messages.append(ERROR_MESSAGES[REQUIRED_ERROR].format(
                        error_field.upper()))
                elif "min length is 1" == error_list[
                        e] or "null value not allowed" in error_list[e]:
                    messages.append(ERROR_MESSAGES[REQUIRED_ERROR].format(
                        e.upper()))
                elif "min length is" in error_list[e]:
                    error_field = self.get_error_field_name(e)
                    messages.append(ERROR_MESSAGES[TOO_SHORT].format(
                        error_field.upper()))
                elif "max length is" in error_list[e]:
                    error_field = self.get_error_field_name(e)
                    messages.append(ERROR_MESSAGES[TOO_LONG].format(
                        error_field.upper()))
                else:
                    error_field = self.get_error_field_name(e)
                    messages.append("{} {}".format(
                        error_field.upper(),
                        ERROR_MESSAGES[error_list[e]] if ERROR_MESSAGES.get(
                            error_list[e]) else error_list[e],
                    ))

                for message in messages:
                    if use_headline:
                        headline = "{}: {}".format(
                            doc["validate"].get("headline",
                                                doc["validate"].get("_id")),
                            message)
                        response.append(headline)
                    else:
                        response.append(message)

            # let custom code do additional validation
            item_validate.send(self,
                               item=item,
                               response=response,
                               error_fields=v.errors)

            if fields:
                return response, v.errors
            return response
        else:
            logger.warn("validator was not found for {}".format(doc["act"]))
            if fields:
                return [], {}
            return []
コード例 #15
0
    def post_process_item(self, item, provider):
        try:
            lines_to_remove = 1
            # Pagemasters sourced content is Greyhound or Trot related, maybe AFL otherwise financial
            # It is from the Racing system
            item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
            item[self.ITEM_SUBJECT] = [{
                'qcode': '15030001',
                'name': subject_codes['15030001']
            }]
            lines = item['body_html'].split('\n')
            # If the content is to be routed/auto published
            if lines[0].upper().find('YY ') != -1 or lines[0].upper().find(
                    'HH ') != -1:
                for dest in self.destinations:
                    if lines[0].upper().find(' ' + dest.upper()) != -1:
                        if (item.get('keywords')):
                            item.get('keywords', []).append(dest)
                        else:
                            item['keywords'] = [dest]

            if lines[2] and lines[2].find(':SPORT -') != -1:
                item[self.ITEM_HEADLINE] = lines[2][9:]
                if lines[1] and lines[1].find(':POTTED :') != -1:
                    item[self.ITEM_SLUGLINE] = lines[1][9:]
                lines_to_remove = 3
            elif lines[1] and lines[1].find('RACING : ') != -1:
                item[self.ITEM_HEADLINE] = lines[1][8:]
                item[self.ITEM_SLUGLINE] = lines[1][8:]
                lines_to_remove = 2
            elif lines[1] and lines[1].find(':POTTED :') != -1:
                item[self.ITEM_HEADLINE] = lines[1][9:]
                item[self.ITEM_SLUGLINE] = lines[1][9:]
                lines_to_remove = 2
            elif lines[1] and lines[1].find(':Premierships') != -1:
                item[self.ITEM_HEADLINE] = lines[1][1:]
                item[self.ITEM_SLUGLINE] = item[self.ITEM_HEADLINE]
                # the overflow of the slugline is dumped in the take key
                item[self.ITEM_TAKE_KEY] = item.get(self.ITEM_SLUGLINE)[21:]
                item[self.ITEM_SLUGLINE] = item[self.ITEM_SLUGLINE][:21]
                lines_to_remove = 2
            elif lines[1] and lines[1].find(' WEIGHTS ') != -1:
                self._scan_lines(item, lines)
            elif lines[0] and lines[0].find('YY ') != -1 or lines[0].find(
                    'HH ') != -1:
                item[self.ITEM_HEADLINE] = lines[1]
                item[self.ITEM_SLUGLINE] = lines[1]
                if lines[1].find(' Comment ') != -1:
                    # need to split the line on the word Comment
                    item[self.ITEM_SLUGLINE] = lines[
                        1][:lines[1].find('Comment')] + 'Comment'
                    item[self.
                         ITEM_TAKE_KEY] = lines[1][lines[1].find('Comment') +
                                                   8:]
                    item[self.ITEM_HEADLINE] = lines[1][:lines[1].find(
                        'Comment')] + 'Gallop Comment ' + item[
                            self.ITEM_TAKE_KEY]
                    lines_to_remove = 2
            else:
                self._scan_lines(item, lines)

            item['body_html'] = '<pre>' + '\n'.join(lines[lines_to_remove:])

            # if the concatenation of the slugline and take key contain the phrase 'Brief Form' change the category to
            # h
            if (item.get(self.ITEM_SLUGLINE, '') + item.get(
                    self.ITEM_TAKE_KEY, '')).lower().find('brief form') >= 0:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]
            # Another exception
            if 'NZ/AUST FIELDS' in item.get('body_html', ''):
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]

            # if the item has been marked as convert to HTML then we need to use the racing reformat macro
            # to convert it.
            if lines[0] and lines[0].find('HH ') != -1:
                racing_reformat_macro(item)

            return item

        except Exception as ex:
            logger.exception(ex)
コード例 #16
0
    def post_process_item(self, item, provider):
        try:
            # is it a horse or dog racing item
            if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1 or item.get(
                    self.ITEM_SLUGLINE, '').find('Trot') != -1 or item.get(
                        self.ITEM_SLUGLINE, '').find('Gallop') != -1:
                # Don't look for the date in the TAB Dividends
                if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') == -1:
                    try:
                        raceday = datetime.strptime(
                            item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                        item[
                            self.
                            ITEM_TAKE_KEY] = 'Fields ' + raceday.strftime('%A')
                    except:
                        item[self.ITEM_TAKE_KEY] = 'Fields'
                    # it's the dogs
                    if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1:
                        item[self.ITEM_HEADLINE] = item.get(
                            self.ITEM_SLUGLINE) + 'hound ' + item.get(
                                self.ITEM_TAKE_KEY, '')
                        item[self.ITEM_SUBJECT] = [{
                            'qcode':
                            '15082000',
                            'name':
                            subject_codes['15082000']
                        }]
                    if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                        item[self.ITEM_HEADLINE] = item.get(
                            self.ITEM_SLUGLINE) + ' ' + item.get(
                                self.ITEM_TAKE_KEY, '')
                        item[self.ITEM_SUBJECT] = [{
                            'qcode':
                            '15030003',
                            'name':
                            subject_codes['15030003']
                        }]
                    self._set_results_genre(item, self.racing_qcode)
                else:
                    # Dividends
                    if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') != -1:
                        item[self.ITEM_TAKE_KEY] = re.sub(
                            ' Monday$| Tuesday$| Wednesday$| Thursday$| Friday$',
                            '', item[self.ITEM_HEADLINE])
                        item[self.ITEM_HEADLINE] = '{} {}'.format(
                            item[self.ITEM_SLUGLINE], item[self.ITEM_HEADLINE])
                        if item.get(self.ITEM_SLUGLINE,
                                    '').find('Greyhound') != -1:
                            item[self.ITEM_SLUGLINE] = item.get(
                                self.ITEM_SLUGLINE,
                                '').replace('Greyhound', 'Greys')
                            item[self.ITEM_SUBJECT] = [{
                                'qcode':
                                '15082000',
                                'name':
                                subject_codes['15082000']
                            }]
                        if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                            item[self.ITEM_SUBJECT] = [{
                                'qcode':
                                '15030003',
                                'name':
                                subject_codes['15030003']
                            }]
                        if item.get(self.ITEM_SLUGLINE,
                                    '').find('Gallop') != -1:
                            item[self.ITEM_SUBJECT] = [{
                                'qcode':
                                '15030001',
                                'name':
                                subject_codes['15030001']
                            }]
                        self._set_results_genre(item, self.sport_results_qcode)

                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
            elif item.get(self.ITEM_SLUGLINE, '').find(' Betting') != -1:
                try:
                    raceday = datetime.strptime(
                        item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                    item[self.ITEM_TAKE_KEY] = raceday.strftime('%A')
                except:
                    pass
                item[self.ITEM_SLUGLINE] = item.get(self.ITEM_SLUGLINE,
                                                    '').replace(
                                                        ' Betting', ' Market')
                item[self.ITEM_HEADLINE] = '{} {}'.format(
                    item[self.ITEM_SLUGLINE], item[self.ITEM_TAKE_KEY])
                item[self.ITEM_SUBJECT] = [{
                    'qcode': '15030001',
                    'name': subject_codes['15030001']
                }]
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
                self._set_results_genre(item, self.racing_qcode)
            elif item.get(self.ITEM_SLUGLINE, '').find('AFL') != -1:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 't'}]
                item[self.ITEM_SUBJECT] = [{
                    'qcode': '15084000',
                    'name': subject_codes['15084000']
                }]
                self._set_results_genre(item, self.sport_results_qcode)
            else:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'f'}]
                item[self.ITEM_SUBJECT] = [{
                    'qcode': '04000000',
                    'name': subject_codes['04000000']
                }]
                self._set_results_genre(item, self.finance_qcode)

            # truncate the slugline to the length defined in the validation schema
            lookup = {'act': 'auto_publish', 'type': CONTENT_TYPE.TEXT}
            validators = get_resource_service('validators').get(req=None,
                                                                lookup=lookup)
            if validators.count():
                max_slugline_len = validators[0]['schema']['slugline'][
                    'maxlength']
                if 'slugline' in item:
                    item['slugline'] = item['slugline'][:max_slugline_len] \
                        if len(item['slugline']) > max_slugline_len else item['slugline']

            return item

        except Exception as ex:
            logger.exception(ex)
コード例 #17
0
def handle_exception(exc):
    """Log exception to logger."""
    logger.exception(exc)
コード例 #18
0
    def post_process_item(self, item, provider):
        try:
            # is it a horse or dog racing item
            if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1 or item.get(self.ITEM_SLUGLINE, '').find(
                    'Trot') != -1 or item.get(self.ITEM_SLUGLINE, '').find('Gallop') != -1:
                # Don't look for the date in the TAB Dividends
                if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') == -1:
                    try:
                        raceday = datetime.strptime(item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                        item[self.ITEM_TAKE_KEY] = 'Fields ' + raceday.strftime('%A')
                    except:
                        item[self.ITEM_TAKE_KEY] = 'Fields'
                    # it's the dogs
                    if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1:
                        item[self.ITEM_HEADLINE] = item.get(self.ITEM_SLUGLINE) + 'hound ' + item.get(
                            self.ITEM_TAKE_KEY,
                            '')
                        item[self.ITEM_SUBJECT] = [{'qcode': '15082000', 'name': subject_codes['15082000']}]
                    if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                        item[self.ITEM_HEADLINE] = item.get(self.ITEM_SLUGLINE) + ' ' + item.get(self.ITEM_TAKE_KEY,
                                                                                                 '')
                        item[self.ITEM_SUBJECT] = [{'qcode': '15030003', 'name': subject_codes['15030003']}]
                else:
                    # Dividends
                    if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') != -1:
                        item[self.ITEM_TAKE_KEY] = re.sub(' Monday$| Tuesday$| Wednesday$| Thursday$| Friday$',
                                                          '', item[self.ITEM_HEADLINE])
                        item[self.ITEM_HEADLINE] = '{} {}'.format(item[self.ITEM_SLUGLINE], item[self.ITEM_HEADLINE])
                        if item.get(self.ITEM_SLUGLINE, '').find('Greyhound') != -1:
                            item[self.ITEM_SLUGLINE] = item.get(self.ITEM_SLUGLINE, '').replace('Greyhound', 'Greys')
                            item[self.ITEM_SUBJECT] = [{'qcode': '15082000', 'name': subject_codes['15082000']}]
                        if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                            item[self.ITEM_SUBJECT] = [{'qcode': '15030003', 'name': subject_codes['15030003']}]
                        if item.get(self.ITEM_SLUGLINE, '').find('Gallop') != -1:
                            item[self.ITEM_SUBJECT] = [{'qcode': '15030001', 'name': subject_codes['15030001']}]
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
                self._set_results_genre(item)
            elif item.get(self.ITEM_SLUGLINE, '').find(' Betting') != -1:
                try:
                    raceday = datetime.strptime(item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                    item[self.ITEM_TAKE_KEY] = raceday.strftime('%A')
                except:
                    pass
                item[self.ITEM_SLUGLINE] = item.get(self.ITEM_SLUGLINE, '').replace(' Betting', ' Market')
                item[self.ITEM_HEADLINE] = '{} {}'.format(item[self.ITEM_SLUGLINE], item[self.ITEM_TAKE_KEY])
                item[self.ITEM_SUBJECT] = [{'qcode': '15030001', 'name': subject_codes['15030001']}]
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
            elif item.get(self.ITEM_SLUGLINE, '').find('AFL') != -1:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 't'}]
                item[self.ITEM_SUBJECT] = [{'qcode': '15084000', 'name': subject_codes['15084000']}]
                self._set_results_genre(item)
            else:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'f'}]
                item[self.ITEM_SUBJECT] = [{'qcode': '04000000', 'name': subject_codes['04000000']}]

            # truncate the slugline to the length defined in the validation schema
            lookup = {'act': 'auto_publish', 'type': CONTENT_TYPE.TEXT}
            validators = get_resource_service('validators').get(req=None, lookup=lookup)
            if validators.count():
                max_slugline_len = validators[0]['schema']['slugline']['maxlength']
                if 'slugline' in item:
                    item['slugline'] = item['slugline'][:max_slugline_len] \
                        if len(item['slugline']) > max_slugline_len else item['slugline']

            return item

        except Exception as ex:
            logger.exception(ex)
コード例 #19
0
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and 'headline' in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            validation_schema = self._get_validator_schema(validator)
            self._sanitize_fields(doc['validate'], validator)
            self._set_default_subject_scheme(doc['validate'])
            self._process_media(doc['validate'], validation_schema)
            self._process_sms(doc['validate'], validation_schema)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc['validate'], validation_schema)
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' %
                                 str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                messages = []
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get('act', None) == 'correct' and e == 'dateline':
                    continue
                elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \
                        e in ('headline', 'abstract', 'body_html'):
                    continue
                elif e == 'extra':
                    for field in error_list[e]:
                        display_name = self._get_vocabulary_display_name(field)
                        if 'required' in error_list[e][field]:
                            messages.append(
                                REQUIRED_ERROR.format(display_name))
                        else:
                            messages.append('{} {}'.format(
                                display_name, error_list[e][field]))
                elif error_list[e] == 'required field' or type(error_list[e]) is dict or \
                        type(error_list[e]) is list:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is 1' == error_list[
                        e] or 'null value not allowed' in error_list[e]:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is' in error_list[e]:
                    messages.append('{} is too short'.format(e.upper()))
                elif 'max length is' in error_list[e]:
                    messages.append('{} is too long'.format(e.upper()))
                else:
                    messages.append('{} {}'.format(e.upper(), error_list[e]))

                for message in messages:
                    if use_headline:
                        headline = '{}: {}'.format(
                            doc['validate'].get('headline',
                                                doc['validate'].get('_id')),
                            message)
                        response.append(headline)
                    else:
                        response.append(message)
            return response
        else:
            logger.warn('validator was not found for {}'.format(doc['act']))
            return []
コード例 #20
0
    def process_timelines(self, items, failed_ids):
        statistics_service = get_resource_service('archive_statistics')
        items_to_create = []
        rewrites = []

        for item_id, item in items.items():
            try:
                self.gen_stats_from_timeline(item)
            except Exception:
                logger.exception(
                    'Failed to generate stats for item {}'.format(item_id))
                failed_ids.append(item_id)
                continue

            if item['updates'].get('rewrite_of') and \
                    (item['updates'].get('time_to_first_publish') or 0) > 0:
                rewrites.append(item_id)

            if not item['item'].get(config.ID_FIELD):
                item['updates'][config.ID_FIELD] = item_id
                item['updates']['stats_type'] = 'archive'
                items_to_create.append(item['updates'])
            else:
                try:
                    statistics_service.patch(item_id, item['updates'])
                except Exception:
                    logger.exception(
                        'Failed to update stats for item {}. updates={}'.
                        format(item_id, item.get('updates')))
                    failed_ids.append(item_id)

        if len(items_to_create) > 0:
            try:
                statistics_service.post(items_to_create)
            except Exception:
                item_ids = [
                    item.get(config.ID_FIELD) for item in items_to_create
                ]
                logger.exception(
                    'Failed to create stat entries for items {}'.format(
                        ', '.join(item_ids)))
                failed_ids.extend(failed_ids)

        for item_id in rewrites:
            item = items[item_id]

            updated_at = item['updates'].get('firstpublished')
            if not updated_at:
                logger.warning(
                    'Failed {}, updated_at not defined'.format(item_id))
                continue

            original_id = item['updates'].get('rewrite_of')
            if not original_id:
                logger.warning(
                    'Failed {}, original_id not defined'.format(item_id))
                continue

            original = statistics_service.find_one(req=None, _id=original_id)
            if not original:
                logger.warning('Failed {}, original not found'.format(item_id))
                continue

            published_at = original.get('firstpublished')
            if not published_at:
                logger.warning(
                    'Failed {}, published_at not defined'.format(original_id))
                continue

            statistics_service.patch(
                original_id, {
                    'time_to_next_update_publish':
                    (updated_at - published_at).total_seconds()
                })
コード例 #21
0
ファイル: zczc_racing.py プロジェクト: mdhaman/superdesk-aap
    def post_process_item(self, item, provider):
        try:
            lines_to_remove = 1
            # Pagemasters sourced content is Greyhound or Trot related, maybe AFL otherwise financial
            # It is from the Racing system
            item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
            item[self.ITEM_SUBJECT] = [{'qcode': '15030001', 'name': subject_codes['15030001']}]
            lines = item['body_html'].split('\n')
            # If the content is to be routed/auto published
            if lines[0].upper().find('YY ') != -1 or lines[0].upper().find('HH ') != -1:
                destinations = lines[0].split(' ')
                for dest in destinations[1:]:
                    if (item.get('keywords')):
                        item.get('keywords', []).append(dest)
                    else:
                        item['keywords'] = [dest]

            if lines[2] and lines[2].find(':SPORT -') != -1:
                item[self.ITEM_HEADLINE] = lines[2][9:]
                if lines[1] and lines[1].find(':POTTED :') != -1:
                    item[self.ITEM_SLUGLINE] = lines[1][9:]
                lines_to_remove = 3
            elif lines[1] and lines[1].find('RACING : ') != -1:
                item[self.ITEM_HEADLINE] = lines[1][8:]
                item[self.ITEM_SLUGLINE] = lines[1][8:]
                lines_to_remove = 2
            elif lines[1] and lines[1].find(':POTTED :') != -1:
                item[self.ITEM_HEADLINE] = lines[1][9:]
                item[self.ITEM_SLUGLINE] = lines[1][9:]
                lines_to_remove = 2
            elif lines[1] and lines[1].find(':Premierships') != -1:
                item[self.ITEM_HEADLINE] = lines[1][1:]
                item[self.ITEM_SLUGLINE] = item[self.ITEM_HEADLINE]
                # the overflow of the slugline is dumped in the take key
                item[self.ITEM_TAKE_KEY] = item.get(self.ITEM_SLUGLINE)[21:]
                item[self.ITEM_SLUGLINE] = item[self.ITEM_SLUGLINE][:21]
                lines_to_remove = 2
            elif lines[1] and lines[1].find(' WEIGHTS ') != -1:
                self._scan_lines(item, lines)
            elif lines[0] and lines[0].find('YY ') != -1 or lines[0].find('HH ') != -1:
                item[self.ITEM_HEADLINE] = lines[1]
                item[self.ITEM_SLUGLINE] = lines[1]
                if lines[1].find(' Comment ') != -1:
                    # need to split the line on the word Comment
                    item[self.ITEM_SLUGLINE] = lines[1][:lines[1].find('Comment')] + 'Comment'
                    item[self.ITEM_TAKE_KEY] = lines[1][lines[1].find('Comment') + 8:]
                    item[self.ITEM_HEADLINE] = lines[1][:lines[1].find('Comment')] + 'Gallop Comment ' + item[
                        self.ITEM_TAKE_KEY]
                    lines_to_remove = 2
            else:
                self._scan_lines(item, lines)

            item['body_html'] = '<pre>' + '\n'.join(lines[lines_to_remove:])

            # if the concatenation of the slugline and take key contain the phrase 'Brief Form' change the category to
            # h
            if (item.get(self.ITEM_SLUGLINE, '') + item.get(self.ITEM_TAKE_KEY, '')).lower().find('brief form') >= 0:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]
            # Another exception
            if 'NZ/AUST FIELDS' in item.get('body_html', ''):
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]

            # if the item has been marked as convert to HTML then we need to use the racing reformat macro
            # to convert it.
            if lines[0] and lines[0].find('HH ') != -1:
                racing_reformat_macro(item)

            genre_map = get_resource_service('vocabularies').find_one(req=None, _id='genre')
            if genre_map:
                item['genre'] = [x for x in genre_map.get('items', []) if
                                 x['qcode'] == 'Racing Data' and x['is_active']]
            return item

        except Exception as ex:
            logger.exception(ex)
コード例 #22
0
    def gen_stats_from_timeline(self, item):
        item.setdefault('updates', {})
        updates = item['updates']

        updates.setdefault('stats', {})
        stats = updates['stats']

        if len(stats.get(STAT_TYPE.TIMELINE) or []) < 1:
            return

        new_timeline = []
        desk_transitions.init(stats)
        featuremedia_updates.init(stats)

        try:
            entries = sorted(stats[STAT_TYPE.TIMELINE],
                             key=lambda k:
                             (k['operation_created'], k['history_id']))
        except Exception as e:
            logger.exception('Failed to sort timeline {}'.format(
                stats[STAT_TYPE.TIMELINE]))
            raise e

        # If the first history item has original_item_id attribute,
        # then this item is a duplicate of another item
        updates['_duplicate'] = entries[0].get('original_item_id')

        # Default the paragraph count to 0
        # We'll update this count while processing the timeline
        updates['par_count'] = 0

        for entry in entries:
            entry.setdefault('update', {})
            self.set_metadata_updates(item, entry)
            self.set_timeline_entry_task_details(entry, updates)

            if self.skip_timeline_entry(entry, updates):
                continue

            # Remove the update attribute before adding to the timeline
            update = entry.get('update') or {}
            self._store_update_fields(entry)

            # Update the paragraph count from this history entry
            self.update_par_count_from_timeline_entry(entry, updates, update)

            new_timeline.append(entry)

            # Use a copy of entry after adding to the timeline
            # So that any changes from here do not modify the existing timeline entry
            entry = deepcopy(entry)

            operation = entry.get('operation')
            operation_created = entry.get('operation_created')

            if operation == OPERATION.PUBLISH:
                updates['_published'] = True
                if not updates.get('firstpublished'):
                    updates['firstpublished'] = operation_created
            elif operation in [OPERATION.CREATE, OPERATION.FETCH] and \
                    not updates.get('firstcreated'):
                updates['firstcreated'] = operation_created

            desk_transitions.process(entry, new_timeline, updates, update,
                                     stats)
            featuremedia_updates.process(entry, new_timeline, updates, update,
                                         stats)

        desk_transitions.complete(stats, updates)
        featuremedia_updates.complete(stats, updates)

        if updates.get('firstpublished') and updates.get('firstcreated'):
            updates['time_to_first_publish'] = (
                updates['firstpublished'] -
                updates['firstcreated']).total_seconds()

        def _remove_tmp_fields(entry):
            entry.pop('_processed', None)
            return entry

        stats[STAT_TYPE.TIMELINE] = [
            _remove_tmp_fields(entry) for entry in new_timeline
        ]

        for key in list(updates.keys()):
            if key.startswith('_'):
                updates.pop(key)
コード例 #23
0
    def post_process_item(self, item, provider):
        try:
            # is it a horse or dog racing item
            if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1 or item.get(
                    self.ITEM_SLUGLINE, '').find('Trot') != -1 or item.get(
                        self.ITEM_SLUGLINE, '').find('Gallop') != -1:
                # Don't look for the date in the TAB Dividends
                if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') == -1:
                    try:
                        raceday = datetime.strptime(
                            item.get(self.ITEM_HEADLINE, ''), '%d/%m/%Y')
                        item[
                            self.
                            ITEM_TAKE_KEY] = 'Fields ' + raceday.strftime('%A')
                    except:
                        item[self.ITEM_TAKE_KEY] = 'Fields'
                    # it's the dogs
                    if item.get(self.ITEM_SLUGLINE, '').find('Grey') != -1:
                        item[self.ITEM_HEADLINE] = item.get(
                            self.ITEM_SLUGLINE) + 'hound ' + item.get(
                                self.ITEM_TAKE_KEY, '')
                        item[self.ITEM_SUBJECT] = [{
                            'qcode':
                            '15082000',
                            'name':
                            subject_codes['15082000']
                        }]
                    if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                        item[self.ITEM_HEADLINE] = item.get(
                            self.ITEM_SLUGLINE) + ' ' + item.get(
                                self.ITEM_TAKE_KEY, '')
                        item[self.ITEM_SUBJECT] = [{
                            'qcode':
                            '15030003',
                            'name':
                            subject_codes['15030003']
                        }]
                else:
                    # Dividends
                    if item.get(self.ITEM_HEADLINE, '').find('TAB DIVS') != -1:
                        item[self.ITEM_TAKE_KEY] = re.sub(
                            ' Monday$| Tuesday$| Wednesday$| Thursday$| Friday$',
                            '', item[self.ITEM_HEADLINE])
                        item[self.ITEM_HEADLINE] = '{} {}'.format(
                            item[self.ITEM_SLUGLINE], item[self.ITEM_HEADLINE])
                        if item.get(self.ITEM_SLUGLINE,
                                    '').find('Greyhound') != -1:
                            item[self.ITEM_SLUGLINE] = item.get(
                                self.ITEM_SLUGLINE,
                                '').replace('Greyhound', 'Greys')
                            item[self.ITEM_SUBJECT] = [{
                                'qcode':
                                '15082000',
                                'name':
                                subject_codes['15082000']
                            }]
                        if item.get(self.ITEM_SLUGLINE, '').find('Trot') != -1:
                            item[self.ITEM_SUBJECT] = [{
                                'qcode':
                                '15030003',
                                'name':
                                subject_codes['15030003']
                            }]
                        if item.get(self.ITEM_SLUGLINE,
                                    '').find('Gallop') != -1:
                            item[self.ITEM_SUBJECT] = [{
                                'qcode':
                                '15030001',
                                'name':
                                subject_codes['15030001']
                            }]
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'r'}]
                self._set_results_genre(item)
            elif item.get(self.ITEM_SLUGLINE, '').find('AFL') != -1:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 't'}]
                item[self.ITEM_SUBJECT] = [{
                    'qcode': '15084000',
                    'name': subject_codes['15084000']
                }]
                self._set_results_genre(item)
            else:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'f'}]
                item[self.ITEM_SUBJECT] = [{
                    'qcode': '04000000',
                    'name': subject_codes['04000000']
                }]
            return item

        except Exception as ex:
            logger.exception(ex)