def get_subjects(tree): """ Finds all the subject tags in the passed tree and returns the parsed subjects. All entries will have both the name and qcode populated. :param tree: :return: a list of subject dictionaries """ subjects = [] for elem in tree.findall('head/tobject/tobject.subject'): qcode = elem.get('tobject.subject.refnum') for field in subject_fields: if elem.get(field): if field == SUBJECT_TYPE: field_qcode = qcode[:2] + '000000' elif field == SUBJECT_MATTER: field_qcode = qcode[:5] + '000' else: field_qcode = qcode if subject_codes.get(field_qcode): subjects.append({ 'name': elem.get(field), 'qcode': field_qcode }) if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode): subjects.append({'name': subject_codes[qcode], 'qcode': qcode}) return subjects
def get_subjects(self, tree): """Finds all the subject tags in the passed tree and returns the parsed subjects. All entries will have both the name and qcode populated. :param tree: :return: a list of subject dictionaries """ subjects = [] qcodes = [] # we check qcodes to avoid duplicates for elem in tree.findall("head/tobject/tobject.subject"): qcode = elem.get("tobject.subject.refnum") if qcode in qcodes: # we ignore duplicates continue else: qcodes.append(qcode) for field in subject_fields: if elem.get(field): if field == SUBJECT_TYPE: field_qcode = qcode[:2] + "000000" elif field == SUBJECT_MATTER: field_qcode = qcode[:5] + "000" else: field_qcode = qcode if subject_codes.get(field_qcode) and not any(c["qcode"] == field_qcode for c in subjects): subjects.append({"name": elem.get(field), "qcode": field_qcode}) # if the subject_fields are not specified. if not any(c["qcode"] == qcode for c in subjects) and subject_codes.get(qcode): subjects.append({"name": subject_codes[qcode], "qcode": qcode}) return subjects
def format_subjects(self, subjects): """ Maps the ingested Subject Codes to their corresponding names as per IPTC Specification. :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}] """ formatted_subjects = [] def is_not_formatted(qcode): for formatted_subject in formatted_subjects: if formatted_subject['qcode'] == qcode: return False return True for subject in subjects: formal_name = subject.get('FormalName') if formal_name and is_not_formatted(formal_name): formatted_subjects.append({ 'qcode': formal_name, 'name': subject_codes.get(formal_name, '') }) return formatted_subjects
def _set_default_item(self, sport_id, comp_id, match_id): """ Construct an item with the common values as required :param sport_id: :param comp_id: :param match_id: :return: """ item = dict() item[ITEM_TYPE] = CONTENT_TYPE.EVENT item[GUID_FIELD] = 'urn:aapsportsfixtures:{}:{}:{}:{}'.format(sport_id, comp_id, self.season, match_id).replace('/', '-') item['anpa_category'] = [{'qcode': 't'}] if comp_id.startswith('dom') else [{'qcode': 's'}] item['subject'] = [{'qcode': self.sport_map.get(sport_id, {}).get('iptc', ''), 'name': subject_codes.get(self.sport_map.get(sport_id, {}).get('iptc', ''), '')}] item['occur_status'] = [x for x in self.eocstat_map.get('items', []) if x['qcode'] == 'eocstat:eos5' and x.get('is_active', True)][0] item['occur_status'].pop('is_active', None) item['versioncreated'] = utcnow() item['state'] = CONTENT_STATE.INGESTED item['pubstatus'] = None calendars = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='event_calendars') item['calendars'] = [c for c in calendars.get('items', []) if c.get('qcode').lower() in ('sport', 'sportgeneral')] return item
def format_subjects(self, subjects): """Map the ingested Subject Codes to their corresponding names as per IPTC Specification. :param subjects: list of dicts where each dict gives the category the article is mapped to. :type subjects: list :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}] :rtype list """ formatted_subjects = [] def is_not_formatted(qcode): for formatted_subject in formatted_subjects: if formatted_subject['qcode'] == qcode: return False return True for subject in subjects: formal_name = subject.get('FormalName') if formal_name and is_not_formatted(formal_name): formatted_subjects.append({ 'qcode': formal_name, 'name': subject_codes.get(formal_name, '') }) return formatted_subjects
def format_subjects(self, subjects): """Map the ingested Subject Codes to their corresponding names as per IPTC Specification. :param subjects: list of dicts where each dict gives the category the article is mapped to. :type subjects: list :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}] :rtype list """ formatted_subjects = [] def is_not_formatted(qcode): for formatted_subject in formatted_subjects: if formatted_subject['qcode'] == qcode: return False return True iptcsc_cv = self._get_cv('iptc_subject_codes') for subject in subjects: formal_name = subject.get('FormalName') for item in iptcsc_cv['items']: if item.get('is_active'): #: check formal_name, format formal_name and filter missing subjects if formal_name and is_not_formatted( formal_name) and item.get('qcode') == formal_name: formatted_subjects.append({ 'qcode': formal_name, 'name': subject_codes.get(formal_name, ''), 'scheme': 'iptc_subject_codes' }) return formatted_subjects
def _set_default_item(self, title, _id, thumbprint, country): """ Construct an item with the common values as required :param sport_id: :param comp_id: :param match_id: :return: """ item = dict() item[ITEM_TYPE] = CONTENT_TYPE.EVENT item[GUID_FIELD] = 'urn:aapsportssheet:{}:{}'.format(_id, thumbprint) item['anpa_category'] = [ {'qcode': 't', 'subject': '15000000', 'name': 'Domestic Sport'}] if country.lower() in ['australia', 'aus'] else [ {'qcode': 's', 'subject': '15000000', 'name': 'Overseas Sport'}] for k, v in self.sheet_map.items(): if k in title: item['subject'] = [{'qcode': v, 'name': subject_codes.get(v, ''), 'parent': '15000000'}] break item['occur_status'] = [x for x in self.eocstat_map.get('items', []) if x['qcode'] == 'eocstat:eos5' and x.get('is_active', True)][0] item['occur_status'].pop('is_active', None) item['versioncreated'] = utcnow() item['state'] = CONTENT_STATE.SCHEDULED item['pubstatus'] = 'usable' calendars = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='event_calendars') item['calendars'] = [c for c in calendars.get('items', []) if c.get('qcode').lower() in ('sport', 'sportgeneral')] return item
def get_subjects(self, tree): """Finds all the subject tags in the passed tree and returns the parsed subjects. All entries will have both the name and qcode populated. :param tree: :return: a list of subject dictionaries """ subjects = [] qcodes = [] # we check qcodes to avoid duplicates for elem in tree.findall('head/tobject/tobject.subject'): qcode = elem.get('tobject.subject.refnum') if qcode in qcodes: # we ignore duplicates continue else: qcodes.append(qcode) for field in subject_fields: if elem.get(field): if field == SUBJECT_TYPE: field_qcode = qcode[:2] + '000000' elif field == SUBJECT_MATTER: field_qcode = qcode[:5] + '000' else: field_qcode = qcode if subject_codes.get(field_qcode) and \ not any(c['qcode'] == field_qcode for c in subjects): subjects.append({ 'name': elem.get(field), 'qcode': field_qcode }) # if the subject_fields are not specified. if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode): subjects.append({'name': subject_codes[qcode], 'qcode': qcode}) return subjects
def format_subjects(self, subjects): """ Maps the ingested Subject Codes to their corresponding names as per IPTC Specification. :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}] """ formatted_subjects = [] def is_not_formatted(qcode): for formatted_subject in formatted_subjects: if formatted_subject['qcode'] == qcode: return False return True for subject in subjects: formal_name = subject.get('FormalName') if formal_name and is_not_formatted(formal_name): formatted_subjects.append({'qcode': formal_name, 'name': subject_codes.get(formal_name, '')}) return formatted_subjects
def _set_default_item(self, sport_id, comp_id, match_id): """ Construct an item with the common values as required :param sport_id: :param comp_id: :param match_id: :return: """ item = dict() item[ITEM_TYPE] = CONTENT_TYPE.EVENT item[GUID_FIELD] = 'urn:aapsportsfixtures:{}:{}:{}:{}'.format( sport_id, comp_id, self.season, match_id).replace('/', '-') item['anpa_category'] = [{ 'qcode': 't' }] if comp_id.startswith('dom') else [{ 'qcode': 's' }] item['subject'] = [{ 'qcode': self.sport_map.get(sport_id, {}).get('iptc', ''), 'name': subject_codes.get( self.sport_map.get(sport_id, {}).get('iptc', ''), '') }] item['occur_status'] = [ x for x in self.eocstat_map.get('items', []) if x['qcode'] == 'eocstat:eos5' and x.get('is_active', True) ][0] item['occur_status'].pop('is_active', None) item['versioncreated'] = utcnow() item['state'] = CONTENT_STATE.INGESTED item['pubstatus'] = None calendars = superdesk.get_resource_service('vocabularies').find_one( req=None, _id='event_calendars') item['calendars'] = [ c for c in calendars.get('items', []) if c.get('qcode').lower() == 'sport' ] return item
def get_subjects(self, tree): """Finds all the IPTC subject tags in the passed tree and returns the parsed subjects. All entries will have both the name and qcode populated. :param tree: :return: a list of subject dictionaries """ subjects = [] qcodes = [] # we check qcodes to avoid duplicates for elem in tree.findall('head/tobject/tobject.subject[@tobject.subject.ipr="IPTC"]'): qcode = elem.get('tobject.subject.refnum') if qcode in qcodes: # we ignore duplicates continue else: qcodes.append(qcode) # if the subject_fields are not specified. if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode): subjects.append({'name': subject_codes[qcode], 'qcode': qcode}) return subjects
def format_subjects(self, subjects): """Map the ingested Subject Codes to their corresponding names as per IPTC Specification. :param subjects: list of dicts where each dict gives the category the article is mapped to. :type subjects: list :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}] :rtype list """ formatted_subjects = [] def is_not_formatted(qcode): for formatted_subject in formatted_subjects: if formatted_subject['qcode'] == qcode: return False return True for subject in subjects: formal_name = subject.get('FormalName') if formal_name and is_not_formatted(formal_name): formatted_subjects.append({'qcode': formal_name, 'name': subject_codes.get(formal_name, '')}) return formatted_subjects