def parse_item(self, image_path): filename = os.path.basename(image_path) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) item = {'guid': guid, 'uri': guid, config.VERSION: 1, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': utcnow(), } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(item, file_metadata) f.seek(0) metadata = get_meta_iptc(f) f.seek(0) self.parse_meta(item, metadata) rendition_spec = get_renditions_spec(no_custom_crops=True) renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) item['renditions'] = renditions return item
def parse_item(self, image_path): filename = os.path.basename(image_path) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) item = { 'guid': guid, config.VERSION: 1, config.ID_FIELD: guid, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': datetime.now() } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream( f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(item, file_metadata) f.seek(0) metadata = get_meta_iptc(f) f.seek(0) rendition_spec = get_renditions_spec(no_custom_crops=True) renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) item['renditions'] = renditions try: date_created, time_created = metadata[TAG.DATE_CREATED], metadata[ TAG.TIME_CREATED] except KeyError: pass else: # we format proper ISO 8601 date so we can parse it with dateutil datetime_created = '{}-{}-{}T{}:{}:{}{}{}:{}'.format( date_created[0:4], date_created[4:6], date_created[6:8], time_created[0:2], time_created[2:4], time_created[4:6], time_created[6], time_created[7:9], time_created[9:]) item['firstcreated'] = dateutil.parser.parse(datetime_created) # now we map IPTC metadata to superdesk metadata for source_key, dest_key in IPTC_MAPPING.items(): try: item[dest_key] = metadata[source_key] except KeyError: continue return item
def _parse_exif(self, data, item): try: res = sess.get(data["item"]["renditions"]["preview"]["href"], timeout=10) except KeyError: return metadata = get_meta_iptc(io.BytesIO(res.content)) if metadata.get("Writer/Editor"): item.setdefault("extra", {})[cp.CAPTION_WRITER] = metadata["Writer/Editor"] if metadata.get("Headline"): item.setdefault("extra", {})[cp.HEADLINE2] = metadata["Headline"] if metadata.get("Keywords"): item.setdefault("extra", {})[cp.XMP_KEYWORDS] = ", ".join( metadata["Keywords"].split(";") )
def _parse_exif(self, data, item): try: res = sess.get(data['item']['renditions']['preview']['href'], timeout=10) except KeyError: return metadata = get_meta_iptc(io.BytesIO(res.content)) if metadata.get('Writer/Editor'): item.setdefault('extra', {})[cp.CAPTION_WRITER] = metadata['Writer/Editor'] if metadata.get('Headline'): item.setdefault('extra', {})[cp.HEADLINE2] = metadata['Headline'] if metadata.get('Keywords'): item.setdefault('extra', {})[cp.XMP_KEYWORDS] = ', '.join( metadata['Keywords'].split(';'))
def parse_item(self, image_path): filename = os.path.basename(image_path) content_type = mimetypes.guess_type(image_path)[0] guid = utils.generate_guid(type=GUID_TAG) item = {'guid': guid, config.VERSION: 1, config.ID_FIELD: guid, ITEM_TYPE: CONTENT_TYPE.PICTURE, 'mimetype': content_type, 'versioncreated': datetime.now() } with open(image_path, 'rb') as f: _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type) f.seek(0) file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata) filemeta.set_filemeta(item, file_metadata) f.seek(0) metadata = get_meta_iptc(f) f.seek(0) rendition_spec = get_renditions_spec(no_custom_crops=True) renditions = generate_renditions(f, file_id, [file_id], 'image', content_type, rendition_spec, url_for_media) item['renditions'] = renditions try: date_created, time_created = metadata[TAG.DATE_CREATED], metadata[TAG.TIME_CREATED] except KeyError: pass else: # we format proper ISO 8601 date so we can parse it with dateutil datetime_created = '{}-{}-{}T{}:{}:{}{}{}:{}'.format(date_created[0:4], date_created[4:6], date_created[6:8], time_created[0:2], time_created[2:4], time_created[4:6], time_created[6], time_created[7:9], time_created[9:]) item['firstcreated'] = dateutil.parser.parse(datetime_created) # now we map IPTC metadata to superdesk metadata for source_key, dest_key in IPTC_MAPPING.items(): try: item[dest_key] = metadata[source_key] except KeyError: continue return item
def _parse_binary(item): binary = app.media.get(item['renditions']['original']['media']) iptc = get_meta_iptc(binary) if not iptc: return item.setdefault('extra', {}) if iptc.get('By-line'): item['byline'] = iptc['By-line'] if iptc.get('Category'): append_matching_subject(item, cp.PHOTO_CATEGORIES, iptc['Category']) if iptc.get('Credit'): item['creditline'] = 'THE ASSOCIATED PRESS' if iptc['Credit'] == 'AP' else iptc['Credit'] if iptc.get('Source'): item['original_source'] = IPTC_SOURCE_MAPPING.get(iptc['Source'], iptc['Source']) item['extra'][cp.ARCHIVE_SOURCE] = item['original_source'] if iptc.get('City') or item.get('Country/Primary Location Name'): country = iptc.get('Country/Primary Location Name') item['dateline'] = { 'located': { 'city': iptc.get('City'), 'country': COUNTRY_MAPPING.get(country, country) if country else None, } } if iptc.get('By-line Title'): item['extra'][cp.PHOTOGRAPHER_CODE] = iptc['By-line Title'] if iptc.get('Writer/Editor'): item['extra'][cp.CAPTION_WRITER] = iptc['Writer/Editor'] if iptc.get('Copyright Notice'): item['copyrightnotice'] = iptc['Copyright Notice'] if iptc.get('Caption/Abstract'): item['description_text'] = iptc['Caption/Abstract'] if iptc.get('Special Instructions'): item['ednote'] = iptc['Special Instructions'] if iptc.get(ORIGINAL_TRANSMISSION_REF): if len(iptc[ORIGINAL_TRANSMISSION_REF]) == cp.SLUG_LEN: item['extra'][cp.ORIG_ID] = iptc[ORIGINAL_TRANSMISSION_REF] else: item['extra'][cp.FILENAME] = iptc[ORIGINAL_TRANSMISSION_REF] if iptc.get(FIXTURE_ID) and len(iptc[FIXTURE_ID]) == cp.SLUG_LEN: item['extra'][cp.ORIG_ID] = iptc[FIXTURE_ID] binary.seek(0) xmp = parse_xmp(binary) if not xmp: return if xmp.get('http://ns.adobe.com/photoshop/1.0/'): for key, val, _ in xmp['http://ns.adobe.com/photoshop/1.0/']: if key == 'photoshop:Urgency': item['urgency'] = int(val) elif key == 'photoshop:DateCreated': item['firstcreated'] = _parse_xmp_datetime(val) if xmp.get('http://purl.org/dc/elements/1.1/'): for key, val, _ in xmp['http://purl.org/dc/elements/1.1/']: if key == 'dc:rights' and val: item['extra'][cp.INFOSOURCE] = val elif key == 'dc:rights[1]' and val: item['extra'][cp.INFOSOURCE] = val
def _parse_binary(item): binary = app.media.get(item["renditions"]["original"]["media"]) iptc = get_meta_iptc(binary) if not iptc: return item.setdefault("extra", {}) if iptc.get("By-line"): item["byline"] = iptc["By-line"] if iptc.get("Category"): append_matching_subject(item, cp.PHOTO_CATEGORIES, iptc["Category"]) if iptc.get("Credit"): item["creditline"] = ("THE ASSOCIATED PRESS" if iptc["Credit"] == "AP" else iptc["Credit"]) if iptc.get("Source"): item["original_source"] = IPTC_SOURCE_MAPPING.get( iptc["Source"], iptc["Source"]) item["extra"][cp.ARCHIVE_SOURCE] = item["original_source"] if iptc.get("City") or item.get("Country/Primary Location Name"): country = iptc.get("Country/Primary Location Name") item["dateline"] = { "located": { "city": iptc.get("City"), "country": COUNTRY_MAPPING.get(country, country) if country else None, } } if iptc.get("By-line Title"): item["extra"][cp.PHOTOGRAPHER_CODE] = iptc["By-line Title"] if iptc.get("Writer/Editor"): item["extra"][cp.CAPTION_WRITER] = iptc["Writer/Editor"] if iptc.get("Copyright Notice"): item["copyrightnotice"] = iptc["Copyright Notice"] if iptc.get("Caption/Abstract"): item["description_text"] = iptc["Caption/Abstract"] if iptc.get("Special Instructions"): item["ednote"] = iptc["Special Instructions"] if iptc.get(ORIGINAL_TRANSMISSION_REF): if len(iptc[ORIGINAL_TRANSMISSION_REF]) == cp.SLUG_LEN: item["extra"][cp.ORIG_ID] = iptc[ORIGINAL_TRANSMISSION_REF] else: item["extra"][cp.FILENAME] = iptc[ORIGINAL_TRANSMISSION_REF] if iptc.get(FIXTURE_ID) and len(iptc[FIXTURE_ID]) == cp.SLUG_LEN: item["extra"][cp.ORIG_ID] = iptc[FIXTURE_ID] binary.seek(0) xmp = parse_xmp(binary) if not xmp: return if xmp.get("http://ns.adobe.com/photoshop/1.0/"): for key, val, _ in xmp["http://ns.adobe.com/photoshop/1.0/"]: if key == "photoshop:Urgency": item["urgency"] = int(val) elif key == "photoshop:DateCreated": item["firstcreated"] = _parse_xmp_datetime(val) if xmp.get("http://purl.org/dc/elements/1.1/"): for key, val, _ in xmp["http://purl.org/dc/elements/1.1/"]: if key == "dc:rights" and val: item["extra"][cp.INFOSOURCE] = val elif key == "dc:rights[1]" and val: item["extra"][cp.INFOSOURCE] = val