Exemple #1
0
    def parse_item(self, image_path):
        filename = os.path.basename(image_path)
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        item = {'guid': guid,
                'uri': guid,
                config.VERSION: 1,
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                'mimetype': content_type,
                'versioncreated': utcnow(),
                }
        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata)
            filemeta.set_filemeta(item, file_metadata)
            f.seek(0)

            metadata = get_meta_iptc(f)
            f.seek(0)
            self.parse_meta(item, metadata)

            rendition_spec = get_renditions_spec(no_custom_crops=True)
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec, url_for_media)
            item['renditions'] = renditions
        return item
    def parse_item(self, image_path):
        filename = os.path.basename(image_path)
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        item = {
            'guid': guid,
            config.VERSION: 1,
            config.ID_FIELD: guid,
            ITEM_TYPE: CONTENT_TYPE.PICTURE,
            'mimetype': content_type,
            'versioncreated': datetime.now()
        }
        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(
                f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f,
                                    filename=filename,
                                    content_type=content_type,
                                    metadata=file_metadata)
            filemeta.set_filemeta(item, file_metadata)
            f.seek(0)
            metadata = get_meta_iptc(f)
            f.seek(0)
            rendition_spec = get_renditions_spec(no_custom_crops=True)
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec,
                                             url_for_media)
            item['renditions'] = renditions

        try:
            date_created, time_created = metadata[TAG.DATE_CREATED], metadata[
                TAG.TIME_CREATED]
        except KeyError:
            pass
        else:
            # we format proper ISO 8601 date so we can parse it with dateutil
            datetime_created = '{}-{}-{}T{}:{}:{}{}{}:{}'.format(
                date_created[0:4], date_created[4:6], date_created[6:8],
                time_created[0:2], time_created[2:4], time_created[4:6],
                time_created[6], time_created[7:9], time_created[9:])
            item['firstcreated'] = dateutil.parser.parse(datetime_created)

        # now we map IPTC metadata to superdesk metadata
        for source_key, dest_key in IPTC_MAPPING.items():
            try:
                item[dest_key] = metadata[source_key]
            except KeyError:
                continue
        return item
Exemple #3
0
 def _parse_exif(self, data, item):
     try:
         res = sess.get(data["item"]["renditions"]["preview"]["href"], timeout=10)
     except KeyError:
         return
     metadata = get_meta_iptc(io.BytesIO(res.content))
     if metadata.get("Writer/Editor"):
         item.setdefault("extra", {})[cp.CAPTION_WRITER] = metadata["Writer/Editor"]
     if metadata.get("Headline"):
         item.setdefault("extra", {})[cp.HEADLINE2] = metadata["Headline"]
     if metadata.get("Keywords"):
         item.setdefault("extra", {})[cp.XMP_KEYWORDS] = ", ".join(
             metadata["Keywords"].split(";")
         )
Exemple #4
0
 def _parse_exif(self, data, item):
     try:
         res = sess.get(data['item']['renditions']['preview']['href'],
                        timeout=10)
     except KeyError:
         return
     metadata = get_meta_iptc(io.BytesIO(res.content))
     if metadata.get('Writer/Editor'):
         item.setdefault('extra',
                         {})[cp.CAPTION_WRITER] = metadata['Writer/Editor']
     if metadata.get('Headline'):
         item.setdefault('extra', {})[cp.HEADLINE2] = metadata['Headline']
     if metadata.get('Keywords'):
         item.setdefault('extra', {})[cp.XMP_KEYWORDS] = ', '.join(
             metadata['Keywords'].split(';'))
    def parse_item(self, image_path):
        filename = os.path.basename(image_path)
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        item = {'guid': guid,
                config.VERSION: 1,
                config.ID_FIELD: guid,
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                'mimetype': content_type,
                'versioncreated': datetime.now()
                }
        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata)
            filemeta.set_filemeta(item, file_metadata)
            f.seek(0)
            metadata = get_meta_iptc(f)
            f.seek(0)
            rendition_spec = get_renditions_spec(no_custom_crops=True)
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec, url_for_media)
            item['renditions'] = renditions

        try:
            date_created, time_created = metadata[TAG.DATE_CREATED], metadata[TAG.TIME_CREATED]
        except KeyError:
            pass
        else:
            # we format proper ISO 8601 date so we can parse it with dateutil
            datetime_created = '{}-{}-{}T{}:{}:{}{}{}:{}'.format(date_created[0:4],
                                                                 date_created[4:6],
                                                                 date_created[6:8],
                                                                 time_created[0:2],
                                                                 time_created[2:4],
                                                                 time_created[4:6],
                                                                 time_created[6],
                                                                 time_created[7:9],
                                                                 time_created[9:])
            item['firstcreated'] = dateutil.parser.parse(datetime_created)

        # now we map IPTC metadata to superdesk metadata
        for source_key, dest_key in IPTC_MAPPING.items():
            try:
                item[dest_key] = metadata[source_key]
            except KeyError:
                continue
        return item
Exemple #6
0
def _parse_binary(item):
    binary = app.media.get(item['renditions']['original']['media'])
    iptc = get_meta_iptc(binary)
    if not iptc:
        return

    item.setdefault('extra', {})

    if iptc.get('By-line'):
        item['byline'] = iptc['By-line']

    if iptc.get('Category'):
        append_matching_subject(item, cp.PHOTO_CATEGORIES, iptc['Category'])

    if iptc.get('Credit'):
        item['creditline'] = 'THE ASSOCIATED PRESS' if iptc['Credit'] == 'AP' else iptc['Credit']

    if iptc.get('Source'):
        item['original_source'] = IPTC_SOURCE_MAPPING.get(iptc['Source'], iptc['Source'])
        item['extra'][cp.ARCHIVE_SOURCE] = item['original_source']

    if iptc.get('City') or item.get('Country/Primary Location Name'):
        country = iptc.get('Country/Primary Location Name')
        item['dateline'] = {
            'located': {
                'city': iptc.get('City'),
                'country': COUNTRY_MAPPING.get(country, country) if country else None,
            }
        }

    if iptc.get('By-line Title'):
        item['extra'][cp.PHOTOGRAPHER_CODE] = iptc['By-line Title']

    if iptc.get('Writer/Editor'):
        item['extra'][cp.CAPTION_WRITER] = iptc['Writer/Editor']

    if iptc.get('Copyright Notice'):
        item['copyrightnotice'] = iptc['Copyright Notice']

    if iptc.get('Caption/Abstract'):
        item['description_text'] = iptc['Caption/Abstract']

    if iptc.get('Special Instructions'):
        item['ednote'] = iptc['Special Instructions']

    if iptc.get(ORIGINAL_TRANSMISSION_REF):
        if len(iptc[ORIGINAL_TRANSMISSION_REF]) == cp.SLUG_LEN:
            item['extra'][cp.ORIG_ID] = iptc[ORIGINAL_TRANSMISSION_REF]
        else:
            item['extra'][cp.FILENAME] = iptc[ORIGINAL_TRANSMISSION_REF]

    if iptc.get(FIXTURE_ID) and len(iptc[FIXTURE_ID]) == cp.SLUG_LEN:
        item['extra'][cp.ORIG_ID] = iptc[FIXTURE_ID]

    binary.seek(0)
    xmp = parse_xmp(binary)
    if not xmp:
        return

    if xmp.get('http://ns.adobe.com/photoshop/1.0/'):
        for key, val, _ in xmp['http://ns.adobe.com/photoshop/1.0/']:
            if key == 'photoshop:Urgency':
                item['urgency'] = int(val)
            elif key == 'photoshop:DateCreated':
                item['firstcreated'] = _parse_xmp_datetime(val)

    if xmp.get('http://purl.org/dc/elements/1.1/'):
        for key, val, _ in xmp['http://purl.org/dc/elements/1.1/']:
            if key == 'dc:rights' and val:
                item['extra'][cp.INFOSOURCE] = val
            elif key == 'dc:rights[1]' and val:
                item['extra'][cp.INFOSOURCE] = val
def _parse_binary(item):
    binary = app.media.get(item["renditions"]["original"]["media"])
    iptc = get_meta_iptc(binary)
    if not iptc:
        return

    item.setdefault("extra", {})

    if iptc.get("By-line"):
        item["byline"] = iptc["By-line"]

    if iptc.get("Category"):
        append_matching_subject(item, cp.PHOTO_CATEGORIES, iptc["Category"])

    if iptc.get("Credit"):
        item["creditline"] = ("THE ASSOCIATED PRESS"
                              if iptc["Credit"] == "AP" else iptc["Credit"])

    if iptc.get("Source"):
        item["original_source"] = IPTC_SOURCE_MAPPING.get(
            iptc["Source"], iptc["Source"])
        item["extra"][cp.ARCHIVE_SOURCE] = item["original_source"]

    if iptc.get("City") or item.get("Country/Primary Location Name"):
        country = iptc.get("Country/Primary Location Name")
        item["dateline"] = {
            "located": {
                "city":
                iptc.get("City"),
                "country":
                COUNTRY_MAPPING.get(country, country) if country else None,
            }
        }

    if iptc.get("By-line Title"):
        item["extra"][cp.PHOTOGRAPHER_CODE] = iptc["By-line Title"]

    if iptc.get("Writer/Editor"):
        item["extra"][cp.CAPTION_WRITER] = iptc["Writer/Editor"]

    if iptc.get("Copyright Notice"):
        item["copyrightnotice"] = iptc["Copyright Notice"]

    if iptc.get("Caption/Abstract"):
        item["description_text"] = iptc["Caption/Abstract"]

    if iptc.get("Special Instructions"):
        item["ednote"] = iptc["Special Instructions"]

    if iptc.get(ORIGINAL_TRANSMISSION_REF):
        if len(iptc[ORIGINAL_TRANSMISSION_REF]) == cp.SLUG_LEN:
            item["extra"][cp.ORIG_ID] = iptc[ORIGINAL_TRANSMISSION_REF]
        else:
            item["extra"][cp.FILENAME] = iptc[ORIGINAL_TRANSMISSION_REF]

    if iptc.get(FIXTURE_ID) and len(iptc[FIXTURE_ID]) == cp.SLUG_LEN:
        item["extra"][cp.ORIG_ID] = iptc[FIXTURE_ID]

    binary.seek(0)
    xmp = parse_xmp(binary)
    if not xmp:
        return

    if xmp.get("http://ns.adobe.com/photoshop/1.0/"):
        for key, val, _ in xmp["http://ns.adobe.com/photoshop/1.0/"]:
            if key == "photoshop:Urgency":
                item["urgency"] = int(val)
            elif key == "photoshop:DateCreated":
                item["firstcreated"] = _parse_xmp_datetime(val)

    if xmp.get("http://purl.org/dc/elements/1.1/"):
        for key, val, _ in xmp["http://purl.org/dc/elements/1.1/"]:
            if key == "dc:rights" and val:
                item["extra"][cp.INFOSOURCE] = val
            elif key == "dc:rights[1]" and val:
                item["extra"][cp.INFOSOURCE] = val