def import_rendition(guid, rendition_name, href, extract_metadata, trigger_events):
    archive = superdesk.apps["archive"].find_one(req=None, guid=guid)
    if not archive:
        msg = "No document found in the media archive with this ID: %s" % guid
        raise superdesk.SuperdeskError(payload=msg)

    if rendition_name not in archive["renditions"]:
        payload = "Invalid rendition name %s" % rendition_name
        raise superdesk.SuperdeskError(payload=payload)

    updates = {}
    metadata = None

    content, filename, content_type = download_file_from_url(href)
    if extract_metadata:
        file_type, ext = content_type.split("/")
        metadata = process_file(content, file_type)

    file_guid = app.media.put(content, filename, content_type, metadata)

    # perform partial update
    updates["renditions." + rendition_name + ".href"] = url_for_media(file_guid)
    updates["renditions." + rendition_name + ".media"] = file_guid
    result = superdesk.apps["archive"].update(id=guid, updates=updates, trigger_events=trigger_events)

    return result
Example #2
0
def import_rendition(guid, rendition_name, href, extract_metadata,
                     trigger_events):
    archive = superdesk.apps['archive'].find_one(req=None, guid=guid)
    if not archive:
        msg = 'No document found in the media archive with this ID: %s' % guid
        raise superdesk.SuperdeskError(payload=msg)

    if rendition_name not in archive['renditions']:
        payload = 'Invalid rendition name %s' % rendition_name
        raise superdesk.SuperdeskError(payload=payload)

    updates = {}
    metadata = None

    content, filename, content_type = download_file_from_url(href)
    if extract_metadata:
        file_type, ext = content_type.split('/')
        metadata = process_file(content, file_type)

    file_guid = app.media.put(content, filename, content_type, metadata)

    # perform partial update
    updates['renditions.' + rendition_name +
            '.href'] = url_for_media(file_guid)
    updates['renditions.' + rendition_name + '.media'] = file_guid
    result = superdesk.apps['archive'].update(id=guid,
                                              updates=updates,
                                              trigger_events=trigger_events)

    return result
def import_rendition(guid, rendition_name, href, extract_metadata):
    archive = superdesk.get_resource_service(ARCHIVE).find_one(req=None, guid=guid)
    if not archive:
        msg = 'No document found in the media archive with this ID: %s' % guid
        raise superdesk.SuperdeskError(payload=msg)

    if rendition_name not in archive['renditions']:
        payload = 'Invalid rendition name %s' % rendition_name
        raise superdesk.SuperdeskError(payload=payload)

    updates = {}
    metadata = None

    content, filename, content_type = download_file_from_url(href)
    if extract_metadata:
        file_type, ext = content_type.split('/')
        metadata = process_file(content, file_type)

    file_guid = app.media.put(content, filename, content_type, metadata)

    # perform partial update
    updates['renditions.' + rendition_name + '.href'] = url_for_media(file_guid)
    updates['renditions.' + rendition_name + '.media'] = file_guid
    result = superdesk.get_resource_service(ARCHIVE).patch(guid, updates=updates)

    return result
Example #4
0
 def download_file(self, doc):
     url = doc.get('URL')
     if not url:
         return
     if url.startswith('data'):
         return download_file_from_encoded_str(url)
     else:
         return download_file_from_url(url)
Example #5
0
 def download_file(self, doc):
     url = doc.get('URL')
     if not url:
         return
     if url.startswith('data'):
         return download_file_from_encoded_str(url)
     else:
         return download_file_from_url(url)
Example #6
0
 def test_download_file_from_url_relative(self):
     app = flask.Flask(__name__)
     app.config["SERVER_NAME"] = "localhost"
     body = io.BytesIO(b"data")
     with app.app_context():
         with requests_mock.mock() as mock:
             mock.get("http://localhost/test/foo.jpg", body=body)
             out = download_file_from_url("/test/foo.jpg")
     self.assertEqual(b"data", out[0].read())
 def test_download_file_from_url_relative(self):
     app = flask.Flask(__name__)
     app.config['SERVER_NAME'] = 'localhost'
     body = io.BytesIO(b'data')
     with app.app_context():
         with requests_mock.mock() as mock:
             mock.get('http://localhost/test/foo.jpg', body=body)
             out = download_file_from_url('/test/foo.jpg')
     self.assertEqual(b'data', out[0].read())
 def test_download_file_from_url_relative(self):
     app = flask.Flask(__name__)
     app.config['SERVER_NAME'] = 'localhost'
     body = io.BytesIO(b'data')
     with app.app_context():
         with requests_mock.mock() as mock:
             mock.get('http://localhost/test/foo.jpg', body=body)
             out = download_file_from_url('/test/foo.jpg')
     self.assertEqual(b'data', out[0].read())
def import_rendition(guid, rendition_name, href, trigger_events):
    archive = superdesk.apps['archive'].find_one(req=None, guid=guid)
    if not archive:
        msg = 'No document found in the media archive with this ID: %s' % guid
        raise superdesk.SuperdeskError(payload=msg)

    if rendition_name not in archive['renditions']:
        payload = 'Invalid rendition name %s' % rendition_name
        raise superdesk.SuperdeskError(payload=payload)

    file_guid = download_file_from_url(href)
    updates = {}
    # perform partial update
    updates['renditions.' + rendition_name + '.href'] = url_for_media(file_guid)
    result = superdesk.apps['archive'].update(id=guid, updates=updates, trigger_events=trigger_events)

    return result
Example #10
0
def update_renditions(item, href):
    inserted = []
    try:
        content, filename, content_type = download_file_from_url(href)
        file_type, ext = content_type.split('/')
        metadata = process_file(content, file_type)
        file_guid = app.media.put(content, filename, content_type, metadata)
        inserted.append(file_guid)

        rendition_spec = app.config.get('RENDITIONS', {}).get('picture', {})
        renditions = generate_renditions(content, file_guid, inserted, file_type,
                                         content_type, rendition_spec, url_for_media)
        item['renditions'] = renditions
        item['mimetype'] = content_type
        item['filemeta'] = metadata
    except Exception:
        for file_id in inserted:
            app.media.delete(file_id)
        raise
Example #11
0
def update_renditions(item, href, old_item):
    """Update renditions for an item.

    If the old_item has renditions uploaded in to media then the old rendition details are
    assigned to the item, this avoids repeatedly downloading the same image and leaving the media entries orphaned.
    If there is no old_item the original is downloaded and renditions are
    generated.
    :param item: parsed item from source
    :param href: reference to original
    :param old_item: the item that we have already ingested, if it exists
    :return: item with renditions
    """
    inserted = []
    try:
        # If there is an existing set of renditions we keep those
        if old_item:
            media = old_item.get('renditions', {}).get('original',
                                                       {}).get('media', {})
            if media:
                item['renditions'] = old_item['renditions']
                item['mimetype'] = old_item.get('mimetype')
                item['filemeta'] = old_item.get('filemeta')
                item['filemeta_json'] = old_item.get('filemeta_json')
                return

        content, filename, content_type = download_file_from_url(href)
        file_type, ext = content_type.split('/')
        metadata = process_file(content, file_type)
        file_guid = app.media.put(content, filename, content_type, metadata)
        inserted.append(file_guid)
        rendition_spec = get_renditions_spec()
        renditions = generate_renditions(content, file_guid, inserted,
                                         file_type, content_type,
                                         rendition_spec, url_for_media)
        item['renditions'] = renditions
        item['mimetype'] = content_type
        set_filemeta(item, metadata)
    except Exception as e:
        logger.exception(e)
        for file_id in inserted:
            app.media.delete(file_id)
        raise
Example #12
0
def update_renditions(item, href):
    inserted = []
    try:
        content, filename, content_type = download_file_from_url(href)
        file_type, ext = content_type.split('/')
        metadata = process_file(content, file_type)
        file_guid = app.media.put(content, filename, content_type, metadata)
        inserted.append(file_guid)

        rendition_spec = app.config.get('RENDITIONS', {}).get('picture', {})
        renditions = generate_renditions(content, file_guid, inserted,
                                         file_type, content_type,
                                         rendition_spec, url_for_media)
        item['renditions'] = renditions
        item['mimetype'] = content_type
        item['filemeta'] = metadata
    except Exception:
        for file_id in inserted:
            app.media.delete(file_id)
        raise
def update_renditions(item, href, old_item):
    """
    If the old_item has renditions uploaded in to media then the old rendition details are
    assigned to the item, this avoids repeatedly downloading the same image and leaving the media entries orphaned.
    If there is no old_item the original is downloaded and renditions are
    generated.
    :param item: parsed item from source
    :param href: reference to original
    :param old_item: the item that we have already injested, if it exists
    :return: item with renditions
    """
    inserted = []
    try:
        # If there is an existing set of renditions we keep those
        if old_item:
            media = old_item.get('renditions', {}).get('original', {}).get('media', {})
            if media:
                item['renditions'] = old_item['renditions']
                item['mimetype'] = old_item.get('mimetype')
                item['filemeta'] = old_item.get('filemeta')
                logger.info("Reuters image not updated for GUID:{}".format(item[GUID_FIELD]))
                return

        content, filename, content_type = download_file_from_url(href)
        file_type, ext = content_type.split('/')

        metadata = process_file(content, file_type)
        file_guid = app.media.put(content, filename, content_type, metadata)
        inserted.append(file_guid)

        rendition_spec = app.config.get('RENDITIONS', {}).get('picture', {})
        renditions = generate_renditions(content, file_guid, inserted, file_type,
                                         content_type, rendition_spec, url_for_media)
        item['renditions'] = renditions
        item['mimetype'] = content_type
        item['filemeta'] = metadata
    except Exception:
        for file_id in inserted:
            app.media.delete(file_id)
        raise
 def fetch_rendition(self, rendition):
     stream, name, mime = download_file_from_url(rendition.get('href'))
     return stream
 def fetch_rendition(self, rendition):
     stream, name, mime = download_file_from_url(rendition.get('href'))
     return stream
Example #16
0
 def download_file(self, doc):
     url = doc.get('URL')
     if not url:
         return
     return download_file_from_url(url)