コード例 #1
0
ファイル: loader.py プロジェクト: ultimatecoder/open-ledger
def _insert_image(iterator, reader, chunk_size, skip_existence_check=False):
    for chunk in iterator(chunk_size, reader):
        images = []
        for row in chunk:
            if skip_existence_check or models.Image.objects.filter(
                    foreign_identifier=row['ImageID']).exists():
                image = models.Image()
                image.identifier = signals.create_identifier(
                    row['OriginalURL'])
                image.foreign_identifier = row['ImageID']
                image.url = row['OriginalURL']
                image.thumbnail = row['Thumbnail300KURL']
                image.foreign_landing_url = row['OriginalLandingURL']
                image.license = 'by'
                image.provider = 'flickr'
                image.source = 'openimages'
                image.license_version = '2.0'
                image.creator_url = row['AuthorProfileURL']
                image.creator = row['Author']
                image.title = row['Title']
                image.filesize = row['OriginalSize']

                # log.debug("Adding image %s", row['ImageID'])
                images.append(image)
            else:
                # log.debug("Skipping existing image %s", row['ImageID'])
                pass
        if len(images) > 0:
            models.Image.objects.bulk_create(images)
            log.debug("*** Committing set of %d images", len(images))
コード例 #2
0
 def test_tag_image(self):
     """It should be possible to associate a tag with an image"""
     image = models.Image(url='http://example.com', license="CC0")
     tag = models.Tag(name='tagname', foreign_identifier='tagid')
     tag.save()
     image.save()
     image_tag = models.ImageTags(image=image, tag=tag)
     image_tag.save()
コード例 #3
0
 def test_image_model(self):
     """It should be possible to create an Image record with a few basic fields"""
     assert models.Image.objects.count() == 0
     image = models.Image()
     image.url = 'http://example.com'
     image.license = 'CC0'
     image.save()
     assert models.Image.objects.count() == 1
コード例 #4
0
    def test_tags_list_image(self):
        """The `tags_list` field on the `Image` table should contain an array of values"""
        image = models.Image(url='http://example.com', license="CC0")
        tags_list = ['a', 'b']
        image.tags_list = tags_list
        image.save()

        # Get it back out and assert that it's a list again
        image = models.Image.objects.all().first()
        assert 2 == len(image.tags_list)
        assert "a" == image.tags_list[0]
コード例 #5
0
    def test_list(self):
        """It should be possible to create a List and add an image to it"""

        image = models.Image(url='http://example.com', license="CC0")
        lst = models.List(title='test')
        lst.save()
        image.save()

        lst.images.add(image)

        assert 1 == models.List.objects.count()
        assert 1 == models.List.objects.first().images.count()
        assert image == models.List.objects.first().images.first()
コード例 #6
0
def serialize(result):
    """For a given Met result, map that to our database"""
    imageinfos = result['ImageInfo']
    thumbnail = None
    url = None
    for info in imageinfos:
        if info['PrimaryDisplay']:
            # Use this one
            thumbnail = ENDPOINT_BASE_IMAGE_URL + info['Thumbnail']
            url = ENDPOINT_BASE_IMAGE_URL + info['LargeWebsite']
            break
    if not url:
        log.warning("Did not get an image URL for %s", result)
        return
    image = models.Image(url=url)
    image.provider = PROVIDER_NAME
    image.source = SOURCE_NAME

    # Creator might be a few fields
    tombstone = result['Tombstone']
    creator_names = []
    for t in tombstone:
        if t['Name'] in CREATOR_LABELS:
            val = t['Value']
            parser = CreatorParser()
            parser.feed(val)
            creator_names.append(" ".join(parser.out))
    if len(creator_names) > 0:
        image.creator = ", ".join(creator_names)

    image.thumbnail = thumbnail
    image.license = "cc0"
    image.license_version = '1.0'
    image.foreign_identifier = result['CollectionObject']['CRDID']
    image.foreign_landing_url = FOREIGN_LANDING_BASE_URL + str(
        image.foreign_identifier)
    image.title = result['CollectionObject']['Title']
    image.identifier = signals.create_identifier(image.url)
    image.last_synced_with_source = timezone.now()
    try:
        image.save()
        log.info("Adding image %s-%s (%s) identifier %s", image.title,
                 image.creator, image.foreign_identifier, image.identifier)
    except IntegrityError as e:
        log.warn(e)
        pass
    return image
コード例 #7
0
def serialize(result):
    """For a given Europeana result, map that to our database"""
    if 'edmIsShownBy' in result:
        # Some Europeana identifiers are longer than we support (>80 chars!)
        # Skip these records for now or else the database will choke; we don't
        # want to truncate them or run an expensive db migration on our end right now
        if len(result['id']) > 79:
            return None

        url = result['edmIsShownBy'][0]

        image = models.Image(url=url)
        thumbnail = 'https://www.europeana.eu/api/v2/thumbnail-by-url.json?size=w200&type=IMAGE&'
        image.thumbnail = thumbnail + urllib.parse.urlencode({'uri': url})
        image.source = SOURCE_NAME
        image.provider = SOURCE_NAME
        image.creator = result['dcCreator'][
            0] if 'dcCreator' in result else None
        license, version = licenses.url_to_license(result['rights'][0])
        image.license = license
        image.license_version = version
        image.foreign_landing_url = result['guid']
        image.foreign_identifier = result['id']
        image.title = result['title'][0]
        image.identifier = signals.create_identifier(image.url)
        image.last_synced_with_source = timezone.now()

        tag_names = []
        # Tags, if available
        if 'edmConceptPrefLabelLangAware' in result and 'en' in result[
                'edmConceptPrefLabelLangAware']:
            # Each one of these is a tag
            for tag_label in result['edmConceptPrefLabelLangAware']['en']:
                #log.debug("Adding tag %s", tag_label)
                models.Tag.objects.get_or_create(name=tag_label.lower(),
                                                 source=SOURCE_NAME)
                tag_names.append(tag_label)
        image.tags_list = tag_names
        #log.debug("'%s' from %s", image.title, image.provider)
        return image
コード例 #8
0
def serialize(result):
    """For a given Rijks result, map that to our database"""
    url = result['webImage']['url']

    # Thumbnails from Rijks are dynamic; let's make them 200 wide
    if url.endswith('=s0'):
        thumbnail = url[:-3] + '=s' + str(THUMBNAIL_WIDTH)
    image = models.Image(url=url)
    image.provider = PROVIDER_NAME
    image.source = SOURCE_NAME
    image.creator = result['principalOrFirstMaker']
    image.thumbnail = thumbnail
    image.license = "cc0"
    image.license_version = '1.0'
    image.foreign_landing_url = result['links']['web']
    image.foreign_identifier = result['webImage']['guid']
    image.width = result['webImage']['width']
    image.height = result['webImage']['height']
    image.title = result['longTitle']
    image.identifier = signals.create_identifier(image.url)
    image.last_synced_with_source = timezone.now()
    return image
コード例 #9
0
def import_from_file(from_file):
    """Import from an NDJSON file"""
    # ndjson files are newline delimited
    results = []
    tags = {}
    for line in open(from_file):
        result = json.loads(line)
        if 'still image' in result['resourceType'] and result.get(
                'captures') and len(result.get('captures')):
            url = result.get('captures')[0]
            url = url[:-3] + 't=w'  # 760 jpg, but it's the largest we're guaranteed to find
            thumbnail = url[:-3] + 't=r'  # 300px thumbnail
            image = models.Image(url=url)
            image.provider = PROVIDER_NAME
            image.source = SOURCE_NAME
            if result.get('contributor'):
                if 'contributorName' in result.get('contributor')[0]:
                    image.creator = result['contributor'][0]['contributorName']
            image.thumbnail = thumbnail
            image.license = "cc0"
            image.license_version = '1.0'
            image.foreign_landing_url = result['digitalCollectionsURL']
            image.foreign_identifier = result['UUID']
            image.title = result['title']
            image.identifier = signals.create_identifier(image.url)
            image.last_synced_with_source = timezone.now()

            tag_names = [topic['text'] for topic in result.get('subjectName')]
            for tag in tag_names:
                tags[tag] = models.Tag(name=tag, source='nypl')
            image.tags_list = tag_names
            results.append(image)
    # Create the tags objects
    log.debug("Bulk creating %d new tags", len(tags.values()))
    models.Tag.objects.bulk_create(tags.values())
    return results