Beispiel #1
0
 def create_record(self, id, media, storage=None):
     record = Record.objects.create(name='id')
     CollectionItem.objects.create(
         collection=self.collection, record=record)
     FieldValue.objects.create(
         record=record, field=standardfield('identifier'), value=id)
     FieldValue.objects.create(
         record=record, field=standardfield('title'), value=id)
     self.records.append(record)
     if media:
         record.media_set.create(
             storage=storage or self.storage, url='%s.txt' % media)
     return record
Beispiel #2
0
 def create_record(self, id, media, storage=None):
     record = Record.objects.create(name='id')
     CollectionItem.objects.create(
         collection=self.collection, record=record)
     FieldValue.objects.create(
         record=record, field=standardfield('identifier'), value=id)
     FieldValue.objects.create(
         record=record, field=standardfield('title'), value=id)
     self.records.append(record)
     if media:
         record.media_set.create(
             storage=storage or self.storage, url='%s.txt' % media)
     return record
Beispiel #3
0
 def _annotation_filter(self):
     return dict(
         owner=self.presentation.owner,
         context_id=self.id,
         context_type=ContentType.objects.get_for_model(PresentationItem),
         field=standardfield('description'),
         record=self.record)
Beispiel #4
0
    def _preload_work_to_images(self, record_ids):

        q = Q(field__in=standardfield('identifier', equiv=True), ) | Q(
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
        )

        identifiers = FieldValue.objects.filter(
            q,
            record__in=record_ids,
        ).values_list('value', 'record__id')

        images = FieldValue.objects.filter(
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
            value__in=(i[0] for i in identifiers),
            index_value__in=(i[0][:32] for i in identifiers),
        )
        images = images.values_list('record__id', 'value')

        identifier_map = dict()
        for v, r in identifiers:
            identifier_map.setdefault(v, []).append(r)

        work_to_images = dict()
        for record_id, image in images:
            image_ids = identifier_map.get(image, [])
            for i in image_ids:
                if record_id != i:
                    work_to_images.setdefault(i, []).append(record_id)

        return work_to_images
Beispiel #5
0
def find_records_without_media(request):
    available_storage = get_list_or_404(filter_by_access(request.user, Storage, manage=True).order_by('title').values_list('id', 'title'))
    available_collections = get_list_or_404(filter_by_access(request.user, Collection, manage=True))

    class SelectionForm(forms.Form):
        collection = forms.ChoiceField(choices=((c.id, c.title) for c in sorted(available_collections, key=lambda c: c.title)))
        storage = forms.ChoiceField(choices=available_storage)

    identifiers = records = []
    analyzed = False

    if request.method == 'POST':

        form = SelectionForm(request.POST)
        if form.is_valid():

            collection = get_object_or_404(filter_by_access(request.user, Collection.objects.filter(id=form.cleaned_data['collection']), manage=True))
            storage = get_object_or_404(filter_by_access(request.user, Storage.objects.filter(id=form.cleaned_data['storage']), manage=True))

            records = analyze_records(collection, storage)
            analyzed = True

            identifiers = FieldValue.objects.filter(field__in=standardfield('identifier', equiv=True),
                                                    record__in=records).order_by('value').values_list('value', flat=True)

    else:
        form = SelectionForm(request.GET)

    return render_to_response('storage_find_records_without_media.html',
                              {'form': form,
                               'identifiers': identifiers,
                               'records': records,
                               'analyzed': analyzed,
                               },
                              context_instance=RequestContext(request))
Beispiel #6
0
def find_records_without_media(request):
    available_storage = get_list_or_404(filter_by_access(request.user, Storage, manage=True).order_by('title').values_list('id', 'title'))
    available_collections = get_list_or_404(filter_by_access(request.user, Collection, manage=True))

    class SelectionForm(forms.Form):
        collection = forms.ChoiceField(choices=((c.id, c.title) for c in sorted(available_collections, key=lambda c: c.title)))
        storage = forms.ChoiceField(choices=available_storage)

    identifiers = records = []
    analyzed = False

    if request.method == 'POST':

        form = SelectionForm(request.POST)
        if form.is_valid():

            collection = get_object_or_404(filter_by_access(request.user, Collection.objects.filter(id=form.cleaned_data['collection']), manage=True))
            storage = get_object_or_404(filter_by_access(request.user, Storage.objects.filter(id=form.cleaned_data['storage']), manage=True))

            records = analyze_records(collection, storage)
            analyzed = True

            identifiers = FieldValue.objects.filter(field__in=standardfield('identifier', equiv=True),
                                                    record__in=records).order_by('value').values_list('value', flat=True)

    else:
        form = SelectionForm(request.GET)

    return render_to_response('storage_find_records_without_media.html',
                              {'form': form,
                               'identifiers': identifiers,
                               'records': records,
                               'analyzed': analyzed,
                               },
                              context_instance=RequestContext(request))
Beispiel #7
0
 def create_record(self, id):
     record = Record.objects.create(name='id')
     CollectionItem.objects.create(
         collection=self.collection, record=record)
     FieldValue.objects.create(
         record=record, field=standardfield('identifier'), value=id)
     self.records.append(record)
     return record
Beispiel #8
0
 def create_record(self, id):
     record = Record.objects.create(name='id')
     CollectionItem.objects.create(
         collection=self.collection, record=record)
     FieldValue.objects.create(
         record=record, field=standardfield('identifier'), value=id)
     self.records.append(record)
     return record
Beispiel #9
0
 def _annotation_filter(self):
     return dict(
         owner=self.presentation.owner,
         context_id=self.id,
         context_type=ContentType.objects.get_for_model(PresentationItem),
         field=standardfield('description'),
         record=self.record
     )
Beispiel #10
0
    def create_record(self, remote_id):
        collection = self.get_collection()

        url = urlparse(self.shared.url)
        server = '://'.join([url.scheme, url.netloc])

        url = server + reverse('api-record',
                               kwargs={
                                   'id': remote_id,
                                   'name': '_'
                               })

        response = _fetch_url(url, self.shared.username, self.shared.password)
        data = json.loads(response.read())

        title = data['record']['title']
        image_url = data['record']['image']
        if not '://' in image_url:
            image_url = server + image_url

        record = Record.objects.create(name=title,
                                       source=url,
                                       manager=self.get_source_id())

        unmapped_field, created = Field.objects.get_or_create(
            name='shared-data', defaults={
                'label': 'Metadata',
            })

        for index, metadata in enumerate(data['record']['metadata']):
            try:
                field = (standardfield(metadata['dc'])
                         if metadata.get('dc') else unmapped_field)
            except Field.DoesNotExist:
                field = unmapped_field
            FieldValue.objects.create(
                record=record,
                field=field,
                order=metadata.get('order', index),
                value=metadata['value'],
                label=metadata['label'],
            )

        CollectionItem.objects.create(collection=collection, record=record)

        # create job to download actual media file
        job = JobInfo.objects.create(func='shared_download_media',
                                     arg=simplejson.dumps(
                                         dict(shared_id=self.shared.id,
                                              record=record.id,
                                              url=image_url)))
        job.run()

        return record
Beispiel #11
0
    def create_record(self, remote_id):
        collection = self.get_collection()

        url = urlparse(self.shared.url)
        server = '://'.join([url.scheme, url.netloc])

        url = server + reverse('api-record', kwargs={
            'id': remote_id, 'name': '_'})

        response = _fetch_url(url, self.shared.username, self.shared.password)
        data = json.loads(response.read())

        title = data['record']['title']
        image_url = data['record']['image']
        if not '://' in image_url:
            image_url = server + image_url

        record = Record.objects.create(name=title,
                                       source=url,
                                       manager=self.get_source_id())

        unmapped_field, created = Field.objects.get_or_create(
            name='shared-data',
            defaults={
                'label': 'Metadata',
            }
        )

        for index, metadata in enumerate(data['record']['metadata']):
            try:
                field = (standardfield(metadata['dc'])
                         if metadata.get('dc') else unmapped_field)
            except Field.DoesNotExist:
                field = unmapped_field
            FieldValue.objects.create(
                record=record,
                field=field,
                order=metadata.get('order', index),
                value=metadata['value'],
                label=metadata['label'],
            )

        CollectionItem.objects.create(collection=collection, record=record)

        # create job to download actual media file
        job = JobInfo.objects.create(
            func='shared_download_media',
            arg=json.dumps(dict(shared_id=self.shared.id,
                                      record=record.id, url=image_url)))
        job.run()

        return record
Beispiel #12
0
 def add_field(f, v, o):
     if type(v) == list:
         for w in v:
             add_field(f, w, o)
     elif v:
         # TODO: neaten?
         try:
             FieldValue.objects.create(record=record,
                                       field=standardfield(f),
                                       order=o,
                                       value=v)
         except:
             pass
Beispiel #13
0
def match_up_media(storage, collection):
    broken, files = analyze_media(storage)
    # find records that have an ID matching one of the remaining files
    idfields = standardfield('identifier', equiv=True)
    results = []
    for file in files:
        # Match identifiers that are either full file name (with extension) or just base name match
        filename = os.path.split(file)[1]
        id = os.path.splitext(filename)[0]
        records = Record.by_fieldvalue(idfields, (id, filename)).filter(collection=collection, owner=None)
        if len(records) == 1:
            results.append((records[0], file))
    return results
Beispiel #14
0
    def create_record(self, url):
        collection = self.get_collection()

        s = BeautifulSoup(urllib2.urlopen(url))

        def sort_by_dimension(entry):
            m = re.search(r'(?P<width>\d+) x (?P<height>\d+)', entry[1])
            return int(m.group('width')) * int(m.group('height')) if m else 0

        # get metadata
        date = s.find(text='Date:&nbsp;').parent.findNextSibling('td').next
        title = s.find(text='Title:&nbsp;').parent.findNextSibling('td').next
        description = s.find(text='Description:&nbsp;').parent.findNextSibling('td').next
        id = s.find(text='ID:&nbsp;').parent.findNextSibling('td').next
        credit_url = s.find(text='Credit:&nbsp;').parent.findNextSibling('td').findNext('a')['href']
        credit_title = s.find(text='Credit:&nbsp;').parent.findNextSibling('td').findNext('a').next

        record = Record.objects.create(name=title,
                                       source=url,
                                       manager='nasaimageexchange')

        FieldValue.objects.create(record=record,
                                  field=standardfield('title'),
                                  order=0,
                                  value=title)
        FieldValue.objects.create(record=record,
                                  field=standardfield('description'),
                                  order=1,
                                  value=description)
        FieldValue.objects.create(record=record,
                                  field=standardfield('date'),
                                  order=2,
                                  value=date)
        FieldValue.objects.create(record=record,
                                  field=standardfield('identifier'),
                                  order=3,
                                  value=id)
        FieldValue.objects.create(record=record,
                                  field=standardfield('contributor'),
                                  order=4,
                                  value=credit_title)
        FieldValue.objects.create(record=record,
                                  field=standardfield('contributor'),
                                  order=5,
                                  value=credit_url)

        CollectionItem.objects.create(collection=collection, record=record)

        # media links and dimensions
        media = [(a['href'], a.next) for a in s.find(text='Format:&nbsp;').parent.findNextSibling('td').findAll('a')]
        media = sorted(media, key=sort_by_dimension, reverse=True)

        # create job to download actual media file
        job = JobInfo.objects.create(func='nasa_download_media', arg=simplejson.dumps(dict(
            record=record.id, url=media[0][0])))
        job.run()

        return record
Beispiel #15
0
 def add_field(f, v, o):
     if type(v) == list:
         for w in v:
             add_field(f, w, o)
     elif v:
         # TODO: neaten?
         try:
             FieldValue.objects.create(
                 record=record,
                 field=standardfield(f),
                 order=o,
                 value=v)
         except:
             pass
Beispiel #16
0
def redirect_to_video(request, id):
    id_fields = [standardfield('identifier')]
    id_fields.extend(id_fields[0].get_equivalent_fields())
    records = Record.by_fieldvalue(id_fields, id).filter(
        collection__name='online-video-collection')
    if not records:
        raise Http404()
    Activity.objects.create(event='ovc-redirect',
                            request=request,
                            content_object=records[0],
                            data=dict(id=id))
    request.master_template = 'ovc_master.html'

    return viewer_shell(request, 'mediaplayer', records[0].id,
                        template='ovc_player.html')
Beispiel #17
0
def main(request, year='1994'):
    collection = Collection.objects.get(name='furious-flower-%s' % year)
    relation_field = standardfield('relation')
    order = FieldValue.objects.filter(field=relation_field, record__collection=collection).values_list('record__id', 'value')
    order = sorted(order, key=lambda (r,o): int(o))
    records = dict((r.id, r) for r in collection.records.all())
    sorted_records = []
    for r, o in order:
        if records.has_key(r):
            sorted_records.append(records.pop(r))
    sorted_records.extend(records.values())

    return render_to_response('furiousflower-main.html',
                              {'records': sorted_records,
                               'year': year,
                               },
                              context_instance=RequestContext(request))
Beispiel #18
0
def main(request, year='1994'):
    collection = Collection.objects.get(name='furious-flower-%s' % year)
    relation_field = standardfield('relation')
    order = FieldValue.objects.filter(field=relation_field, record__collection=collection).values_list('record__id', 'value')
    order = sorted(order, key=lambda (r,o): int(o))
    records = dict((r.id, r) for r in collection.records.all())
    sorted_records = []
    for r, o in order:
        if records.has_key(r):
            sorted_records.append(records.pop(r))
    sorted_records.extend(records.values())

    return render_to_response('furiousflower-main.html',
                              {'records': sorted_records,
                               'year': year,
                               },
                              context_instance=RequestContext(request))
Beispiel #19
0
def _dummy_record(title, url):
    print '** _dummy_record (%s, %s)' % (title, url)
    record = Record.objects.create(name=title, source=url, manager='dummy')
    print '** _dummy_record.foo'
    FieldValue.objects.create(record=record,
                              field=standardfield('title'),
                              order=0,
                              value=title)
    collection = _get_collection()
    print '** _dummy_record.bar'
    CollectionItem.objects.create(collection=collection, record=record)
    print '** _dummy_record.baz'
    job = JobInfo.objects.create(func='dummy_download_media',
                                 arg=simplejson.dumps({
                                     'record': record.id,
                                     'url': url
                                 }))
    print '** _dummy_record.qux'
    job.run()
    return record
    def handle(self, *args, **kwargs):

        updated = 0

        id_fields = standardfield('identifier', equiv=True)
        titles = FieldValue.objects.select_related('record').filter(field__in=id_fields)
        pb = ProgressBar(titles.count())

        for count, title in enumerate(titles):
            name = slugify(title.value)
            if name != title.record.name:
                title.record.name = name
                title.record.save(force_update_name=True)
                updated += 1

            pb.update(count)

        pb.done()

        print "Updated %d record objects" % updated
Beispiel #21
0
def _dummy_record(title, url):
	print '** _dummy_record (%s, %s)' % (title, url)
	record = Record.objects.create(name=title,
					source=url,
					manager='dummy')
	print '** _dummy_record.foo'
	FieldValue.objects.create(record=record,
				field=standardfield('title'),
				order=0,
				value=title)
	collection = _get_collection()
	print '** _dummy_record.bar'
	CollectionItem.objects.create(collection=collection, record=record)
	print '** _dummy_record.baz'
	job = JobInfo.objects.create(func='dummy_download_media', arg=simplejson.dumps({
		'record': record.id,
		'url': url
	}))
	print '** _dummy_record.qux'
	job.run()
	return record
    def handle(self, *args, **kwargs):

        updated = 0

        id_fields = standardfield('identifier', equiv=True)
        titles = FieldValue.objects.select_related('record').filter(
            field__in=id_fields)
        pb = ProgressBar(titles.count())

        for count, title in enumerate(titles):
            name = slugify(title.value)
            if name != title.record.name:
                title.record.name = name
                title.record.save(force_update_name=True)
                updated += 1

            pb.update(count)

        pb.done()

        print("Updated %d record objects" % updated)
Beispiel #23
0
    def _preload_image_to_works(self, record_ids):

        image_to_works = dict()

        work_relation = FieldValue.objects.filter(
            record__in=record_ids,
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
        ).values_list('record__id', 'value')

        works = FieldValue.objects.filter(
            field__in=standardfield('identifier', equiv=True),
            value__in=(wr[1] for wr in work_relation),
            index_value__in=(wr[1][:32] for wr in work_relation),
        ).values_list('value', 'record__id')
        works = dict(works)

        for record_id, work in work_relation:
            work_id = works.get(work)
            if work_id:
                image_to_works.setdefault(record_id, []).append(work_id)

        return image_to_works
Beispiel #24
0
    def _preload_work_to_images(self, record_ids):

        q = Q(
            field__in=standardfield('identifier', equiv=True),
        ) | Q(
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
        )

        identifiers = FieldValue.objects.filter(
            q,
            record__in=record_ids,
        ).values_list('value', 'record__id')

        images = FieldValue.objects.filter(
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
            value__in=(i[0] for i in identifiers),
            index_value__in=(i[0][:32] for i in identifiers),
        )
        images = images.values_list('record__id', 'value')

        identifier_map = dict()
        for v, r in identifiers:
            identifier_map.setdefault(v, []).append(r)

        work_to_images = dict()
        for record_id, image in images:
            image_ids = identifier_map.get(image, [])
            for i in image_ids:
                if record_id != i:
                    work_to_images.setdefault(i, []).append(record_id)

        return work_to_images
Beispiel #25
0
    def _preload_image_to_works(self, record_ids):

        image_to_works = dict()

        work_relation = FieldValue.objects.filter(
            record__in=record_ids,
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
        ).values_list('record__id', 'value')

        works = FieldValue.objects.filter(
            field__in=standardfield('identifier', equiv=True),
            value__in=(wr[1] for wr in work_relation),
            index_value__in=(wr[1][:32] for wr in work_relation),
        ).values_list('value', 'record__id')
        works = dict(works)

        for record_id, work in work_relation:
            work_id = works.get(work)
            if work_id:
                image_to_works.setdefault(record_id, []).append(work_id)

        return image_to_works
Beispiel #26
0
    def create_record(self, url):
        collection = self.get_collection()

        s = BeautifulSoup(urllib2.urlopen(url))

        def sort_by_dimension(entry):
            m = re.search(r"(?P<width>\d+) x (?P<height>\d+)", entry[1])
            return int(m.group("width")) * int(m.group("height")) if m else 0

        # get metadata
        date = s.find(text="Date:&nbsp;").parent.findNextSibling("td").next
        title = s.find(text="Title:&nbsp;").parent.findNextSibling("td").next
        description = s.find(text="Description:&nbsp;").parent.findNextSibling("td").next
        id = s.find(text="ID:&nbsp;").parent.findNextSibling("td").next
        credit_url = s.find(text="Credit:&nbsp;").parent.findNextSibling("td").findNext("a")["href"]
        credit_title = s.find(text="Credit:&nbsp;").parent.findNextSibling("td").findNext("a").next

        record = Record.objects.create(name=title, source=url, manager="nasaimageexchange")

        FieldValue.objects.create(record=record, field=standardfield("title"), order=0, value=title)
        FieldValue.objects.create(record=record, field=standardfield("description"), order=1, value=description)
        FieldValue.objects.create(record=record, field=standardfield("date"), order=2, value=date)
        FieldValue.objects.create(record=record, field=standardfield("identifier"), order=3, value=id)
        FieldValue.objects.create(record=record, field=standardfield("contributor"), order=4, value=credit_title)
        FieldValue.objects.create(record=record, field=standardfield("contributor"), order=5, value=credit_url)

        CollectionItem.objects.create(collection=collection, record=record)

        # media links and dimensions
        media = [(a["href"], a.next) for a in s.find(text="Format:&nbsp;").parent.findNextSibling("td").findAll("a")]
        media = sorted(media, key=sort_by_dimension, reverse=True)

        # create job to download actual media file
        job = JobInfo.objects.create(
            func="nasa_download_media", arg=json.dumps(dict(record=record.id, url=media[0][0]))
        )
        job.run()

        return record
Beispiel #27
0
 def setUp(self):
     self.fieldset = FieldSet.objects.create(title='facet-fields')
     FieldSetField.objects.create(
         fieldset=self.fieldset, field=standardfield('title'))
     FieldSetField.objects.create(
         fieldset=self.fieldset, field=standardfield('creator'))
Beispiel #28
0
    def create_record(self, remote_id):
        collection = self.get_collection()

        results = self.flickr.flickr_call(method='flickr.photos.getInfo',
                                          api_key=settings.FLICKR_KEY,
                                          photo_id=remote_id,
                                          format='xmlnode')

        def get_property(exp):
            try:
                return exp(results.photo[0])
            except (KeyError, AttributeError):
                return None

        username = get_property(lambda r: r.owner[0]['username'])
        realname = get_property(lambda r: r.owner[0]['realname'])

        title = get_property(lambda r: r.title[0].text) or 'Untitled'
        description = get_property(lambda r: r.description[0].text)
        date = get_property(lambda r: r.dates[0]['taken'])
        url = get_property(lambda r: r.urls[0].url[0].text)


        tags = get_property(lambda r: r.tags[0].tag)
        tags = [tag.text for tag in tags] if tags else []

        info = self.flickr.flickr_call(method='flickr.photos.getSizes',
                                       api_key=settings.FLICKR_KEY,
                                       photo_id=remote_id,
                                       format='xmlnode')

        image_url = info.sizes[0].size[-1]['source']



        record = Record.objects.create(name=title,
                                       source=url,
                                       manager='flickr')

        FieldValue.objects.create(record=record,
                                  field=standardfield('title'),
                                  order=0,
                                  value=title)
        if description:
            FieldValue.objects.create(record=record,
                                      field=standardfield('description'),
                                      order=1,
                                      value=description)
        if date:
            FieldValue.objects.create(record=record,
                                      field=standardfield('date'),
                                      order=2,
                                      value=date)
        FieldValue.objects.create(record=record,
                                  field=standardfield('identifier'),
                                  order=3,
                                  value=remote_id)
        if username:
            FieldValue.objects.create(record=record,
                                      field=standardfield('contributor'),
                                      order=4,
                                      value=username)
        if realname:
            FieldValue.objects.create(record=record,
                                      field=standardfield('contributor'),
                                      order=5,
                                      value=realname)
        for tag in tags:
            FieldValue.objects.create(record=record,
                                      field=standardfield('subject'),
                                      order=6,
                                      value=tag)
        if url:
            FieldValue.objects.create(record=record,
                                      field=standardfield('source'),
                                      order=7,
                                      value=url)



        CollectionItem.objects.create(collection=collection, record=record)

        # create job to download actual media file
        job = JobInfo.objects.create(func='flickr_download_media', arg=simplejson.dumps(dict(
            record=record.id, url=image_url)))
        job.run()

        return record
Beispiel #29
0
def import_files(request):

    available_storage = get_list_or_404(filter_by_access(request.user, Storage, write=True).order_by('title'))
    available_collections = get_list_or_404(filter_by_access(request.user, Collection))
    writable_collection_ids = list(filter_by_access(request.user, Collection, write=True).values_list('id', flat=True))

    storage_choices = choices = [make_storage_select_choice(s, request.user) for s in available_storage]

    class UploadFileForm(forms.Form):
        collection = forms.ChoiceField(choices=((c.id, '%s%s' % ('*' if c.id in writable_collection_ids else '', c.title)) for c in sorted(available_collections, key=lambda c: c.title)))
        storage = forms.ChoiceField(choices=storage_choices)
        file = forms.FileField()
        create_records = forms.BooleanField(required=False)
        replace_files = forms.BooleanField(required=False, label='Replace files of same type')
        multiple_files = forms.BooleanField(required=False,
                                                   label='Allow multiple files of same type')
        personal_records = forms.BooleanField(required=False)

        def clean(self):
            cleaned_data = self.cleaned_data
            if any(self.errors):
                return cleaned_data
            personal = cleaned_data['personal_records']
            if not personal:
                if not int(cleaned_data['collection']) in writable_collection_ids:
                    self._errors['collection'] = ErrorList(["Can only add personal records to selected collection"])
                    del cleaned_data['collection']
            return cleaned_data


    if request.method == 'POST':

        form = UploadFileForm(request.POST, request.FILES)
        if form.is_valid():

            create_records = form.cleaned_data['create_records']
            replace_files = form.cleaned_data['replace_files']
            multiple_files = form.cleaned_data['multiple_files']
            personal_records = form.cleaned_data['personal_records']

            collection = get_object_or_404(filter_by_access(request.user, Collection.objects.filter(id=form.cleaned_data['collection']), write=True if not personal_records else None))
            storage = get_object_or_404(filter_by_access(request.user, Storage.objects.filter(id=form.cleaned_data['storage'].split(',')[0]), write=True))
            file = request.FILES['file']
            record = None

            limit = storage.get_upload_limit(request.user)
            if limit > 0 and file.size > limit * 1024:
                result = "The uploaded file is too large (%d>%d)." % (file.size, limit * 1024)
            else:

                mimetype = mimetypes.guess_type(file.name)[0] or file.content_type

                owner = request.user if personal_records else None
                id = os.path.splitext(file.name)[0]

                # find record by identifier
                titlefield = standardfield('title')
                idfield = standardfield('identifier')

                # Match identifiers that are either full file name (with extension) or just base name match
                records = find_record_by_identifier((id, file.name,), collection,
                    owner=owner, ignore_suffix=multiple_files)
                result = "File skipped."

                if len(records) == 1:
                    # Matching record found
                    record = records[0]
                    media = record.media_set.filter(storage=storage, mimetype=mimetype)
                    media_same_id = media.filter(name=id)
                    if len(media) == 0 or (len(media_same_id) == 0 and multiple_files):
                        # No media yet
                        media = Media.objects.create(record=record,
                                                     name=id,
                                                     storage=storage,
                                                     mimetype=mimetype)
                        media.save_file(file.name, file)
                        result = "File added (Identifier '%s')." % id
                    elif len(media_same_id) > 0 and multiple_files:
                        # Replace existing media with same name and mimetype
                        media = media_same_id[0]
                        media.delete_file()
                        media.save_file(file.name, file)
                        result = "File replaced (Identifier '%s')." % id
                    elif replace_files:
                        # Replace existing media with same mimetype
                        media = media[0]
                        media.delete_file()
                        media.save_file(file.name, file)
                        result = "File replaced (Identifier '%s')." % id
                    else:
                        result = "File skipped, media files already attached."
                elif len(records) == 0:
                    # No matching record found
                    if create_records:
                        # Create a record
                        record = Record.objects.create(name=id, owner=owner)
                        CollectionItem.objects.create(collection=collection, record=record)
                        FieldValue.objects.create(record=record, field=idfield, value=id, order=0)
                        FieldValue.objects.create(record=record, field=titlefield, value=id, order=1)
                        media = Media.objects.create(record=record,
                                                     name=id,
                                                     storage=storage,
                                                     mimetype=mimetype)
                        media.save_file(file.name, file)
                        result = "File added to new record (Identifier '%s')." % id
                    else:
                        result = "File skipped, no matching record found (Identifier '%s')." % id
                else:
                    result = "File skipped, multiple matching records found (Identifier '%s')." % id
                    # Multiple matching records found
                    pass

            if request.POST.get('swfupload') == 'true':
                html = render_to_string('storage_import_file_response.html',
                                 {'result': result,
                                  'record': record,},
                                 context_instance=RequestContext(request)
                                 )
                return HttpResponse(content=simplejson.dumps(dict(status='ok', html=html)),
                                    mimetype='application/json')

            request.user.message_set.create(message=result)
            next = request.GET.get('next', request.get_full_path())
            return HttpResponseRedirect(next)

        else:
            # invalid form submission
            if request.POST.get('swfupload') == 'true':
                html = render_to_string('storage_import_file_response.html',
                                 {'result': form.errors},
                                 context_instance=RequestContext(request)
                                 )
                return HttpResponse(content=simplejson.dumps(dict(status='ok', html=html)),
                                    mimetype='application/json')

    else:
        form = UploadFileForm()

    return render_to_response('storage_import_files.html',
                              {'upload_form': form,
                               },
                              context_instance=RequestContext(request))
Beispiel #30
0
    def create_record(self, remote_id):
        collection = self.get_collection()

        results = self.flickr.flickr_call(method='flickr.photos.getInfo',
                                          api_key=settings.FLICKR_KEY,
                                          photo_id=remote_id,
                                          format='xmlnode')

        def get_property(exp):
            try:
                return exp(results.photo[0])
            except (KeyError, AttributeError):
                return None

        username = get_property(lambda r: r.owner[0]['username'])
        realname = get_property(lambda r: r.owner[0]['realname'])

        title = get_property(lambda r: r.title[0].text) or 'Untitled'
        description = get_property(lambda r: r.description[0].text)
        date = get_property(lambda r: r.dates[0]['taken'])
        url = get_property(lambda r: r.urls[0].url[0].text)

        tags = get_property(lambda r: r.tags[0].tag)
        tags = [tag.text for tag in tags] if tags else []

        info = self.flickr.flickr_call(method='flickr.photos.getSizes',
                                       api_key=settings.FLICKR_KEY,
                                       photo_id=remote_id,
                                       format='xmlnode')

        image_url = info.sizes[0].size[-1]['source']

        record = Record.objects.create(name=title,
                                       source=url,
                                       manager='flickr')

        FieldValue.objects.create(record=record,
                                  field=standardfield('title'),
                                  order=0,
                                  value=title)
        if description:
            FieldValue.objects.create(record=record,
                                      field=standardfield('description'),
                                      order=1,
                                      value=description)
        if date:
            FieldValue.objects.create(record=record,
                                      field=standardfield('date'),
                                      order=2,
                                      value=date)
        FieldValue.objects.create(record=record,
                                  field=standardfield('identifier'),
                                  order=3,
                                  value=remote_id)
        if username:
            FieldValue.objects.create(record=record,
                                      field=standardfield('contributor'),
                                      order=4,
                                      value=username)
        if realname:
            FieldValue.objects.create(record=record,
                                      field=standardfield('contributor'),
                                      order=5,
                                      value=realname)
        for tag in tags:
            FieldValue.objects.create(record=record,
                                      field=standardfield('subject'),
                                      order=6,
                                      value=tag)
        if url:
            FieldValue.objects.create(record=record,
                                      field=standardfield('source'),
                                      order=7,
                                      value=url)

        CollectionItem.objects.create(collection=collection, record=record)

        # create job to download actual media file
        from .tasks import flickr_download_media
        flickr_download_media.delay(record.id, image_url)

        return record
Beispiel #31
0
def dido_import():
    """
    Import and convert a set of JPEG or TIFF images into DIDO.
    """
    thumb_dir = os.path.join(SCRATCH_DIR, MDID_COLLECTION_ID)

    assert all([
        os.path.exists(ARCHIVE_DIR),
        os.path.exists(COLLECTION_DIR),
        os.path.exists(INCOMING_DIR),
    ]), """Must specify location of directories in the INCOMING_DIR,
        ARCHIVE_DIR and COLLECTION_DIR config variables"""

    assert all([
        os.path.exists(SCRATCH_DIR),
        os.path.exists(thumb_dir),
    ]), """Must specify the location of the thumb directory in the SCRATCH_DIR
        mdid3 config variable, and the collection id in the MDID_COLLECTION_ID
        config variable"""

    if not os.listdir(INCOMING_DIR):
        # No files to import - nothing to do!
        print 'No files to import, exiting'
        sys.exit(0)

    try:
        # Set up the database access objects
        storage = Storage.objects.get(id=2)
        identifier_field = standardfield('identifier')
        image_number_field = Field.objects.get(label='Image Number')
    except Exception:
        # Error accessing the database
        exception = traceback.format_exc()
        send_report(exception=exception)
        sys.exit(1)

    try:
        # Set up metadata variables
        meta = {}
        exception = None
        with lock_file('dido.lock'):
            for file_name in os.listdir(INCOMING_DIR):
                print "Attempting to import file %s..." % file_name
                file_id, _ = os.path.splitext(file_name)

                file_type = extract_mimetype(
                    os.path.join(INCOMING_DIR, file_name),
                )
                if file_type not in VALID_MIMETYPES:
                    print 'Invalid filetype, skipping %s' % file_name
                    continue

                # Generate absolute file paths for the original and new files
                try:
                    print "Generating paths for %s..." % file_name
                    paths = generate_paths(file_name, file_id)
                except OSError:
                    exception = traceback.format_exc()
                    raise

                # Create the new images and save them, making sure not to
                # re-compress if the image is already a jpeg
                quality = 70 #if file_type == 'image/tiff' else 100
                try:
                    print "Attempting to convert %s. File type: %s" % (file_name, file_type)
                    convert_image(
                        paths['orig_file'],
                        paths['converted_file'],
                        quality,
                    )
                except IOError:
                    exception = traceback.format_exc()
                    raise

                # Move the original file to the archive directory
                try:
                    if not os.path.exists(paths['archive_dir']):
                        os.makedirs(paths['archive_dir'])
                    print "Moving the original file %s to the archive..." % file_name
                    shutil.move(paths['orig_file'], paths['archive_file'])
                except (IOError, OSError):
                    exception = traceback.format_exc()
                    raise

                assert os.path.isfile(paths['archive_file']), \
                    '%s does not exist' % paths['archive_file']
                assert os.path.isfile(paths['converted_file']), \
                    '%s does not exist' % paths['converted_file']

                # Create the database records for this file
                try:
                    print "Attempting to see if this record already exists in db..."
                    field_value = FieldValue.objects.get(
                        field=image_number_field,
                        value=file_id,
                        index_value=file_id[:32],
                    )
                except FieldValue.DoesNotExist:
                    # Record doesn't exist, which means we need to create
                    # the initial record in the database.
                    print "No record found. Adding %s to database..." % file_name
                    duplicate = False
                    add_record_to_database(
                        file_id=file_id,
                        field=identifier_field,
                        storage=storage,
                        image_number=image_number_field,
                    )
                else:
                    duplicate = True
                    record = field_value.record
                    print "Record already exists in database - replacing file"
                    delete_thumbs(record=record, thumb_dir=thumb_dir)

                # Add metadata to the meta dict
                meta[file_id + '.jpg'] = {
                    'path': paths['converted_file'],
                    'duplicate': duplicate,
                }

                print
                print

    except EnvironmentError:
        exception = traceback.format_exc()
        raise
    except Exception as e:
        print e.message
        raise
    finally:
        send_report(meta, exception)
        sys.exit(1 if exception else 0)
Beispiel #32
0
def import_files(request):

    available_storage = get_list_or_404(
        filter_by_access(request.user, Storage, write=True).order_by('title'))
    available_collections = get_list_or_404(
        filter_by_access(request.user, Collection))
    writable_collection_ids = list(
        filter_by_access(request.user, Collection,
                         write=True).values_list('id', flat=True))

    storage_choices = [
        make_storage_select_choice(s, request.user) for s in available_storage
    ]

    class UploadFileForm(forms.Form):
        collection = forms.ChoiceField(choices=(
            (c.id, '%s%s' %
             ('*' if c.id in writable_collection_ids else '', c.title))
            for c in available_collections))
        storage = forms.ChoiceField(choices=storage_choices)
        file = forms.FileField()
        create_records = forms.BooleanField(required=False)
        replace_files = forms.BooleanField(required=False,
                                           label='Replace files of same type')
        multiple_files = forms.BooleanField(
            required=False, label='Allow multiple files of same type')
        personal_records = forms.BooleanField(required=False)
        response_type = forms.CharField(required=False,
                                        widget=forms.HiddenInput)

        def clean(self):
            cleaned_data = self.cleaned_data
            if any(self.errors):
                return cleaned_data
            personal = cleaned_data['personal_records']
            if not personal:
                if int(cleaned_data['collection']) not in \
                        writable_collection_ids:
                    self._errors['collection'] = \
                        ErrorList([
                            "Can only add personal records "
                            "to selected collection"
                        ])
                    del cleaned_data['collection']
            return cleaned_data

    if request.method == 'POST':

        form = UploadFileForm(request.POST, request.FILES)
        if form.is_valid():

            create_records = form.cleaned_data['create_records']
            replace_files = form.cleaned_data['replace_files']
            multiple_files = form.cleaned_data['multiple_files']
            personal_records = form.cleaned_data['personal_records']

            collection = get_object_or_404(
                filter_by_access(request.user,
                                 Collection.objects.filter(
                                     id=form.cleaned_data['collection']),
                                 write=True if not personal_records else None))
            storage = get_object_or_404(
                filter_by_access(
                    request.user,
                    Storage.objects.filter(
                        id=form.cleaned_data['storage'].split(',')[0]),
                    write=True))
            file = request.FILES['file']
            record = None

            limit = storage.get_upload_limit(request.user)
            if limit > 0 and file.size > limit * 1024:
                result = "The uploaded file is too large (%d>%d)." % (
                    file.size, limit * 1024)
            else:

                mimetype = mimetypes.guess_type(file.name)[0] or \
                    file.content_type

                owner = request.user if personal_records else None
                id = os.path.splitext(file.name)[0]

                # find record by identifier
                titlefield = standardfield('title')
                idfield = standardfield('identifier')

                # Match identifiers that are either full file name
                # (with extension) or just base name match
                records = find_record_by_identifier(
                    (
                        id,
                        file.name,
                    ),
                    collection,
                    owner=owner,
                    ignore_suffix=multiple_files)
                result = "File skipped."

                if len(records) == 1:
                    # Matching record found
                    record = records[0]
                    media = record.media_set.filter(storage=storage,
                                                    mimetype=mimetype)
                    media_same_id = media.filter(name=id)
                    if len(media) == 0 or \
                            (len(media_same_id) == 0 and multiple_files):
                        # No media yet
                        media = Media.objects.create(record=record,
                                                     name=id,
                                                     storage=storage,
                                                     mimetype=mimetype)
                        media.save_file(file.name, file)
                        result = "File added (Identifier '%s')." % id
                    elif len(media_same_id) > 0 and multiple_files:
                        # Replace existing media with same name and mimetype
                        media = media_same_id[0]
                        media.delete_file()
                        media.save_file(file.name, file)
                        result = "File replaced (Identifier '%s')." % id
                    elif replace_files:
                        # Replace existing media with same mimetype
                        media = media[0]
                        media.delete_file()
                        media.save_file(file.name, file)
                        result = "File replaced (Identifier '%s')." % id
                    else:
                        result = "File skipped, media files already attached."
                elif len(records) == 0:
                    # No matching record found
                    if create_records:
                        # Create a record
                        record = Record.objects.create(name=id, owner=owner)
                        CollectionItem.objects.create(collection=collection,
                                                      record=record)
                        FieldValue.objects.create(record=record,
                                                  field=idfield,
                                                  value=id,
                                                  order=0)
                        FieldValue.objects.create(record=record,
                                                  field=titlefield,
                                                  value=id,
                                                  order=1)
                        media = Media.objects.create(record=record,
                                                     name=id,
                                                     storage=storage,
                                                     mimetype=mimetype)
                        media.save_file(file.name, file)
                        result = \
                            "File added to new record (Identifier '%s')." % id
                    else:
                        result = \
                            "File skipped, no matching record found " \
                            "(Identifier '%s')." % id
                else:
                    result = \
                        "File skipped, multiple matching records found " \
                        "(Identifier '%s')." % id
                    # Multiple matching records found
                    pass

            if form.cleaned_data['response_type'] == 'json':
                return HttpResponse(content=simplejson.dumps(
                    dict(status='ok', message=result)),
                                    content_type='application/json')

            messages.add_message(request, messages.INFO, message=result)
            next = request.GET.get('next', request.get_full_path())
            return HttpResponseRedirect(next)

        else:
            pass

    else:
        form = UploadFileForm()

    return render_to_response('storage_import_files.html', {
        'upload_form': form,
    },
                              context_instance=RequestContext(request))
Beispiel #33
0
def main(request):
    collection = Collection.objects.get(name='the-breeze')

    coverage = standardfield('coverage')
    date = standardfield('date')

    volumes = sorted(map(int, FieldValue.objects.filter(
        record__collection=collection,
        field=coverage,
        label='Volume',
        ).values_list('value', flat=True).distinct()))

    try:
        volume = int(request.GET.get('v'))
    except (ValueError, TypeError):
        volume = None
    if not volume in volumes:
        volume = volumes[0]


    record_ids = FieldValue.objects.filter(
        record__collection=collection,
        field=coverage,
        label='Volume',
        value=str(volume),
        ).values_list('record', flat=True)


    issues = sorted(FieldValue.objects.filter(
        record__in=record_ids,
        field=coverage,
        label='Issue',
        ).values_list('record', 'value'))

    dates = sorted(FieldValue.objects.filter(
        record__in=record_ids,
        field=date,
        ).values_list('record', 'value'))


    combined = sorted((int(i[1]), d[1], i[0]) for i, d in zip(issues, dates))


    try:
        record_id = int(request.GET.get('r'))
    except (ValueError, TypeError):
        record_id = None
    if not record_id in (r for i, d, r in combined):
        record_id = combined[0][2]

    for i, d, r in combined:
        if record_id == r:
            issue = i
            break

    viewer = pdfviewer(None, request, record_id)


    return render_to_response('thebreeze.html',
                              {'collection': collection,
                               'breezelogo': 'breeze_logo_%s.png' %
                                    random.choice('00 01 02 03 04'.split()),
                               'volume': volume,
                               'volumes': volumes,
                               'issue': issue,
                               'issues': combined,
                               'record': record_id,
                               'viewer': viewer,
                               'embedcode': viewer.embed_code(request, None) if viewer else None,
                               },
                              context_instance=RequestContext(request))
        def presentation_import(pres_ids, rc):

            print pres_ids

            for pres_id in pres_ids:

                pres_url = 'http://mdid3.temple.edu/api/presentation/' + str(pres_id) + '/'
                print 'fetching %s' % pres_url

                theShow = requests.get(pres_url, cookies=rc)
                #print theShow.json()

                jp = simplejson.loads(theShow.content)

                concat_description = jp['description']
                presentation = Presentation.objects.create(title=jp['title'],
                                                           owner=target_user,
                                                           description=concat_description)


                # jp['content'] contains every slide
                for order, slide in enumerate(jp['content']):
                    #print order, slide
                    rec_exists = False
                    rec_id = None

                    print 'using storage %s' % store.base

                    for metadata in slide['metadata']:

                        #print 'metadata for slide %s, %s' % (slide['name'], str(metadata))
                        #print metadata

                        if metadata['label'] == 'ID':
                            print 'metadata for slide %s, %s' % (slide['name'], str(metadata))
                            rec_id = metadata['value']
                            print '%s is an ID field' % rec_id
                            #print metadata['value']
                            if Record.by_fieldvalue(fid, rec_id):
                                rec_exists = True
                                print '%s already exists' % rec_id
                            break

                    # when finished checking for ID either add existing record to pres
                    # or create record and then add it

                    if rec_exists:
                        # note that record is the first record in the list that is returned byfieldvalue
                        # which should be checked for accuracy in multiple tests if there's any chance that
                        # there could be multiple records
                        print 'Check the following list list of records for multiple values:'
                        print Record.by_fieldvalue(fid, rec_id)
                        record = Record.by_fieldvalue(fid, rec_id)[0]
                        presentation.items.create(order=order, record=record)
                        presentation.save()
                        print 'adding %s to presentation at position %s' % (rec_id, order)

                    else:
                        print 'creating record for %s' % rec_id
                        print 'metadata:'
                        print slide['metadata']

                        #record = Record.objects.create(name=rec_id, owner=target_user)
                        record = Record.objects.create(owner=target_user)
                        record.save()

                        for metadata in slide['metadata']:
                            try:
                                target = Field.objects.get(label=metadata['label'], standard__prefix='aae')
                                record.fieldvalue_set.create(field=target,
                                                             value=metadata['value'],
                                                             label=metadata['label'], )
                            except Exception as e:
                                print e
                                try:
                                    target = Field.objects.filter(label=metadata['label'])
                                    record.fieldvalue_set.create(field=target[0],
                                                                 value=metadata['value'],
                                                                 label=metadata['label'], )
                                    print "Ok, went with %s the first field I could find to go with!" % target[0]
                                except Exception as e_two:
                                    print e_two
                                    print "ok, giving up!"
                                    continue
                                continue

                        try:
                            title = slide['title']
                        except:
                            title = 'Untitled'

                        FieldValue.objects.create(record=record,
                                                  field=standardfield('title'),
                                                  order=0,
                                                  value=title)

                        col_i = CollectionItem.objects.create(collection=collection, record=record)

                        print 'collection item created: %s' % col_i

                        ## file biz

                        # media_req.content contains the image
                        media_url = mdid_base_url + slide['image']
                        print 'media_url: %s' % media_url
                        media_req = requests.get(media_url, cookies=rc)
                        mimetype = media_req.headers['content-type']

                        file = StringIO(media_req.content)

                        if guess_extension(mimetype) == '.jpeg':
                            filename = record.name + '.jpg'
                            extension = 'JPEG'
                        else:
                            filename = os.path.join(record.name + guess_extension(mimetype))
                            extension = os.path.splitext(mimetype)[0]
                        print 'extension %s' % extension

                        file_path = os.path.join(store.base, filename)

                        print 'saving media file for %s to %s' % (record.name, file_path)

                        media = Media.objects.create(record=record,
                                                     #name=os.path.splitext(file.name)[0],
                                                     name=record.name,
                                                     storage=store,
                                                     mimetype=mimetype)
                        media.save_file(filename, file)

                        presentation.items.create(order=order, record=record)
                        presentation.save()
Beispiel #35
0
def upload(request):
    collection = Collection.objects.get(name='the-breeze')
    storage = Storage.objects.get(name='the-breeze')

    check_access(request.user, collection, write=True, fail_if_denied=True)
    check_access(request.user, storage, write=True, fail_if_denied=True)

    fcoverage = standardfield('coverage')
    fdate = standardfield('date')
    ftitle = standardfield('title')
    fdescription = standardfield('description')
    fidentifier = standardfield('identifier')

    if request.method == 'POST':
        form = UploadForm(request.POST, request.FILES)
        if form.is_valid():

            volume = str(form.cleaned_data['volume'])
            issue = str(form.cleaned_data['issue'])
            date = str(form.cleaned_data['date'])
            pages = str(form.cleaned_data['pages'])
            publication = form.cleaned_data['publication']

            title = '%s %s Volume %s Issue %s' % (
                publication,
                date,
                volume,
                issue,
            )

            record = Record.objects.create()
            CollectionItem.objects.create(record=record, collection=collection)
            record.fieldvalue_set.create(
                field=ftitle,
                label=None,
                order=1,
                value=title,
            )
            record.fieldvalue_set.create(
                field=fcoverage,
                label='Volume',
                order=2,
                value=volume,
            )
            record.fieldvalue_set.create(
                field=fcoverage,
                label='Issue',
                order=3,
                value=issue,
            )
            record.fieldvalue_set.create(
                field=fdate,
                label=None,
                order=4,
                value=date,
            )
            record.fieldvalue_set.create(
                field=fdescription,
                label='Pages',
                order=5,
                value=pages,
            )
            record.fieldvalue_set.create(
                field=fidentifier,
                label=None,
                order=6,
                value=title,
                hidden=True,
            )

            import re
            filename = re.sub(r'[^a-z0-9]+', '-', title.lower()) + '.pdf'

            media = Media.objects.create(
                record=record,
                storage=storage,
                mimetype='application/pdf',
            )
            media.save_file(filename, request.FILES['pdf'])

            return HttpResponseRedirect(reverse('thebreeze-main'))
    else:
        form = UploadForm()


    return render_to_response('thebreeze-upload.html',
                              {'breezelogo': 'breeze_logo_%s.png' %
                                    random.choice('00 01 02 03 04'.split()),
                                'form': form,
                                    },
                              context_instance=RequestContext(request))
Beispiel #36
0
 def setUp(self):
     self.fieldset = FieldSet.objects.create(title='facet-fields')
     FieldSetField.objects.create(
         fieldset=self.fieldset, field=standardfield('title'))
     FieldSetField.objects.create(
         fieldset=self.fieldset, field=standardfield('creator'))
Beispiel #37
0
    def create_record(self, url):
        collection = self.get_collection()

        s = BeautifulSoup(urllib2.urlopen(url))

        def sort_by_dimension(entry):
            m = re.search(r'(?P<width>\d+) x (?P<height>\d+)', entry[1])
            return int(m.group('width')) * int(m.group('height')) if m else 0

        # get metadata
        date = s.find(text='Date:&nbsp;').parent.findNextSibling('td').next
        title = s.find(text='Title:&nbsp;').parent.findNextSibling('td').next
        description = s.find(
            text='Description:&nbsp;').parent.findNextSibling('td').next
        id = s.find(text='ID:&nbsp;').parent.findNextSibling('td').next
        credit_url = s.find(text='Credit:&nbsp;').parent.findNextSibling(
            'td').findNext('a')['href']
        credit_title = s.find(text='Credit:&nbsp;').parent.findNextSibling(
            'td').findNext('a').next

        record = Record.objects.create(name=title,
                                       source=url,
                                       manager='nasaimageexchange')

        FieldValue.objects.create(record=record,
                                  field=standardfield('title'),
                                  order=0,
                                  value=title)
        FieldValue.objects.create(record=record,
                                  field=standardfield('description'),
                                  order=1,
                                  value=description)
        FieldValue.objects.create(record=record,
                                  field=standardfield('date'),
                                  order=2,
                                  value=date)
        FieldValue.objects.create(record=record,
                                  field=standardfield('identifier'),
                                  order=3,
                                  value=id)
        FieldValue.objects.create(record=record,
                                  field=standardfield('contributor'),
                                  order=4,
                                  value=credit_title)
        FieldValue.objects.create(record=record,
                                  field=standardfield('contributor'),
                                  order=5,
                                  value=credit_url)

        CollectionItem.objects.create(collection=collection, record=record)

        # media links and dimensions
        media = [(a['href'], a.next) for a in s.find(
            text='Format:&nbsp;').parent.findNextSibling('td').findAll('a')]
        media = sorted(media, key=sort_by_dimension, reverse=True)

        # create job to download actual media file
        job = JobInfo.objects.create(func='nasa_download_media',
                                     arg=simplejson.dumps(
                                         dict(record=record.id,
                                              url=media[0][0])))
        job.run()

        return record