Ejemplo n.º 1
0
def find_record_by_identifier(identifiers, collection, owner=None,
                              ignore_suffix=False, suffix_regex=r'[-_]\d+$'):
    idfields = standardfield_ids('identifier', equiv=True)
    if not isinstance(identifiers, (list, tuple)):
        identifiers = [identifiers]
    else:
        identifiers = list(identifiers)
    if ignore_suffix:
        identifiers.extend([re.sub(suffix_regex, '', id) for id in identifiers])
    records = Record.by_fieldvalue(idfields, identifiers).filter(collection=collection, owner=owner)
    return records
Ejemplo n.º 2
0
def find_record_by_identifier(identifiers, collection, owner=None,
        ignore_suffix=False, suffix_regex=r'[-_]\d+$'):
    idfields = standardfield_ids('identifier', equiv=True)
    records = Record.by_fieldvalue(idfields, identifiers) \
                    .filter(collection=collection, owner=owner)
    if not records and ignore_suffix:
        if not isinstance(identifiers, (list, tuple)):
            identifiers = [identifiers]
        identifiers = (re.sub(suffix_regex, '', id) for id in identifiers)
        records = Record.by_fieldvalue(idfields, identifiers) \
                        .filter(collection=collection, owner=owner)
    return records
Ejemplo n.º 3
0
def find_record_by_identifier(
        identifiers, collection, owner=None,
        ignore_suffix=False, suffix_regex=r'[-_]\d+$'):
    idfields = standardfield_ids('identifier', equiv=True)
    if not isinstance(identifiers, (list, tuple)):
        identifiers = [identifiers]
    else:
        identifiers = list(identifiers)
    if ignore_suffix:
        identifiers.extend(
            [re.sub(suffix_regex, '', id) for id in identifiers])
    records = Record.by_fieldvalue(
        idfields, identifiers).filter(
        collection=collection, owner=owner).distinct()
    return records
    def handle(self, from_collection, to_collections, commit, *args,
               **options):

        if not from_collection or not to_collections:
            print("Error: Must specify --from and --to arguments")
            return

        print("Mapping presentation items from collection %s to " \
            "collection(s) %s" % (from_collection, to_collections))

        idfields = standardfield_ids('identifier', equiv=True)

        print("Fetching identifiers")

        query = FieldValue.objects.filter(
            field__in=idfields,
            record__collectionitem__collection=from_collection,
            owner=None,
            context_type=None,
            hidden=False).values_list('value', 'record')

        record_to_id = dict()
        for identifier, record in query:
            record_to_id.setdefault(record, []).append(identifier)

        print("Fetching target records")

        query = FieldValue.objects.filter(
            field__in=idfields,
            record__collectionitem__collection__in=to_collections,
            owner=None,
            context_type=None,
            hidden=False).values_list('value', 'record')

        id_to_record = dict()

        for identifier, record in query:
            id_to_record.setdefault(identifier, []).append(record)

        print("Mapping presentation items")
        remapped = 0
        errors = []

        items = PresentationItem.objects.filter(
            record__collectionitem__collection=from_collection)
        pb = ProgressBar(len(items))

        for count, item in enumerate(items):
            identifiers = record_to_id.get(item.record_id)
            if identifiers:
                for identifier in identifiers:
                    new_records = id_to_record.get(identifier)
                    if new_records:
                        if len(new_records) == 1:
                            remapped += 1
                            if commit:
                                item.record_id = new_records[0]
                                item.save()
                            break
                        else:
                            errors.append(
                                "Multiple matching records with identifier "
                                "'%s' found in collection %s: %s" %
                                (identifier, to_collections,
                                 sorted(new_records)))
                    else:
                        errors.append(
                            "No record with identifier '%s' found in "
                            "collection %s" % (identifier, to_collections))
            else:
                errors.append("No identifier found for record %s" %
                              item.record_id)
            pb.update(count)

        pb.done()

        errors = sorted(set(errors))

        if commit:
            print("Remapped %s items" % remapped)
        else:
            print("Would have remapped %s items - rerun with --commit" % \
                remapped)
        if errors:
            print("%s unique errors occurred:" % len(errors))
            print('\n'.join(errors))
Ejemplo n.º 5
0
    def handle(self, collection, from_date, until_date, file_format, file_name,
               include_events, exclude_events, list_events, *args, **options):

        events = list(
            Activity.objects.distinct().order_by('event').values_list(
                'event', flat=True))

        if include_events:
            events = include_events
        for event in exclude_events:
            if event in events:
                events.remove(event)

        if list_events:
            print "Events (not all may apply to date range or collection):"
            print '\n'.join(events)
            return

        if not from_date:
            print "Please specify a start date"
            return
        if not collection:
            print "Please specify a collection"
            return

        try:
            collection = Collection.objects.get(name=collection)
        except Collection.DoesNotExist:
            try:
                collection = Collection.objects.get(id=collection)
            except (Collection.DoesNotExist, ValueError):
                print "Cannot find specified collection: %s" % collection
                return

        def accumulation_status(date, event, step, numsteps):
            print >> sys.stderr, "Accumulating data for event %s on %s... (%d/%d)" % (
                event, date, step + 1, numsteps)

        assure_accumulation(from_date,
                            until_date,
                            events,
                            callback=accumulation_status)

        activity = AccumulatedActivity.objects.filter(object_id__isnull=False,
                                                      date__gte=from_date)
        if until_date:
            activity = activity.filter(date__lt=until_date)

        record_ids = CollectionItem.objects.filter(
            collection=collection, record__owner__isnull=True).values('record')
        media = Media.objects.filter(
            record__in=record_ids).select_related('storage')
        media_dict = dict((id, (record, name))
                          for id, record, name in media.values_list(
                              'id', 'record', 'storage__name'))

        record_type = ContentType.objects.get_for_model(Record)
        media_type = ContentType.objects.get_for_model(Media)

        activity = activity.filter(
            (Q(content_type=record_type, object_id__in=record_ids)
             | Q(content_type=media_type, object_id__in=media.values('id'))))

        records = dict()
        identifier_field = standardfield_ids('identifier', equiv=True)
        title_field = standardfield_ids('title', equiv=True)

        if file_name:
            output = open(file_name, 'wb')
        else:
            output = sys.stdout

        writer = csv.writer(output, dialect='excel')
        writer.writerow((
            'Date',
            '',
            'Record',
            'Title',
            'Media',
            'Storage',
            'Event',
            'Count',
        ))

        for entry in activity.select_related('content_type').order_by(
                'date', 'event'):
            if entry.content_type == media_type:
                record_id, storage = media_dict[entry.object_id]
                media_id = entry.object_id
            else:
                record_id = entry.object_id
                media_id = None
                storage = None

            if records.has_key(record_id):
                identifier, title = records[record_id]
            else:
                try:
                    identifier = FieldValue.objects.filter(
                        record=record_id,
                        field__in=identifier_field,
                    ).order_by('order')[0].value
                    identifier = identifier.encode('utf-8')
                except Exception, e:
                    print >> sys.stderr, e
                    identifier = None
                try:
                    title = FieldValue.objects.filter(
                        record=record_id,
                        field__in=title_field,
                    ).order_by('order')[0].value
                    title = title.encode('utf-8')
                except Exception, e:
                    print >> sys.stderr, e
                    title = None

                records[record_id] = (identifier, title)
Ejemplo n.º 6
0
 def title_from_fieldvalues(self, fieldvalues):
     titlefields = standardfield_ids('title', equiv=True)
     for fv in fieldvalues:
         if fv.field_id in titlefields:
             return fv.value
     return None
Ejemplo n.º 7
0
 def title(self):
     titlefields = standardfield_ids('title', equiv=True)
     q = Q(field__in=titlefields)
     fv = self.get_fieldvalues(q=q)
     return None if not fv else fv[0].value
Ejemplo n.º 8
0
    def handle(self, *args, **kwargs):

        mapping_file = kwargs.get('mapping_file')
        collections = map(int, kwargs.get('collections') or list())

        if not mapping_file or not collections:
            print "--collection and --mapping are required parameters"
            return

        works = dict()

        with open(mapping_file, 'rU') as mappings:
            reader = csv.DictReader(mappings)
            for row in reader:
                identifier = row['ImageFileName']
                work = row['fk_WorkID']
                works.setdefault(work, []).append(identifier)

        # Clean out old relations
        FieldValue.objects.filter(
            record__collection__in=collections,
            field__standard__prefix='dc',
            field__name='relation',
            refinement='IsPartOf',
        ).delete()

        related_field = Field.objects.get(
            standard__prefix='dc',
            name='relation',
        )

        id_fields = standardfield_ids('identifier', equiv=True)

        print "Caching record identifiers"
        identifiers = dict()
        values = FieldValue.objects.select_related('record').filter(
            record__collection__in=collections, field__in=id_fields)
        for fv in values:
            identifiers[fv.value] = fv.record.id

        pb = ProgressBar(len(works))

        # Insert new relations
        for count, work in enumerate(works.itervalues()):
            primary = work[0]
            items = work[1:]
            for item in items:
                options = [item]
                if item.lower().endswith('.jpg'):
                    options.append(item[:-4])
                record = None
                for option in options:
                    record = identifiers.get(option)
                    if record:
                        break
                else:
                    continue
                FieldValue.objects.create(record=Record.objects.get(id=record),
                                          field=related_field,
                                          refinement='IsPartOf',
                                          value=primary)

            pb.update(count)

        pb.done()
Ejemplo n.º 9
0
    def handle(self, *args, **kwargs):

        system_field = get_system_field()

        collections = map(int, kwargs.get('collections') or list())
        mapping_file = kwargs.get('mapping_file')

        if not collections:
            print "--collection is a required parameter"
            return

        if not mapping_file:
            print "--mapping is a required parameter"
            return

        mappings = dict()
        with open(mapping_file, 'r') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                mappings[row['Identifier']] = (row['Work'], row['Primary'])

        related_field = Field.objects.get(
            standard__prefix='dc',
            name='relation',
        )

        existing_works = FieldValue.objects.filter(
            record__collection__in=collections,
            field=related_field,
            refinement='IsPartOf',
        )

        # Clean out old relations
        print "Deleting old works info"
        existing_works.delete()

        id_fields = standardfield_ids('identifier', equiv=True)

        print "Fetching records"
        identifiers = FieldValue.objects.select_related('record').filter(
            record__collection__in=collections,
            field__in=id_fields,
        )

        pb = ProgressBar(identifiers.count())

        # Insert new relations
        for count, identifier in enumerate(identifiers):

            work, isprimary = mappings.get(identifier.value, (None, False))
            isprimary = isprimary == 'True'
            if not work:
                print "Warning: no entry found for identifier '%s'" % \
                      identifier.value
                continue

            FieldValue.objects.create(record=identifier.record,
                                      field=related_field,
                                      refinement='IsPartOf',
                                      value=work,
                                      hidden=True)

            fv = list(
                FieldValue.objects.filter(record=identifier.record,
                                          field=system_field,
                                          label='primary-work-record'))
            if len(fv) > 0:
                if not isprimary:
                    for f in fv:
                        f.delete()
            elif isprimary:
                FieldValue.objects.create(
                    record=identifier.record,
                    field=system_field,
                    label='primary-work-record',
                    value=work,
                    hidden=True,
                )

            pb.update(count)

        pb.done()
Ejemplo n.º 10
0
 def title_from_fieldvalues(self, fieldvalues):
     titlefields = standardfield_ids('title', equiv=True)
     for fv in fieldvalues:
         if fv.field_id in titlefields:
             return fv.value
     return None
Ejemplo n.º 11
0
    def handle(self, collection, from_date, until_date, file_format, file_name,
               include_events, exclude_events, list_events, *args, **options):

        events = list(Activity.objects.distinct()
                      .order_by('event').values_list('event', flat=True))

        if include_events:
            events = include_events
        for event in exclude_events:
            if event in events:
                events.remove(event)

        if list_events:
            print "Events (not all may apply to date range or collection):"
            print '\n'.join(events)
            return

        if not from_date:
            print "Please specify a start date"
            return
        if not collection:
            print "Please specify a collection"
            return

        try:
            collection = Collection.objects.get(name=collection)
        except Collection.DoesNotExist:
            try:
                collection = Collection.objects.get(id=collection)
            except (Collection.DoesNotExist, ValueError):
                print "Cannot find specified collection: %s" % collection
                return

        def accumulation_status(date, event, step, numsteps):
            print >> sys.stderr, "Accumulating data for event %s on %s... (%d/%d)" % (
                event, date, step + 1, numsteps)
        assure_accumulation(from_date, until_date, events, callback=accumulation_status)

        activity = AccumulatedActivity.objects.filter(object_id__isnull=False, date__gte=from_date)
        if until_date:
            activity = activity.filter(date__lt=until_date)

        record_ids = CollectionItem.objects.filter(collection=collection, record__owner__isnull=True).values('record')
        media = Media.objects.filter(record__in=record_ids).select_related('storage')
        media_dict = dict((id, (record, name))
                           for id, record, name in media.values_list('id', 'record', 'storage__name'))

        record_type = ContentType.objects.get_for_model(Record)
        media_type = ContentType.objects.get_for_model(Media)

        activity = activity.filter(
            (Q(content_type=record_type, object_id__in=record_ids) |
             Q(content_type=media_type, object_id__in=media.values('id')))
            )

        records = dict()
        identifier_field = standardfield_ids('identifier', equiv=True)
        title_field = standardfield_ids('title', equiv=True)

        if file_name:
            output = open(file_name, 'wb')
        else:
            output = sys.stdout

        writer = csv.writer(output, dialect='excel')
        writer.writerow((
            'Date',
            '',
            'Record',
            'Title',
            'Media',
            'Storage',
            'Event',
            'Count',
        ))

        for entry in activity.select_related('content_type').order_by('date', 'event'):
            if entry.content_type == media_type:
                record_id, storage = media_dict[entry.object_id]
                media_id = entry.object_id
            else:
                record_id = entry.object_id
                media_id = None
                storage = None

            if records.has_key(record_id):
                identifier, title = records[record_id]
            else:
                try:
                    identifier = FieldValue.objects.filter(record=record_id,
                                                           field__in=identifier_field,
                                                           ).order_by('order')[0].value
                    identifier = identifier.encode('utf-8')
                except Exception, e:
                    print >> sys.stderr, e
                    identifier = None
                try:
                    title = FieldValue.objects.filter(record=record_id,
                                                           field__in=title_field,
                                                           ).order_by('order')[0].value
                    title = title.encode('utf-8')
                except Exception, e:
                    print >> sys.stderr, e
                    title = None

                records[record_id] = (identifier, title)
Ejemplo n.º 12
0
    def handle(self, *args, **kwargs):

        system_field = get_system_field()

        collections = map(int, kwargs.get('collections') or list())
        mapping_file = kwargs.get('mapping_file')

        if not collections:
            print "--collection is a required parameter"
            return

        if not mapping_file:
            print "--mapping is a required parameter"
            return

        mappings = dict()
        with open(mapping_file, 'r') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                mappings[row['Identifier']] = (row['Work'], row['Primary'])

        related_field = Field.objects.get(
            standard__prefix='dc',
            name='relation',
        )

        existing_works = FieldValue.objects.filter(
            record__collection__in=collections,
            field=related_field,
            refinement='IsPartOf',
        )

        # Clean out old relations
        print "Deleting old works info"
        existing_works.delete()

        id_fields = standardfield_ids('identifier', equiv=True)

        print "Fetching records"
        identifiers = FieldValue.objects.select_related('record').filter(
            record__collection__in=collections,
            field__in=id_fields,
        )

        pb = ProgressBar(identifiers.count())

        # Insert new relations
        for count, identifier in enumerate(identifiers):

            work, isprimary = mappings.get(identifier.value, (None, False))
            isprimary = isprimary == 'True'
            if not work:
                print "Warning: no entry found for identifier '%s'" % \
                      identifier.value
                continue

            FieldValue.objects.create(
                record=identifier.record,
                field=related_field,
                refinement='IsPartOf',
                value=work,
                hidden=True
            )

            fv = list(FieldValue.objects.filter(
                record=identifier.record,
                field=system_field,
                label='primary-work-record'
            ))
            if len(fv) > 0:
                if not isprimary:
                    for f in fv:
                        f.delete()
            elif isprimary:
                FieldValue.objects.create(
                    record=identifier.record,
                    field=system_field,
                    label='primary-work-record',
                    value=work,
                    hidden=True,
                )

            pb.update(count)

        pb.done()