Exemple #1
0
    def __init__(self):

        # initialize system field, so later it does not get created multiple
        # times in a race condition

        get_system_field()

        # Only need to run once
        raise MiddlewareNotUsed
Exemple #2
0
    def __init__(self):

        # initialize system field, so later it does not get created multiple
        # times in a race condition

        get_system_field()

        # Only need to run once
        raise MiddlewareNotUsed
Exemple #3
0
    def testKeepSystemFieldValues(self):
        identifier = Field.objects.get(name='identifier',
                                       standard__prefix='dc')
        title = Field.objects.get(name='title', standard__prefix='dc')
        system = get_system_field()

        r1 = Record.objects.create(name='s001')
        CollectionItem.objects.create(record=r1, collection=self.collection)
        r1.fieldvalue_set.create(field=identifier, value='S001')
        r1.fieldvalue_set.create(field=title, value='Title')
        r1.fieldvalue_set.create(field=system, value='Keep this')

        testimport = SpreadsheetImport(
            StringIO("Identifier,Title\nS002,NewTitle2\nS001,NewTitle1"),
            [self.collection])
        testimport.name_field = 'Identifier'
        testimport.run(update=True)

        self.assertEqual(1, testimport.added)
        self.assertEqual(1, testimport.updated)
        self.assertEqual(0, testimport.added_skipped)
        self.assertEqual(0, testimport.updated_skipped)

        t1 = self.collection.records.get(name='s001').fieldvalue_set.filter(
            field=title)
        t2 = self.collection.records.get(name='s002').fieldvalue_set.filter(
            field=title)
        s = self.collection.records.get(name='s001').fieldvalue_set.filter(
            field=system)

        self.assertEqual('NewTitle1', t1[0].value)
        self.assertEqual('NewTitle2', t2[0].value)
        self.assertEqual('Keep this', s[0].value)
Exemple #4
0
    def testKeepSystemFieldValues(self):
        no_signals()
        identifier = Field.objects.get(name='identifier', standard__prefix='dc')
        title = Field.objects.get(name='title', standard__prefix='dc')
        system = get_system_field()

        r1 = Record.objects.create(name='s001')
        CollectionItem.objects.create(record=r1, collection=self.collection)
        r1.fieldvalue_set.create(field=identifier, value='S001')
        r1.fieldvalue_set.create(field=title, value='Title')
        r1.fieldvalue_set.create(field=system, value='Keep this')

        testimport = SpreadsheetImport(StringIO("Identifier,Title\nS002,NewTitle2\nS001,NewTitle1"),
                                       [self.collection])
        testimport.name_field = 'Identifier'
        testimport.run(update=True)

        self.assertEqual(1, testimport.added)
        self.assertEqual(1, testimport.updated)
        self.assertEqual(0, testimport.added_skipped)
        self.assertEqual(0, testimport.updated_skipped)

        t1 = self.collection.records.get(name='s001').fieldvalue_set.filter(field=title)
        t2 = self.collection.records.get(name='s002').fieldvalue_set.filter(field=title)
        s = self.collection.records.get(name='s001').fieldvalue_set.filter(field=system)

        self.assertEqual('NewTitle1', t1[0].value)
        self.assertEqual('NewTitle2', t2[0].value)
        self.assertEqual('Keep this', s[0].value)
    def run(self, update=True, add=True, test=False, update_names=False, target_collections=[], skip_rows=0):
        if not self.analyzed:
            self.analyze(preview_rows=1)

        identifier_field = self.get_identifier_field()
        if not identifier_field:
            raise SpreadsheetImport.NoIdentifierException("No column is mapped to an identifier field")

        system_field = get_system_field()

        def apply_values(record, row, is_new=False):
            if not is_new:
                record.fieldvalue_set.filter(~Q(field=system_field), owner=None).delete()
            for field, values in row.iteritems():
                target = self.mapping.get(field)
                if target and values:
                    for order, value in enumerate(values):
                        record.fieldvalue_set.create(
                            field=target,
                            value=value,
                            label=self.labels.get(field),
                            order=self.order.get(field, order),
                            hidden=self.hidden.get(field, False),
                        )

        reader = self._get_reader()

        self.added = (
            self.added_skipped
        ) = (
            self.updated
        ) = (
            self.updated_skipped
        ) = (
            self.duplicate_in_file_skipped
        ) = self.no_id_skipped = self.owner_skipped = self.duplicate_in_collection_skipped = 0
        self.processed_ids = dict()

        def process_row(row):
            ids = row[identifier_field]
            if self.processed_ids.has_key("\n".join(ids)):
                self.duplicate_in_file_skipped += 1
                for func in self.on_duplicate_in_file_skipped:
                    func(ids)
                return
            self.processed_ids["\n".join(ids)] = None
            fvs = FieldValue.objects.select_related("record").filter(
                record__collection__in=self.collections, owner=None, field__in=self._identifier_ids, value__in=ids
            )
            if not fvs:
                if add:
                    # create new record
                    if not test:
                        record = Record.objects.create(
                            owner=self.owner, name=row.get(self.name_field, [None])[0] if self.name_field else None
                        )
                        apply_values(record, row, is_new=True)
                        for collection in target_collections or self.collections:
                            CollectionItem.objects.get_or_create(record=record, collection=collection)
                    self.added += 1
                    for func in self.on_added:
                        func(ids)
                else:
                    # adding new records is disabled
                    self.added_skipped += 1
                    for func in self.on_added_skipped:
                        func(ids)
            elif len(fvs) == 1:
                if fvs[0].record.owner == self.owner:
                    if update:
                        # update existing record (including records just created in previous row)
                        if not test:
                            record = fvs[0].record
                            apply_values(record, row)
                            if update_names:
                                record.name = row.get(self.name_field, [None])[0] if self.name_field else None
                                record.save(force_update_name=True)
                            for collection in target_collections or self.collections:
                                CollectionItem.objects.get_or_create(record=record, collection=collection)
                        self.updated += 1
                        for func in self.on_updated:
                            func(ids)
                    else:
                        # updating records is disabled
                        self.updated_skipped += 1
                        for func in self.on_updated_skipped:
                            func(ids)
                else:
                    self.owner_skipped += 1
                    for func in self.on_owner_skipped:
                        func(ids)
            else:
                # duplicate id found
                self.duplicate_in_collection_skipped += 1
                for func in self.on_duplicate_in_collection_skipped:
                    func(ids)

        for skip in range(skip_rows):
            reader.next()

        last_row = None
        for i, row in enumerate(reader):
            row = self._split_values(row)
            if not last_row:
                last_row = row
                continue

            # compare IDs of current and last rows
            last_id = last_row.get(identifier_field)
            if not last_id:
                last_row = row
                self.no_id_skipped += 1
                for func in self.on_no_id_skipped:
                    func(None)
                continue

            current_id = row.get(identifier_field)

            if not current_id or (last_id == current_id):
                # combine current and last rows
                for key, values in row.iteritems():
                    v = last_row.setdefault(key, [])
                    for value in values or []:
                        if not value in v:
                            v.append(value)
                    last_row[key] = v
                for func in self.on_continuation:
                    func(last_id)
            else:
                process_row(last_row)
                last_row = row

        if last_row:
            process_row(last_row)
    def run(self,
            update=True,
            add=True,
            test=False,
            update_names=False,
            target_collections=[],
            skip_rows=0):
        if not self.analyzed:
            self.analyze(preview_rows=1)

        identifier_field = self.get_identifier_field()
        if not identifier_field:
            raise SpreadsheetImport.NoIdentifierException(
                'No column is mapped to an identifier field')

        system_field = get_system_field()

        def apply_values(record, row, is_new=False):
            if not is_new:
                record.fieldvalue_set.filter(~Q(field=system_field),
                                             owner=None).delete()
            for field, values in row.iteritems():
                target = self.mapping.get(field)
                if target and values:
                    for order, value in enumerate(values):
                        record.fieldvalue_set.create(
                            field=target,
                            value=value,
                            label=self.labels.get(field),
                            order=self.order.get(field, order),
                            hidden=self.hidden.get(field, False))

        reader = self._get_reader()

        self.added = self.added_skipped = self.updated = self.updated_skipped = \
                     self.duplicate_in_file_skipped = self.no_id_skipped = self.owner_skipped = \
                     self.duplicate_in_collection_skipped = 0
        self.processed_ids = dict()

        def process_row(row):
            ids = row[identifier_field]
            if self.processed_ids.has_key('\n'.join(ids)):
                self.duplicate_in_file_skipped += 1
                for func in self.on_duplicate_in_file_skipped:
                    func(ids)
                return
            self.processed_ids['\n'.join(ids)] = None
            fvs = FieldValue.objects.select_related('record').filter(
                record__collection__in=self.collections,
                owner=None,
                field__in=self._identifier_ids,
                value__in=ids)
            if not fvs:
                if add:
                    # create new record
                    if not test:
                        record = Record.objects.create(
                            owner=self.owner,
                            name=row.get(self.name_field, [None])[0]
                            if self.name_field else None)
                        apply_values(record, row, is_new=True)
                        for collection in target_collections or self.collections:
                            CollectionItem.objects.get_or_create(
                                record=record, collection=collection)
                    self.added += 1
                    for func in self.on_added:
                        func(ids)
                else:
                    # adding new records is disabled
                    self.added_skipped += 1
                    for func in self.on_added_skipped:
                        func(ids)
            elif len(fvs) == 1:
                if fvs[0].record.owner == self.owner:
                    if update:
                        # update existing record (including records just created in previous row)
                        if not test:
                            record = fvs[0].record
                            apply_values(record, row)
                            if update_names:
                                record.name = row.get(
                                    self.name_field,
                                    [None])[0] if self.name_field else None
                                record.save(force_update_name=True)
                            for collection in target_collections or self.collections:
                                CollectionItem.objects.get_or_create(
                                    record=record, collection=collection)
                        self.updated += 1
                        for func in self.on_updated:
                            func(ids)
                    else:
                        # updating records is disabled
                        self.updated_skipped += 1
                        for func in self.on_updated_skipped:
                            func(ids)
                else:
                    self.owner_skipped += 1
                    for func in self.on_owner_skipped:
                        func(ids)
            else:
                # duplicate id found
                self.duplicate_in_collection_skipped += 1
                for func in self.on_duplicate_in_collection_skipped:
                    func(ids)

        for skip in range(skip_rows):
            reader.next()

        last_row = None
        for i, row in enumerate(reader):
            row = self._split_values(row)
            if not last_row:
                last_row = row
                continue

            # compare IDs of current and last rows
            last_id = last_row.get(identifier_field)
            if not last_id:
                last_row = row
                self.no_id_skipped += 1
                for func in self.on_no_id_skipped:
                    func(None)
                continue

            current_id = row.get(identifier_field)

            if not current_id or (last_id == current_id):
                # combine current and last rows
                for key, values in row.iteritems():
                    v = last_row.setdefault(key, [])
                    for value in (values or []):
                        if not value in v:
                            v.append(value)
                    last_row[key] = v
                for func in self.on_continuation:
                    func(last_id)
            else:
                process_row(last_row)
                last_row = row

        if last_row:
            process_row(last_row)
    def run(self, update=True, add=True, test=False, target_collections=[],
            skip_rows=0):
        if not self.analyzed:
            self.analyze(preview_rows=1)

        identifier_field = self.get_identifier_field()
        if not identifier_field:
            raise SpreadsheetImport.NoIdentifierException(
                'No column is mapped to an identifier field')

        system_field = get_system_field()

        def apply_values(record, row, is_new=False):
            if not is_new:
                record.fieldvalue_set.filter(~Q(field=system_field),
                                             owner=None).delete()
            for field, values in row.iteritems():
                target = self.mapping.get(field)
                if target and values:
                    for order, value in enumerate(values):
                        record.fieldvalue_set.create(
                            field=target,
                            value=value,
                            label=self.labels.get(field),
                            order=self.order.get(field, order),
                            hidden=self.hidden.get(field, False),
                            refinement=self.refinements.get(field))

        reader = self._get_reader()

        self.added = self.added_skipped = self.updated = \
            self.updated_skipped = self.duplicate_in_file_skipped = \
            self.no_id_skipped = self.owner_skipped = \
            self.duplicate_in_collection_skipped = 0
        self.processed_ids = dict()

        def process_row(row):
            ids = row[identifier_field]
            if '\n'.join(ids) in self.processed_ids:
                self.duplicate_in_file_skipped += 1
                for func in self.on_duplicate_in_file_skipped:
                    func(ids)
                return
            self.processed_ids['\n'.join(ids)] = None
            fvs = FieldValue.objects.select_related('record').filter(
                record__collection__in=self.collections,
                owner=None,
                field__in=self._identifier_ids,
                index_value__in=(x[:32] for x in ids),
                value__in=ids)
            if not fvs:
                if add:
                    # create new record
                    if not test:
                        record = Record.objects.create(owner=self.owner)
                        apply_values(record, row, is_new=True)
                        for collection in (
                                target_collections or self.collections):
                            CollectionItem.objects.get_or_create(
                                record=record,
                                collection=collection
                            )
                    self.added += 1
                    for func in self.on_added:
                        func(ids)
                else:
                    # adding new records is disabled
                    self.added_skipped += 1
                    for func in self.on_added_skipped:
                        func(ids)
            elif len(fvs) == 1:
                if fvs[0].record.owner == self.owner:
                    if update:
                        # update existing record
                        # (including records just created in previous row)
                        if not test:
                            record = fvs[0].record
                            apply_values(record, row)
                            for collection in (
                                    target_collections or self.collections):
                                CollectionItem.objects.get_or_create(
                                    record=record,
                                    collection=collection)
                        self.updated += 1
                        for func in self.on_updated:
                            func(ids)
                    else:
                        # updating records is disabled
                        self.updated_skipped += 1
                        for func in self.on_updated_skipped:
                            func(ids)
                else:
                    self.owner_skipped += 1
                    for func in self.on_owner_skipped:
                        func(ids)
            else:
                # duplicate id found
                self.duplicate_in_collection_skipped += 1
                for func in self.on_duplicate_in_collection_skipped:
                    func(ids)

        for skip in range(skip_rows):
            reader.next()

        last_row = None
        try:

            for i, row in enumerate(reader):

                # On every row, delay record indexing for a little longer
                delay_record_indexing()
                reset_queries()

                row = self._split_values(row)
                if not last_row:
                    last_row = row
                    continue

                # compare IDs of current and last rows
                last_id = last_row.get(identifier_field)
                if not last_id:
                    last_row = row
                    self.no_id_skipped += 1
                    for func in self.on_no_id_skipped:
                        func(None)
                    continue

                current_id = row.get(identifier_field)

                if not current_id or (last_id == current_id):
                    # combine current and last rows
                    for key, values in row.iteritems():
                        v = last_row.get(key) or []
                        for value in (values or []):
                            if value not in v:
                                v.append(value)
                        last_row[key] = v
                    for func in self.on_continuation:
                        func(last_id)
                else:
                    process_row(last_row)
                    last_row = row

            if last_row:
                process_row(last_row)
        finally:
            resume_record_indexing()