Example #1
0
    def get_default_pid(self):
        '''Default pid logic for DigitalObjects in the Keep.  Mint a
        new ARK via the PID manager, store the ARK in the MODS
        metadata (if available) or Dublin Core, and use the noid
        portion of the ARK for a Fedora pid in the site-configured
        Fedora pidspace.'''

        if pidman is not None:
            # pidman wants a target for the new pid
            '''Get a pidman-ready target for a named view.'''

            # first just reverse the view name.
            pid = '%s:%s' % (self.default_pidspace, self.PID_TOKEN)
            target = reverse(self.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() encodes the PID_TOKEN and the :, so just unquote the url
            # (shouldn't contain anything else that needs escaping)
            target = urllib.unquote(target)

            # reverse() returns a full path - absolutize so we get scheme & server also
            target = absolutize_url(target)
            # pid name is not required, but helpful for managing pids
            pid_name = self.label
            # ask pidman for a new ark in the configured pidman domain
            ark = pidman.create_ark(settings.PIDMAN_DOMAIN,
                                    target,
                                    name=pid_name)
            # pidman returns the full, resolvable ark
            # parse into dictionary with nma, naan, and noid
            parsed_ark = parse_ark(ark)
            naan = parsed_ark['naan']  # name authority number
            noid = parsed_ark['noid']  # nice opaque identifier

            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(self, 'mods'):
                # store full uri and short-form ark
                self.mods.content.identifiers.extend([
                    mods.Identifier(type='ark',
                                    text='ark:/%s/%s' % (naan, noid)),
                    mods.Identifier(type='uri', text=ark)
                ])
            else:
                # otherwise, add full uri ARK to dc:identifier
                self.dc.content.identifier_list.append(ark)

            # use the noid to construct a pid in the configured pidspace
            return '%s:%s' % (self.default_pidspace, noid)
        else:
            # if pidmanager is not available, fall back to default pid behavior
            return super(DigitalObject, self).get_default_pid()
Example #2
0
    def get_default_pid(self):
        if not self._unused_pid_result:
            pidman = DjangoPidmanRestClient()
            result = pidman.search_pids(target=UNUSED_PID_URL)
            # if any were found, use results
            if result and result['results_count']:
                self._unused_pid_result = result['results']

        # if we have any unused pids, pop one off and use it
        if self._unused_pid_result:
            pid_info = self._unused_pid_result.pop()
            ark = pid_info['targets'][0]['access_uri']
            parsed_ark = parse_ark(ark)
            naan = parsed_ark['naan']  # name authority number
            noid = parsed_ark['noid']  # nice opaque identifier


            # use noid as basis for new pid
            pid = '%s:%s' % (self.default_pidspace, noid)
            # calculate target to new object
            target = reverse(self.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() returns a full path - absolutize so we get scheme & server also
            target = absolutize_url(target)
            # update pid ark label from object
            pidman.update_ark(noid, name=self.label)
            # update default ark target for new object url
            pidman.update_ark_target(noid, target_uri=target, active=True)

            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(self, 'mods'):
                # store full uri and short-form ark
                self.mods.content.identifiers.extend([
                    mods.Identifier(type='ark', text='ark:/%s/%s' % (naan, noid)),
                    mods.Identifier(type='uri', text=ark)
                    ])

            # always add full uri ARK to dc:identifier
            self.dc.content.identifier_list.append(ark)

            # use the noid to construct a pid in the configured pidspace
            return '%s:%s' % (self.default_pidspace, noid)

        else:
            # if we run out of pids re-use, fall back to default behavior
            return super(PidReuseDigitalObject, self).get_default_pid()
Example #3
0
    def update_progress(self, results, results_count):
        """Report the objects in Pidman and summarize in a CSV.

            :param results: results from pidman
            :param results_count: total count of objects founds within a collection
        """

        # update progress on the screen
        sys.stdout.write("%i objects in total.\n" % results_count)
        sys.stdout.flush()

        # initialize a progress bar following the Readux example
        pbar = ProgressBar(
            widgets=[Percentage(), ' (',
                     Counter(), ')',
                     Bar(), ETA()],
            maxval=results_count).start()

        max_results_per_page = results["max_results_per_page"]
        pages = int(math.ceil(results_count / float(max_results_per_page))) + 1
        current_count = 0

        # iterate through all results fetched from pidman
        for page in range(1, pages):
            page_results = self.pidman.search_pids(
                domain_uri=settings.PIDMAN_RUSHDIE_DOMAIN, page=page)
            for page_result in page_results["results"]:
                pm_object_pid, pm_object_noid, pm_label, updated_pm_label, pm_target_uri = (
                    None, ) * 5
                in_fedora, fedora_object, fedora_label, fedora_create_time_stamp = (
                    None, ) * 4
                supposed_label, supposed_target_uri, status_label = (
                    None, ) * 3
                exception_label = "no-exception"

                if not self.is_dry_run:
                    status_label = "actual-run"
                else:
                    status_label = "dry-run"

                try:
                    pm_object_pid = "emory:" + page_result["pid"]
                    pm_object_noid = page_result["pid"]
                    pm_label = page_result["name"]
                    pm_target_uri = page_result["targets"][0]["target_uri"]
                    fedora_object = self.repo.get_object(pm_object_pid)
                    in_fedora = True if fedora_object.exists else False

                    # fedora object doesn't exist:
                    # - mark item as PIDMAN_RUSHDIE_UNUSED_URI
                    # - use generic target URI PIDMAN_RUSHDIE_UNUSED_URI
                    # - set status_label as "unused-pid-identified"
                    if not fedora_object.exists:
                        if not self.is_dry_run:
                            pid_response = self.pidman.update_pid(
                                type="ark",
                                noid=pm_object_noid,
                                name=settings.PIDMAN_RUSHDIE_UNUSED)
                            target_response = self.pidman.update_target(
                                type="ark",
                                noid=pm_object_noid,
                                target_uri=settings.PIDMAN_RUSHDIE_UNUSED_URI)
                            if pid_response[
                                    "name"] == settings.PIDMAN_RUSHDIE_UNUSED and target_response[
                                        "target_uri"] == settings.PIDMAN_RUSHDIE_UNUSED_URI:
                                status_label += ", unused-pid-updated"
                            else:
                                status_label += ", unused-pid-update-failed"

                        # supposed label and target_uri
                        supposed_label = settings.PIDMAN_RUSHDIE_UNUSED
                        supposed_target_uri = settings.PIDMAN_RUSHDIE_UNUSED_URI

                    # fedora object exists
                    # - update label to that in Fedora
                    # - update target_uri to that in Fedora
                    else:
                        if not self.is_dry_run:
                            # label update
                            fedora_label = fedora_object.label
                            if pm_label != fedora_label and fedora_label is not None:
                                response = self.pidman.update_pid(
                                    type="ark",
                                    noid=pm_object_noid,
                                    name=fedora_label)
                                if response["name"] == fedora_label:
                                    status_label += ", label-updated"
                                else:
                                    status_label += ", label-update-failed"

                            # target_uri update
                            # create the target_uri using the logic that is used in creating objects from TheKeep
                            keep_target = reverse(
                                fedora_object.NEW_OBJECT_VIEW,
                                kwargs={'pid': fedora_object.pid})
                            keep_target = urllib.unquote(keep_target)
                            keep_target_uri = absolutize_url(keep_target)
                            if pm_target_uri != keep_target_uri:
                                response = self.pidman.update_target(
                                    type="ark",
                                    noid=pm_object_noid,
                                    target_uri=keep_target_uri)
                                if keep_target_uri == response["target_uri"]:
                                    status_label += ", target_uri-updated"
                                else:
                                    status_label += ", target_uri-update-failed"

                        # supposed label and target_uri
                        keep_target = reverse(
                            fedora_object.NEW_OBJECT_VIEW,
                            kwargs={'pid': fedora_object.pid})
                        keep_target = urllib.unquote(keep_target)
                        supposed_label = fedora_object.label
                        supposed_target_uri = absolutize_url(keep_target)

                    fedora_create_time_stamp = fedora_object.created.strftime(
                        "%Y-%m-%d %H:%M:%S")
                except Exception as e:
                    exception_label = "Exception: %s" % str(e)

                self.summary_log.writerow((time.strftime("%Y-%m-%d %H:%M:%S", \
                    time.localtime()), \
                    status_label, \
                    pm_object_pid, \
                    pm_label, \
                    pm_target_uri, \
                    supposed_label, \
                    supposed_target_uri, \
                    str(in_fedora), \
                    fedora_label, \
                    fedora_create_time_stamp, \
                    exception_label))

                current_count += 1

                # update progress
                pbar.update(current_count)
Example #4
0
    def get_new_pid(self, obj):
        # TODO: first, make sure object label is set appropriately before
        # minting new pid or updating an existing one

        # check to see if there are any unused pids in the rushdie collection
        # that can be re-assigned
        unused_pids = pidman.search_pids(
            domain_uri=settings.PIDMAN_RUSHDIE_DOMAIN,
            target=settings.PIDMAN_RUSHDIE_UNUSED_URI)

        total_found = unused_pids.get('results_count', 0)
        logger.debug('Found %d unused rushdie pids' % total_found)

        # if any unused pids were found, use the first one
        if total_found:
            next_pid = unused_pids['results'][0]
            noid = next_pid['pid']

            print 'Found %d unused rushdie pid%s, using %s' % \
                (total_found, 's' if total_found != 1 else '', noid)

            # update pid metadata to reflect the updated object
            # update the ark name to match the current object
            pidman.update_ark(noid=noid, name=obj.label)
            # update the ark target and ensure it is active

            # generate the keep url for this object, using the same logic
            # in keep.common.fedora for minting new pids
            pid = ':'.join([obj.default_pidspace, noid])
            target = reverse(obj.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() encodes the PID_TOKEN and the :, so just unquote the url
            # (shouldn't contain anything else that needs escaping)
            target = urllib.unquote(target)
            # absolutize the url to include configured keep domain
            target = absolutize_url(target)
            # update the existing pid with the new Keep url
            pidman.update_ark_target(noid=noid, target_uri=target, active=True)

            ark_uri = next_pid['targets'][0]['access_uri']
            parsed_ark = parse_ark(ark_uri)
            naan = parsed_ark['naan']  # name authority number
            # short form of ark identifier
            ark = 'ark:/%s/%s' % (naan, noid)

            # NOTE: adding to the old object metadata is semi useless,
            # since the old object will not be saved and the migration,
            # but it provides convenient access to ark and ark_uri

            # store the ark in the object metadata
            # (this logic duplicated from base get_default_pid method)
            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(obj, 'mods'):
                # store full uri and short-form ark
                obj.mods.content.identifiers.extend([
                    mods.Identifier(type='ark', text=ark),
                    mods.Identifier(type='uri', text=ark_uri)
                ])
            else:
                # otherwise, add full uri ARK to dc:identifier
                obj.dc.content.identifier_list.append(ark_uri)

            # return the pid to be used
            return pid

        else:
            # TEST this: can we use default get next pid for arrangement
            # objects (including email)?
            return obj.get_default_pid()
Example #5
0
 def test_domain_with_scheme(self):
     self.site.domain = 'http://example.com'
     self.site.save()
     self.assertEqual('http://example.com/foo/', absolutize_url('/foo/'))
Example #6
0
 def test_domain_with_scheme(self):
     self.site.domain = 'http://example.com'
     self.site.save()
     self.assertEqual('http://example.com/foo/', absolutize_url('/foo/'))
Example #7
0
    def get_new_pid(self, obj):
        # TODO: first, make sure object label is set appropriately before
        # minting new pid or updating an existing one

        # check to see if there are any unused pids in the rushdie collection
        # that can be re-assigned
        unused_pids = pidman.search_pids(
            domain_uri=settings.PIDMAN_RUSHDIE_DOMAIN,
            target=settings.PIDMAN_RUSHDIE_UNUSED_URI)

        total_found = unused_pids.get('results_count', 0)
        logger.debug('Found %d unused rushdie pids' % total_found)

        # if any unused pids were found, use the first one
        if total_found:
            next_pid = unused_pids['results'][0]
            noid = next_pid['pid']

            print 'Found %d unused rushdie pid%s, using %s' % \
                (total_found, 's' if total_found != 1 else '', noid)

            # update pid metadata to reflect the updated object
            # update the ark name to match the current object
            pidman.update_ark(noid=noid, name=obj.label)
            # update the ark target and ensure it is active

            # generate the keep url for this object, using the same logic
            # in keep.common.fedora for minting new pids
            pid = ':'.join([obj.default_pidspace, noid])
            target = reverse(obj.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() encodes the PID_TOKEN and the :, so just unquote the url
            # (shouldn't contain anything else that needs escaping)
            target = urllib.unquote(target)
            # absolutize the url to include configured keep domain
            target = absolutize_url(target)
            # update the existing pid with the new Keep url
            pidman.update_ark_target(noid=noid, target_uri=target, active=True)

            ark_uri = next_pid['targets'][0]['access_uri']
            parsed_ark = parse_ark(ark_uri)
            naan = parsed_ark['naan']  # name authority number
            # short form of ark identifier
            ark = 'ark:/%s/%s' % (naan, noid)

            # NOTE: adding to the old object metadata is semi useless,
            # since the old object will not be saved and the migration,
            # but it provides convenient access to ark and ark_uri

            # store the ark in the object metadata
            # (this logic duplicated from base get_default_pid method)
            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(obj, 'mods'):
                # store full uri and short-form ark
                obj.mods.content.identifiers.extend([
                    mods.Identifier(type='ark', text=ark),
                    mods.Identifier(type='uri', text=ark_uri)
                    ])
            else:
                # otherwise, add full uri ARK to dc:identifier
                obj.dc.content.identifier_list.append(ark_uri)

            # return the pid to be used
            return pid

        else:
            # TEST this: can we use default get next pid for arrangement
            # objects (including email)?
            return obj.get_default_pid()
Example #8
0
    def update_progress(self, object_class, content_model_name, total_count):
        """Update the objects in Pidman and reports progress back to the user.

            :param object_class: the class of a object collection
            :param content_model_name: a human readable name for the content model/objects
            :param total_count: total count of objects founds within a collection
            :type content_model: str
            :type content_model_name: str
            :type total_count: number
        """

        # initialize counters and a status label (needs change vs. does not need change)
        change_count = 0
        nochange_count = 0
        status_label = ""

        # update progress on the screen
        sys.stdout.write("Starting %s task. %i objects in total.\n" % (content_model_name, total_count))
        sys.stdout.flush()

        # bind a handler for interrupt signal
        signal.signal(signal.SIGINT, self.interrupt_handler)

        # initialize a progress bar following the Readux example
        pbar = ProgressBar(widgets=[Percentage(),
            ' (', Counter(), ')',
            Bar(),
            ETA()],
            maxval=total_count).start()

        # use generator to process each object
        object_uris = self.repo.risearch.get_subjects(modelns.hasModel, object_class.CONTENT_MODELS[0])
        for object_uri in object_uris:
            digital_object, digital_object_pid, digital_object_label, pidman_digital_obejct = (None,)*4
            pidman_label, updated_pidman_label, pidman_target_uri, keep_target_uri, status_label, mismatch = (None,)*6
            exception_string = ""
            hasException = False

            digital_object = self.repo.get_object(object_uri, object_class)
            digital_object_pid = digital_object.pid

            try:
                pidman_digital_obejct = self.pidman.search_pids(domain_uri=settings.PIDMAN_DOMAIN, pid=digital_object.noid)
            except Exception as e:
                hasException = True
                exception_string += "Object %s is not found in Pidman. \
                    Error message: %s \n" % (digital_object_pid, str(e))

            if not hasException:
                try:
                    pidman_label = pidman_digital_obejct["results"][0].get("name", None)
                except KeyError as e:
                    hasException = True
                    exception_string += "Pidman object %s doesn't exist or doesn't have a valid label." % digital_object_pid
                except Exception as e:
                    hasException = True
                    exception_string += "Pidman object %s is not accessible. Error message: %s" % (digital_object_pid, str(e))

                try:
                    digital_object_label = digital_object.label
                except AttributeError as e:
                    hasException = True
                    exception_string += "Fedora object %s doesn't have a label attribute." % digital_object_pid
                except Exception as e:
                    hasException = True
                    exception_string += "Fedora object %s is not accessible. \
                        Error message: %s \n" % (digital_object_pid, str(e))
                try:
                    pidman_target_uri = pidman_digital_obejct["results"][0]["targets"][0].get("target_uri", None)
                except KeyError as e:
                    hasException = True
                    exception_string += "Pidman object %s doesn't exist or doesn't have a valid target_uri." % digital_object_pid
                except Exception as e:
                    hasException = True
                    exception_string += "Pidman object %s is not accessible. Error message: %s" % (digital_object_pid, str(e))

                try:
                    # create the target_uri using the logic that is used in creating objects from TheKeep
                    keep_target = reverse(digital_object.NEW_OBJECT_VIEW, kwargs={'pid': digital_object.pid})
                    keep_target = urllib.unquote(keep_target)
                    keep_target_uri = absolutize_url(keep_target)
                except Exception as e:
                    hasException = True
                    exception_string += "Keep target_uri creation failed. Error message: %s" % (digital_object_pid, str(e))

                try:
                    if hasException:
                        mismatch = ""
                    else:
                        if pidman_target_uri == keep_target_uri and pidman_label == digital_object_label:
                            mismatch = "No"
                        elif pidman_target_uri != keep_target_uri and pidman_label != digital_object_label:
                            mismatch = "label&uri"
                        elif pidman_target_uri != keep_target_uri:
                            mismatch = "uri"
                        elif pidman_label != digital_object_label:
                            mismatch = "label"
                except Exception as e:
                    hasException = True
                    exception_string += "Either Pidman or Keep target_uri doesn't exist. Error message: %s" % (digital_object_pid, str(e))

            if not hasException:
                # execute irreversible update when the dry run flag is not set
                # be cautious
                if not self.is_dry_run:
                    try:
                        if pidman_label != digital_object_label and digital_object_label is not None:
                            response = self.pidman.update_pid(type="ark", noid=digital_object.noid, name=digital_object_label)
                            updated_pidman_label = response["name"]
                            status_label = "changed"
                    except Exception as e:
                        hasException = True
                        exception_string += "Pidman object %s is not updated. Error message: %s" % (digital_object_pid, str(e))

                # when the names are not the same
                if (pidman_label != digital_object_label):
                    change_count += 1
                    if updated_pidman_label == digital_object_label:
                        status_label = "changed"
                    else:
                        status_label = "change-needed"

                # when the names are the same
                else:
                    nochange_count += 1
                    status_label = "ok"

            else:
                # log the failure in a file
                error_file_path = "%s/%s.log" % (self.error_path, digital_object.noid)
                error_log = open(error_file_path, 'w+')
                error_log.write('[TIME]: %s, [CONTENT_MODEL]: %s, [PID]: %s\n %s \n' % \
                    (time.strftime("%Y%m%d %H:%M:%S", time.localtime()), \
                    content_model_name, \
                    digital_object.noid, \
                    exception_string))
                error_log.close()
                status_label = "error"

            # write to CSV
            self.summary_log.writerow((time.strftime("%Y-%m-%d %H:%M:%S", \
                time.localtime()), \
                status_label, \
                content_model_name, \
                digital_object_pid, \
                digital_object_label, \
                pidman_label, \
                pidman_target_uri, \
                keep_target_uri, \
                mismatch, \
                exception_string))

            # update progress
            pbar.update(change_count + nochange_count)

            # break if anything goes wrong
            if self.interrupted:
                break

        # update finish when all tasks are completed
        if not self.interrupted:
            pbar.finish()

        # write statistics
        self.stdout.write("Total objects: %i \n" % total_count)
        self.stdout.write("No change: %i | Change required: %i | Failed (see logs): %i\n" \
            % (nochange_count, change_count, (total_count - nochange_count - change_count)))
Example #9
0
    def update_progress(self, object_class, content_model_name, total_count):
        """Update the objects in Pidman and reports progress back to the user.

            :param object_class: the class of a object collection
            :param content_model_name: a human readable name for the content model/objects
            :param total_count: total count of objects founds within a collection
            :type content_model: str
            :type content_model_name: str
            :type total_count: number
        """

        # initialize counters and a status label (needs change vs. does not need change)
        change_count = 0
        nochange_count = 0
        status_label = ""

        # update progress on the screen
        sys.stdout.write("Starting %s task. %i objects in total.\n" %
                         (content_model_name, total_count))
        sys.stdout.flush()

        # bind a handler for interrupt signal
        signal.signal(signal.SIGINT, self.interrupt_handler)

        # initialize a progress bar following the Readux example
        pbar = ProgressBar(
            widgets=[Percentage(), ' (',
                     Counter(), ')',
                     Bar(), ETA()],
            maxval=total_count).start()

        # use generator to process each object
        object_uris = self.repo.risearch.get_subjects(
            modelns.hasModel, object_class.CONTENT_MODELS[0])
        for object_uri in object_uris:
            digital_object, digital_object_pid, digital_object_label, pidman_digital_obejct = (
                None, ) * 4
            pidman_label, updated_pidman_label, pidman_target_uri, keep_target_uri, status_label, mismatch = (
                None, ) * 6
            exception_string = ""
            hasException = False

            digital_object = self.repo.get_object(object_uri, object_class)
            digital_object_pid = digital_object.pid

            try:
                pidman_digital_obejct = self.pidman.search_pids(
                    domain_uri=settings.PIDMAN_DOMAIN, pid=digital_object.noid)
            except Exception as e:
                hasException = True
                exception_string += "Object %s is not found in Pidman. \
                    Error message: %s \n" % (digital_object_pid, str(e))

            if not hasException:
                try:
                    pidman_label = pidman_digital_obejct["results"][0].get(
                        "name", None)
                except KeyError as e:
                    hasException = True
                    exception_string += "Pidman object %s doesn't exist or doesn't have a valid label." % digital_object_pid
                except Exception as e:
                    hasException = True
                    exception_string += "Pidman object %s is not accessible. Error message: %s" % (
                        digital_object_pid, str(e))

                try:
                    digital_object_label = digital_object.label
                except AttributeError as e:
                    hasException = True
                    exception_string += "Fedora object %s doesn't have a label attribute." % digital_object_pid
                except Exception as e:
                    hasException = True
                    exception_string += "Fedora object %s is not accessible. \
                        Error message: %s \n" % (digital_object_pid, str(e))
                try:
                    pidman_target_uri = pidman_digital_obejct["results"][0][
                        "targets"][0].get("target_uri", None)
                except KeyError as e:
                    hasException = True
                    exception_string += "Pidman object %s doesn't exist or doesn't have a valid target_uri." % digital_object_pid
                except Exception as e:
                    hasException = True
                    exception_string += "Pidman object %s is not accessible. Error message: %s" % (
                        digital_object_pid, str(e))

                try:
                    # create the target_uri using the logic that is used in creating objects from TheKeep
                    keep_target = reverse(digital_object.NEW_OBJECT_VIEW,
                                          kwargs={'pid': digital_object.pid})
                    keep_target = urllib.unquote(keep_target)
                    keep_target_uri = absolutize_url(keep_target)
                except Exception as e:
                    hasException = True
                    exception_string += "Keep target_uri creation failed. Error message: %s" % (
                        digital_object_pid, str(e))

                try:
                    if hasException:
                        mismatch = ""
                    else:
                        if pidman_target_uri == keep_target_uri and pidman_label == digital_object_label:
                            mismatch = "No"
                        elif pidman_target_uri != keep_target_uri and pidman_label != digital_object_label:
                            mismatch = "label&uri"
                        elif pidman_target_uri != keep_target_uri:
                            mismatch = "uri"
                        elif pidman_label != digital_object_label:
                            mismatch = "label"
                except Exception as e:
                    hasException = True
                    exception_string += "Either Pidman or Keep target_uri doesn't exist. Error message: %s" % (
                        digital_object_pid, str(e))

            if not hasException:
                # execute irreversible update when the dry run flag is not set
                # be cautious
                if not self.is_dry_run:
                    try:
                        if pidman_label != digital_object_label and digital_object_label is not None:
                            response = self.pidman.update_pid(
                                type="ark",
                                noid=digital_object.noid,
                                name=digital_object_label)
                            updated_pidman_label = response["name"]
                            status_label = "changed"
                    except Exception as e:
                        hasException = True
                        exception_string += "Pidman object %s is not updated. Error message: %s" % (
                            digital_object_pid, str(e))

                # when the names are not the same
                if (pidman_label != digital_object_label):
                    change_count += 1
                    if updated_pidman_label == digital_object_label:
                        status_label = "changed"
                    else:
                        status_label = "change-needed"

                # when the names are the same
                else:
                    nochange_count += 1
                    status_label = "ok"

            else:
                # log the failure in a file
                error_file_path = "%s/%s.log" % (self.error_path,
                                                 digital_object.noid)
                error_log = open(error_file_path, 'w+')
                error_log.write('[TIME]: %s, [CONTENT_MODEL]: %s, [PID]: %s\n %s \n' % \
                    (time.strftime("%Y%m%d %H:%M:%S", time.localtime()), \
                    content_model_name, \
                    digital_object.noid, \
                    exception_string))
                error_log.close()
                status_label = "error"

            # write to CSV
            self.summary_log.writerow((time.strftime("%Y-%m-%d %H:%M:%S", \
                time.localtime()), \
                status_label, \
                content_model_name, \
                digital_object_pid, \
                digital_object_label, \
                pidman_label, \
                pidman_target_uri, \
                keep_target_uri, \
                mismatch, \
                exception_string))

            # update progress
            pbar.update(change_count + nochange_count)

            # break if anything goes wrong
            if self.interrupted:
                break

        # update finish when all tasks are completed
        if not self.interrupted:
            pbar.finish()

        # write statistics
        self.stdout.write("Total objects: %i \n" % total_count)
        self.stdout.write("No change: %i | Change required: %i | Failed (see logs): %i\n" \
            % (nochange_count, change_count, (total_count - nochange_count - change_count)))