Ejemplo n.º 1
0
    def test_ht_marc(self):
        rec = Utils.create_ht_marc('010000666241')

        # Only one field for the oclc
        self.assertEqual(len(re.findall('OCoLC|ocm|ocn', rec.serialize())), 1)

        # Only one 999 field for HT
        self.assertEqual(len(rec.tag_999), 1)
        # And that one need the barcode in code i
        self.assertTrue('<subfield code="i">010000666241</subfield>' in rec.tag_999[0].serialize())

        # Need to change Aleph reference from `(Aleph)..` to `(GEU)Aleph...`
        self.assertEqual(len(re.findall('\(Aleph', rec.serialize())), 0)
        # and make sure we did add the new one correctly, this is also check to
        # see if we put it in the right spot
        self.assertTrue('(GEU)Aleph000116142' in rec.field_035[-1].serialize())
Ejemplo n.º 2
0
    def load(self, *args, **kwargs):
        "Class method to scan data directory specified in the ``localsettings`` **KDIP_DIR** and create new KDIP objects in the database."

        # The only thing that should be sending any args is when the kdip is
        # set to reporcess and the kdip object will be the first (and only) arg.
        if args:
            reproc_kdip = args[0]
            # We need to make sure that we are sending the rights
            # type of object. Just sending `args[0]` had issues.
            # Most noteably with the Mets validation.
            kdip = KDip.objects.get(pk=reproc_kdip.id)
            # Clear out previous validation errors.
            errors = kdip.validationerror_set.all()
            errors.delete()
            kdip.validate()
            Utils.create_ht_marc(kdip)

        else:
            kdip_list = {}
            exclude = ['%s/HT' % kdip_dir, '%s/out_of_scope' % kdip_dir, '%s/test' % kdip_dir]

            for path, subdirs, files in os.walk(kdip_dir):
                for dir in subdirs:
                    kdip = re.search(r"^[0-9]", dir)
                    full_path = os.path.join(path, dir)

                    # Only process new KDips or ones.
                    try:
                        skip = getattr(settings, 'SKIP_DIR', None)
                        if skip not in path:
                            processed_KDip = KDip.objects.get(kdip_id = dir)
                            # Check to see if the a KDip has moved and update the path.
                            if processed_KDip != path:
                                processed_KDip.path = path
                                processed_KDip.save()
                    except KDip.DoesNotExist:
                        if kdip and full_path not in exclude:
                            kdip_list[dir] = path

            # Empty list to gather errant KDips
            bad_kdips = []

            # create the KDIP is it does not exits
            for k in kdip_list:
                try:
                    # lookkup bib record for note field
                    bib_rec = Utils.create_ht_marc(k[:12])
                    # Find the OCLC in the MARCXML
                    # First an empty list to put all the 035 tags in
                    oclc_tags = []
                    for oclc_tag in bib_rec.tag_035a:
                        oclc_search = re.search('<.*>(.*?)</.*>', oclc_tag.serialize())
                        # Make a readable list of 035$a tags
                        oclc_tags.append(oclc_search.group(1))
                    # The oclc filed can have a few patterns. We want the first match
                    oclc = next(oclc_val for oclc_val in oclc_tags \
                        if "(OCoLC)" in oclc_val \
                        or "ocm" in oclc_val \
                        or "ocn" in oclc_val \
                        and bib_rec.alma_number not in oclc_val)
                    # Remove all non-numeric characters
                    oclc = re.sub("[^0-9]", "", oclc)

                    # Set the note field to 'EnumCron not found' if the 999a filed
                    # is empty or missing.
                    note = bib_rec.note(k[:12]) or 'EnumCron not found'

                    defaults={
                       'create_date': datetime.fromtimestamp(os.path.getctime('%s/%s' % (kdip_list[k], k))),
                        'note': note,
                        'path': kdip_list[k],
                        'oclc': oclc
                    }

                    kdip, created = self.objects.get_or_create(kdip_id=k, defaults = defaults)
                    if created:
                        logger.info("Created KDip %s" % kdip.kdip_id)

                        if kwargs.get('kdip_enumcron'):
                            kdip.note = kwargs.get('kdip_enumcron')
                            Utils.update_999a(kdip.path, kdip.kdip_id, kwargs.get('kdip_enumcron'))

                        if kwargs.get('kdip_pid'):
                            kdip.pid = kwargs.get('kdip_pid')

                        kdip.validate()

                        # If the KDip had errors, add it to the list so an email alert can be sent.
                        if kdip.status == 'invalid':
                            bad_kdips.append(kdip.kdip_id)

                    # else:
                    #     kdip.validate()


                except:
                    bad_kdips.append(k)
                    logger.error("Error creating KDip %s : %s" % (k, sys.exc_info()[0]))

            bad_kdip_list = '\n'.join(map(str, bad_kdips))