Python validate_bib_dir Examples, chronam.core.utils.utils.validate_bib_dir Python Examples

Example #1

0

Show file

class Command(LoggingCommand):
    help = "Load a holdings records after title records are all loaded"  # NOQA: A003
    args = '<location of holdings directory>'

    bib_in_settings = validate_bib_dir()
    if bib_in_settings:
        default_location = bib_in_settings + '/holdings'
    else:
        default_location = None

    def handle(self, holdings_source=default_location, *args, **options):

        if not os.path.exists(holdings_source):
            LOGGER.error("There is no valid holdings source folder defined.")
            set_holdings = [
                'To load holdings - Add a folder called "holdings"',
                'to the bib directory that is set in settings',
                'or pass the location of holdings as an arguement to the loader.',
            ]
            LOGGER.error(' '.join(set_holdings))
            return

        # First we want to make sure that our material types are up to date
        material_types = models.MaterialType.objects.all()
        [m.delete() for m in material_types]
        management.call_command('loaddata', 'material_types.json')

        holding_loader = HoldingLoader()
        holding_loader.main(holdings_source)

Example #2

0

Show file

    def handle(self, *args, **options):
        start = datetime.now()

        _logger.info("Starting title sync process.")
        # only load titles if the BIB_STORAGE is there, not always the case
        # for folks in the opensource world
        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            worldcat_dir = bib_in_settings + '/worldcat_titles/'

            pull_titles = bool(options['pull_title_updates']
                               and hasattr(settings, "WORLDCAT_KEY"))
            if pull_titles:
                call_command('pull_titles', )

            _logger.info("Starting load of OCLC titles.")
            bulk_dir = worldcat_dir + 'bulk'
            if os.path.isdir(bulk_dir):
                call_command('load_titles', bulk_dir, skip_index=True)

            tnu = self.find_titles_not_updated()

            # Only update by individual lccn if there are records that need updating.
            if pull_titles and len(tnu):
                _logger.info(
                    "Pulling titles from OCLC by individual lccn & oclc num.")
                self.pull_lccn_updates(tnu)

            _logger.info("Loading titles from second title pull.")
            lccn_dir = worldcat_dir + 'lccn'
            if os.path.isdir(lccn_dir):
                call_command('load_titles', lccn_dir, skip_index=True)

            tnu = self.find_titles_not_updated(limited=False)
            _logger.info("Running pre-deletion checks for these titles.")

        if bib_in_settings:
            if len(tnu):
                # Delete titles haven't been update &  issues attached.
                for title in tnu:
                    issues = title.issues.all()

                    error = "DELETION ERROR: Title %s has " % title
                    error_end = "It will not be deleted."

                    if issues:
                        _logger.warning(error + 'issues.' + error_end)
                        continue

            # Load holdings for all remaining titles.
            call_command('load_holdings')

        # overlay place info harvested from dbpedia onto the places table
        try:
            self.load_place_links()
        except Exception, e:
            _logger.exception(e)

Example #3

0

Show file

File: chronam_sync.py Project: rugby110/chronam

    def handle(self, **options):
        if not (models.Title.objects.all().count() == 0
                and models.Holding.objects.all().count() == 0
                and models.Essay.objects.all().count() == 0
                and models.Batch.objects.all().count() == 0
                and models.Issue.objects.all().count() == 0
                and models.Page.objects.all().count() == 0
                and index.page_count() == 0 and index.title_count() == 0):
            _logger.warn("Database or index not empty as expected.")
            return

        start = datetime.now()
        management.call_command('loaddata', 'languages.json')
        management.call_command('loaddata', 'institutions.json')
        management.call_command('loaddata', 'ethnicities.json')
        management.call_command('loaddata', 'labor_presses.json')
        management.call_command('loaddata', 'countries.json')

        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            # look in BIB_STORAGE for original titles to load
            for filename in os.listdir(bib_in_settings):
                if filename.startswith('titles-') and filename.endswith(
                        '.xml'):
                    filepath = os.path.join(bib_in_settings, filename)
                    management.call_command('load_titles',
                                            filepath,
                                            skip_index=True)

        management.call_command(
            'title_sync',
            skip_essays=options['skip_essays'],
            pull_title_updates=options['pull_title_updates'])

        end = datetime.now()
        total_time = end - start
        _logger.info('start time: %s' % start)
        _logger.info('end time: %s' % end)
        _logger.info('total time: %s' % total_time)
        _logger.info("chronam_sync done.")

Example #4

0

Show file

File: chronam_sync.py Project: mikebeccaria/chronam

    def handle(self, **options):
        if not (models.Title.objects.all().count() == 0 and
                models.Holding.objects.all().count() == 0 and
                models.Essay.objects.all().count() == 0 and
                models.Batch.objects.all().count() == 0 and
                models.Issue.objects.all().count() == 0 and
                models.Page.objects.all().count() == 0 and
                index.page_count() == 0 and
                index.title_count() == 0):
            _logger.warn("Database or index not empty as expected.")
            return

        start = datetime.now()
        management.call_command('loaddata', 'languages.json')
        management.call_command('loaddata', 'institutions.json')
        management.call_command('loaddata', 'ethnicities.json')
        management.call_command('loaddata', 'labor_presses.json')
        management.call_command('loaddata', 'countries.json')

        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            # look in BIB_STORAGE for original titles to load
            for filename in os.listdir(bib_in_settings):
                if filename.startswith('titles-') and filename.endswith('.xml'):
                    filepath = os.path.join(bib_in_settings, filename)
                    management.call_command('load_titles', filepath, skip_index=True)

        management.call_command('title_sync', 
                                skip_essays=options['skip_essays'],
                                pull_title_updates=options['pull_title_updates'])

        

        end = datetime.now()
        total_time = end - start
        _logger.info('start time: %s' % start)
        _logger.info('end time: %s' % end)
        _logger.info('total time: %s' % total_time)
        _logger.info("chronam_sync done.")

Example #5

0

Show file

File: title_sync.py Project: sshyran/chronam

    def handle(self, *args, **options):
        start = datetime.now()

        LOGGER.info("Starting title sync process.")
        # only load titles if the BIB_STORAGE is there, not always the case
        # for folks in the opensource world
        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            worldcat_dir = bib_in_settings + '/worldcat_titles/'

            pull_titles = bool(options['pull_title_updates']
                               and hasattr(settings, "WORLDCAT_KEY"))
            if pull_titles:
                call_command('pull_titles')

            LOGGER.info("Starting load of OCLC titles.")
            bulk_dir = worldcat_dir + 'bulk'
            if os.path.isdir(bulk_dir):
                call_command('load_titles', bulk_dir, skip_index=True)

            tnu = self.find_titles_not_updated()

            # Only update by individual lccn if there are records that need updating.
            if pull_titles and len(tnu):
                LOGGER.info(
                    "Pulling titles from OCLC by individual lccn & oclc num.")
                self.pull_lccn_updates(tnu)

            LOGGER.info("Loading titles from second title pull.")
            lccn_dir = worldcat_dir + 'lccn'
            if os.path.isdir(lccn_dir):
                call_command('load_titles', lccn_dir, skip_index=True)

            tnu = self.find_titles_not_updated(limited=False)
            LOGGER.info("Running pre-deletion checks for these titles.")

        # Make sure that our essays are up to date
        if not options['skip_essays']:
            load_essays(settings.ESSAYS_FEED)

        if bib_in_settings:
            if len(tnu):
                # Delete titles haven't been update & do not have essays or issues attached.
                for title in tnu:
                    essays = title.essays.all()
                    issues = title.issues.all()

                    error = "DELETION ERROR: Title %s has " % title
                    error_end = "It will not be deleted."

                    if not essays or not issues:
                        delete_txt = (title.name, title.lccn, title.oclc)
                        LOGGER.info('TITLE DELETED: %s, lccn: %s, oclc: %s' %
                                    delete_txt)
                        title.delete()
                    elif essays:
                        LOGGER.warning(error + 'essays.' + error_end)
                        continue
                    elif issues:
                        LOGGER.warning(error + 'issues.' + error_end)
                        continue

            # Load holdings for all remaining titles.
            call_command('load_holdings')

        # overlay place info harvested from dbpedia onto the places table
        try:
            self.load_place_links()
        except Exception as e:
            LOGGER.exception(e)

        index.index_titles()

        # Time of full process run
        end = datetime.now()
        total_time = end - start
        LOGGER.info('start time: %s' % start)
        LOGGER.info('end time: %s' % end)
        LOGGER.info('total time: %s' % total_time)
        LOGGER.info("title_sync done.")

Example #6

0

Show file

    def handle(self, *args, **options):
        start = datetime.now()

        _logger.info("Starting title sync process.")
        # only load titles if the BIB_STORAGE is there, not always the case
        # for folks in the opensource world
        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            worldcat_dir = bib_in_settings + '/worldcat_titles/'

            pull_titles = bool(options['pull_title_updates'] and hasattr(settings, "WORLDCAT_KEY"))
            if pull_titles:
                call_command('pull_titles',)

            _logger.info("Starting load of OCLC titles.")
            bulk_dir = worldcat_dir + 'bulk'
            if os.path.isdir(bulk_dir):
                call_command('load_titles', bulk_dir, skip_index=True)

            tnu = self.find_titles_not_updated()

            # Only update by individual lccn if there are records that need updating.
            if pull_titles and len(tnu):
                _logger.info("Pulling titles from OCLC by individual lccn & oclc num.")
                self.pull_lccn_updates(tnu)

            _logger.info("Loading titles from second title pull.")
            lccn_dir = worldcat_dir + 'lccn'
            if os.path.isdir(lccn_dir):
                call_command('load_titles', lccn_dir, skip_index=True)

            tnu = self.find_titles_not_updated(limited=False)
            _logger.info("Running pre-deletion checks for these titles.")

        # Make sure that our essays are up to date
        if not options['skip_essays']:
            load_essays(settings.ESSAYS_FEED)

        if bib_in_settings:
            if len(tnu):
                # Delete titles haven't been update & do not have essays or issues attached.
                for title in tnu:
                    essays = title.essays.all()
                    issues = title.issues.all()

                    error = "DELETION ERROR: Title %s has " % title
                    error_end = "It will not be deleted."

                    if not essays or not issues:
                        delete_txt = (title.name, title.lccn, title.oclc)
                        _logger.info('TITLE DELETED: %s, lccn: %s, oclc: %s' % delete_txt)
                        title.delete()
                    elif essays:
                        _logger.warning(error + 'essays.' + error_end)
                        continue
                    elif issues:
                        _logger.warning(error + 'issues.' + error_end)
                        continue

            # Load holdings for all remaining titles.
            call_command('load_holdings')

        # overlay place info harvested from dbpedia onto the places table
        try:
            self.load_place_links()
        except Exception, e:
            _logger.exception(e)