import os import logging from django.core.management.base import BaseCommand from django.core.management.base import CommandError from openoni.core.management.commands import configure_logging from openoni.core import tasks configure_logging('queue_process_coordinates.config', 'queue_process_coordinates_%s.log' % os.getpid()) LOGGER = logging.getLogger(__name__) class Command(BaseCommand): option_list = BaseCommand.option_list + ( ) help = "queue the word coordinates of a batch to be processed" args = '<batch name>' def handle(self, batch_name, *args, **options): if len(args)!=0: raise CommandError('Usage is queue_process_coordinates %s' % self.args) try: tasks.process_coordinates.delay(batch_name) except Exception, e: LOGGER.exception(e) raise CommandError("unable to process coordinates. check the queue_load_batch log for clues")
import logging from django.core.cache import cache from django.core.management.base import BaseCommand, CommandError from openoni.core.management.commands import configure_logging configure_logging('', 'purge_django_cache.log' ) LOGGER = logging.getLogger(__name__) class Command(BaseCommand): help = "Purge the django cache after ingest/purge of a batch" def handle(self, *args, **options): try: # delete the total pages count LOGGER.info('removing newspaper_info from cache') cache.delete('newspaper_info') # delete the advanced search title list LOGGER.info('removing titles_states from cache') cache.delete('titles_states') except Exception, e: LOGGER.exception(e) raise CommandError("unable to purge the cache. check the purge_batch_cache log for clues")
import os import logging from optparse import make_option from django.core.management.base import BaseCommand from django.core.management.base import CommandError from openoni.core import models from openoni.core.management.commands import configure_logging configure_logging('diff_batches_logging.config', 'diff_batches_%s.log' % os.getpid()) _logger = logging.getLogger(__name__) class Command(BaseCommand): option_list = BaseCommand.option_list + ( make_option('--skip-process-ocr', action='store_false', dest='process_ocr', default=True, help='Do not generate ocr, and index'), ) help = "Diff batches by name from a batch list file" args = '<batch_list_filename>' def handle(self, batch_list_filename, *args, **options): if len(args)!=0: raise CommandError('Usage is diff_batch %s' % self.args) batches = set()
import logging from datetime import datetime import os from optparse import make_option from django.core.management.base import BaseCommand from openoni.core import title_loader from openoni.core.index import index_titles from openoni.core.models import Title from openoni.core.management.commands import configure_logging configure_logging('load_titles_logging.config', 'load_titles.log') _logger = logging.getLogger(__name__) class Command(BaseCommand): help = "Load a marcxml file of title records" args = '<location of marcxml>' option_list = BaseCommand.option_list + ( make_option('--skip-index', action='store_true', dest='skip_index', default=False, help="\ Skip the index process. Use this if you call this from \ another process such as 'openoni_sync'. If you call this \ directly, you don't want to use this flag. \ "), )
import os import logging from optparse import make_option from django.conf import settings from django.db import connection from django.core.management.base import BaseCommand, CommandError from solr import SolrConnection from openoni.core.batch_loader import BatchLoader, BatchLoaderException from openoni.core.management.commands import configure_logging configure_logging('purge_batches_logging.config', 'purge_batch_%s.log' % os.getpid()) log = logging.getLogger(__name__) class Command(BaseCommand): option_list = BaseCommand.option_list + ( make_option('--no-optimize', action='store_false', dest='optimize', default=True, help='Do not optimize Solr and MySQL after purge'), ) help = "Purge a batch" args = '<batch_location>' def handle(self, batch_location=None, *args, **options):
import os import logging from optparse import make_option from django.core.management.base import BaseCommand from django.core.management.base import CommandError from openoni.core import batch_loader from openoni.core.management.commands import configure_logging configure_logging('load_batches_logging.config', 'load_batches_%s.log' % os.getpid()) _logger = logging.getLogger(__name__) class Command(BaseCommand): option_list = BaseCommand.option_list + ( make_option('--skip-process-ocr', action='store_false', dest='process_ocr', default=True, help='Do not generate ocr, and index'), make_option('--skip-process-coordinates', action='store_false', dest='process_ocr', default=True, help='Do not write out word coordinates'), ) help = "Load batches by name from a batch list file" args = '<batch_list_filename>' def handle(self, batch_list_filename, *args, **options):
import logging from cStringIO import StringIO from optparse import make_option from django.core.management.base import BaseCommand import pymarc from openoni.core.management.commands import configure_logging from openoni.core import index from openoni.core.models import Title configure_logging("openoni_purge_titles.config", "openoni_purge_etitles.log") _log = logging.getLogger(__name__) class Command(BaseCommand): """ Management command for purging title records which have an 856 field containing a link to Chronicling America, and which appear to be records for an electronic only version of a title 245 $h == [electronic resource]. The script is careful not to purge any records that have issues attached to them. See https://rdc.lctl.gov/trac/ndnp/ticket/375 for context. If you want to see the records that will be purged use the --pretend option. """ option_list = BaseCommand.option_list + ( make_option('-p', '--pretend', dest='pretend', action='store_true'), )
import logging import urllib2 from django.core.management.base import BaseCommand from django.db import reset_queries from rdflib import Namespace, ConjunctiveGraph, URIRef try: import simplejson as json except ImportError: import json from openoni.core import models from openoni.core.management.commands import configure_logging configure_logging("openoni_link_places.config", "openoni_link_places.log") _logger = logging.getLogger(__name__) geo = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#') owl = Namespace('http://www.w3.org/2002/07/owl#') dbpedia = Namespace('http://dbpedia.org/ontology/') class Command(BaseCommand): def handle(self, **options): _logger.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri
import os import logging from django.core.management.base import BaseCommand from django.core.management.base import CommandError from openoni.core import batch_loader from openoni.core.management.commands import configure_logging configure_logging("process_coordinates_logging.config", "process_coordinates_%s.log" % os.getpid()) _logger = logging.getLogger(__name__) class Command(BaseCommand): option_list = BaseCommand.option_list + () help = "Process word coordinates for a batch by name from a batch list file" args = "<batch_list_filename>" def handle(self, batch_list_filename, *args, **options): if len(args) != 0: raise CommandError("Usage is process_coordinates %s" % self.args) loader = batch_loader.BatchLoader() batch_list = file(batch_list_filename) _logger.info("batch_list_filename: %s" % batch_list_filename) for line in batch_list: batch_name = line.strip() _logger.info("batch_name: %s" % batch_name) parts = batch_name.split("_") if len(parts) == 4:
import os import csv import codecs from django.core.management.base import BaseCommand from openoni.core.management.commands import configure_logging from openoni.core.models import Institution configure_logging("load_intitutions_logging.config", "load_institutions_%s.log" % os.getpid()) """ Simple command to load institution data obtained from the MySQL database running in the MARC Standards office. "oid","orgName","altname1","altname2","altname3","altname4","orgCode","lowercode","isilCode","obsoleteOrgCode","createDate","modifiedDate","address1","address2","address3","city","stateID","zip","countryID","ID","cname","prefix","searchable" 22035,"3Com Corporation Technical Library","","","","","CStcTCC","cstctcc","US-CStcTCC","","1995-10-19 00:00:00","1995-10-19 00:00:00","5400 Bayfront Plaza","","","Santa Clara",5,"95052",210,210,"United States","US","yes" """ class Command(BaseCommand): help = 'loads institution csv data into Institution table' args = '<institution_csv_file>' def handle(self, csv_file, *args, **options): for row in unicode_csv_reader(codecs.open(csv_file, encoding='utf-8')): if row[20] != 'United States': continue i = Institution() i.code = row[7].upper() i.name = row[1]
import logging from django.core.management.base import BaseCommand from openoni.core.management.commands import configure_logging from openoni.core.index import index_titles, index_pages configure_logging("index_logging.config", "index.log") _logger = logging.getLogger(__name__) class Command(BaseCommand): help = "index all titles and pages ; " + \ "you may (or may not) want to zap_index before" def handle(self, **options): _logger.info("indexing titles") index_titles() _logger.info("finished indexing titles") _logger.info("indexing pages") index_pages() _logger.info("finished indexing pages")
import logging import os from django.core import management from django.core.management.base import BaseCommand from openoni.core import models from openoni.core.holding_loader import HoldingLoader from openoni.core.management.commands import configure_logging from openoni.core.utils.utils import validate_bib_dir configure_logging('load_holdings_logging.config', 'load_holdings.log') _logger = logging.getLogger(__name__) class Command(BaseCommand): help = "Load a holdings records after title records are all loaded" args = '<location of holdings directory>' bib_in_settings = validate_bib_dir() if bib_in_settings: default_location = bib_in_settings + '/holdings' else: default_location = None def handle(self, holdings_source=default_location, *args, **options): if not os.path.exists(holdings_source): _logger.error("There is no valid holdings source folder defined.") set_holdings = ['To load holdings - Add a folder called "holdings"', 'to the bib directory that is set in settings',
import csv from optparse import make_option from time import mktime from datetime import datetime import feedparser from django.core.management.base import BaseCommand from django.conf import settings from openoni.core.management.commands import configure_logging from openoni.core.rdf import rdf_uri from openoni.core import models as m configure_logging("release.config", "release.log") _logger = logging.getLogger(__name__) class Command(BaseCommand): help = "Updates (Resets if --reset option is used) release datetime on batches from one of following sources (in order of preference) 1. bag-info.txt, if found in the batch source 2. If path to a file is provided with the command, datetime is extracted from the file 3. current public feed 4. current server datetime" reset = make_option('--reset', action = 'store_true', dest = 'reset', default = False, help = 'reset release times to nothing before setting them again') option_list = BaseCommand.option_list + (reset, ) def handle(self, *args, **options): if options['reset']:
from django.core.management.base import BaseCommand from openoni.core.management.commands import configure_logging from openoni.core.index import index_pages configure_logging("index_pages_logging.config", "index_pages.log") class Command(BaseCommand): def handle(self, **options): index_pages()
import logging from django.core.management.base import BaseCommand from openoni.core.management.commands import configure_logging from openoni.core.index import index_titles configure_logging("index_titles_logging.config", "index_titles.log") _logger = logging.getLogger(__name__) class Command(BaseCommand): def handle(self, **options): _logger.info("indexing titles") index_titles() _logger.info("finished indexing titles")
import os import logging from datetime import datetime from optparse import make_option from django.core import management from django.core.management.base import BaseCommand from openoni.core import models from openoni.core import index from openoni.core.management.commands import configure_logging from openoni.core.utils.utils import validate_bib_dir configure_logging("openoni_sync_logging.config", "openoni_sync.log") _logger = logging.getLogger(__name__) class Command(BaseCommand): verbose = make_option("--verbose", action="store_true", dest="verbose", default=False, help="") pull_title_updates = make_option( "--pull-title-updates", action="store_true", dest="pull_title_updates", default=False, help="Pull down a new set of titles.", ) option_list = BaseCommand.option_list + (verbose, pull_title_updates) help = "" args = ""
import os import logging from optparse import make_option from django.core.management.base import BaseCommand from django.core.management.base import CommandError from openoni.core.management.commands import configure_logging from openoni.core import tasks configure_logging('queue_load_batch_logging.config', 'queue_load_batch_%s.log' % os.getpid()) LOGGER = logging.getLogger(__name__) class Command(BaseCommand): option_list = BaseCommand.option_list + ( make_option('--skip-coordinates', action='store_false', dest='process_coordinates', default=True, help="Do not generate word coordinates"), ) help = "queue a batch to be loaded" args = '<batch name>' def handle(self, batch_name, *args, **options): if len(args)!=0: raise CommandError('Usage is queue_load_batch %s' % self.args) try: