예제 #1
0
import logging
import os

from django.core import management
from django.core.management.base import BaseCommand

from core import models
from core.holding_loader import HoldingLoader
from core.management.commands import configure_logging
from core.utils.utils import validate_bib_dir

configure_logging('load_holdings_logging.config', 'load_holdings.log')
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Load a holdings records after title records are all loaded"
    args = '<location of holdings directory>'

    bib_in_settings = validate_bib_dir()
    if bib_in_settings:
        default_location = bib_in_settings + '/holdings'
    else:
        default_location = None

    def handle(self, holdings_source=default_location, *args, **options):

        if not os.path.exists(holdings_source):
            _logger.error("There is no valid holdings source folder defined.")
            set_holdings = [
                'To load holdings - Add a folder called "holdings"',
예제 #2
0
import os
import logging

from optparse import make_option

from django.conf import settings
from django.db import connection
from django.core.management.base import BaseCommand, CommandError

from solr import SolrConnection

from core.batch_loader import BatchLoader, BatchLoaderException
from core.management.commands import configure_logging

configure_logging('purge_batches_logging.config',
                  'purge_batch_%s.log' % os.getpid())

log = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Purge a batch"

    def add_arguments(self, parser):
        # Positional arguments
        parser.add_argument('batch_name',
                            help='Batch name from "batches" command')

        # Options
        parser.add_argument(
            '--optimize', action='store_true', default=False, dest='optimize',
예제 #3
0
import logging
import urllib2

from django.core.management.base import BaseCommand
from django.db import reset_queries
from rdflib import Namespace, ConjunctiveGraph, URIRef
try:
    import simplejson as json
except ImportError:
    import json

from core import models
from core.management.commands import configure_logging

configure_logging("openoni_link_places.config", "openoni_link_places.log")
_logger = logging.getLogger(__name__)

geo = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#')
owl = Namespace('http://www.w3.org/2002/07/owl#')
dbpedia = Namespace('http://dbpedia.org/ontology/')


class Command(BaseCommand):

    def handle(self, **options):
        _logger.debug("linking places")
        for place in models.Place.objects.filter(dbpedia__isnull=True):
            if not place.city or not place.state:
                continue

            # formulate a dbpedia place uri
예제 #4
0
import os
import logging

from optparse import make_option

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.batch_loader import BatchLoader, BatchLoaderException
from core.management.commands import configure_logging

configure_logging('load_batch_logging.config',
                  'load_batch_%s.log' % os.getpid())

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    help = """
    This command loads the metadata and pages associated with a batch into a
    database and search index. It may take up to several hours to complete,
    depending on the batch size and machine.
    """

    def add_arguments(self, parser):
        # Positional arguments
        parser.add_argument('batch_path', help='Path to batch files')

        # Options
        parser.add_argument('--skip-coordinates',
                            action='store_true',
예제 #5
0
from django.core.management.base import BaseCommand
    
from core.management.commands import configure_logging
from core.solr_index import index_pages

configure_logging("index_pages_logging.config", "index_pages.log")

class Command(BaseCommand):

    def handle(self, **options):
        index_pages()
예제 #6
0
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand

try:
    import simplejson as json
except ImportError:
    import json

from openoni import core
from core import solr_index
from core.management.commands import configure_logging
from core.models import Place, Title
from core.utils.utils import validate_bib_dir

configure_logging("title_sync_logging.config", "title_sync.log")
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    pull_title_updates = make_option('--pull-title-updates',
                                     action='store_true',
                                     dest='pull_title_updates',
                                     default=False,
                                     help='Pull down a new set of titles.')

    option_list = BaseCommand.option_list + (pull_title_updates)

    help = 'Runs title pull and title load for a complete title refresh.'
    args = ''
예제 #7
0
import csv

from optparse import make_option
from time import mktime
from datetime import datetime

import feedparser

from django.core.management.base import BaseCommand
from django.conf import settings

from core.management.commands import configure_logging
from core.rdf import rdf_uri
from core import models as m

configure_logging("release.config", "release.log")

_logger = logging.getLogger(__name__)

class Command(BaseCommand):
    help = "Updates (Resets if --reset option is used) release datetime on batches from one of following sources (in order of preference) 1. bag-info.txt, if found in the batch source 2. If path to a file is provided with the command, datetime is extracted from the file 3. current public feed 4. current server datetime"

    reset = make_option('--reset',
        action = 'store_true',
        dest = 'reset',
        default = False,
        help = 'reset release times to nothing before setting them again')
    option_list = BaseCommand.option_list + (reset, )

    def handle(self, *args, **options):
        if options['reset']:
예제 #8
0
import logging

from django.core.cache import cache
from django.core.management.base import BaseCommand, CommandError

from core.management.commands import configure_logging

configure_logging('', 'purge_django_cache.log' )

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Purge the django cache after ingest/purge of a batch"

    def handle(self, *args, **options):

        try:
            # delete the total pages count
            LOGGER.info('removing newspaper_info from cache')
            cache.delete('newspaper_info')

            # delete the advanced search title list
            LOGGER.info('removing titles_states from cache')
            cache.delete('titles_states')

        except Exception, e:
            LOGGER.exception(e)
            raise CommandError("unable to purge the cache. check the purge_batch_cache log for clues")
예제 #9
0
from django.core.management.base import BaseCommand

from core.management.commands import configure_logging
from core.solr_index import index_pages

configure_logging("index_pages_logging.config", "index_pages.log")


class Command(BaseCommand):
    def handle(self, **options):
        index_pages()
예제 #10
0
import logging

from django.core.management.base import BaseCommand

from core.management.commands import configure_logging
from core.solr_index import index_titles, index_pages

configure_logging("index_logging.config", "index.log")

_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = """
    Rebuilds the entire title and page index data in Solr, including the
    page OCR data.  It shouldn't be necessary most of the time, but it can be
    useful to run if Solr data becomes corrupt (though this is a very rare
    occurrence), or in cases the Solr index must be deleted, e.g., if you upgrade
    to a new major version of Solr.

    *If Solr corruption is suspected, you should run the `zap_index` command
    prior to reindexing.*

    This command can take a while to run, because every single page has OCR data
    which Solr has to index in order to facilitate full-text searching.  Plan for
    60 to 90 minutes per 100,000 pages in your collection.
    """

    def handle(self, **options):

        _logger.info("indexing titles")
예제 #11
0
import os
import logging
from optparse import make_option

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core import models
from core.management.commands import configure_logging
    
configure_logging('diff_batches_logging.config', 
                  'diff_batches_%s.log' % os.getpid())

_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
        make_option('--skip-process-ocr', 
                    action='store_false', 
                    dest='process_ocr', default=True,
                    help='Do not generate ocr, and index'),
    )
    help = "Diff batches by name from a batch list file"
    args = '<batch_list_filename>'

    def handle(self, batch_list_filename, *args, **options):
        if len(args)!=0:
            raise CommandError('Usage is diff_batch %s' % self.args)

        batches = set()
예제 #12
0
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand

try:
    import simplejson as json
except ImportError:
    import json

from openoni import core
from core import solr_index
from core.management.commands import configure_logging
from core.models import Place, Title
from core.utils.utils import validate_bib_dir

configure_logging("title_sync_logging.config", "title_sync.log")
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    pull_title_updates = make_option('--pull-title-updates',
                                     action='store_true',
                                     dest='pull_title_updates',
                                     default=False,
                                     help='Pull down a new set of titles.')

    option_list = BaseCommand.option_list + (pull_title_updates)

    help = 'Runs title pull and title load for a complete title refresh.'
    args = ''
예제 #13
0
import logging
import os

from django.core import management
from django.core.management.base import BaseCommand

from core import models
from core.holding_loader import HoldingLoader
from core.management.commands import configure_logging
from core.utils.utils import validate_bib_dir

configure_logging('load_holdings_logging.config', 'load_holdings.log')
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Load a holdings records after title records are all loaded"
    args = '<location of holdings directory>'

    bib_in_settings = validate_bib_dir()
    if bib_in_settings:
        default_location = bib_in_settings + '/holdings'
    else:
        default_location = None

    def handle(self, holdings_source=default_location, *args, **options):
        
        if not os.path.exists(holdings_source): 
            _logger.error("There is no valid holdings source folder defined.")
            set_holdings = ['To load holdings - Add a folder called "holdings"',
            'to the bib directory that is set in settings',
예제 #14
0
import os
import logging
import datetime

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.load_copyright_map import loadCopyrightMap
from core.management.commands import configure_logging

configure_logging("load_copyright_map_logging.config", "load_copyright_map.log")

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Add records to lccn-date-copyright maps table from input file."
    def add_arguments(self, parser):
        parser.add_argument('filepath', help="Path to input file")

    def handle(self, *args, **options):
       

        filepath = options['filepath']
        try:
            loadCopyrightMap(filepath)
        except Exception as e:
            LOGGER.exception(e)
            raise CommandError("unable to load copyright maps. check the load_batch log for clues")

예제 #15
0
import logging

from datetime import datetime
from optparse import make_option

from django.core.management.base import BaseCommand
from django.conf import settings

from core import title_pull

from core.management.commands import configure_logging
    
configure_logging('pull_titles_logging.config', 'pull_titles.log')
_logger = logging.getLogger(__name__)

class Command(BaseCommand):
    help = "Retrieve a fresh pull of titles from OCLC. \
            #TODO: add a list of example commands."
    args = ''
    #TODO: Remove default from lccn
    option_list = BaseCommand.option_list + (
        make_option('-l', '--lccn',
        action='store',
        dest='lccn',
        default=None,
        help="Pass a specific lccn to pull down updates from Worldcat."),

        make_option('-o', '--oclc',
        action='store',
        dest='oclc',
        default=None,
예제 #16
0
import logging

from cStringIO import StringIO
from optparse import make_option

from django.core.management.base import BaseCommand
import pymarc

from core.management.commands import configure_logging
from core import solr_index
from core.models import Title

configure_logging("openoni_purge_titles.config", "openoni_purge_etitles.log")
_log = logging.getLogger(__name__)

class Command(BaseCommand):
    """
    Management command for purging title records which have an 856 field 
    containing a link to Chronicling America, and which appear to be records 
    for an electronic only version of a title 245 $h == [electronic resource].

    The script is careful not to purge any records that have issues attached 
    to them.

    If you want to see the records that will be purged use the --pretend
    option.
    """

    option_list = BaseCommand.option_list + (
        make_option('-p', '--pretend', dest='pretend', action='store_true'),
    )
예제 #17
0
import os
import csv
import codecs

from django.core.management.base import BaseCommand

from core.management.commands import configure_logging
from core.models import Institution

configure_logging("load_intitutions_logging.config",
                  "load_institutions_%s.log" % os.getpid())
"""
Simple command to load institution data obtained from the MySQL database 
running in the MARC Standards office.

"oid","orgName","altname1","altname2","altname3","altname4","orgCode","lowercode","isilCode","obsoleteOrgCode","createDate","modifiedDate","address1","address2","address3","city","stateID","zip","countryID","ID","cname","prefix","searchable"
22035,"3Com Corporation Technical Library","","","","","CStcTCC","cstctcc","US-CStcTCC","","1995-10-19 00:00:00","1995-10-19 00:00:00","5400 Bayfront Plaza","","","Santa Clara",5,"95052",210,210,"United States","US","yes"
"""


class Command(BaseCommand):
    help = 'loads institution csv data into Institution table'
    args = '<institution_csv_file>'

    def handle(self, csv_file, *args, **options):
        for row in unicode_csv_reader(codecs.open(csv_file, encoding='utf-8')):
            if row[20] != 'United States':
                continue
            i = Institution()
            i.code = row[7].upper()
            i.name = row[1]
예제 #18
0
import os
import logging
from optparse import make_option

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core import batch_loader
from core.management.commands import configure_logging

configure_logging("load_batches_logging.config", "load_batches_%s.log" % os.getpid())

_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
        make_option(
            "--skip-process-ocr",
            action="store_false",
            dest="process_ocr",
            default=True,
            help="Do not generate ocr, and index",
        ),
        make_option(
            "--skip-process-coordinates",
            action="store_false",
            dest="process_ocr",
            default=True,
            help="Do not write out word coordinates",
        ),
예제 #19
0
import os
import logging

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.management.commands import configure_logging
from core import tasks

configure_logging('queue_process_coordinates.config',
                  'queue_process_coordinates_%s.log' % os.getpid())

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
    )
    help = "queue the word coordinates of a batch to be processed"
    args = '<batch name>'

    def handle(self, batch_name, *args, **options):
        if len(args)!=0:
            raise CommandError('Usage is queue_process_coordinates %s' % self.args)
        try:
            tasks.process_coordinates.delay(batch_name)
        except Exception, e:
            LOGGER.exception(e)
            raise CommandError("unable to process coordinates. check the queue_load_batch log for clues")
예제 #20
0
import os
import logging

from optparse import make_option

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.batch_loader import BatchLoader, BatchLoaderException
from core.management.commands import configure_logging
    
configure_logging('load_batch_logging.config', 
                  'load_batch_%s.log' % os.getpid())

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
        make_option('--skip-process-ocr', 
                    action='store_false', 
                    dest='process_ocr', default=True,
                    help='Do not generate ocr, and index'),
        make_option('--skip-coordinates', 
                    action='store_false', 
                    dest='process_coordinates', default=True,
                    help='Do not out word coordinates'),
    )
    help = "Load a batch"
    args = '<batch name>'
예제 #21
0
import logging

from datetime import datetime
import os
from optparse import make_option

from django.core.management.base import BaseCommand

from core import title_loader
from core.solr_index import index_titles
from core.models import Title
from core.management.commands import configure_logging

configure_logging('load_titles_logging.config', 'load_titles.log')
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Load a marcxml file of title records"
    args = '<location of marcxml>'
    option_list = BaseCommand.option_list + (make_option('--skip-index',
                                                         action='store_true',
                                                         dest='skip_index',
                                                         default=False,
                                                         help="\
                Skip the index process. Use this if you call this from \
                another process such as 'openoni_sync'. If you call this \
                directly, you don't want to use this flag. \
            "), )

    def __init__(self):
예제 #22
0
import os
import logging

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.management.commands import configure_logging
from core import tasks

configure_logging('queue_purge_batch_logging.config',
                  'queue_purge_batch_%s.log' % os.getpid())

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + ()
    help = "queue a batch to be purged"
    args = '<batch name>'

    def handle(self, batch_name, *args, **options):
        if len(args) != 0:
            raise CommandError('Usage is queue_purge_batch %s' % self.args)
        try:
            tasks.purge_batch.delay(batch_name)
        except Exception, e:
            LOGGER.exception(e)
            raise CommandError(
                "unable to queue purge batch. check the queue_purge_batch log for clues"
            )
예제 #23
0
import logging
import os

from django.core.cache import cache
from django.core.management.base import BaseCommand, CommandError

from core.management.commands import configure_logging


configure_logging('delete_cache_logging.config',
                  'delete_cache_%s.log' % os.getpid())
logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Delete newspaper info and title state cache"

    def handle(self, *args, **options):
        if len(args)!=0:
            raise CommandError('Usage is `manage.py delete_cache`')

        try:
            logger.info("Deleting newspaper_info cache...")
            cache.delete('newspaper_info')

            logger.info("Deleting titles_states cache...")
            cache.delete('titles_states')
        except Exception, e:
            logger.exception(e)
            raise CommandError("Unable to delete newspaper info and title state cache")
예제 #24
0
import json
import logging
import urllib

from django.core.management.base import BaseCommand
from django.db import reset_queries

from core import models
from core.management.commands import configure_logging

configure_logging("openoni_map_places.config", "openoni_map_places.log")
_logger = logging.getLogger("map_places")
geonames_url="http://api.geonames.org/searchJSON"

class Command(BaseCommand):
    def add_arguments(self, parser):
        parser.add_argument("username", action="store",
            help="Username for the GeoNames API; set to 'demo' if you don't plan to pull any GeoNames data")
        parser.add_argument('state', action='store',
            help='State code (e.g., OR, NE, etc) for restricting the search results'),

    def handle(self, *args, **options):
        _logger.debug("Finding places in Geonames")

        # Gather up all the places' latitude and longitude data
        output_data = {}
        for place in models.Place.objects.all():
            if not place.city:
                _logger.error("A place with no city exists in your database (%s)!  " +
                    "This is probably A Bad Thing (tm)." % place.name)
                continue
예제 #25
0
import os
import logging

from optparse import make_option

from django.conf import settings
from django.db import connection
from django.core.management.base import BaseCommand, CommandError

from solr import SolrConnection

from core.batch_loader import BatchLoader, BatchLoaderException
from core.management.commands import configure_logging
    
configure_logging('purge_batches_logging.config', 
                  'purge_batch_%s.log' % os.getpid())

log = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
        make_option('--no-optimize', 
                    action='store_false', 
                    dest='optimize', default=True,
                    help='Do not optimize Solr and MySQL after purge'),
    )
    help = "Purge a batch"
    args = '<batch_location>'

    def handle(self, batch_location=None, *args, **options):
예제 #26
0
import os
import logging
from optparse import make_option

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core import models
from core.management.commands import configure_logging

configure_logging('diff_batches_logging.config',
                  'diff_batches_%s.log' % os.getpid())

_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Diff batches by name from a batch list file"

    def add_arguments(self, parser):
        # Positional arguments
        parser.add_argument('batch_list_filename')

    def handle(self, batch_list_filename, *args, **options):
        if len(args) != 0:
            raise CommandError('Usage is diff_batch %s' % self.args)

        batches = set()
        batch_list = file(batch_list_filename)
        _logger.info("batch_list_filename: %s" % batch_list_filename)
        for line in batch_list:
예제 #27
0
import logging

from django.core.management.base import BaseCommand
    
from core.management.commands import configure_logging
from core.solr_index import index_titles

configure_logging("index_titles_logging.config", "index_titles.log")

_logger = logging.getLogger(__name__)

class Command(BaseCommand):

    def handle(self, **options):
        _logger.info("indexing titles")
        index_titles()
        _logger.info("finished indexing titles")

예제 #28
0
import logging
import urllib2

from django.core.management.base import BaseCommand
from django.db import reset_queries
from rdflib import Namespace, ConjunctiveGraph, URIRef
try:
    import simplejson as json
except ImportError:
    import json

from core import models
from core.management.commands import configure_logging

configure_logging("openoni_link_places.config", "openoni_link_places.log")
_logger = logging.getLogger(__name__)

geo = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#')
owl = Namespace('http://www.w3.org/2002/07/owl#')
dbpedia = Namespace('http://dbpedia.org/ontology/')


class Command(BaseCommand):
    def handle(self, **options):
        _logger.debug("linking places")
        for place in models.Place.objects.filter(dbpedia__isnull=True):
            if not place.city or not place.state:
                continue

            # formulate a dbpedia place uri
            path = urllib2.quote('%s,_%s' %
예제 #29
0
import os
import csv 
import codecs

from django.core.management.base import BaseCommand

from core.management.commands import configure_logging
from core.models import Institution

configure_logging("load_intitutions_logging.config", 
                  "load_institutions_%s.log" % os.getpid())

"""
Simple command to load institution data obtained from the MySQL database 
running in the MARC Standards office.

"oid","orgName","altname1","altname2","altname3","altname4","orgCode","lowercode","isilCode","obsoleteOrgCode","createDate","modifiedDate","address1","address2","address3","city","stateID","zip","countryID","ID","cname","prefix","searchable"
22035,"3Com Corporation Technical Library","","","","","CStcTCC","cstctcc","US-CStcTCC","","1995-10-19 00:00:00","1995-10-19 00:00:00","5400 Bayfront Plaza","","","Santa Clara",5,"95052",210,210,"United States","US","yes"
"""

class Command(BaseCommand):
    help = 'loads institution csv data into Institution table'
    args = '<institution_csv_file>'

    def handle(self, csv_file, *args, **options):
        for row in unicode_csv_reader(codecs.open(csv_file, encoding='utf-8')): 
            if row[20] != 'United States':
                continue
            i = Institution()
            i.code = row[7].upper()
            i.name = row[1]
예제 #30
0
import logging

from datetime import datetime
from optparse import make_option

from django.core.management.base import BaseCommand
from django.conf import settings

from core import title_pull

from core.management.commands import configure_logging

configure_logging('pull_titles_logging.config', 'pull_titles.log')
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Retrieve a fresh pull of titles from OCLC. \
            #TODO: add a list of example commands."

    args = ''
    #TODO: Remove default from lccn
    option_list = BaseCommand.option_list + (
        make_option(
            '-l',
            '--lccn',
            action='store',
            dest='lccn',
            default=None,
            help="Pass a specific lccn to pull down updates from Worldcat."),
        make_option('-o',
예제 #31
0
import os
import logging

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core import batch_loader
from core.management.commands import configure_logging

configure_logging('process_coordinates_logging.config',
                  'process_coordinates_%s.log' % os.getpid())

_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
    )
    help = "Process word coordinates for a batch by name from a batch list file"
    args = '<batch_list_filename>'

    def handle(self, batch_list_filename, *args, **options):
        if len(args)!=0:
            raise CommandError('Usage is process_coordinates %s' % self.args)

        loader = batch_loader.BatchLoader()
        batch_list = file(batch_list_filename)
        _logger.info("batch_list_filename: %s" % batch_list_filename)
        for line in batch_list:
            batch_name = line.strip()
            _logger.info("batch_name: %s" % batch_name)
예제 #32
0
import os
import logging

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.management.commands import configure_logging
from core import tasks
    
configure_logging('queue_purge_batch_logging.config', 
                  'queue_purge_batch_%s.log' % os.getpid())

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
    )
    help = "queue a batch to be purged"
    args = '<batch name>'

    def handle(self, batch_name, *args, **options):
        if len(args)!=0:
            raise CommandError('Usage is queue_purge_batch %s' % self.args)
        try:
            tasks.purge_batch.delay(batch_name)
        except Exception, e:
            LOGGER.exception(e)
            raise CommandError("unable to queue purge batch. check the queue_purge_batch log for clues")

예제 #33
0
import logging

from datetime import datetime
import os
from optparse import make_option

from django.core.management.base import BaseCommand

from core import title_loader
from core.solr_index import index_titles
from core.models import Title
from core.management.commands import configure_logging

configure_logging('load_titles_logging.config', 'load_titles.log')
_logger = logging.getLogger(__name__)

class Command(BaseCommand):
    help = "Load a marcxml file of title records"
    args = '<location of marcxml>'
    option_list = BaseCommand.option_list + (
        make_option('--skip-index',
        action='store_true',
        dest='skip_index',
        default=False,
        help="\
                Skip the index process. Use this if you call this from \
                another process such as 'openoni_sync'. If you call this \
                directly, you don't want to use this flag. \
            "),
    )
예제 #34
0
import glob
import logging
import os
import requests
from urllib import parse

from django.core.management.base import BaseCommand
from django.conf import settings
from core.management.commands import configure_logging
from solr import SolrConnection

configure_logging('setup_index_logging.config', 'setup_index.log')

_logger = logging.getLogger(__name__)
fixture_dir = os.path.abspath(
    os.path.join(os.path.dirname(__file__), '../../fixtures'))
schema_url = settings.SOLR_BASE_URL + '/api/cores/openoni/schema'

# Copy fields are defined here because we have to manually check for dupes; for
# some reason Solr doesn't do this for us, and will in fact allow dozens of the
# same copy-field definition.  The structure should be obvious, and is the
# exact format Solr's API takes.
copy_fields = [{
    'source': 'place_of_publication',
    'dest': 'place_of_publication_facet'
}, {
    'source': 'subject',
    'dest': 'subject_facet'
}, {
    'source': 'title',
    'dest': 'title_facet'
예제 #35
0
import logging

from django.core.management.base import BaseCommand

from core.management.commands import configure_logging
from core.solr_index import index_titles

configure_logging("index_titles_logging.config", "index_titles.log")

_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    def handle(self, **options):
        _logger.info("indexing titles")
        index_titles()
        _logger.info("finished indexing titles")
예제 #36
0
import os
import logging
import datetime

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from core.load_copyright_map import loadCopyrightMap
from core.management.commands import configure_logging

configure_logging("load_copyright_map_logging.config", "load_copyright_map.log")

LOGGER = logging.getLogger(__name__)


class Command(BaseCommand):
    help = """
    Defines rules for which titles should use a given rights statement for certain
    date ranges.  Rights *must* first be loaded via the load_copyright command.

    Rules are composed of four-field tab-separated-values files, where each line
    indicates a single rule.  The fields, in order, are LCCN, start date, end date,
    and rights URI.  Start and end dates must be formatted as `YYYY-MM-DD`, e.g.,
    `2001-09-08` means September 8th, 2001.

    Please note that loading the same file multiple times will result in duplicated
    data, and manual SQL may be needed to clean dupes from `core_lccndatecopyright`.
    """

    def add_arguments(self, parser):
        parser.add_argument('filepath', help="Path to input file")
예제 #37
0
import logging
import os

from django.conf import settings
from django.core.management.base import BaseCommand

from core.management.commands import configure_logging

from core.models import Batch, OcrDump

configure_logging("dump_ocr_logging.config", "dump_ocr.log")
_logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "looks for batches that need to have ocr dump files created"

    def handle(self, *args, **options):
        if not os.path.isdir(settings.OCR_DUMP_STORAGE):
            os.makedirs(settings.OCR_DUMP_STORAGE)

        for batch in Batch.objects.filter(ocr_dump__isnull=True):
            _logger.info("starting to dump ocr for %s", batch)
            try:
                if batch.ocr_dump:
                    _logger.info("Ocr is already generated for %s", batch)
                    continue
            except OcrDump.DoesNotExist:
                pass

            dump = OcrDump.new_from_batch(batch)