Ejemplo n.º 1
0
    def handle(self, batch_list_filename, *args, **options):
        if len(args) != 0:
            raise CommandError("Usage is process_coordinates %s" % self.args)

        verbosity = options["verbosity"]

        if verbosity > 0:
            log_level = logging.DEBUG if verbosity > 1 else logging.INFO
            loggers = [logging.getLogger(), logging.getLogger("chronam")]
            for logger in loggers:
                logger.setLevel(log_level)
                for handler in logger.handlers:
                    handler.setLevel(log_level)

        loader = batch_loader.BatchLoader()

        if os.path.exists(batch_list_filename):
            with open(batch_list_filename) as f:
                batches = [i.strip() for i in f]
        else:
            batches = [batch_list_filename]

        LOGGER.info("Processing %d batches from %s", len(batches),
                    batch_list_filename)

        for batch_name in batches:
            LOGGER.info("batch_name: %s" % batch_name)
            loader.process_coordinates(batch_name)
Ejemplo n.º 2
0
    def handle(self, batch_list_filename, *args, **options):
        def slack(message):
            sc.api_call("chat.postMessage", channel="#ghnp", text=message)

        def log(message):
            _logger.info(message.replace('`', ''))

        def update(message):
            slack(message)
            log(message)

        if len(args) != 0:
            raise CommandError('Usage is load_batch %s' % self.args)

        added_batches = []
        failed_batches = []
        skipped_batches = []
        processed = 0
        start = datetime.now()

        sc = SlackClient(settings.SLACK_KEY)

        loader = batch_loader.BatchLoader(
            process_ocr=options['process_ocr'],
            process_coordinates=options['process_coordinates'])

        # get legit batch names
        with open(batch_list_filename) as f:
            batch_names_from_file = f.readlines()
            batch_names_from_file[:] = [
                batch_name for batch_name in batch_names_from_file
                if batch_name.strip()
            ]
            count = len(batch_names_from_file)

        update('Loading `%s` Batches from file: `%s`' %
               (count, batch_list_filename))
        for line in batch_names_from_file:
            batch_start = datetime.now()
            processed += 1
            batch_name = line.strip()
            try:
                if Batch.objects.filter(name=batch_name).count() != 0:
                    skipped_batches.append(batch_name)
                    continue
                update('Loading batch `%s` of `%s`: `%s`' %
                       (processed, count, batch_name))
                batch = loader.load_batch(batch_name, strict=False)
                added_batches.append(batch_name)
                update('`%s` loaded in `%s`.' %
                       (batch_name, datetime.now() - batch_start))
            except Exception, e:
                update('`%s` failed to load. Error: `%s`.' %
                       (batch_name, str(e)))
                failed_batches.append(batch_name)
                continue
Ejemplo n.º 3
0
    def handle(self, batch_list_filename, *args, **options):
        if len(args) != 0:
            raise CommandError('Usage is load_batch %s' % self.args)

        loader = batch_loader.BatchLoader()
        loader.PROCESS_OCR = options['process_ocr']
        loader.PROCESS_COORDINATES = options['process_coordinates']

        batch_list = open(batch_list_filename)
        self.stdout.write("batch_list_filename: %s" % batch_list_filename)
        for line in batch_list:
            batch_name = line.strip()
            self.stdout.write("batch_name: %s" % batch_name)
            loader.load_batch(batch_name, strict=False)
Ejemplo n.º 4
0
    def handle(self, batch_list_filename, *args, **options):
        if len(args)!=0:
            raise CommandError('Usage is process_coordinates %s' % self.args)

        loader = batch_loader.BatchLoader()
        batch_list = file(batch_list_filename)
        _logger.info("batch_list_filename: %s" % batch_list_filename)
        for line in batch_list:
            batch_name = line.strip()
            _logger.info("batch_name: %s" % batch_name)
            parts = batch_name.split("_")
            if len(parts)==4:
                loader.process_coordinates(batch_name)
            else:
                _logger.warning("invalid batch name '%s'" % batch_name)
Ejemplo n.º 5
0
    def handle(self, batch_list_filename, *args, **options):
        if len(args) != 0:
            raise CommandError('Usage is load_batch %s' % self.args)

        loader = batch_loader.BatchLoader(
            process_ocr=options['process_ocr'],
            process_coordinates=options['process_coordinates'])
        batch_list = file(batch_list_filename)
        _logger.info("batch_list_filename: %s" % batch_list_filename)
        for line in batch_list:
            batch_name = line.strip()
            _logger.info("batch_name: %s" % batch_name)
            parts = batch_name.split("_")
            if len(parts) == 4 and parts[0] == "batch":
                loader.load_batch(batch_name, strict=False)
            else:
                _logger.warning("invalid batch name '%s'" % batch_name)
Ejemplo n.º 6
0
    def handle(self, *args, **options):
        def get_immediate_subdirectories(a_dir):
            return [
                name for name in os.listdir(a_dir)
                if os.path.isdir(os.path.join(a_dir, name))
            ]

        def slack(message):
            sc.api_call("chat.postMessage", channel="#ghnp", text=message)

        start = datetime.now()

        sc = SlackClient(settings.SLACK_KEY)

        loader = BatchLoader()

        new_batches_location = '/opt/chronam/data/chronamftp/new_batches/'
        replacement_batches_location = '/opt/chronam/data/chronamftp/replacement_batches/'
        nonlccn_location = '/opt/chronam/data/nonlccn/'
        batch_drop = '/opt/chronam/data/dlg_batches/drop/'

        # GET LIST OF BATCHES TO LOAD
        new_batches = get_immediate_subdirectories(new_batches_location)
        replacement_batches = get_immediate_subdirectories(
            replacement_batches_location)

        # CHECK new_batches FOR finalMARC FOLDERS
        new_title_folders = []
        for folder in new_batches:
            if 'MARC' in folder:
                new_title_folders.append(folder)
                new_batches.remove(folder)

        # ISSUE STARTING NOTIFICATIONS
        slack(
            'Starting DLG Batch Load Process! Found `%s` new batches and `%s` replacement batches available to load.'
            % (len(new_batches), len(replacement_batches)))

        # RUN KEVIN'S RSYNC COMMANDS, WAIT
        slack('RSync of batches is starting')
        start_time = time.time()
        slack('Copying new batches')
        subprocess.call([
            'rsync -rav --progress /opt/chronam/data/chronamftp/new_batches/* /opt/chronam/data/dlg_batches/drop/'
        ])
        slack('Copying replacement batches')
        subprocess.call([
            'rsync -rav --progress /opt/chronam/data/chronamftp/replacement_batches/* /opt/chronam/data/dlg_batches/drop/'
        ])
        duration = time.time() - start_time
        slack('RSync of new and replacement batches completed in %s seconds' %
              duration)

        # LOAD NEW TITLES IF PRESENT
        if new_title_folders:
            slack('Also found `%s` title MARC files to process.' %
                  len(new_title_folders))
            for nt in new_title_folders:
                for nt_f in os.listdir(os.path.join(new_batches_location, nt)):
                    if nt_f.endswith('.xml'):
                        marc_file = os.path.join(nonlccn_location, nt_f)
                        copyfile(os.path.join(new_batches_location, nt, nt_f),
                                 marc_file)
                        title_load_results = title_loader.load(marc_file)
                        if title_load_results[1]:
                            slack('New title created from `%s`.' % nt_f)
                        if title_load_results[2]:
                            slack('Title updated from `%s`.' % nt_f)
                        if title_load_results[3]:
                            slack('Error on title load from `%s`' % nt_f)
            index_titles(start)
            slack('Finished loading titles.')

        # PURGE REPLACEMENT BATCHES
        if replacement_batches:
            slack('Purging batches destined for replacement.')
            for r_b in replacement_batches:
                batch_to_purge = r_b.replace('ver02','ver01')\
                    .replace('ver03','ver02')\
                    .replace('ver04','ver03')\
                    .replace('ver05','ver04')\
                    .replace('ver06','ver05')\
                    .replace('ver07','ver06')\
                    .replace('ver08','ver07')
                slack('Purging `%s`.' % batch_to_purge)
                loader.purge_batch(batch_to_purge)
            start_time = time.time()
            solr = SolrConnection(settings.SOLR)
            solr.optimize()
            slack('Index optimize complete in `%s` seconds.' % time.time() -
                  start_time)

        # LOAD ALL BATCHES
        # start with replacement batches
        final_loader = batch_loader.BatchLoader(process_ocr=True,
                                                process_coordinates=True)
        if replacement_batches:
            replace_start = time.time()
            for replacement in replacement_batches:
                final_loader.load_batch('drop/%s' % replacement, strict=False)
                slack('Loaded replacement batch `%s`.' % replacement)
            slack('All replacement batches loaded in `%s` seconds.' %
                  time.time() - replace_start)
        # load new batches
        if new_batches:
            new_start = time.time()
            for new in new_batches:
                final_loader.load_batch('drop/%s' % new, strict=False)
                slack('Loaded new batch `%s`.' % new)
            slack('All new batches loaded in `%s` seconds.' % time.time() -
                  new_start)

        slack('Batch loading job complete!')