def handle(self, *args, **options): s3 = boto3.resource('s3', aws_access_key_id=settings.AWS_ACCESS_KEY, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) dt = options['start_date'].replace(hour=0, minute=0, second=0, microsecond=0) if not options['to_stdout']: connections.create_connection(hosts=[options['es_url']], **settings.ES_CONNECTION_PARAMS) CRECDoc.init() while dt < options['end_date']: logger.info('Processing files for {0}.'.format(dt)) try: response = s3.Object( options['source_bucket'], crec_s3_key('mods.xml', dt) ).get() except botocore.exceptions.ClientError as e: logger.info('Could not find mods file for {0}.'.format(dt)) response = None if response is not None and response.get('Body'): try: crecs = extract_crecs_from_mods(response['Body']) logger.info('Found {0} new records.'.format(len(crecs))) if options['to_stdout']: logger.info('Using stdout:') for crec in crecs: if not crec.is_skippable(): if options['to_stdout']: logger.info(crec.to_es_doc()) else: es_doc = crec.to_es_doc() es_doc.save() upload_speaker_word_counts(crec) except Exception as e: logger.exception('Error processing data for {0}.'.format(dt.strftime('%Y-%m-%d'))) dt += timedelta(days=1)
def setUp(self): self.es_conn = connections.get_connection() self.test_crecs = [] for i in range(20): self.test_crecs.append( CRECDoc(title=str(i), content='foo bar baz Foo', date_issued=datetime(2017, 1, i % 5 + 1))) self.index = Index(settings.ES_CW_INDEX) CRECDoc.init() for c in self.test_crecs: c.save(refresh=True) self.client = Client()
def setUp(self): self.es_conn = connections.get_connection() self.test_crecs = [] for i in range(20): self.test_crecs.append( CRECDoc( title=str(i), content='foo bar baz Foo', date_issued=datetime(2017, 1, i % 5 + 1) ) ) self.index = Index(settings.ES_CW_INDEX) CRECDoc.init() for c in self.test_crecs: c.save(refresh=True) self.client = Client()
def handle(self, *args, **options): s3 = boto3.resource( 's3', aws_access_key_id=settings.AWS_ACCESS_KEY, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) dt = options['start_date'].replace(hour=0, minute=0, second=0, microsecond=0) if not options['to_stdout']: connections.create_connection(hosts=[options['es_url']], **settings.ES_CONNECTION_PARAMS) CRECDoc.init() while dt < options['end_date']: logger.info('Processing files for {0}.'.format(dt)) try: response = s3.Object(options['source_bucket'], crec_s3_key('mods.xml', dt)).get() except botocore.exceptions.ClientError as e: logger.info('Could not find mods file for {0}.'.format(dt)) response = None if response is not None and response.get('Body'): try: crecs = extract_crecs_from_mods(response['Body']) logger.info('Found {0} new records.'.format(len(crecs))) if options['to_stdout']: logger.info('Using stdout:') for crec in crecs: if not crec.is_skippable(): if options['to_stdout']: logger.info(crec.to_es_doc()) else: es_doc = crec.to_es_doc() es_doc.save() upload_speaker_word_counts(crec) except Exception as e: logger.exception('Error processing data for {0}.'.format( dt.strftime('%Y-%m-%d'))) dt += timedelta(days=1)