def handle(self, *args, **options):
     s3 = boto3.resource('s3', aws_access_key_id=settings.AWS_ACCESS_KEY, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)
     dt = options['start_date'].replace(hour=0, minute=0, second=0, microsecond=0)
     if not options['to_stdout']:
         connections.create_connection(hosts=[options['es_url']], **settings.ES_CONNECTION_PARAMS)
         CRECDoc.init()
     while dt < options['end_date']:
         logger.info('Processing files for {0}.'.format(dt))
         try:
             response = s3.Object(
                 options['source_bucket'],
                 crec_s3_key('mods.xml', dt)
             ).get()
         except botocore.exceptions.ClientError as e:
             logger.info('Could not find mods file for {0}.'.format(dt))
             response = None
         if response is not None and response.get('Body'):
             try:
                 crecs = extract_crecs_from_mods(response['Body'])
                 logger.info('Found {0} new records.'.format(len(crecs)))
                 if options['to_stdout']:
                     logger.info('Using stdout:')
                 for crec in crecs:
                     if not crec.is_skippable():
                         if options['to_stdout']:
                             logger.info(crec.to_es_doc())
                         else:
                             es_doc = crec.to_es_doc()
                             es_doc.save()
                         upload_speaker_word_counts(crec)
             except Exception as e:
                 logger.exception('Error processing data for {0}.'.format(dt.strftime('%Y-%m-%d')))
         dt += timedelta(days=1)
Exemple #2
0
 def setUp(self):
     self.es_conn = connections.get_connection()
     self.test_crecs = []
     for i in range(20):
         self.test_crecs.append(
             CRECDoc(title=str(i),
                     content='foo bar baz Foo',
                     date_issued=datetime(2017, 1, i % 5 + 1)))
     self.index = Index(settings.ES_CW_INDEX)
     CRECDoc.init()
     for c in self.test_crecs:
         c.save(refresh=True)
     self.client = Client()
Exemple #3
0
 def setUp(self):
     self.es_conn = connections.get_connection()
     self.test_crecs = []
     for i in range(20):
         self.test_crecs.append(
             CRECDoc(
                 title=str(i),
                 content='foo bar baz Foo',
                 date_issued=datetime(2017, 1, i % 5 + 1)
             )
         )
     self.index = Index(settings.ES_CW_INDEX)
     CRECDoc.init()
     for c in self.test_crecs:
         c.save(refresh=True)
     self.client = Client()
Exemple #4
0
 def handle(self, *args, **options):
     s3 = boto3.resource(
         's3',
         aws_access_key_id=settings.AWS_ACCESS_KEY,
         aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)
     dt = options['start_date'].replace(hour=0,
                                        minute=0,
                                        second=0,
                                        microsecond=0)
     if not options['to_stdout']:
         connections.create_connection(hosts=[options['es_url']],
                                       **settings.ES_CONNECTION_PARAMS)
         CRECDoc.init()
     while dt < options['end_date']:
         logger.info('Processing files for {0}.'.format(dt))
         try:
             response = s3.Object(options['source_bucket'],
                                  crec_s3_key('mods.xml', dt)).get()
         except botocore.exceptions.ClientError as e:
             logger.info('Could not find mods file for {0}.'.format(dt))
             response = None
         if response is not None and response.get('Body'):
             try:
                 crecs = extract_crecs_from_mods(response['Body'])
                 logger.info('Found {0} new records.'.format(len(crecs)))
                 if options['to_stdout']:
                     logger.info('Using stdout:')
                 for crec in crecs:
                     if not crec.is_skippable():
                         if options['to_stdout']:
                             logger.info(crec.to_es_doc())
                         else:
                             es_doc = crec.to_es_doc()
                             es_doc.save()
                         upload_speaker_word_counts(crec)
             except Exception as e:
                 logger.exception('Error processing data for {0}.'.format(
                     dt.strftime('%Y-%m-%d')))
         dt += timedelta(days=1)