def parse_items(self): """For every item in the directory, send it to Celery for processing""" docket_paths, file_count = get_docket_list() completed = 0 for docket_path in docket_paths: if completed < self.options['start_item'] - 1: # Skip ahead if start_lines is provided. completed += 1 continue else: logger.info("Parsing docket: %s" % docket_path) pacer_doc = PacerXMLParser(docket_path) docket = pacer_doc.save(self.debug) if docket is not None: pacer_doc.make_documents(docket, self.debug) completed += 1 max_items = self.options['max_items'] if completed >= max_items and max_items != -1: print "\n\nCompleted %s items. Aborting early." % max_items break
def parse_recap_docket(self, filename, debug=False): """Parse a docket path, creating items or updating existing ones.""" docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename) recap_pks = [] try: pacer_doc = PacerXMLParser(docket_path) except IOError: logger.warning("Unable to find the docket at: %s" % docket_path) else: docket = pacer_doc.save(debug=debug) if docket is not None: try: recap_pks = pacer_doc.make_documents(docket, debug=debug) except (IntegrityError, DocketEntry.MultipleObjectsReturned) as exc: raise self.retry(exc=exc, countdown=20 * 60) return recap_pks