def import_work(self, works_page, publishers_page): """Imports a Work: gets the work name and heading; gets the order of impressions; walks through the work directory and creates an impression object for each PDF file.""" work = Work() work.code = self._work_code if work.code.find('Opus') >= 0: work.has_opus = True try: opus_n = int(work.code.split()[1].strip()) except ValueError: opus_n = 66 work.is_posthumous = (opus_n >= settings.POSTHUMOUS_WORKS_WITH_OPUS) work.sort_order = opus_n else: work.has_opus = False work.is_posthumous = (work.code in settings.POSTHUMOUS_WORKS_WITHOUT_OPUS) work.sort_order = settings.ALL_WORKS_WITHOUT_OPUS.index( work.code) + 74 self.logger.debug('Work sort order: {}'.format(work.sort_order)) # Heading filename. try: heading_filename = glob.glob( os.path.join(self._work_path, '*.heading*.pdf'))[0] except IndexError: self.logger.error( 'No heading file found; skipping work {0}'.format(work.code)) return work.heading = self._import_heading(heading_filename) work.title = work.heading.split(' ')[0].strip() work.slug = safe_slugify(work.title, Work) # Create a Work PDF Document. document = Document(title=work.title) with open(heading_filename, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(heading_filename), pdf_file) document.tags.add('work') work.pdf = document # gets the order of impressions self._order_of_impressions = self._import_order_of_impressions() self.logger.debug(self._order_of_impressions) works_page.add_child(instance=work) self._import_impressions(work, publishers_page)
def import_work(self, works_page, publishers_page): """Imports a Work: gets the work name and heading; gets the order of impressions; walks through the work directory and creates an impression object for each PDF file.""" work = Work() work.code = self._work_code if work.code.find('Opus') >= 0: work.has_opus = True try: opus_n = int(work.code.split()[1].strip()) except ValueError: opus_n = 66 work.is_posthumous = (opus_n >= settings.POSTHUMOUS_WORKS_WITH_OPUS) work.sort_order = opus_n else: work.has_opus = False work.is_posthumous = ( work.code in settings.POSTHUMOUS_WORKS_WITHOUT_OPUS) work.sort_order = settings.ALL_WORKS_WITHOUT_OPUS.index(work.code) + 74 self.logger.debug('Work sort order: {}'.format(work.sort_order)) # Heading filename. try: heading_filename = glob.glob(os.path.join(self._work_path, '*.heading*.pdf'))[0] except IndexError: self.logger.error('No heading file found; skipping work {0}'.format( work.code)) return work.heading = self._import_heading(heading_filename) work.title = work.heading.split(' ')[0].strip() work.slug = safe_slugify(work.title, Work) # Create a Work PDF Document. document = Document(title=work.title) with open(heading_filename, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(heading_filename), pdf_file) document.tags.add('work') work.pdf = document # gets the order of impressions self._order_of_impressions = self._import_order_of_impressions() self.logger.debug(self._order_of_impressions) works_page.add_child(instance=work) self._import_impressions(work, publishers_page)
def _import_impression (self, work, publishers_page, f_path): # creates a new PDFParser to get the impression self.logger.debug('Parsing {}'.format(f_path)) parser = PDFParser(f_path) code = parser.get_impression_code() if code: self.logger.debug('Impression: ' + code) # Create an Impression PDF Document. document = Document(title=code) with open(f_path, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(f_path), pdf_file) document.tags.add('impression') # creates a new impression impression = Impression() impression.title = code impression.impression_title = parser.get_title() impression.content = parser.get_text_content() impression.pdf = document try: sort_order = self._order_of_impressions.index(code.lower()) except Exception: self.logger.error( u'{0} missing from order of impressions, which consists of: {1}'.format(code, ', '.join(self._order_of_impressions))) sort_order = 999 impression.sort_order = sort_order impression.slug = safe_slugify(impression.title, Impression) impression.comments = parser.get_comments() self._import_copies(impression, parser, code) publisher_code = impression.title.split('-')[-1] publisher = Publisher.objects.filter(title=publisher_code).first() if not publisher: publisher = Publisher(title=publisher_code) publisher.slug = slugify(publisher_code) publishers_page.add_child(instance=publisher) impression.publisher = publisher work.add_child(instance=impression)
def _import_impression(self, work, publishers_page, f_path): # creates a new PDFParser to get the impression self.logger.debug('Parsing {}'.format(f_path)) parser = PDFParser(f_path) code = parser.get_impression_code() if code: self.logger.debug('Impression: ' + code) # Create an Impression PDF Document. document = Document(title=code) with open(f_path, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(f_path), pdf_file) document.tags.add('impression') # creates a new impression impression = Impression() impression.title = code impression.impression_title = parser.get_title() impression.content = parser.get_text_content() impression.pdf = document try: sort_order = self._order_of_impressions.index(code.lower()) except Exception: self.logger.error( u'{0} missing from order of impressions, which consists of: {1}' .format(code, ', '.join(self._order_of_impressions))) sort_order = 999 impression.sort_order = sort_order impression.slug = safe_slugify(impression.title, Impression) impression.comments = parser.get_comments() self._import_copies(impression, parser, code) publisher_code = impression.title.split('-')[-1] publisher = Publisher.objects.filter(title=publisher_code).first() if not publisher: publisher = Publisher(title=publisher_code) publisher.slug = slugify(publisher_code) publishers_page.add_child(instance=publisher) impression.publisher = publisher work.add_child(instance=impression)