コード例 #1
0
 def dividePdf(self):
     """ 
     Cut up the volume into articles pdfs based on the data in the LIMB toc
     """
     for _, articles in self.article_data.items():
         for article in articles:
             hash_name = tools.getHashName(article["TitleDocMain"])
             start_page = article["start_page"]
             end_page = article["end_page"]
             # legr: TODO : should probably subtract 1 from end_page
             output_name = tools.getArticleName(hash_name, start_page, end_page)
             output_path = os.path.join(self.pdf_output_dir, output_name)
             self.debug_message("creating file {0}".format(output_path))
             # if our call to pdftk fails, get out quickly
             if not tools.cutPdf(self.pdf_path, output_path, start_page, end_page):
                 error = "PDF division failed. Input file: {0}, " "start page: {1}, end page: {2}"
                 error = error.format(self.pdf_path, start_page, end_page)
                 raise IOError(error)
     return None
コード例 #2
0
 def createArticleXML(self, doc, article, date_published):
     '''
     Given an article dict, create the OJS XML
     corresponding to this data
     '''
     #=======================================================================
     # Create article stub with title
     #=======================================================================
     article_tag = doc.createElement('article')
     doc_language = tools.convertLangToLocale(article['DocLanguage'])
     article_tag.setAttribute('locale',doc_language)
     article_tag.setAttribute('language',article['DocLanguage'])
     title_tag = self.createXMLTextTag(doc, 'title', article['TitleDocMain'])
     article_tag.appendChild(title_tag)
     #=======================================================================
     # Add DBC-id to article
     #=======================================================================
     '''
     this code causes OJS to panic - we don't know why it should be here.
     so we're killing it.
     dbc_marcx_id = article['dbcMarcxID'] if 'dbcMarcxID' in article else ''
     dbc_id_tag = self.createXMLTextTag(doc, 'id', dbc_marcx_id)
     dbc_id_tag.setAttribute('type','dbcMarcxID')
     article_tag.appendChild(dbc_id_tag)
     '''
     #=======================================================================
     # Add page range
     #=======================================================================
     start_page = article['start_page']
     end_page = article['end_page']
     # legr: Calculate offset so real start- and end page can be forwarded to OJS
     offset = self.get_calculated_offset()
     if offset == 0:
         print("Warning: offset = 0. Maybe all pages are uncounted?")
     else:
         print("Information: Calculated offset value: {0}".format(offset))
     page_range = "{0}-{1}".format(start_page + offset, end_page + offset)
     pages_tag = self.createXMLTextTag(doc, 'pages', page_range)
     article_tag.appendChild(pages_tag)
     #=======================================================================
     # Add date published tag
     #=======================================================================
     published_tag = self.createXMLTextTag(doc, 'date_published', date_published) 
     article_tag.appendChild(published_tag)
     #=======================================================================
     # Add authors
     #=======================================================================
     # don't add an author tag if we don't have one (e.g. Front Matter)
     if 'Author' in article: #Author is a list of zero, one or multuple authors
         for author in article['Author']:
             author_tag = self.createAuthorXML(doc, author)
             article_tag.appendChild(author_tag)
     #=======================================================================
     # Add subjects
     # see http://pkp.sfu.ca/wiki/index.php/Importing_and_Exporting_Data#Creating_the_XML_Import_File
     #=======================================================================
     if 'Subject' in article: #Author is a list of zero, one or multuple authors
         indexing_tag = doc.createElement('indexing')
         subjects = ''
         if isinstance(article['Subject'],list):
             subjects = ';'.join(article['Subject'])
         else:
             subjects = article['Subject']
         subject_tag = self.createXMLTextTag(doc,'subject',subjects) 
         subject_tag.setAttribute('locale',tools.convertLangToLocale('da'))
         indexing_tag.appendChild(subject_tag)
         article_tag.appendChild(indexing_tag)
     #=======================================================================
     # Add pdf-link
     #=======================================================================
     md5_hash = tools.getHashName(article['TitleDocMain'])
     pdf_name = tools.getArticleName(md5_hash, start_page,end_page)
     galley_tag = self.createGalleyXML(doc, pdf_name)
     article_tag.appendChild(galley_tag)
     return article_tag