Example #1
0
    def getVariables(self):
        """
        This script pulls in all the variables
        from the command line and the config file
        that are necessary for its running.
        Errors in variables will lead to an
        Exception being thrown.
        We need the ojs server, and user
        path to the correct dir for the import tool
        path to the correct dir for the xml
        name of the journal and an OJS admin user
        """
        self.ojs_server = self.getConfigItem("ojs_server")
        self.ojs_server_user = self.getConfigItem("ojs_server_user")
        self.ojs_app_user = self.getConfigItem("ojs_app_user")
        self.tool_path = self.getConfigItem("tool_path")

        # Temporary, new processes should always have issn, so check should be in essential section
        # Initially assume we have an ISSN on the command line
        self.issn_missing = False
        issn = ""
        try:
            # Get path to generate ojs_dir using ISSN API on Tidsskrift.dk
            issn = self.command_line.issn
        except AttributeError as e:
            self.debug_message("Warning, missing attribute. Details: {0}".format(e))
            # We dont have an ISSN on the commandline, so use old code
            self.issn_missing = True

        if self.issn_missing:
            # Old method: generate ojs path from title, or get from command line
            process_path = self.command_line.process_path
            mets_file_name = self.getConfigItem("metadata_goobi_file", None, "process_files")
            mets_file = os.path.join(process_path, mets_file_name)
            issue_data = mets_tools.getIssueData(mets_file)
            self.ojs_journal_path = self.getSetting("ojs_journal_path", default="system")
            if self.ojs_journal_path == "system":
                self.volume_title = tools.parseTitle(issue_data["TitleDocMain"])
            else:
                self.volume_title = self.ojs_journal_path
        else:
            # New method: use issn to lookup journal path
            self.ojs_journal_path = ojs.getJournalPath(self.ojs_server, issn)
            self.debug_message("Journal path is %s" % self.ojs_journal_path)

        # build the path to the ojs xml file based in the form
        # <upload_dir>/<journal_name>/<process_name>/<process_name>.xml
        if self.issn_missing:
            # Old Method:
            upload_dir = self.getConfigItem("upload_dir").format(self.volume_title, self.command_line.process_title)
        else:
            # New method:
            upload_dir = self.getConfigItem("upload_dir").format(self.ojs_journal_path, self.command_line.process_title)

        xml_name = "{0}.xml".format(self.command_line.process_title)
        self.xml_path = os.path.join(upload_dir, xml_name)
        self.debug_message("XML path is %s" % self.xml_path)
    def createXML(self):
        '''
        Get the data from the issue file and the toc
        Use this to construct the OJS XML
        '''
        data = minidom.parse(self.mets_file)
        issue_data = mets_tools.getIssueData(self.mets_file)
        article_data = mets_tools.getArticleData(data, ['FrontMatter', 'Articles', 'BackMatter'])
        # this is the dir where files will be uploaded to
        if self.ojs_journal_path == 'system':
            journal_title_path = tools.parseTitle(issue_data['TitleDocMain'])
            # TODO: write this one back as a property?
            #self.goobi_com.addProperty(self.process_id, 'ojs_journal_path', journal_title_path, overwrite=True)
        else:
            journal_title_path = self.ojs_journal_path
        self.ojs_dir = os.path.join(self.ojs_root,journal_title_path, self.command_line.process_title)
        #=======================================================================
        # Get and validate PublicationYear
        # I.e. s only four digits and starts with 17,18,19 or 20
        #=======================================================================
        err = ('Publiceringsåret ("{0}") for hæftet skal være et firecifret tal '
               'begyndende med enten 17, 18, 19 eller 20, f.eks. 1814, 1914 '
               'eller 2014.  {1}. Åben metadata-editor og ret metadata for '
               'hæftet og afslut opgaven.')
        pub_year = issue_data['PublicationYear']
        pub_year = pub_year.strip() # Remove leading and trailing spaces.
        if not pub_year.isdigit():
            raise ValueError(err.format(pub_year,'Det indtastede i feltet årstal er ikke et korrekt tal'))
        if not len(pub_year) == 4:
            raise ValueError(err.format(pub_year,'Tallet er ikke præcis fire cifre langt'))
        if not int(int(pub_year)/100) in [17,18,19,20]:
            raise ValueError(err.format(pub_year,'Tallet starter ikke med 17, 18, 19 eller 20'))
        date_published = "{0}-01-01".format(pub_year)
        #=======================================================================
        # Create base xml for issue
        #=======================================================================
        impl = minidom.getDOMImplementation()
        doc = impl.createDocument(None, "issue", None)
        doc = self.createHeadMaterial(doc, issue_data)
        # Get data for articles in the sections front matter, articles and back matter
        if article_data['FrontMatter']:
            front_section = self.createFrontSectionXML(doc, issue_data)
            front_section = self.createArticlesForSection(article_data['FrontMatter'],
                                                          front_section,
                                                          doc,
                                                          date_published)
            doc.documentElement.appendChild(front_section)
        if article_data['Articles']:
            article_section = self.createArticleSectionXML(doc, issue_data)
            article_section = self.createArticlesForSection(article_data['Articles'],
                                                            article_section,
                                                            doc,
                                                            date_published)
            doc.documentElement.appendChild(article_section)
            
        if article_data['BackMatter']:
            back_section = self.createBackSectionXML(doc, issue_data)
            back_section = self.createArticlesForSection(article_data['BackMatter'],
                                                         back_section,
                                                         doc,
                                                         date_published)
            doc.documentElement.appendChild(back_section)

        # save the xml content to the correct file
        output_name = os.path.join(self.ojs_metadata_dir, self.command_line.process_title + '.xml')
        output = open(output_name, 'w')
        output.write(doc.toxml())#'utf-8'))
    def getVariables(self):
        '''
        This script pulls in all the variables
        from the command line and the config file 
        that are necessary for its running.
        Errors in variables will lead to an 
        Exception being thrown.
        We need the path to the OJS mount,
        the current process dir, the pdf dir,
        and the ojs xml dir.
        '''

        # Temporary, new processes should always have issn, so check should be in essential section
        # Initially assume we have an ISSN on the command line
        issn_missing = False
        try:
            self.issn = self.command_line.issn
        except AttributeError as e:
            self.debug_message("Warning, missing attribute. Details: {0}".format(e))
            # We dont have an ISSN on the commandline, so use old code
            issn_missing = True

        process_path = self.command_line.process_path

        # Temporary, until all new processes uses issn
        if issn_missing:
            mets_file_name = self.getConfigItem('metadata_goobi_file', None, 'process_files')
            mets_file = os.path.join(process_path, mets_file_name)

        ojs_mount = self.getConfigItem('ojs_mount')
        ojs_metadata_dir = self.getConfigItem('metadata_ojs_path',
                                              section= self.folder_structure_section)
        self.ojs_metadata_dir = os.path.join(process_path, ojs_metadata_dir)
        
        pdf_path = self.getConfigItem('doc_pdf_path',
                                      section= self.folder_structure_section)
        self.pdf_input_dir = os.path.join(process_path, pdf_path)

        # Temporary condition, until all new processes uses issn
        if issn_missing:
            issue_data = mets_tools.getIssueData(mets_file)
            # Get path to generate ojs_dir -> system means "define it from system variables"
            self.ojs_journal_path = self.getSetting('ojs_journal_path', default='system')
            if self.ojs_journal_path == 'system':
                volume_title = tools.parseTitle(issue_data['TitleDocMain'])
                # TODO: write this one back as a property?
                # self.goobi_com.addProperty(self.process_id, 'ojs_journal_path', volume_title, overwrite=True)
            else:
                volume_title = self.ojs_journal_path
            # volume_title = tools.parseTitle(issue_data['TitleDocMain'])
        else:
            # We have a process with issn, so:
            issn = self.command_line.issn
            ojs_journal_path = ojs.getJournalPath(self.ojs_server, issn)
            ojs_journal_folder = os.path.join(ojs_mount, ojs_journal_path)

        # Temporary condition, until all new processes uses issn
        if issn_missing:
            ojs_journal_folder = os.path.join(ojs_mount, volume_title)

        # Create folder and set owner to gid 1000 => ojs-group
        tools.find_or_create_dir(ojs_journal_folder,change_owner=1000)
        self.ojs_dest_dir = os.path.join(ojs_journal_folder,
                                         self.command_line.process_title)
        # Create folder and set owner to gid 1000 => ojs-group
        tools.find_or_create_dir(self.ojs_dest_dir,change_owner=1000)

        tools.ensureDirsExist(self.ojs_metadata_dir,
                              self.pdf_input_dir,
                              self.ojs_dest_dir)

        # Temporary condition, in the future, issn is always available
        if not issn_missing:
            self.debug_message("metadata_dir is %s" % self.ojs_metadata_dir)
            self.debug_message("pdf_input_dir is %s" % self.pdf_input_dir)
            self.debug_message("dest_dir is %s" % self.ojs_dest_dir)