def processMetadata(self, doc, unique_id): """ Process an individual metadata instance Returns a tuple containing an XML string and a validation warning (or None, if no warning was generated). """ from cStringIO import StringIO import libxml2 from medin import log warning = None # contains any validation warning if unique_id is None: unique_id = "no identifier present" # try and validate the xml if self.validator: from medin.validate import ValidationError log("Validating document (%s)" % unique_id) try: self.validator(doc) except ValidationError, e: msg = "This document (%s) is NOT valid MEDIN metadata: %s" % (unique_id, e.args[0]) status = "Validation status: " + msg warning = ValidationWarning(msg) else: status = "Validation status: This document (%s) is valid MEDIN metadata" % unique_id log(status)
def die(msg): """End the program with a message to standard error""" sys.stderr.write("%s\n" % msg) try: log(msg) except NameError: pass # for when the medin module can't be imported sys.exit(1)
def iter_directory(directory, recurse=False): for entry in listdir(directory): filename = os.path.join(directory, entry) if recurse and os.path.isdir(filename): for doc in iter_directory(filename): yield doc root, ext = os.path.splitext(entry) if ext == '.xml': log('Reading file %s' % filename) yield libxml2.parseFile(filename)
def update(self, wsdl=None): """ Update the organisations from the web """ from medin import log # update the organisations log('Retrieving organisations from EDMO') source = Organisations() organisations = source.getOrganisations() log('Updating existing organisations') self.session.add_all(organisations) self.session.commit()
def metadata_generator(): for input_ in inputs: if input_ == '-': log('Reading standard input') yield libxml2.readFd(stdin.fileno(), 'stdin', 'utf-8', 0) elif os.path.isfile(input_) or os.path.islink(input_): log('Reading file %s' % input_) yield libxml2.parseFile(input_) elif os.path.isdir(input_): log('Reading directory %s' % input_) for metadata in iter_directory(input_, options.recurse): yield metadata else: # it must be a URL log('Reading URL %s' % input_) yield libxml2.parseFile(input_)
try: self.validator(doc) except ValidationError, e: msg = "This document (%s) is NOT valid MEDIN metadata: %s" % (unique_id, e.args[0]) status = "Validation status: " + msg warning = ValidationWarning(msg) else: status = "Validation status: This document (%s) is valid MEDIN metadata" % unique_id log(status) else: status = ( "Validation status: This document (%s) has NOT been validated as conforming to the MEDIN Metadata Standard" % unique_id ) log(status) # save the validation status in the document itself comment = doc.newDocComment(status) doc.getRootElement().addPrevSibling(comment) # output the document f = StringIO() buf = libxml2.createOutputBuffer(f, "utf-8") doc.saveFormatFileTo(buf, "utf-8", True) doc.freeDoc() # clean up return (f.getvalue().strip(), warning) def _createFlush(self, xml, doc, unique_id): """ Return a function that outputs a metadata entry