def process(self, lang): assert len(lang) == 2, 'Language name must be two letters long' doc = ElementTree(file='%s.xml' % lang) root = doc.getroot() if root.tag == 'resources': for child in root: self.walk(child, (child.get('name'),), lang)
def process(self, lang): assert len(lang) == 2, "Language name must be two letters long" doc = ElementTree(file=os.path.join(self._dirname, "%s.xml" % lang)) root = doc.getroot() if root.tag == "resources": for child in root: self.walk(child, (child.get("name"),), lang)
def process(self, lang): assert len(lang) == 2, 'Language name must be two letters long' doc = ElementTree(file='%s.xml' % lang) root = doc.getroot() if root.tag == 'resources': for child in root: self.walk(child, (child.get('name'), ), lang)
def mark_changed_publications(modified_since): """ Asks Symplectic API for info about publications modified since given date Receives XML File as response Parses XML File to find publications modified matches publication XML element to db publication object flags each publication object as needing to be re-fetched from Symplectic """ # date needs to be in form of yyyy-mm-dd # will then append string "T00:00:00Z" as we are in UTC-0 timezone in # which : becomes %3A # symplectic api url and local file path url = "".join([ SYMPLECTIC_API_URL, 'search-publications?modified-since-when=', modified_since, 'T00%3A00%3A00Z' ]) tmp_filename = "".join([ SYMPLECTIC_LOCAL_XML_FOLDER, SYMPLECTIC_LOCAL_PUBSMODIFIED_FOLDER, modified_since, '.xml' ]) # get xml document from symplectic api and store on hd (tmp_filename, http_headers) = urllib.urlretrieve(url, tmp_filename) # parse xml file search_publications_etree = ElementTree(file=tmp_filename) # delete local file from hd # try: os.remove(tmp_filename) # except: # pass # publication lite elements are held in a subtree BUT the subtree is # the root element # search_publications_subtree = # search_publications_etree.find(SYMPLECTIC_NAMESPACE + # 'search-publications-response') search_publications_subtree = search_publications_etree.getroot() # check if any publication elements in subtree if search_publications_subtree is None or \ len(search_publications_subtree) < 1: return # for each publication element in subtree for search_publication_element \ in search_publications_subtree.getchildren(): _flag_publication_as_needing_refetch( search_publication_element )
def GetElementsFromXML(self, filename): 'Extracts a dictionary of elements from the gcc_xml file.' tree = ElementTree() try: tree.parse(filename) except ExpatError: raise InvalidXMLError, 'Not a XML file: %s' % filename root = tree.getroot() if root.tag != 'GCC_XML': raise InvalidXMLError, 'Not a valid GCC_XML file' # build a dictionary of id -> element, None elementlist = root.getchildren() elements = {} for element in elementlist: id = element.get('id') if id: elements[id] = element, None return elements
def GetElementsFromXML(self,filename): 'Extracts a dictionary of elements from the gcc_xml file.' tree = ElementTree() try: tree.parse(filename) except ExpatError: raise InvalidXMLError, 'Not a XML file: %s' % filename root = tree.getroot() if root.tag != 'GCC_XML': raise InvalidXMLError, 'Not a valid GCC_XML file' # build a dictionary of id -> element, None elementlist = root.getchildren() elements = {} for element in elementlist: id = element.get('id') if id: elements[id] = element, None return elements
def body(self, xml): """ Body importer """ if isinstance(xml, (str, unicode)): parser = XMLTreeBuilder() parser.feed(xml) tree = parser.close() tree = ElementTree(tree) elem = tree.getroot() else: elem = xml if elem.tag != 'object': raise AttributeError('Invalid xml root element %s' % elem.tag) name = elem.get('name') if not name: raise AttributeError('No name provided for object') if hasattr(self.context, '__name__') and (name != self.context.__name__): raise AttributeError(('XML root object name %s ' 'should match context name %s') % (name, self.context.__name__)) for child in elem.getchildren(): if child.tag == 'property': self.attribute = child elif child.tag == 'object': self.child = child event.notify(ObjectModifiedEvent(self.context)) if INews.providedBy(self.context): logger.info('Commit transaction import for %s' % getattr( self.context, '__name__', '(no name)')) transaction.commit()
prop_path = os.path.join(persistence_path, "Property") if not os.path.exists(persistence_path) or \ not os.path.exists(cat_path) or \ not os.path.exists(comp_path) or \ not os.path.exists(prop_path): print "INVALID PROJECT DIRECTORY" exit(0) et = ElementTree() #transform categories print "Transforming Categories..." for cat_fname in os.listdir(cat_path): fpath = os.path.join(cat_path, cat_fname) et.parse(fpath) version = et.getroot().get("version") if not version: print "\tTransforming %s..." % cat_fname root = Element("category", {"version": "1.1", "name": et.find("name").text.strip(), "description": et.find("description").text.strip()}) et = ElementTree(root) et.write(fpath, indent=True) elif version == "1.0": print "\tTransforming %s..." % cat_fname root = Element("category", {"version": "1.1", "name": et.getroot().get("name"), "description": et.getroot().get("description")}) et = ElementTree(root)
prop_path = os.path.join(persistence_path, "Property") if not os.path.exists(persistence_path) or \ not os.path.exists(cat_path) or \ not os.path.exists(comp_path) or \ not os.path.exists(prop_path): print "INVALID PROJECT DIRECTORY" exit(0) et = ElementTree() #transform categories print "Transforming Categories..." for cat_fname in os.listdir(cat_path): fpath = os.path.join(cat_path, cat_fname) et.parse(fpath) version = et.getroot().get("version") if not version: print "\tTransforming %s..." % cat_fname root = Element( "category", { "version": "1.1", "name": et.find("name").text.strip(), "description": et.find("description").text.strip() }) et = ElementTree(root) et.write(fpath, indent=True) elif version == "1.0": print "\tTransforming %s..." % cat_fname root = Element( "category", { "version": "1.1",
import xml import re os.chdir('votes') cwdfiles=os.listdir(os.getcwd()) votesfiles=filter(lambda s:re.match('votes',s), cwdfiles) topelement=Element('top') i=1 for vf in votesfiles: print vf try: votetree=ElementTree(file=vf) voteroot=votetree.getroot() date=voteroot.get('date') m=re.match('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})',date) if not m: print "internal error in date format" sys.exit() mgd=m.groupdict() mgd.update({'date':date}) acts=votetree.findall('//royal_assent/act') if len(acts)>0: assent=Element('assent',mgd) for j in range(len(acts)): assent.insert(j,acts[j]) topelement.insert(i,assent) i=i+1 except xml.parsers.expat.ExpatError, errorinst:
def process(directory, option, file_out, use_file_out, xml_file, group, verbose, recurse, progress): if verbose: print "Inside process..." col = commands.getoutput("echo \"$COLUMNS\"") try: columns = int(col) except: columns = 60 pb = progress_bar.pb("Progress: ", "-", columns, sys.stderr) tree = ElementTree(file=xml_file) elem = tree.getroot() if verbose: print "Getting rpm_names" rpm_names = get_names_from_dir(directory, recurse, pb, progress) if verbose: print "Processing names" if option == comps_opt.ERASE: """ Handle the ERASE operations """ for subelem in elem: for subsub in subelem: p = 0.0 for subsubsub in subsub: p = p + 1.0 if progress: percentage = p / len(subsub) pb.progress(percentage) if subsubsub.tag == 'packagereq' and subsubsub.text in rpm_names: subsub.remove(subsubsub) if verbose: print "Found %s, removing" % subsubsub.text elif option == comps_opt.ADD: """ Handle the ADD operations """ text = "<group>\n" text += "<id>%s</id>\n" % group text += "<name>%s</name>\n" % group text += "<packagelist>\n" p = 0.0 for name in rpm_names: p = p + 1.0 if progress: percentage = p / len(rpm_names) pb.progress(percentage) text += "<packagereq type=\"mandatory\">%s</packagereq>\n" % name text += "</packagelist>\n" text += "</group>\n" node = fromstring(text) elem.append(node) else: die("Some unknown error has occured. Neither 'ADD' nor 'ERASE' was specified, somehow") if progress: pb.clear() if verbose: print "Ending, outputing XML" if use_file_out: ElementTree(tree).write(file_out) else: dump(tree)
def update_publication(publication_object): """ Asks Symplectic API for info about specified publication based upon its guid Receives XML File as response Parses XML File to find publication info & all biblio-records for that publication """ # checking # print " update_publication", publication_object if not(publication_object) or (publication_object.guid == ''): return # symplectic api url and local file path url = SYMPLECTIC_API_URL + 'publications/' + publication_object.guid tmp_filename = "".join([ SYMPLECTIC_LOCAL_XML_FOLDER, SYMPLECTIC_LOCAL_PUBS_FOLDER, str(publication_object.guid), '.xml' ]) # get xml document from symplectic api and store on hd (tmp_filename, http_headers) = urllib.urlretrieve(url, tmp_filename) # parse xml file pub_etree = ElementTree(file=tmp_filename) #d elete local file from hd # try: os.remove(tmp_filename) # except: # pass #++++++PUBLICATION++++++ # publication element pub_element = pub_etree.getroot() # no loading-of/blank publication object required as updating the one # passed in # check returned xml element is valid and for correct publication if pub_element is None: return elif publication_object.guid != pub_element.get('id', ''): return # publication attributes if pub_element is not None: publication_object.new_id = pub_element.get('new-id') if pub_element.get('is-deleted', 'false') == 'true': publication_object.is_deleted = True else: publication_object.is_deleted = False attr_names = ["type", "created-when", "last-modified-when"] for attr_name in attr_names: attr_value = pub_element.get(attr_name, "") setattr( publication_object, attr_name.replace("-", "_"), attr_value ) # just fetched latest version from symplectic publication_object.needs_refetch = False # save updated publication object publication_object.save() # ++++++BIBLIOGRAPHIC-RECORD++++++ # bibliographic-record elements are held in a subtree biblio_subtree = pub_etree.find( SYMPLECTIC_NAMESPACE + 'bibliographic-records' ) # check if any bibliographic-record elements in subtree if biblio_subtree is None or len(biblio_subtree) < 1: return # for each bibliographic-record element in subtree for biblio_element in biblio_subtree.getchildren(): _create_biblio_object(biblio_element, publication_object)
def _get_users(researcher_object): """ Asks Symplectic API for User info about specified researcher Specify which researcher using proprietary-id Receives XML File as response Parses XML File to find symplectic ID for each User """ # symplectic api url and local file path url = "".join([ SYMPLECTIC_API_URL, 'search-users?', '&include-deleted=true', '&authority=', AUTHENTICATING_AUTHORITY, '&proprietary-id=', str(researcher_object.person_id) ]) #'&username='******'.xml' ]) #get xml document from symplectic api and store on hd try: (tmp_filename, http_headers) = urllib.urlretrieve(url, tmp_filename) except urllib2.URLError: raise ESymplecticGetFileError(""" Could not HTTP GET the XML file of User GUID from Symplectic API """) #parse xml file users_etree = ElementTree(file=tmp_filename) usersresponse_element = users_etree.getroot() #delete local file from hd #try: os.remove(tmp_filename) #except: #pass #check if any user elements in tree if usersresponse_element is None: return "" # for each retrieved user element in tree (should only be 1) for user_element in usersresponse_element.getchildren(): # pull out of xml what symplectic says this researcher's proprietary # id and symplectic-id are proprietary_id = user_element.attrib.get("proprietary-id") id = user_element.attrib.get("id") # if arkestra and symplectic agree this is the same person if str(researcher_object.person_id) == proprietary_id: # researcher_object.symplectic_int_id = id # int_id version researcher_object.symplectic_id = id # guid version researcher_object.save() # force return after 1 (should only be 1 person per xml file # anyway) return id else: raise ESymplecticExtractUserGUIDError(""" ID returned by Symplectic API not for correct Arkestra User (Proprietary ID doesnt match """)
import sys import xml import re os.chdir('votes') cwdfiles = os.listdir(os.getcwd()) votesfiles = filter(lambda s: re.match('votes', s), cwdfiles) topelement = Element('top') i = 1 for vf in votesfiles: print vf try: votetree = ElementTree(file=vf) voteroot = votetree.getroot() date = voteroot.get('date') m = re.match('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})', date) if not m: print "internal error in date format" sys.exit() mgd = m.groupdict() mgd.update({'date': date}) acts = votetree.findall('//royal_assent/act') if len(acts) > 0: assent = Element('assent', mgd) for j in range(len(acts)): assent.insert(j, acts[j]) topelement.insert(i, assent) i = i + 1 except xml.parsers.expat.ExpatError, errorinst: