def process_file(file): root, filename = os.path.split(file) rdf_file = os.path.splitext(filename)[0]+'.nt' rdf_path = root + '/' + rdf_file clean_file(file) try: xml = etree.parse(file) rdf = transform(xml) g = Graph() g.parse(StringInputSource(rdf),"xml") # If the graph already exists then we want to generate some diffs before overwriting it: these can be used generating changesets when uploading to a datastore existing = False if os.path.exists(rdf_path): print "Comparing graphs" go = Graph() go.parse(rdf_path,format='nt') existing = True elif os.path.exists(root+'/archive/'+rdf_file): print "Comparing with archived graph" go = Graph() go.parse(root+'/archive/'+rdf_file,format='nt') existing = True if existing: both, old, new = graph_diff(go,g) if(len(old)): # old.serialize(rdf_path+'_old',format='nt') #Uncomment if you want a non-reified version of the statements cs = BatchChangeSet() cs.setCreatorName('IATI Update Scripts') cs.setChangeReason('Statements to remove from'+file) for (s,p,o) in old.triples((None, None, None)): cs.remove(s,p,o) cs.getGraph().serialize(rdf_path+'_csremove',format='nt') if(len(new)): # new.serialize(rdf_path+'_new',format='nt') #Uncomment if you want a non-reified version of the statements cs = BatchChangeSet() cs.setCreatorName('IATI Update Scripts') cs.setChangeReason('Statements to add from '+file) for (s,p,o) in new.triples((None, None, None)): cs.remove(s,p,o) cs.getGraph().serialize(rdf_path+'_csadd',format='nt') g.serialize(rdf_path,format='nt') except Exception, e: print "Error processing file "+ file print e
try: og.parse(rdf_path,format='nt') except Exception, e: print "Failed reading archived online data" print e print "Running graph diff - new data against archived data" both, old, new = graph_diff(og,ng) if(len(old)): cs = BatchChangeSet() cs.setCreatorName('R4D Update Scripts') cs.setChangeReason('Statements to remove from'+dirList[0]) for (s,p,o) in old.triples((None, None, None)): cs.remove(s,p,o) print "Saving triples for removal to changeset" cs.getGraph().serialize(rdf_path+'_csremove',format='nt') if(len(new)): cs = BatchChangeSet() cs.setCreatorName('IATI Update Scripts') cs.setChangeReason('Statements to add from '+dirList[0]) for (s,p,o) in new.triples((None, None, None)): cs.remove(s,p,o) print "Saving new triples to changeset" cs.getGraph().serialize(rdf_path+'_csadd',format='nt') archive(exec_path+"/"+dirList[0],True) ng.serialize(rdf_path,format='nt') except Exception, e: print "Could not read RDFXML file: " print e