Ejemplo n.º 1
0
 def __init__(self, xml):
     '''Initializes the codelist class.
     
     Parameters
     @xml: The XML file of the codelist..'''
     
     self.xml = xml
     
     self.id = AttributeHelper.attribute_key(self.xml, 'name')
     self.last_updated = AttributeHelper.attribute_key(self.xml, 'date-last-modified')
Ejemplo n.º 2
0
 def get_organisation_defaults(self):
     '''Returns the defaults of the organisation.
     
     Returns
     @defaults: A dictionary containing the defaults of the activity.'''
     
     defaults = dict([('id', self.id),
                      ('language', AttributeHelper.attribute_key(self.xml, '{http://www.w3.org/XML/1998/namespace}lang')),
                      ('currency', AttributeHelper.attribute_key(self.xml, 'default-currency'))])
     
     return defaults
Ejemplo n.º 3
0
 def get_activity_defaults(self):
     '''Returns the defaults of the activity.
     
     Returns
     @defaults: A dictionary containing the defaults of the activity.'''
     
     defaults = dict([('id', self.id),
                      ('language', AttributeHelper.attribute_key(self.xml, '{http://www.w3.org/XML/1998/namespace}lang')),
                      ('currency', AttributeHelper.attribute_key(self.xml, 'default-currency')),
                      ('finance_type', self.get_default_type('default-finance-type')),
                      ('flow_type', self.get_default_type('default-flow-type')),
                      ('aid_type', self.get_default_type('default-aid-type')),
                      ('tied_status', self.get_default_type('default-tied-status')),
                      ('hierarchy', self.hierarchy),
                      ('linked_data_uri', self.linked_data_uri)])
     
     return defaults
Ejemplo n.º 4
0
 def __init__(self, xml):
     '''Initializes the organisation class.
     
     Parameters
     @xml: An ElementTree of an activity.'''
     
     self.xml = xml
     
     self.id = self.get_id()
     self.last_updated = AttributeHelper.attribute_key(self.xml, 'last-updated-datetime')
Ejemplo n.º 5
0
    def get_codelist_defaults(self):
        '''Retrieves the defaults for the codelist.
        
        Return
        @defaults: A dictionary of defaults.'''

        defaults = dict([('id', self.id),
                         ('language', AttributeHelper.attribute_key(self.xml, '{http://www.w3.org/XML/1998/namespace}lang'))])
        
        return defaults        
Ejemplo n.º 6
0
 def __init__(self, xml, version, linked_data_default):
     '''Initializes the activity class.
     
     Parameters
     @xml: An ElementTree of an activity.
     @version: The version of the activities.
     @linked_data_default: The Linked Data default URI of the activity.'''
     
     self.xml = xml
     
     self.id = self.get_id()
     self.last_updated = AttributeHelper.attribute_key(self.xml, 'last-updated-datetime')
     
     self.version = self.determine_version(version)
     self.linked_data_uri = self.determine_linked_data_uri(linked_data_default, self.id)
     
     self.hierarchy = AttributeHelper.attribute_key(self.xml, 'hierarchy')
     
     self.failed = []
Ejemplo n.º 7
0
 def get_default_type(self, type):
     '''Returns a default type of the activity.
     
     Parameters
     @type: The element that should be retrieved.
     
     Returns
     @default_type: The default finance type of the activity.'''
     
     default_type = self.xml.find(type)
     
     if default_type == None:
         return None
     
     return AttributeHelper.attribute_key(default_type, 'code') 
Ejemplo n.º 8
0
 def determine_version(self, version):
     '''Determines the version of this activity.
     
     Parameters
     @version: The version of the iati-activities attribute.
     
     Returns
     @version: The iati-activity or iati-activities attribute version.'''
     
     activity_version = AttributeHelper.attribute_key(self.xml, 'version')
     
     if not activity_version == None:
         return activity_version
     
     else:
         return version
Ejemplo n.º 9
0
 def determine_linked_data_uri(self, linked_data_default, id):
     '''Determines the Linked Data URI of this activity.
     
     Parameters
     @linked_data_default: The version of the iati-activities attribute.
     @id: The ID of the activity.
     
     Returns
     @linked_data_uri: The Linked Data URI or None if not specified.'''
     
     linked_data_uri = AttributeHelper.attribute_key(self.xml, 'linked-data-uri')
     
     if not linked_data_uri == None:
         return linked_data_uri
     
     elif not linked_data_default == None:
         return str(linked_data_default) + str(id)
     
     else:
         return None
Ejemplo n.º 10
0
def main():
    '''Converts Activity XMLs to Turtle files and stores these to local folder.'''
    
    # Settings
    xml_folder = "/media/Acer/School/IATI-data/xml/activities/"
    turtle_folder = "/media/Acer/School/IATI-data/activity/"
    Iati = Namespace("http://purl.org/collections/iati/")
    
    if not os.path.isdir(turtle_folder):
        os.makedirs(turtle_folder)
    
    document_count = 1
    activity_count = 1
    
    failed_elements = []
    
    # Retrieve XML files from the XML folder
    for document in glob.glob(xml_folder + '*.xml'):
        
        activity_ids = []
        
        doc_id = str(document.rsplit('/',1)[1])[:-4]
        doc_folder = turtle_folder + doc_id + '/'
        
        if not os.path.isdir(doc_folder):
            os.makedirs(doc_folder)
        
        failed = False
        
        graph = Graph()
        provenance = Graph()
        provenance.bind('iati', Iati)
        
        # Parse the XML file
        try:
            xml = ET.parse(document)
        except ET.ParseError:
            print "Could not parse file " + document
            failed = True
            
        # Get the version
        if not failed == True:
            root = xml.getroot()
            version = AttributeHelper.attribute_key(root, 'version')
            linked_data_default = AttributeHelper.attribute_key(root, 'linked-data-default')
        
            # Convert each activity in XML file to RDFLib Graph
            for activity in xml.findall('iati-activity'):
                
                try:
                    converter = IatiConverter.ConvertActivity(activity, version, linked_data_default)
                    graph, id, last_updated, version, fails = converter.convert(Iati)
                except TypeError as e:
                    print "Error in " + document + ":" + str(e)
                
                if not fails == None:
                    for fail in fails:
                        if not fail in failed_elements:
                            failed_elements.append(fail)
                            
                if (not graph == None) and (not id == None):
                    print "Processing: Activity %s (# %s) in document %s (# %s)" % (str(id.replace('/','%2F')), 
                                                                                    str(activity_count),
                                                                                    str(document.rsplit('/',1)[1]), 
                                                                                    str(document_count))
                    
                    # Write activity to Turtle and store in local folder
                    graph_turtle = graph.serialize(format='turtle') 
                    
                    with open(doc_folder + str(id.replace('/','%2F')) + '.ttl', 'w') as turtle_file:
                        turtle_file.write(graph_turtle)
                        
                    activity_ids.append(id)
                    
                else:
                    print "WARNING: Activity (# %s) in %s (# %s) has no identifier specified" % (str(activity_count),
                                                                                                 str(document.rsplit('/',1)[1]),
                                                                                                 str(document_count)) 
                            
                activity_count += 1
                       
            document_count += 1

            # Add provenance from corresponding JSON file
            json_document = document[:-4] + '.json'
            
            try:
                with open(json_document, 'r') as open_json_doc:
                    json_parsed = json.load(open_json_doc)
            except:
                print "Could not parse file " + json_document
                json_parsed = None
            
            provenance_converter = IatiConverter.ConvertProvenance('activity', json_parsed, provenance, 
                                                                   doc_id, last_updated, version, activity_ids)
            provenance = provenance_converter.convert(Iati)

            # Write provenance graph to Turtle and store in local folder
            provenance_turtle = provenance.serialize(format='turtle')
            
            with open(doc_folder + 'provenance-' + doc_id + '.ttl', 'w') as turtle_file:
                turtle_file.write(provenance_turtle)
        
    print "Failed:"
    
    for fail in failed_elements:
        print fail
    
    print "Done!"
Ejemplo n.º 11
0
def main():
    '''Converts Activity XMLs to Turtle files and stores these to local folder.'''

    # Settings
    xml_folder = "/media/Acer/School/IATI-data/xml/activities/"
    turtle_folder = "/media/Acer/School/IATI-data/activity/"
    Iati = Namespace("http://purl.org/collections/iati/")

    if not os.path.isdir(turtle_folder):
        os.makedirs(turtle_folder)

    document_count = 1
    activity_count = 1

    failed_elements = []

    # Retrieve XML files from the XML folder
    for document in glob.glob(xml_folder + '*.xml'):

        activity_ids = []

        doc_id = str(document.rsplit('/', 1)[1])[:-4]
        doc_folder = turtle_folder + doc_id + '/'

        if not os.path.isdir(doc_folder):
            os.makedirs(doc_folder)

        failed = False

        graph = Graph()
        provenance = Graph()
        provenance.bind('iati', Iati)

        # Parse the XML file
        try:
            xml = ET.parse(document)
        except ET.ParseError:
            print "Could not parse file " + document
            failed = True

        # Get the version
        if not failed == True:
            root = xml.getroot()
            version = AttributeHelper.attribute_key(root, 'version')
            linked_data_default = AttributeHelper.attribute_key(
                root, 'linked-data-default')

            # Convert each activity in XML file to RDFLib Graph
            for activity in xml.findall('iati-activity'):

                try:
                    converter = IatiConverter.ConvertActivity(
                        activity, version, linked_data_default)
                    graph, id, last_updated, version, fails = converter.convert(
                        Iati)
                except TypeError as e:
                    print "Error in " + document + ":" + str(e)

                if not fails == None:
                    for fail in fails:
                        if not fail in failed_elements:
                            failed_elements.append(fail)

                if (not graph == None) and (not id == None):
                    print "Processing: Activity %s (# %s) in document %s (# %s)" % (
                        str(id.replace('/', '%2F')), str(activity_count),
                        str(document.rsplit('/', 1)[1]), str(document_count))

                    # Write activity to Turtle and store in local folder
                    graph_turtle = graph.serialize(format='turtle')

                    with open(
                            doc_folder + str(id.replace('/', '%2F')) + '.ttl',
                            'w') as turtle_file:
                        turtle_file.write(graph_turtle)

                    activity_ids.append(id)

                else:
                    print "WARNING: Activity (# %s) in %s (# %s) has no identifier specified" % (
                        str(activity_count), str(document.rsplit(
                            '/', 1)[1]), str(document_count))

                activity_count += 1

            document_count += 1

            # Add provenance from corresponding JSON file
            json_document = document[:-4] + '.json'

            try:
                with open(json_document, 'r') as open_json_doc:
                    json_parsed = json.load(open_json_doc)
            except:
                print "Could not parse file " + json_document
                json_parsed = None

            provenance_converter = IatiConverter.ConvertProvenance(
                'activity', json_parsed, provenance, doc_id, last_updated,
                version, activity_ids)
            provenance = provenance_converter.convert(Iati)

            # Write provenance graph to Turtle and store in local folder
            provenance_turtle = provenance.serialize(format='turtle')

            with open(doc_folder + 'provenance-' + doc_id + '.ttl',
                      'w') as turtle_file:
                turtle_file.write(provenance_turtle)

    print "Failed:"

    for fail in failed_elements:
        print fail

    print "Done!"
def main():
    '''Converts Organisation XMLs to Turtle files and stores these to local folder.'''
    
    # Settings
    xml_folder = "/home/iati/xml/organisations/"
    turtle_folder = "/home/iati/organisation/"
    Iati = Namespace("http://purl.org/collections/iati/")
        
    if not os.path.isdir(turtle_folder):
        os.makedirs(turtle_folder)
    
    document_count = 1
    organisation_count = 1
    
    # Retrieve XML files from the XML folder
    for document in glob.glob(xml_folder + '*.xml'):
        
        organisation_ids = []
        
        doc_fail = False
        
        doc_id = str(document.rsplit('/',1)[1])[:-4]
        doc_folder = turtle_folder + doc_id + '/'
        
        if not os.path.isdir(doc_folder):
            os.makedirs(doc_folder)
        
        provenance = Graph()
        provenance.bind('iati', Iati)        
        
        # Parse the XML file
        try:
            xml = ET.parse(document)
        except ET.ParseError:
            print "Could not parse file " + document
            doc_fail = True
        
        if not doc_fail == True:
            root = xml.getroot()
            version = AttributeHelper.attribute_key(root, 'version')
            
            if (root.tag == 'iati-organisations') or (root.tag == 'organisations'):
                            
                # Convert each organisation in XML file to RDFLib Graph
                for organisation in xml.findall('iati-organisation'):
                    
                    try:
                        converter = IatiConverter.ConvertOrganisation(organisation)
                        graph, id, last_updated = converter.convert(Iati)
                    except TypeError as e:
                        print "Error in " + document + ":" + str(e)
                    
                    print "Progress: Organisation #" + str(organisation_count) + " in document #" + str(document_count)
                    
                    if (not graph == None) and (not id == None):
                        # Write organisation to Turtle and store in local folder
                        graph_turtle = graph.serialize(format='turtle')
                        
                        with open(doc_folder + str(id.replace('/','%2F')) + '.ttl', 'w') as turtle_file:
                            turtle_file.write(graph_turtle)
                    
                    organisation_count += 1
                    organisation_ids.append(id)
    
                for organisation in xml.findall('organisation'):
                    
                    try:
                        converter = IatiConverter.ConvertOrganisation(organisation)
                        graph, id, last_updated = converter.convert(Iati)
                    except TypeError as e:
                        print "Error in " + document + ":" + str(e)
                    
                    print "Progress: Organisation #" + str(organisation_count) + " in document #" + str(document_count)
                    
                    if (not graph == None) and (not id == None):
                        # Write organisation to Turtle and store in local folder
                        graph_turtle = graph.serialize(format='turtle')
                        
                        with open(doc_folder + str(id.replace('/','%2F')) + '.ttl', 'w') as turtle_file:
                            turtle_file.write(graph_turtle)
                    
                    organisation_count += 1
                    organisation_ids.append(id)
                
            elif (root.tag == 'iati-organisation') or (root.tag == 'organisation'):
                
                try:
                    converter = IatiConverter.ConvertOrganisation(xml.getroot())
                    graph, id, last_updated = converter.convert(Iati)
                except TypeError as e:
                    print "Error in " + document + ":" + str(e)
                
                print "Progress: Organisation #" + str(organisation_count) + " in document #" + str(document_count)
                
                if (not graph == None) and (not id == None):
                    # Write organisation to Turtle and store in local folder
                    graph_turtle = graph.serialize(format='turtle')
                    
                    with open(doc_folder + str(id.replace('/','%2F')) + '.ttl', 'w') as turtle_file:
                        turtle_file.write(graph_turtle)

                    # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
                    with open(doc_folder + str(id.replace('/','%2F')) + '.ttl.graph','w') as graph_file:
                        graph_file.write(str(Iati) + 'graph/organisation/' + str(id))
                
                organisation_count += 1
                organisation_ids.append(id)
                       
            document_count += 1
    
            # Add provenance from corresponding JSON file
            json_document = document[:-4] + '.json'
            
            try:
                with open(json_document, 'r') as open_json_doc:
                    json_parsed = json.load(open_json_doc)
            except:
                print "Could not parse file " + json_document
                json_parsed = None
    
            provenance_converter = IatiConverter.ConvertProvenance('organisation', json_parsed, provenance, 
                                                                   doc_id, last_updated, version, organisation_ids)
            provenance = provenance_converter.convert(Iati)

            # Write provenance graph to Turtle and store in local folder
            provenance_turtle = provenance.serialize(format='turtle')
            
            with open(doc_folder + 'provenance-' + doc_id + '.ttl', 'w') as turtle_file:
                turtle_file.write(provenance_turtle)

            # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
            with open(doc_folder + 'provenance-' + doc_id + '.ttl.graph','w') as graph_file:
                graph_file.write(str(Iati) + 'graph/provenance/')

       
    print "Done!"
def main():
    '''Converts Codelist XMLs to Turtle files and stores these to local folder.'''
    
    # Settings
    xml_folder = "/home/iati/xml/codelists/"
    turtle_folder = "/home/iati/codelist/"
    Iati = Namespace("http://purl.org/collections/iati/")
        
    if not os.path.isdir(turtle_folder):
        os.makedirs(turtle_folder)
    
    document_count = 1
    
    total_elapsed_time = 0
    
    # Retrieve XML files from the XML folder
    for document in glob.glob(xml_folder + '*.xml'):
        
        doc_id = str(document.rsplit('/',1)[1])[:-4]
        doc_folder = turtle_folder + doc_id + '/'
        
        if not os.path.isdir(doc_folder):
            os.makedirs(doc_folder)
        
        provenance = Graph()
        provenance.bind('iati', Iati)
        
        xml = ET.parse(document)
        root = xml.getroot()
        
        version = AttributeHelper.attribute_key(root, 'version')
        
        try:
            # Convert each codelist in XML file to RDFLib Graph    
            converter = IatiConverter.ConvertCodelist(root)
            graph, id, last_updated = converter.convert(Iati)
        except TypeError as e:
            print "Error in " + document + ":" + str(e)
            graph = None
        
        if not graph == None:
            # Write codelist to Turtle and store in local folder
            graph_turtle = graph.serialize(format='turtle')
            
            with open(doc_folder + id.replace('/','%2F') + '.ttl', 'w') as turtle_file:
                turtle_file.write(graph_turtle)
                
            # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
             with open(doc_folder + id.replace('/','%2F') + '.ttl.graph','w') as graph_file:
                 graph_file.write(str(Iati) + 'graph/codelist/' + str(id))
            
            # Add provenance of last-updated, version and source document
            provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)),
                            URIRef(Iati + 'last-updated'),
                            Literal(last_updated)))
            
            provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)),
                            URIRef(Iati + 'version'),
                            Literal(version)))
            
            provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)),
                            URIRef(Iati + 'source-document-id'),
                            Literal(str(id))))
            
            provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)),
                            URIRef(Iati + 'source-document-download-url'),
                            URIRef('http://datadev.aidinfolabs.org/data/codelist/' + str(id) + '.xml')))               
        
        print "Progress: Document #" + str(document_count)
                   
        document_count += 1
        
        # Add prov model
        start_time = datetime.datetime.now()
        source_xml = str('http://datadev.aidinfolabs.org/data/codelist/' + str(id) + '.xml')
        entities = []
        entities.append(str(id))
        script = "conversion%20scripts/CodelistsToTurtle.py"
        
        provenance = AddProvenance.addProv(Iati, provenance, 'codelist', doc_id, start_time, source_xml, entities, script)
    
        # Write provenance graph to Turtle and store in local folder
        provenance_turtle = provenance.serialize(format='turtle')
        
        with open(doc_folder + 'provenance-' + str(id) + '.ttl', 'w') as turtle_file:
            turtle_file.write(provenance_turtle)

        # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
        with open(doc_folder + 'provenance-' + str(id) + '.ttl.graph','w') as graph_file:
            graph_file.write(str(Iati) + 'graph/provenance/')
   
    print "Done!"
Ejemplo n.º 14
0
def main():
    '''Converts Organisation XMLs to Turtle files and stores these to local folder.'''

    # Settings
    xml_folder = "/home/iati/xml/organisations/"
    turtle_folder = "/home/iati/organisation/"
    Iati = Namespace("http://purl.org/collections/iati/")

    if not os.path.isdir(turtle_folder):
        os.makedirs(turtle_folder)

    document_count = 1
    organisation_count = 1

    # Retrieve XML files from the XML folder
    for document in glob.glob(xml_folder + '*.xml'):

        organisation_ids = []

        doc_fail = False

        doc_id = str(document.rsplit('/', 1)[1])[:-4]
        doc_folder = turtle_folder + doc_id + '/'

        if not os.path.isdir(doc_folder):
            os.makedirs(doc_folder)

        provenance = Graph()
        provenance.bind('iati', Iati)

        # Parse the XML file
        try:
            xml = ET.parse(document)
        except ET.ParseError:
            print "Could not parse file " + document
            doc_fail = True

        if not doc_fail == True:
            root = xml.getroot()
            version = AttributeHelper.attribute_key(root, 'version')

            if (root.tag == 'iati-organisations') or (root.tag
                                                      == 'organisations'):

                # Convert each organisation in XML file to RDFLib Graph
                for organisation in xml.findall('iati-organisation'):

                    try:
                        converter = IatiConverter.ConvertOrganisation(
                            organisation)
                        graph, id, last_updated = converter.convert(Iati)
                    except TypeError as e:
                        print "Error in " + document + ":" + str(e)

                    print "Progress: Organisation #" + str(
                        organisation_count) + " in document #" + str(
                            document_count)

                    if (not graph == None) and (not id == None):
                        # Write organisation to Turtle and store in local folder
                        graph_turtle = graph.serialize(format='turtle')

                        with open(
                                doc_folder + str(id.replace('/', '%2F')) +
                                '.ttl', 'w') as turtle_file:
                            turtle_file.write(graph_turtle)

                    organisation_count += 1
                    organisation_ids.append(id)

                for organisation in xml.findall('organisation'):

                    try:
                        converter = IatiConverter.ConvertOrganisation(
                            organisation)
                        graph, id, last_updated = converter.convert(Iati)
                    except TypeError as e:
                        print "Error in " + document + ":" + str(e)

                    print "Progress: Organisation #" + str(
                        organisation_count) + " in document #" + str(
                            document_count)

                    if (not graph == None) and (not id == None):
                        # Write organisation to Turtle and store in local folder
                        graph_turtle = graph.serialize(format='turtle')

                        with open(
                                doc_folder + str(id.replace('/', '%2F')) +
                                '.ttl', 'w') as turtle_file:
                            turtle_file.write(graph_turtle)

                    organisation_count += 1
                    organisation_ids.append(id)

            elif (root.tag == 'iati-organisation') or (root.tag
                                                       == 'organisation'):

                try:
                    converter = IatiConverter.ConvertOrganisation(
                        xml.getroot())
                    graph, id, last_updated = converter.convert(Iati)
                except TypeError as e:
                    print "Error in " + document + ":" + str(e)

                print "Progress: Organisation #" + str(
                    organisation_count) + " in document #" + str(
                        document_count)

                if (not graph == None) and (not id == None):
                    # Write organisation to Turtle and store in local folder
                    graph_turtle = graph.serialize(format='turtle')

                    with open(
                            doc_folder + str(id.replace('/', '%2F')) + '.ttl',
                            'w') as turtle_file:
                        turtle_file.write(graph_turtle)

                    # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
                    with open(
                            doc_folder + str(id.replace('/', '%2F')) +
                            '.ttl.graph', 'w') as graph_file:
                        graph_file.write(
                            str(Iati) + 'graph/organisation/' + str(id))

                organisation_count += 1
                organisation_ids.append(id)

            document_count += 1

            # Add provenance from corresponding JSON file
            json_document = document[:-4] + '.json'

            try:
                with open(json_document, 'r') as open_json_doc:
                    json_parsed = json.load(open_json_doc)
            except:
                print "Could not parse file " + json_document
                json_parsed = None

            provenance_converter = IatiConverter.ConvertProvenance(
                'organisation', json_parsed, provenance, doc_id, last_updated,
                version, organisation_ids)
            provenance = provenance_converter.convert(Iati)

            # Write provenance graph to Turtle and store in local folder
            provenance_turtle = provenance.serialize(format='turtle')

            with open(doc_folder + 'provenance-' + doc_id + '.ttl',
                      'w') as turtle_file:
                turtle_file.write(provenance_turtle)

            # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
            with open(doc_folder + 'provenance-' + doc_id + '.ttl.graph',
                      'w') as graph_file:
                graph_file.write(str(Iati) + 'graph/provenance/')

    print "Done!"