Beispiel #1
0
def genseo98():
    """
    generate SEO 1998 ontology. seo98.owl
    """
    g = Graph()
    g.add((SEO98, RDF.type, OWL.Ontology))
    g.add((SEO98, RDF.type, SKOS.ConceptScheme))
    g.add((SEO98, RDFS.label, Literal(u'SEO 1998 Ontology')))
    g.add((SEO98, RDFS.comment,
           Literal(u'An ontology that provides classes '
                   u'codes and hierarchical information '
                   u'about SEO 1998 codes.')))
    g.add((SEO98, DC.title,
           Literal(
               u"Australian Standard Research "
               u"Classification (ASRC): "
               u"Socio-Economic Objective Classification",
               lang=u"en")))
    g.add(
        (SEO98, DC.description,
         Literal(
             u"The SEO Classification allows R&D "
             u"data to be classified according to the researcher's perceived "
             u"purpose. The purpose categories take account of processes, "
             u"products, health, education and other social and environmental "
             u"aspects of particular interest.",
             lang=u"en")))
    ontoannot(g, SEO98)
    ontoversion(g, SEO98)

    g.add((SEO98.SEO, RDF.type, OWL.Class))
    g.add((SEO98.SEO, RDFS.subClassOf, SKOS.Concept))
    g.add((SEO98.SEO, RDFS.label, Literal(u'SEO 1998 Code')))
    g.add((SEO98.SEO, RDFS.comment, Literal(u'Superclass for SEO 1998 codes')))

    g.add((SEO98.SEODivision, RDF.type, OWL.Class))
    g.add((SEO98.SEODivision, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEODivision, RDFS.label, Literal(u'SEO 1998 Division Code')))
    g.add((SEO98.SEODivision, RDFS.comment,
           Literal(u'Class for SEO 1998 Division codes')))

    g.add((SEO98.SEO2, RDF.type, OWL.Class))
    g.add((SEO98.SEO2, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEO2, RDFS.label, Literal(u'SEO 1998 2 digit Code')))
    g.add((SEO98.SEO2, RDFS.comment,
           Literal(u'Class for SEO 1998 2 digit codes')))

    g.add((SEO98.SEO4, RDF.type, OWL.Class))
    g.add((SEO98.SEO4, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEO4, RDFS.label, Literal(u'SEO 1998 4 digit Code')))
    g.add((SEO98.SEO4, RDFS.comment,
           Literal(u'Class for SEO 1998 4 digit codes')))

    g.add((SEO98.SEO6, RDF.type, OWL.Class))
    g.add((SEO98.SEO6, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEO6, RDFS.label, Literal(u'SEO 1998 6 digit Code')))
    g.add((SEO98.SEO6, RDFS.comment,
           Literal(u'Class for SEO 1998 6 digit codes')))

    # read data from csv files
    seo98csv = csv.reader(open('anzsrc_data/seo98.csv'))
    seo98csv.next()
    seo98csv.next()

    subdivision = {}  # collect subdivision codes
    group = {}  # collect group codes

    for code, title in seo98csv:
        if len(code) == 2:
            subdivision[int(code)] = title
        if len(code) == 4:
            group[int(code)] = title

    seo98csv = csv.reader(open('anzsrc_data/seo98-seo08.csv'))
    for i in range(0, 5):
        seo98csv.next()

    objective = {}  # collect SEO-6 codes in here

    # check if all data exists and collect objective codes
    for s98, n98, s08, n08 in seo98csv:
        if not s98:
            continue
        subdivcode = int(s98[:2])
        if subdivcode not in subdivision:
            print "WARNING division %d does net exist" % subdivcode

        grcode = int(s98[:4])
        if grcode not in group:
            print "WARNING group %d does not exist" % grcode

        obcode = int(s98)
        if obcode not in objective:
            objective[obcode] = n98
        elif objective[obcode] != n98:
            print "WARNING objective %d %s differs from %d %s" % (
                obcode, objective[obcode], obcode, n98)

    # print summary and build instances
    print 'SEO 98'
    # from seo98.pdf: 5 divisions, 18 subdivisions, 107 groups and 594 classes
    print 'Divisions (5):', len(division)
    print 'Subdivisions (18): ', len(subdivision)
    print 'Groups (107):', len(group)
    print 'Objective (594):', len(objective)

    for div in division.items():
        createNode(g, SEO98, SEO98.SEODivision, div[0], div[1], None)

    for div in subdivision.items():
        createNode(g, SEO98, SEO98.SEO2, u'%02d' % div[0], div[1],
                   code2division[div[0]])

    for gr in group.items():
        createNode(g, SEO98, SEO98.SEO4, u'%04d' % gr[0], gr[1],
                   (u'%04d' % gr[0])[:2])

    for obj in objective.items():
        createNode(g, SEO98, SEO98.SEO6, u'%06d' % obj[0], obj[1],
                   (u'%06d' % obj[0])[:4])

    return g
Beispiel #2
0
def genseo98():
    """
    generate SEO 1998 ontology. seo98.owl
    """
    g = Graph()
    g.add((SEO98, RDF.type, OWL.Ontology))
    g.add((SEO98, RDF.type, SKOS.ConceptScheme))
    g.add((SEO98, RDFS.label, Literal(u'SEO 1998 Ontology')))
    g.add((SEO98, RDFS.comment, Literal(u'An ontology that provides classes '
                                        u'codes and hierarchical information '
                                        u'about SEO 1998 codes.')))
    g.add((SEO98, DC.title, Literal(u"Australian Standard Research "
                                    u"Classification (ASRC): "
                                    u"Socio-Economic Objective Classification",
                                    lang=u"en")))
    g.add((SEO98, DC.description, Literal(u"The SEO Classification allows R&D "
            u"data to be classified according to the researcher's perceived "
            u"purpose. The purpose categories take account of processes, "
            u"products, health, education and other social and environmental "
            u"aspects of particular interest.", lang=u"en")))
    ontoannot(g, SEO98)
    ontoversion(g, SEO98)

    g.add((SEO98.SEO, RDF.type, OWL.Class))
    g.add((SEO98.SEO, RDFS.subClassOf, SKOS.Concept))
    g.add((SEO98.SEO, RDFS.label, Literal(u'SEO 1998 Code')))
    g.add((SEO98.SEO, RDFS.comment,
           Literal(u'Superclass for SEO 1998 codes')))

    g.add((SEO98.SEODivision, RDF.type, OWL.Class))
    g.add((SEO98.SEODivision, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEODivision, RDFS.label, Literal(u'SEO 1998 Division Code')))
    g.add((SEO98.SEODivision, RDFS.comment,
           Literal(u'Class for SEO 1998 Division codes')))

    g.add((SEO98.SEO2, RDF.type, OWL.Class))
    g.add((SEO98.SEO2, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEO2, RDFS.label, Literal(u'SEO 1998 2 digit Code')))
    g.add((SEO98.SEO2, RDFS.comment,
           Literal(u'Class for SEO 1998 2 digit codes')))

    g.add((SEO98.SEO4, RDF.type, OWL.Class))
    g.add((SEO98.SEO4, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEO4, RDFS.label, Literal(u'SEO 1998 4 digit Code')))
    g.add((SEO98.SEO4, RDFS.comment,
           Literal(u'Class for SEO 1998 4 digit codes')))

    g.add((SEO98.SEO6, RDF.type, OWL.Class))
    g.add((SEO98.SEO6, RDFS.subClassOf, SEO98.SEO))
    g.add((SEO98.SEO6, RDFS.label, Literal(u'SEO 1998 6 digit Code')))
    g.add((SEO98.SEO6, RDFS.comment,
           Literal(u'Class for SEO 1998 6 digit codes')))

    # read data from csv files
    seo98csv = csv.reader(open('anzsrc_data/seo98.csv'))
    seo98csv.next()
    seo98csv.next()

    subdivision = {}  # collect subdivision codes
    group = {}  # collect group codes

    for code, title in seo98csv:
        if len(code) == 2:
            subdivision[int(code)] = title
        if len(code) == 4:
            group[int(code)] = title

    seo98csv = csv.reader(open('anzsrc_data/seo98-seo08.csv'))
    for i in range(0, 5):
        seo98csv.next()

    objective = {}  # collect SEO-6 codes in here

    # check if all data exists and collect objective codes
    for s98, n98, s08, n08 in seo98csv:
        if not s98:
            continue
        subdivcode = int(s98[:2])
        if subdivcode not in subdivision:
            print "WARNING division %d does net exist" % subdivcode

        grcode = int(s98[:4])
        if grcode not in group:
            print "WARNING group %d does not exist" % grcode

        obcode = int(s98)
        if obcode not in objective:
            objective[obcode] = n98
        elif objective[obcode] != n98:
            print "WARNING objective %d %s differs from %d %s" % (obcode,
                                            objective[obcode], obcode, n98)

    # print summary and build instances
    print 'SEO 98'
    # from seo98.pdf: 5 divisions, 18 subdivisions, 107 groups and 594 classes
    print 'Divisions (5):', len(division)
    print 'Subdivisions (18): ', len(subdivision)
    print 'Groups (107):', len(group)
    print 'Objective (594):', len(objective)

    for div in division.items():
        createNode(g, SEO98, SEO98.SEODivision, div[0], div[1], None)

    for div in subdivision.items():
        createNode(g, SEO98, SEO98.SEO2, u'%02d' % div[0], div[1],
                   code2division[div[0]])

    for gr in group.items():
        createNode(g, SEO98, SEO98.SEO4, u'%04d' % gr[0], gr[1],
                   (u'%04d' % gr[0])[:2])

    for obj in objective.items():
        createNode(g, SEO98, SEO98.SEO6, u'%06d' % obj[0], obj[1],
                   (u'%06d' % obj[0])[:4])

    return g
Beispiel #3
0
def genfor08():
    """
    generate FOR 2008 ontology for08.owl

    for:FOR ... top level class       broad narrow
    for:FOR2 ... subclass of for:FOR   -      FOR4
    for:FOR4 ... subclass of for:FOR   FOR2   FOR6
    for:FOR6 ... subclass of for:FOR   FOR4    -

    properties:
      rdfs:label ... name
      for:code   ... code (not used yet)
      skos:narrower ... narrower
      skos:broader  ... broader
    """
    g = Graph()
    g.add((FOR08, RDF.type, OWL.Ontology))
    g.add((FOR08, RDF.type, SKOS.ConceptScheme))
    g.add((FOR08, RDFS.label, Literal(u"FOR 2008 Ontology")))
    g.add(
        (
            FOR08,
            RDFS.comment,
            Literal(
                u"An ontology that provides classes " u"codes and hierarchical information " u"about FOR 2008 codes."
            ),
        )
    )
    g.add(
        (
            FOR08,
            DC.title,
            Literal(
                u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Fields of Research.",
                lang=u"en",
            ),
        )
    )
    g.add(
        (
            FOR08,
            DC.description,
            Literal(
                u"The ANZSRC FOR allows R&D activity"
                u" to be categorised according to the methodology used in the R&D,"
                u" rather than the activity of the unit performing the R&D or the "
                u"purpose of the R&D."
                u"\n"
                u"The categories in the classification include major fields and "
                u"related sub-fields of research and emerging areas of study "
                u"investigated by businesses, universities, tertiary institutions,"
                u" national research institutions and other organisations."
                u"\n"
                u"This classification allows the categorisation of fields of "
                u"research activity within Australia and New Zealand.",
                lang=u"en",
            ),
        )
    )
    ontoannot(g, FOR08)
    ontoversion(g, FOR08)

    g.add((FOR08.FOR, RDF.type, OWL.Class))
    g.add((FOR08.FOR, RDFS.subClassOf, SKOS.Concept))
    g.add((FOR08.FOR, RDFS.label, Literal(u"FOR 2008 Code")))
    g.add((FOR08.FOR, RDFS.comment, Literal(u"Superclass for FOR 2008 codes")))

    g.add((FOR08.FOR2, RDF.type, OWL.Class))

    g.add((FOR08.FOR2, RDFS.subClassOf, FOR08.FOR))
    g.add((FOR08.FOR2, RDFS.label, Literal(u"FOR 2008 2 digit Code")))
    g.add((FOR08.FOR2, RDFS.comment, Literal(u"Class for FOR 2008 2 digit codes")))

    g.add((FOR08.FOR4, RDF.type, OWL.Class))
    g.add((FOR08.FOR4, RDFS.subClassOf, FOR08.FOR))
    g.add((FOR08.FOR4, RDFS.label, Literal(u"FOR 2008 4 digit Code")))
    g.add((FOR08.FOR4, RDFS.comment, Literal(u"Class for FOR 2008 4 digit codes")))

    g.add((FOR08.FOR6, RDF.type, OWL.Class))
    g.add((FOR08.FOR6, RDFS.subClassOf, FOR08.FOR))
    g.add((FOR08.FOR6, RDFS.label, Literal(u"FOR 2008 6 digit Code")))
    g.add((FOR08.FOR6, RDFS.comment, Literal(u"Class for FOR 2008 6 digit codes")))

    # create property definition to hold code
    g.add((FOR08.code, RDF.type, OWL.DatatypeProperty))
    g.add((FOR08.code, RDFS.domain, FOR08.FOR))
    g.add((FOR08.code, RDFS.range, XSD.string))

    for98csv = csv.reader(open("anzsrc_data/for08.csv"))
    for98csv.next()

    division = {}
    group = {}
    field = {}

    for div, gr, fi, code in for98csv:
        divcode = int(code[:2])
        if divcode not in division:
            division[divcode] = div
        elif division[divcode] != div:
            print "WARNING division"

        grcode = int(code[:4])
        if grcode not in group:
            group[grcode] = gr
        elif group[grcode] != gr:
            print "WARNING group"

        ficode = int(code)
        if ficode not in field:
            field[ficode] = fi
        elif field[ficode] != fi:
            print "WARNING objective"

    # checkagainst mapping:
    for98csv = csv.reader(open("anzsrc_data/for08-rfcd.csv"))
    for98csv.next()
    for98csv.next()
    for98csv.next()
    for98csv.next()
    for98csv.next()
    for row in for98csv:
        try:
            if int(row[0]) not in field:
                print "WARNING: missing code ", row
        except ValueError:
            continue

    print "FOR"
    print "Divisions (22):", len(division)
    print "Groups (157):", len(group)
    print "Field (1238):", len(field)

    for div in division.items():
        createNode(g, FOR08, FOR08.FOR2, "%02d" % div[0], div[1], None)

    for gr in group.items():
        createNode(g, FOR08, FOR08.FOR4, "%04d" % gr[0], gr[1], ("%04d" % gr[0])[:2])

    for fi in field.items():
        createNode(g, FOR08, FOR08.FOR6, "%06d" % fi[0], fi[1], ("%06d" % fi[0])[:4])

    return g
Beispiel #4
0
def genrfcd():
    '''
    generate RFCD 2008 ontology for08.owl

    for:RFCD ... top level class         broad  narrow
    for:RFCD2 ... subclass of for:RFCD    -      RFCD4
    for:RFCD4 ... subclass of for:RFCD   RFCD2   RFCD6
    for:RFCD6 ... subclass of for:RFCD   RFCD4    -

    properties:
      rdfs:label ... name
      for:code   ... code (not used yet)
      skos:narrower ... narrower
      skos:broader  ... broader
    '''
    # generate class definitions
    g = Graph()
    g.add((RFCD, RDF.type, OWL.Ontology))
    g.add((RFCD, RDF.type, SKOS.ConceptScheme))
    g.add((RFCD, RDFS.label, Literal(u'RFCD 1998 Ontology')))
    g.add((RFCD, RDFS.comment, Literal(u'An ontology that provides classes '
                                      u'codes and hierarchical information '
                                      u'about RFCD 1998 codes.')))
    g.add((RFCD, DC.title, Literal(u"Australian Standard Research "
                                   u"Classification (ASRC): "
                                   u"Research Fields, Courses and Disciplines "
                                   u"Classification", lang=u"en")))
    g.add((RFCD, DC.description, Literal(u"This classification allows both R&D"
            u" activity and other activity within the higher education sector "
            u"to be categorised."
            u"\n"
            u"The categories in the classification include recognised academic"
            u" disciplines and related major sub-fields taught at universities"
            u" or tertiary institutions, major fields of research investigated"
            u" by national research institutions and organisations, and "
            u"emerging areas of study.", lang="en")))
    ontoannot(g, RFCD)
    ontoversion(g, RFCD)

    g.add((RFCD.RFCD, RDF.type, OWL.Class))
    g.add((RFCD.RFCD, RDFS.subClassOf, SKOS.Concept))
    g.add((RFCD.RFCD, RDFS.label, Literal(u'RFCD 1998 Code')))
    g.add((RFCD.RFCD, RDFS.comment, Literal(u'Superclass for RFCD 1998 '
                                            u'codes')))

    g.add((RFCD.RFCD2, RDF.type, OWL.Class))
    g.add((RFCD.RFCD2, RDFS.subClassOf, RFCD.RFCD))
    g.add((RFCD.RFCD2, RDFS.label, Literal(u'RFCD 1998 2 digit Code')))
    g.add((RFCD.RFCD2, RDFS.comment,
           Literal(u'Class for RFCD 1998 2 digit codes')))

    g.add((RFCD.RFCD4, RDF.type, OWL.Class))
    g.add((RFCD.RFCD4, RDFS.subClassOf, RFCD.RFCD))
    g.add((RFCD.RFCD4, RDFS.label, Literal(u'RFCD 1998 4 digit Code')))
    g.add((RFCD.RFCD4, RDFS.comment,
           Literal(u'Class for RFCD 1998 4 digit codes')))

    g.add((RFCD.RFCD6, RDF.type, OWL.Class))
    g.add((RFCD.RFCD6, RDFS.subClassOf, RFCD.RFCD))
    g.add((RFCD.RFCD6, RDFS.label, Literal(u'RFCD 1998 6 digit Code')))
    g.add((RFCD.RFCD6, RDFS.comment,
           Literal(u'Class for RFCD 1998 6 digit codes')))

    # read data from csv files
    rfcdcsv = csv.reader(open('anzsrc_data/rfcd.csv'))
    rfcdcsv.next()
    rfcdcsv.next()  # skip file header

    division = {}  # collect divisions here
    discipline = {}  # collect discplines here
    for code, title in rfcdcsv:
        if len(code) == 2:
            division[int(code)] = title
        elif len(code) == 4:
            discipline[int(code)] = title

    rfcdcsv = csv.reader(open('anzsrc_data/rfcd-for08.csv'))
    for i in range(0, 5):
        rfcdcsv.next()

    subject = {}  # collect RFCD-6 codes in here
    # check if all data exists and collect objective codes
    for f98, n98, f08, n08 in rfcdcsv:
        if not f98:
            continue
        divcode = int(f98[:2])
        if divcode not in division:
            print "WARNING division %d does net exist" % divcode

        disccode = int(f98[:4])
        if disccode not in discipline:
            print "WARNING discipline %d does not exist" % disccode

        subjcode = int(f98)
        if subjcode not in subject:
            subject[subjcode] = n98
        elif subject[subjcode] != n98:
            print "WARNING subject %d %s differs from %d %s" % (subjcode,
                                        subject[subjcode], subjcode, n98)

    # All data read, print out a summary and start creating instances
    print 'RFCD 98'
    # from 12970_98.pdf: 24 divisions, 139 disciplines, 898 subjects
    print 'Divisions (24):', len(division)
    print 'Disciplines (139):', len(discipline)
    print 'Subjects (898):', len(subject)

    def createDivision(code, name):
        rfcdcode = RFCD.term(code)
        g.add((rfcdcode, RDF.type, RFCD.RFCD2))
        g.add((rfcdcode, RDF.type, OWL.Thing))
        g.add((rfcdcode, RDFS.label, Literal(unicode(name))))
        g.add((rfcdcode, ANZSRC.code, Literal(code)))

    for div in division.items():
        createDivision(u'%02d' % div[0], div[1])

    for disc in discipline.items():
        createNode(g, RFCD, RFCD.RFCD4, u'%04d' % disc[0], disc[1],
                   (u'%04d' % disc[0])[:2])

    for subj in subject.items():
        createNode(g, RFCD, RFCD.RFCD6, u'%06d' % subj[0], subj[1],
                   (u'%06d' % subj[0])[:4])

    return g
Beispiel #5
0
def genrfcd():
    '''
    generate RFCD 2008 ontology for08.owl

    for:RFCD ... top level class         broad  narrow
    for:RFCD2 ... subclass of for:RFCD    -      RFCD4
    for:RFCD4 ... subclass of for:RFCD   RFCD2   RFCD6
    for:RFCD6 ... subclass of for:RFCD   RFCD4    -

    properties:
      rdfs:label ... name
      for:code   ... code (not used yet)
      skos:narrower ... narrower
      skos:broader  ... broader
    '''
    # generate class definitions
    g = Graph()
    g.add((RFCD, RDF.type, OWL.Ontology))
    g.add((RFCD, RDF.type, SKOS.ConceptScheme))
    g.add((RFCD, RDFS.label, Literal(u'RFCD 1998 Ontology')))
    g.add((RFCD, RDFS.comment,
           Literal(u'An ontology that provides classes '
                   u'codes and hierarchical information '
                   u'about RFCD 1998 codes.')))
    g.add((RFCD, DC.title,
           Literal(
               u"Australian Standard Research "
               u"Classification (ASRC): "
               u"Research Fields, Courses and Disciplines "
               u"Classification",
               lang=u"en")))
    g.add((
        RFCD, DC.description,
        Literal(
            u"This classification allows both R&D"
            u" activity and other activity within the higher education sector "
            u"to be categorised."
            u"\n"
            u"The categories in the classification include recognised academic"
            u" disciplines and related major sub-fields taught at universities"
            u" or tertiary institutions, major fields of research investigated"
            u" by national research institutions and organisations, and "
            u"emerging areas of study.",
            lang="en")))
    ontoannot(g, RFCD)
    ontoversion(g, RFCD)

    g.add((RFCD.RFCD, RDF.type, OWL.Class))
    g.add((RFCD.RFCD, RDFS.subClassOf, SKOS.Concept))
    g.add((RFCD.RFCD, RDFS.label, Literal(u'RFCD 1998 Code')))
    g.add(
        (RFCD.RFCD, RDFS.comment, Literal(u'Superclass for RFCD 1998 '
                                          u'codes')))

    g.add((RFCD.RFCD2, RDF.type, OWL.Class))
    g.add((RFCD.RFCD2, RDFS.subClassOf, RFCD.RFCD))
    g.add((RFCD.RFCD2, RDFS.label, Literal(u'RFCD 1998 2 digit Code')))
    g.add((RFCD.RFCD2, RDFS.comment,
           Literal(u'Class for RFCD 1998 2 digit codes')))

    g.add((RFCD.RFCD4, RDF.type, OWL.Class))
    g.add((RFCD.RFCD4, RDFS.subClassOf, RFCD.RFCD))
    g.add((RFCD.RFCD4, RDFS.label, Literal(u'RFCD 1998 4 digit Code')))
    g.add((RFCD.RFCD4, RDFS.comment,
           Literal(u'Class for RFCD 1998 4 digit codes')))

    g.add((RFCD.RFCD6, RDF.type, OWL.Class))
    g.add((RFCD.RFCD6, RDFS.subClassOf, RFCD.RFCD))
    g.add((RFCD.RFCD6, RDFS.label, Literal(u'RFCD 1998 6 digit Code')))
    g.add((RFCD.RFCD6, RDFS.comment,
           Literal(u'Class for RFCD 1998 6 digit codes')))

    # read data from csv files
    rfcdcsv = csv.reader(open('anzsrc_data/rfcd.csv'))
    rfcdcsv.next()
    rfcdcsv.next()  # skip file header

    division = {}  # collect divisions here
    discipline = {}  # collect discplines here
    for code, title in rfcdcsv:
        if len(code) == 2:
            division[int(code)] = title
        elif len(code) == 4:
            discipline[int(code)] = title

    rfcdcsv = csv.reader(open('anzsrc_data/rfcd-for08.csv'))
    for i in range(0, 5):
        rfcdcsv.next()

    subject = {}  # collect RFCD-6 codes in here
    # check if all data exists and collect objective codes
    for f98, n98, f08, n08 in rfcdcsv:
        if not f98:
            continue
        divcode = int(f98[:2])
        if divcode not in division:
            print "WARNING division %d does net exist" % divcode

        disccode = int(f98[:4])
        if disccode not in discipline:
            print "WARNING discipline %d does not exist" % disccode

        subjcode = int(f98)
        if subjcode not in subject:
            subject[subjcode] = n98
        elif subject[subjcode] != n98:
            print "WARNING subject %d %s differs from %d %s" % (
                subjcode, subject[subjcode], subjcode, n98)

    # All data read, print out a summary and start creating instances
    print 'RFCD 98'
    # from 12970_98.pdf: 24 divisions, 139 disciplines, 898 subjects
    print 'Divisions (24):', len(division)
    print 'Disciplines (139):', len(discipline)
    print 'Subjects (898):', len(subject)

    def createDivision(code, name):
        rfcdcode = RFCD.term(code)
        g.add((rfcdcode, RDF.type, RFCD.RFCD2))
        g.add((rfcdcode, RDF.type, OWL.Thing))
        g.add((rfcdcode, RDFS.label, Literal(unicode(name))))
        g.add((rfcdcode, ANZSRC.code, Literal(code)))

    for div in division.items():
        createDivision(u'%02d' % div[0], div[1])

    for disc in discipline.items():
        createNode(g, RFCD, RFCD.RFCD4, u'%04d' % disc[0], disc[1],
                   (u'%04d' % disc[0])[:2])

    for subj in subject.items():
        createNode(g, RFCD, RFCD.RFCD6, u'%06d' % subj[0], subj[1],
                   (u'%06d' % subj[0])[:4])

    return g
Beispiel #6
0
def genfor08():
    '''
    generate FOR 2008 ontology for08.owl

    for:FOR ... top level class       broad narrow
    for:FOR2 ... subclass of for:FOR   -      FOR4
    for:FOR4 ... subclass of for:FOR   FOR2   FOR6
    for:FOR6 ... subclass of for:FOR   FOR4    -

    properties:
      rdfs:label ... name
      for:code   ... code (not used yet)
      skos:narrower ... narrower
      skos:broader  ... broader
    '''
    g = Graph()
    g.add((FOR08, RDF.type, OWL.Ontology))
    g.add((FOR08, RDF.type, SKOS.ConceptScheme))
    g.add((FOR08, RDFS.label, Literal(u'FOR 2008 Ontology')))
    g.add((FOR08, RDFS.comment,
           Literal(u'An ontology that provides classes '
                   u'codes and hierarchical information '
                   u'about FOR 2008 codes.')))
    g.add((FOR08, DC.title,
           Literal(
               u"Australian and New Zealand Standard "
               u"Research Classification (ANZSRC): "
               u"Fields of Research.",
               lang=u"en")))
    g.add((
        FOR08, DC.description,
        Literal(
            u"The ANZSRC FOR allows R&D activity"
            u" to be categorised according to the methodology used in the R&D,"
            u" rather than the activity of the unit performing the R&D or the "
            u"purpose of the R&D."
            u"\n"
            u"The categories in the classification include major fields and "
            u"related sub-fields of research and emerging areas of study "
            u"investigated by businesses, universities, tertiary institutions,"
            u" national research institutions and other organisations."
            u"\n"
            u"This classification allows the categorisation of fields of "
            u"research activity within Australia and New Zealand.",
            lang=u"en")))
    ontoannot(g, FOR08)
    ontoversion(g, FOR08)

    g.add((FOR08.FOR, RDF.type, OWL.Class))
    g.add((FOR08.FOR, RDFS.subClassOf, SKOS.Concept))
    g.add((FOR08.FOR, RDFS.label, Literal(u'FOR 2008 Code')))
    g.add((FOR08.FOR, RDFS.comment, Literal(u'Superclass for FOR 2008 codes')))

    g.add((FOR08.FOR2, RDF.type, OWL.Class))

    g.add((FOR08.FOR2, RDFS.subClassOf, FOR08.FOR))
    g.add((FOR08.FOR2, RDFS.label, Literal(u'FOR 2008 2 digit Code')))
    g.add((FOR08.FOR2, RDFS.comment,
           Literal(u'Class for FOR 2008 2 digit codes')))

    g.add((FOR08.FOR4, RDF.type, OWL.Class))
    g.add((FOR08.FOR4, RDFS.subClassOf, FOR08.FOR))
    g.add((FOR08.FOR4, RDFS.label, Literal(u'FOR 2008 4 digit Code')))
    g.add((FOR08.FOR4, RDFS.comment,
           Literal(u'Class for FOR 2008 4 digit codes')))

    g.add((FOR08.FOR6, RDF.type, OWL.Class))
    g.add((FOR08.FOR6, RDFS.subClassOf, FOR08.FOR))
    g.add((FOR08.FOR6, RDFS.label, Literal(u'FOR 2008 6 digit Code')))
    g.add((FOR08.FOR6, RDFS.comment,
           Literal(u'Class for FOR 2008 6 digit codes')))

    # create property definition to hold code
    g.add((FOR08.code, RDF.type, OWL.DatatypeProperty))
    g.add((FOR08.code, RDFS.domain, FOR08.FOR))
    g.add((FOR08.code, RDFS.range, XSD.string))

    for98csv = csv.reader(open('anzsrc_data/for08.csv'))
    for98csv.next()

    division = {}
    group = {}
    field = {}

    for div, gr, fi, code in for98csv:
        divcode = int(code[:2])
        if divcode not in division:
            division[divcode] = div
        elif division[divcode] != div:
            print "WARNING division"

        grcode = int(code[:4])
        if grcode not in group:
            group[grcode] = gr
        elif group[grcode] != gr:
            print "WARNING group"

        ficode = int(code)
        if ficode not in field:
            field[ficode] = fi
        elif field[ficode] != fi:
            print "WARNING objective"

    # checkagainst mapping:
    for98csv = csv.reader(open('anzsrc_data/for08-rfcd.csv'))
    for98csv.next()
    for98csv.next()
    for98csv.next()
    for98csv.next()
    for98csv.next()
    for row in for98csv:
        try:
            if int(row[0]) not in field:
                print 'WARNING: missing code ', row
        except ValueError:
            continue

    print 'FOR'
    print 'Divisions (22):', len(division)
    print 'Groups (157):', len(group)
    print 'Field (1238):', len(field)

    for div in division.items():
        createNode(g, FOR08, FOR08.FOR2, '%02d' % div[0], div[1], None)

    for gr in group.items():
        createNode(g, FOR08, FOR08.FOR4, '%04d' % gr[0], gr[1],
                   ('%04d' % gr[0])[:2])

    for fi in field.items():
        createNode(g, FOR08, FOR08.FOR6, '%06d' % fi[0], fi[1],
                   ('%06d' % fi[0])[:4])

    return g
Beispiel #7
0
def genseo08():
    """
    generate SEO 2008 ontology. seo08.owl
    """
    g = Graph()
    g.add((SEO08, RDF.type, OWL.Ontology))
    g.add((SEO08, RDF.type, SKOS.ConceptScheme))
    g.add((SEO08, RDFS.label, Literal(u'SEO 2008 Ontology')))
    g.add((SEO08, RDFS.comment, Literal(u'An ontology that provides classes '
                                        u'codes and hierarchical information '
                                        u'about SEO 2008 codes.')))
    g.add((SEO08, DC.title, Literal(u"Australian and New Zealand Standard "
                                    u"Research Classification (ANZSRC): "
                                    u"Socio-Economic Objective.", lang=u"en")))
    g.add((SEO08, DC.description, Literal(u"The ANZSRC SEO classification "
            u"allows R&D activity in Australia and New Zealand to be "
            u"categorised according to the intended purpose or outcome of the "
            u"research, rather than the processes or techniques used in order "
            u"to achieve this objective."
            u"\n"
            u"The purpose categories include processes, products, health, "
            u"education and other social and environmental aspects in "
            u"Australia and New Zealand that R&D activity aims to improve.",
                                          lang=u"en")))
    ontoannot(g, SEO08)
    ontoversion(g, SEO08)

    g.add((SEO08.SEO, RDF.type, OWL.Class))
    g.add((SEO08.SEO, RDFS.subClassOf, SKOS.Concept))
    g.add((SEO08.SEO, RDFS.label, Literal(u'SEO 2008 Code')))
    g.add((SEO08.SEO, RDFS.comment, Literal(u'Superclass for SEO 2008 codes')))

    g.add((SEO08.SEOSection, RDF.type, OWL.Class))
    g.add((SEO08.SEOSection, RDFS.subClassOf, SEO08.SEO))
    g.add((SEO08.SEOSection, RDFS.label, Literal(u'SEO 2008 Section Code')))
    g.add((SEO08.SEOSection, RDFS.comment,
           Literal(u'Class for SEO 2008 Section codes')))

    g.add((SEO08.SEO2, RDF.type, OWL.Class))
    g.add((SEO08.SEO2, RDFS.subClassOf, SEO08.SEO))
    g.add((SEO08.SEO2, RDFS.label, Literal(u'SEO 2008 2 digit Code')))
    g.add((SEO08.SEO2, RDFS.comment,
           Literal(u'Class for SEO 2008 2 digit codes')))

    g.add((SEO08.SEO4, RDF.type, OWL.Class))
    g.add((SEO08.SEO4, RDFS.subClassOf, SEO08.SEO))
    g.add((SEO08.SEO4, RDFS.label, Literal(u'SEO 2008 4 digit Code')))
    g.add((SEO08.SEO4, RDFS.comment,
           Literal(u'Class for SEO 2008 4 digit codes')))

    g.add((SEO08.SEO6, RDF.type, OWL.Class))
    g.add((SEO08.SEO6, RDFS.subClassOf, SEO08.SEO))
    g.add((SEO08.SEO6, RDFS.label, Literal(u'SEO 2008 6 digit Code')))
    g.add((SEO08.SEO6, RDFS.comment,
           Literal(u'Class for SEO 2008 6 digit codes')))

    seo98csv = csv.reader(open('anzsrc_data/seo08.csv'))
    seo98csv.next()

    division = {}
    group = {}
    objective = {}

    for sec, div, gr, obj, code in seo98csv:
        divcode = int(code[:2])
        if divcode not in division:
            division[divcode] = div
        elif division[divcode] != div:
            print "WARNING division %d does not exist" % divcode

        grcode = int(code[:4])
        if grcode not in group:
            group[grcode] = gr
        elif group[grcode] != gr:
            print "WARNING group %d does not exist" % grcode

        obcode = int(code)
        if obcode not in objective:
            objective[obcode] = obj
        elif objective[obcode] != obj:
            print "WARNING objective %d %s differs from %d %s" % (obcode,
                                            objective[obcode], obcode, obj)

    print 'SEO 08'
    print 'Sectors (5):', len(sector)
    print 'Divisions (17):', len(division)
    print 'Groups (119):', len(group)
    print 'Objective (847):', len(objective)

    # checkagainst mapping:
    seo98csv = csv.reader(open('anzsrc_data/seo08-seo98.csv'))
    for i in range(0, 5):
        seo98csv.next()
    for row in seo98csv:
        try:
            if int(row[0]) not in objective:
                print 'WARNING: missing code ', row
        except ValueError:
            continue

    for sec in sector.items():
        createNode(g, SEO08, SEO08.SEOSection, sec[0], sec[1], None)

    for div in division.items():
        createNode(g, SEO08, SEO08.SEO2, u'%02d' % div[0], div[1],
                   code2sector[div[0]])

    for gr in group.items():
        createNode(g, SEO08, SEO08.SEO4, u'%04d' % gr[0], gr[1],
                   (u'%04d' % gr[0])[:2])

    for obj in objective.items():
        createNode(g, SEO08, SEO08.SEO6, u'%06d' % obj[0], obj[1],
                   (u'%06d' % obj[0])[:4])

    return g