def retrieveTermListMetadata(githubBaseUri): # retrieve term list metadata from Github dataUrl = githubBaseUri + 'term-lists/term-lists.csv' table = http_library.retrieveData(dataUrl, 'csv', ',') header = table[0] # determine which columns contain the namespace info for column in range(len(header)): if header[column] == 'list': listColumn = column if header[column] == 'vann_preferredNamespacePrefix': prefixColumn = column if header[column] == 'vann_preferredNamespaceUri': uriColumn = column listFilename = {} listNamespace = {} listUri = {} for row in range(1, len(table)): #skip the header row for termList in termLists: if termList == table[row][listColumn]: listNamespace[termList] = table[row][ prefixColumn] # make a dictionary of namespaces listUri[termList] = table[row][ uriColumn] # make a dictionary of URIs if table[row][prefixColumn] == 'ac': listFilename[termList] = 'audubon' else: listFilename[termList] = table[row][ prefixColumn] + '-for-ac' # make a dictionary of filenames return [listFilename, listNamespace, listUri]
def createMasterMetadataTable(termLists, listMetadata): fileNameDict = listMetadata[0] namespaceDict = listMetadata[1] uriDict = listMetadata[2] masterTable = [] for termList in termLists: # retrieve term metadata for a particular list from Github dataUrl = githubBaseUri + fileNameDict[termList] + '/' + fileNameDict[ termList] + '.csv' table = http_library.retrieveData(dataUrl, 'csv', ',') header = table[0] # determine which columns contain specified metadata fields for column in range(len(header)): if header[column] == 'term_localName': localNameColumn = column if header[column] == 'label': labelColumn = column if header[column] == 'tdwgutility_layer': layerColumn = column if header[column] == 'tdwgutility_required': requiredColumn = column if header[column] == 'tdwgutility_repeatable': repeatableColumn = column if header[column] == 'rdfs_comment': definitionColumn = column if header[column] == 'skos_scopeNote': scopeNoteColumn = column if header[column] == 'dcterms_description': notesColumn = column if header[column] == 'tdwgutility_organizedInClass': organizedColumn = column for row in range(1, len(table)): #skip the header row masterTable.append([ namespaceDict[termList], uriDict[termList], table[row][localNameColumn], table[row][labelColumn], table[row][layerColumn], table[row][requiredColumn], table[row][repeatableColumn], table[row][definitionColumn], table[row][scopeNoteColumn], table[row][notesColumn], table[row][organizedColumn] ]) return masterTable
def retrieveVocabularyInfo(githubBaseUri): dataUrl = githubBaseUri + 'vocabularies/vocabularies-members.csv' table = http_library.retrieveData(dataUrl, 'csv', ',') header = table[0] # determine which column contains the vocab and term list ids for column in range(len(header)): if header[column] == 'termList': termListColumn = column if header[column] == 'vocabulary': vocabularyColumn = column # store the identifiers of the term lists termLists = [] for row in range(1, len(table)): #skip the header row if table[row][vocabularyColumn] == 'http://rs.tdwg.org/ac/': termLists.append(table[row][termListColumn]) return termLists