def arnsberg(): g = gather() dataForDB = map(import_data, g) metautils.setsettings(settings) metautils.addSimpleDataToDB(dataForDB, portalname, checked=True, accepted=True, remove_data=True)
def bayern(): ds = catalog_entry_urls() ds = map(fetch, ds) ds = map(import_data, ds) metautils.setsettings(settings) metautils.addSimpleDataToDB(ds, portalname, checked=True, accepted=True, remove_data=True) return ds
def badenWuerttenberg(): print "Get Catalog Entries" catalogPages = getCatalogPages() catalogItemDicts = map(scrapeCatalogPageList, catalogPages) catalogItemDicts = list(itertools.chain(*catalogItemDicts)) print "Scrape Catalog Entries" catalogDicts = map(scrapeCatalogEntryPage, catalogItemDicts) dataForDB = map(toDB, catalogDicts) print "Write to db" metautils.setsettings(settings) metautils.addSimpleDataToDB(dataForDB, portalname, checked=True, accepted=True, remove_data=True)
def braunschweigGeoportal(): print 'Get catalog records' catalog = getRecords() xmlString = etree.tostring(catalog, pretty_print=True) with open(braunschweigMetaDataFile, 'w') as f: f.write(xmlString.encode('utf8')) print 'Scrape catalog record entries' recsList = extractRecords(catalog) recDicts = map(extractData, recsList) recDicts = map(scrapeData, recDicts) dataForDB = map(recordToDB, recDicts) print 'Write to db' metautils.setsettings(settings) metautils.addSimpleDataToDB(dataForDB, portalname, checked=True, accepted=True, remove_data=True)
recordsdict = {} for record in allrecords: if record['title'] not in recordsdict: recordsdict[record['title']] = record else: if (verbose): print record['title'] + ' in ' + str(record['categories']) + ' is already in ' + str(recordsdict[record['title']]['categories']) + '. Transferring category.' recordsdict[record['title']]['categories'].extend(record['categories']) allrecords = recordsdict.values() finalrecords = [] #Expand categories for record in allrecords: record['metadata'] = record.copy() record['source'] = 'd' record['description'] = None record['costs'] = None record['metadata_xml'] = None odm_cats = metautils.govDataLongToODM(metautils.arraytocsv(record['categories']), checkAll=True) if len(odm_cats) > 0: record['categories'] = odm_cats else: record['categories'] = ['Noch nicht kategorisiert'] finalrecords.append(record) if (verbose): print 'Done. Adding to DB.' #Write data to the DB metautils.setsettings(settings) #Add data metautils.addSimpleDataToDB(datafordb=finalrecords, originating_portal='http://www.bochum.de/opendata', checked=True, accepted=True, remove_data=True)
recordsdict[record['title']]['categories'].extend(record['categories']) allrecords = recordsdict.values() finalrecords = [] #Expand categories for record in allrecords: record['metadata'] = record.copy() record['source'] = 'd' record['description'] = None record['costs'] = None record['metadata_xml'] = None odm_cats = metautils.govDataLongToODM(metautils.arraytocsv( record['categories']), checkAll=True) if len(odm_cats) > 0: record['categories'] = odm_cats else: record['categories'] = ['Noch nicht kategorisiert'] finalrecords.append(record) if (verbose): print 'Done. Adding to DB.' #Write data to the DB metautils.setsettings(settings) #Add data metautils.addSimpleDataToDB(datafordb=finalrecords, originating_portal='http://www.bochum.de/opendata', checked=True, accepted=True, remove_data=True)