def fgdc_to_ogp_csv(fgdc_path, return_header=False): with open(fgdc_path) as f: xml = BeautifulStoneSoup(f) csv_fields = {} csv_fields['DataType'] = xml.find("direct").text csv_fields['MinX'] = float(xml.find("westbc").text) csv_fields['MaxX'] = float(xml.find("eastbc").text) csv_fields['MinY'] = float(xml.find("southbc").text) csv_fields['MaxY'] = float(xml.find("northbc").text) csv_fields['CenterX'] = (csv_fields['MinX'] + csv_fields['MaxX']) / 2 csv_fields['CenterY'] = (csv_fields['MinY'] + csv_fields['MaxY']) / 2 csv_fields['HalfWidth'] = csv_fields['MaxX'] - csv_fields['CenterX'] csv_fields['HalfHeight'] = csv_fields['MaxY'] - csv_fields['CenterY'] csv_fields['Area'] = 4 * csv_fields['HalfWidth'] * csv_fields['HalfHeight'] csv_fields['Institution'] = "UNH" csv_fields['WorkspaceName'] = "ogp" csv_fields['Name'] = name_from_filepath(fgdc_path) csv_fields['LayerId'] = "%s.%s" % (csv_fields['Institution'], csv_fields['Name']) csv_fields['timestamp'] = ogp_timestamp_for_now() csv_fields['Availability'] = "Online" csv_fields['GeoReferenced'] = "TRUE" themes = xml.findAll("themekey") csv_fields['ThemeKeywords'] = '"' if len(themes) > 0: csv_fields['ThemeKeywords'] += '%s' % (themes.pop().text) for t in themes: csv_fields['ThemeKeywords'] += ', %s' % (t.text) csv_fields['ThemeKeywords'] += '"' places = xml.findAll("placekey") csv_fields['PlaceKeywords'] = '"' if len(places) > 0: csv_fields['PlaceKeywords'] += '%s' % (places.pop().text) for p in places: csv_fields['PlaceKeywords'] += ', %s' % (p.text) csv_fields['PlaceKeywords'] += '"' #pubdate is correct according to the FGDC spec, but OGP expects the date to be in the same format as TimeStamp csv_fields['ContentDate'] = content_date_for_map( xml.find("timeperd").find("caldate").text) csv_fields['Originator'] = xml.findAll("origin")[-1].text csv_fields['LayerDisplayName'] = xml.find("title").text.replace( "Historic Digital Raster Graphic - ", "") csv_fields['Publisher'] = xml.find("publish").text csv_fields['Access'] = "Public" csv_fields['Abstract'] = xml.find("abstract").text csv_fields[ 'Location'] = '{"wms": ["https://place.sr.unh.edu:8080/geoserver/wms"]}' csv_fields['FgdcText'] = unicode(xml) fields = sorted(csv_fields.keys()) data = [] stringIO = cStringIO.StringIO() writer = UnicodeWriter(stringIO) for field in fields: v = csv_fields[field] if type(csv_fields[field]) == float: v = str(v) data.append(v) writer.writerow(data) csv_row = stringIO.getvalue() stringIO.close() if return_header: stringIO = StringIO.StringIO() writer = csv.writer(stringIO) writer.writerow(fields) header_row = stringIO.getvalue() stringIO.close() return (header_row, csv_row) else: return csv_row
def fgdc_to_ogp_csv(fgdc_path, return_header=False): with open(fgdc_path) as f: xml = BeautifulStoneSoup(f) csv_fields = {} csv_fields['DataType'] = xml.find("direct").text csv_fields['MinX'] = float(xml.find("westbc").text) csv_fields['MaxX'] = float(xml.find("eastbc").text) csv_fields['MinY'] = float(xml.find("southbc").text) csv_fields['MaxY'] = float(xml.find("northbc").text) csv_fields['CenterX'] = (csv_fields['MinX'] + csv_fields['MaxX'])/2 csv_fields['CenterY'] = (csv_fields['MinY'] + csv_fields['MaxY'])/2 csv_fields['HalfWidth'] = csv_fields['MaxX'] - csv_fields['CenterX'] csv_fields['HalfHeight'] = csv_fields['MaxY'] - csv_fields['CenterY'] csv_fields['Area'] = 4 * csv_fields['HalfWidth'] * csv_fields['HalfHeight'] csv_fields['Institution'] = "UNH" csv_fields['WorkspaceName'] = "ogp" csv_fields['Name'] = name_from_filepath(fgdc_path) csv_fields['LayerId'] = "%s.%s" % (csv_fields['Institution'], csv_fields['Name']) csv_fields['timestamp'] = ogp_timestamp_for_now() csv_fields['Availability'] = "Online" csv_fields['GeoReferenced'] = "TRUE" themes = xml.findAll("themekey") csv_fields['ThemeKeywords'] = '"' if len(themes) > 0: csv_fields['ThemeKeywords'] += '%s' % (themes.pop().text) for t in themes: csv_fields['ThemeKeywords'] += ', %s' % (t.text) csv_fields['ThemeKeywords'] += '"' places = xml.findAll("placekey") csv_fields['PlaceKeywords'] = '"' if len(places) > 0: csv_fields['PlaceKeywords'] += '%s' % (places.pop().text) for p in places: csv_fields['PlaceKeywords'] += ', %s' % (p.text) csv_fields['PlaceKeywords'] += '"' #pubdate is correct according to the FGDC spec, but OGP expects the date to be in the same format as TimeStamp csv_fields['ContentDate'] = content_date_for_map(xml.find("lineage").find("pubdate").text) csv_fields['Originator'] = xml.findAll("origin")[-1].text csv_fields['LayerDisplayName'] = xml.find("title").text.replace("Historic Digital Raster Graphic - ", "") csv_fields['Publisher'] = xml.find("publish").text csv_fields['Access'] = "Public" csv_fields['Abstract'] = xml.find("abstract").text csv_fields['Location'] = '{"wms": ["http://place.sr.unh.edu:8080/geoserver/wms"]}' csv_fields['FgdcText'] = unicode(xml) fields = sorted(csv_fields.keys()) data = [] stringIO = cStringIO.StringIO() writer = UnicodeWriter(stringIO) for field in fields: v = csv_fields[field] if type(csv_fields[field]) == float: v = str(v) data.append(v) writer.writerow(data) csv_row = stringIO.getvalue() stringIO.close() if return_header: stringIO = StringIO.StringIO() writer = csv.writer(stringIO) writer.writerow(fields) header_row = stringIO.getvalue() stringIO.close() return (header_row, csv_row) else: return csv_row
def content_date_for_map(date): if (type(date) == unicode or type(date) == str) and len(date) == 4: return ogp_timestamp_for_year(date) else: return ogp_timestamp_for_now()