def from_xml(cls, xml_folder, name="XML Dataset"): from xmljson import badgerfish as bf from xml.etree.ElementTree import fromstring """ Generates a dataset from a folder with XML and corresponding images :param xml_folder: :type xml_folder: pathlib.Path :raise ImportError: Raised if xml_folder is a `pathlib.Path` object and it cannot be imported """ dataset = cls(name) xml_list = list(xml_folder.glob("*.jpg")) categories = [] for idx, imgp in enumerate(xml_list): xml = bf.data( fromstring(open(imgp.with_suffix(".xml"), "r").read())) # Handle single object case if type(xml["annotation"]["object"]) is not list: xml["annotation"]["object"] = [xml["annotation"]["object"]] for ann in xml["annotation"]["object"]: cat = ann["name"]["$"] categories.append(cat) categories = list(set(categories)) xml_categories = { cat: Category(cat, id=idx + 1) for idx, cat in enumerate(categories) } for idx, imgp in enumerate(xml_list): image = Image.from_path(str(imgp)) image.id = idx image.dataset = name xml = bf.data( fromstring(open(imgp.with_suffix(".xml"), "r").read())) # Handle single object case if type(xml["annotation"]["object"]) is not list: xml["annotation"]["object"] = [xml["annotation"]["object"]] for ann in xml["annotation"]["object"]: i = ann["bndbox"] cat = ann["name"]["$"] x, y, xx, yy = (int(i["xmin"]["$"]), int(i["ymin"]["$"]), int(i["xmax"]["$"]), int(i["ymax"]["$"])) bbox = [x, y, xx, yy] fin_ann = Annotation(image=image, bbox=bbox, category=xml_categories[cat]) image.add(fin_ann) dataset.add(image) return dataset
def __init__(self, source): """Initializes a Podcast instance based on XML data from a string or file path""" logger.debug("Initializing podcast from source: " + source) if os.path.isfile(source): tree = ET.parse(source) root = tree.getroot() data = bf.data(root) else: data = bf.data(fromstring(source)) channel = data.get("rss").get("channel") self.source = source self.channel = channel self.author = channel.get("author", {}).get("$") self.copyright = channel.get("copyright", {}).get("$") self.description = channel.get("description", {}).get("$") self.explicit = channel.get("explicit", {}).get("$") self.generator = channel.get("generator", {}).get("$") self.language = channel.get("language", {}).get("$") self.managing_editor = channel.get("managingEditor", {}).get("$") self.new_feed_url = channel.get("new-feed-url", {}).get("$") self.pub_date = channel.get("pubDate", {}).get("$") self.subtitle = channel.get("subtitle", {}).get("$") self.summary = channel.get("summary", {}).get("$") self.title = channel.get("title", {}).get("$") self.type = channel.get("type", {}).get("$") self.web_master = channel.get("webMaster", {}).get("$")
def get_canadian_weather(city, province=None): # Get city list entries = badgerfish.data( fromstring( get_cached_url( "http://dd.weather.gc.ca/citypage_weather/xml/siteList.xml", timedelta(days=30), )))["siteList"]["site"] matching_entries = [ entry for entry in entries if entry["nameEn"]["$"] == city and ( province is None or entry["provinceCode"]["$"] == province) ] assert len( matching_entries ) == 1, f"Your provided city/province matched {len(matching_entries)} entries: {matching_entries}" entry = matching_entries[0] # Get city forecast forecast = badgerfish.data( fromstring( get_cached_url( f"""http://dd.weather.gc.ca/citypage_weather/xml/{entry["provinceCode"]["$"]}/{entry["@code"]}_e.xml""", timedelta(hours=1), )))["siteData"] return forecast
def get_congress_votes(year, roll_num, congress, session): import pandas as pd import numpy as np import requests from bs4 import BeautifulSoup import sqlite3 from xmljson import badgerfish as bf from xml.etree.ElementTree import fromstring from pandas.io.json import json_normalize x = len(str(roll_num)) if x == 3: roll_num = roll_num elif x == 2: roll_num = '0{}'.format(roll_num) elif x == 1: roll_num = '00{}'.format(roll_num) url = 'http://clerk.house.gov/evs/{}/roll{}.xml'.format(year, roll_num) page = requests.get(url) try: df = json_normalize( pd.DataFrame(bf.data(fromstring( page.content))).loc['vote-data', 'rollcall-vote']['recorded-vote']) try: df.columns = [ 'member_full', 'bioguide_id', 'party', 'role', 'name', u'state', 'unaccented-name', 'vote' ] df = df[[ 'member_full', 'bioguide_id', 'party', 'role', u'state', 'vote' ]] except: df.columns = ['member_full', 'party', 'role', 'state', 'vote'] df.loc[:, 'bioguide_id'] = None df = df[[ 'member_full', 'bioguide_id', 'party', 'role', u'state', 'vote' ]] df.loc[:, 'year'] = year df.loc[:, 'roll'] = roll_num df.loc[:, 'congress'] = congress df.loc[:, 'session'] = session df.loc[:, 'date'] = pd.to_datetime( json_normalize( pd.DataFrame(bf.data(fromstring( page.content))).loc['vote-metadata', 'rollcall-vote']).loc[0, 'action-date.$']) return df except KeyError: 'No date for this vote'
def _dochits_to_objset(self, docHits): objset = [] #iterate through docHits for d in docHits: # need to descend two layers in API for object metadata url_object = d.text obj_resp = requests.get(url_object, auth=HTTPBasicAuth(self.uname.strip(), self.pswd.strip())) objTree = ET.fromstring(obj_resp.content) for mdataRef in objTree.findall('{http://preservica.com/EntityAPI/v6.0}' 'AdditionalInformation/{http://preservica.com/' 'EntityAPI/v6.0}Metadata/{http://preservica.com/' 'EntityAPI/v6.0}Fragment[@schema="http://www.open' 'archives.org/OAI/2.0/oai_dc/"]'): url_mdata = mdataRef.text mdata_resp = requests.get(url_mdata, auth=HTTPBasicAuth(self.uname.strip(), self.pswd.strip())) mdataTree = ET.fromstring(mdata_resp.content) object = [ # strip namespace from JSON json.dumps(badgerfish.data(x)).replace('{http://purl.org/dc/elements/1.1/}','') for x in mdataTree.findall('{http://preservica.com/XIP/v6.0}MetadataContainer/' '{http://preservica.com/XIP/v6.0}Content/' '{http://www.openarchives.org/OAI/2.0/oai_dc/}dc') ] # need to inject Preservica ID into json for isShownAt/isShownBy preservica_id = url_object.split('information-objects/')[1] jRecord = json.loads(object[0]) jRecord.update({"preservica_id": { "$": preservica_id } }) objset.append(jRecord) self.doc_current += len(docHits) if self.url_next: self.url_API = self.url_next return objset
def handle(self, *args, **options): r = requests.get(XML_SOURCE) root = ET.fromstring(r.text) amount = 0 for item in root.iter('item'): d = bf.data(item) images = [] if d['item']['image_link']: images.append(d['item']['image_link']['$']) data = { 'identifier': d['item']['id']['$'], 'title': d['item']['title']['$'], 'start_date': start_date, 'kind': d['item']['vertragsart']['$'], 'link': d['item']['link']['$'], 'description': d['item']['description']['$'], 'location': d['item']['standort']['$'], 'images': images, 'contact_email': CONTACT_EMAIL, 'company': { 'name': d['item']['unternehmen']['$'], 'address': { 'street': '', 'zip': '', 'city': d['item']['standort']['$'], } } } print(data['kind']) amount += 1 print(amount)
def formatEmailBodyToJson(emailBody): ## Flatten this email text into a single line of text suitable for translaction into json # emails come out of google with a line limit and the continuation character is an equals sign # look for an equals sign followed by a new line and remove that. emailBody = emailBody.replace(b'=\n', b'') # This data from google also has some html escape artifacts, %3D would be an equals sign, instead we # are just left with 3D. Remove it. emailBody = emailBody.replace(b'3D', b'') # The media companies also pollute their xml with a bunch of garbage that makes conversion to json impossible # Remove it. This is all found inside the <Infringement> tag. emailBody = emailBody[:emailBody.find(b'xmlns=')] + emailBody[emailBody.find(b'.xsd\"') + 5:] # At this stage we still have the entire email. We only want the XML data. Look for the start of the XML. # Typically the XML ends with the closing Infringement tag so look for that to signify the end of the XML. xmlstart = emailBody.find(b"<?xml") xmlend = emailBody.find(b"</Infringement>") + len(b"</Infringement>") # slice the email text into just the XML now that we have found starting and ending positions. emailBody = emailBody[xmlstart:xmlend] # Convert this XML into json data. jsondata = loads(dumps((bf.data(fromstring(emailBody))))) return jsondata
def do_sub_element(el_list, el_type): # call to ENA for additional elements # array of elements to extract do_if = ['SUBMISSION', 'STUDY', 'SAMPLE', 'EXPERIMENT'] out = dict() out_list = list() if '-' in el_type: short_form_type = el_type[el_type.index('-') + 1:] if short_form_type in do_if: # init output dict if type(el_list) == str: el_list = [el_list] for el in el_list: # do lookup resp = requests.get('http://www.ebi.ac.uk/ena/data/view/' + el + '%26display%3Dxml').content st = resp.decode('utf-8') et = fromstring(st) data = bf.data(et) x = iterate_over_attributes(data, short_form_type) out_list.append(x) return out_list
def vmID(vcloud_token, vapp): try: url = "https://10.10.21.155/api/vApp/" + vapp headers = { 'Content-Type': "application/vnd.vmware.admin.organization+xml", 'x-vcloud-authorization': vcloud_token, 'Accept': "application/*+xml;version=31.0", } response = requests.request("GET", url, verify=False, headers=headers) # print(response.text) doc = ET.fromstring(response.text.encode('utf-8')) jsonStr = json.dumps(bf.data(doc)) jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "") y = json.loads(jsonStr) print y print y["VApp"]["Children"]["Vm"]["@name"] print y["VApp"]["Children"]["Vm"]["@href"] vm_id = y["VApp"]["Children"]["Vm"]["@href"] vm_id = vm_id.split("/") print vm_id[5] return vm_id[5], False except Exception as e: print "false" return "false", True
def vapp_id(vcloud_token, vdc_id): try: url = "https://10.10.21.155/api/vdc/" + vdc_id + "" headers = { 'Content-Type': "application/vnd.vmware.admin.organization+xml", 'x-vcloud-authorization': vcloud_token, 'Accept': "application/*+xml;version=31.0", } response = requests.request("GET", url, verify=False, headers=headers) # print(response.text) doc = ET.fromstring(response.text.encode('utf-8')) jsonStr = json.dumps(bf.data(doc)) jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "") y = json.loads(jsonStr) # print y, "\ntest" print y["Vdc"]["ResourceEntities"]["ResourceEntity"]["@href"], "debug" vapp_id = y["Vdc"]["ResourceEntities"]["ResourceEntity"][ "@href"].split("/") print vapp_id[5] return vapp_id[5], False except Exception as e: print "false" return "false", True
def load_gpx(file_location): my_data = eval(dumps(bf.data(fromstring(open(file_location, 'rt').read())))) a = '{http://www.topografix.com/GPX/1/1}' X = [] S = [] md = None origin = None for p in my_data[a + "gpx"][a + "trk"][a + "trkseg"][a + "trkpt"]: datetime_object = datetime.strptime(p[a + "time"]['$'], '%Y-%m-%dT%H:%M:%SZ') if md is None or datetime_object < md: md = datetime_object origin = (float(p['@lat']), float(p['@lon'])) #X.append( [datetime_object, float(p['@lat']), float(p[a+"ele"]['$']), float(p['@lon'])]) S.append( multi_sensor_point(datetime_object, float(p['@lat']), float(p[a + "ele"]['$']), float(p['@lon']), None, None)) ## Duplet of (time, coordinates) : (time, lat, lon) # X=[[(x[0]-md).total_seconds(),(x[1],x[3])] for x in X] X = [[(x.datetime - md).total_seconds(), (x.latitude, x.longitude, x.elevation)] for x in S] return sorted(X, key=operator.itemgetter(0))
def topiczoom(text): def remove_namespaces(e): for elem in e.getiterator(): if not hasattr(elem.tag, 'find'): continue # (1) i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i + 1:] return (e) params = (('lang', 'de'), ) data = text response = requests.post('http://twittopic.topiczoom.de/quickindex.xml', params=params, data=data) feedjson = bf.data(remove_namespaces(fromstring(response.text))) topiczoom_list = feedjson["Envelope"]["Body"]["TZTopicSet"]["TZTopic"] topiczoom_list = sorted(topiczoom_list, reverse=True, key=lambda x: x["@weight"]) return (topiczoom_list)
def get_GO_terms(pdb_id): pdb, chain = pdb_id[:4], pdb_id[4:] req = requests.get('http://www.rcsb.org/pdb/rest/goTerms?structureId=%s.%s' % (pdb, chain)) if req.status_code != 200: # then assume it's a .cif raise requests.HTTPError('HTTP Error %s' % req.status_code) data = bf.data(fromstring(req.content))['goTerms'] return [] if 'term' not in data else data['term']
def create_sessions(hostname, username, password): url = "https://" + hostname + "/api/sessions" payload = "" headers = { 'Accept': "application/*+xml;version=31.0", 'Authorization': "Basic " + base64.b64encode(username + ":" + password) } response = requests.request("POST", url, verify=False, data=payload, headers=headers) # print type(response.text) try: if response.status_code == 200: print response doc = ET.fromstring(response.text.encode('utf-8')) jsonStr = json.dumps(bf.data(doc)) jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "") y = json.loads(jsonStr) # print y,"\nsessions" # print(response.text) # print response.headers.get("x-vcloud-authorization") id_sess = response.headers.get("x-vcloud-authorization") return id_sess # dbTest(id_sess) except Exception as e: print response
def storeTree(item): print(f'{item.name} start: {datetime.now()}') actXSD = xmlschema.XMLSchema( '/Users/john/Development/HumAI_data/Schema/iati-activities-schema.xsd') orgXSD = xmlschema.XMLSchema( '/Users/john/Development/HumAI_data/Schema/iati-organisations-schema.xsd' ) db = firestore.Client() try: tree = ET.parse(item) except xml.etree.ElementTree.ParseError as exp: parse_error += 1 print('Parse error:', exp, file=sys.stderr) else: tree_ = bf.data(tree.getroot()) if "iati-activities" in tree_: print('Prune activity ', item.name) # pruneTree(db, None, tree_, actXSD) elif "iati-organisations" in tree_: print('Prune organisation ', item.name) # pruneTree(db, None, tree_, orgXSD) else: pass print(f'{item.name} end: {datetime.now()}')
def json(self, conversion: _Text = 'badgerfish') -> Mapping: """A JSON Representation of the XML. Default is badgerfish. :param conversion: Which conversion method to use. (`learn more <https://github.com/sanand0/xmljson#conventions>`_) """ if not self._json: if conversion is 'badgerfish': from xmljson import badgerfish as serializer elif conversion is 'abdera': from xmljson import abdera as serializer elif conversion is 'cobra': from xmljson import cobra as serializer elif conversion is 'gdata': from xmljson import gdata as serializer elif conversion is 'parker': from xmljson import parker as serializer elif conversion is 'yahoo': from xmljson import yahoo as serializer self._json = json.dumps(serializer.data(etree.fromstring( self.xml))) return self._json
def handle_xml_pull(request): if 'query' in request.GET: xmlFileId = request.GET['query'] else: xmlFileId = request.POST.__getitem__('fileId') try: f = XmlSession.objects.get(id__exact=xmlFileId) except ObjectDoesNotExist: return redirect('/displayer/?error=xml-lost') result = xel.parseString(str(f.etreeString)) root = result.root if type(root) is NoneType: return redirect('/displayer/?error=xml-parse-error') else: request.session['fileId'] = xmlFileId context = { "file_json": dumps(bf.data(root)), "delete_list": f.delete_list, "file_name": f.xml_file_name, "parse_error": dumps(result.error) } return render(request, 'displayer/select.html', context)
def get_all_departments(): api_response = requests.get( 'http://courses.illinois.edu/cisapp/explorer/catalog/2017/spring.xml') api_json = bf.data(fromstring(api_response.text)) departments_arr = api_json['{http://rest.cis.illinois.edu}term'][ 'subjects']['subject'] return departments_arr
def xml_to_json(input, output): i = 0 template_output = output ################################################################# #BadgerFish(bf): Use "$" for text content, @ to prefix attributes #fromstring: barses an XML section from a string constant ################################################################# with open(input+ ".xml", "r") as input: jsonOut = bf.data(fromstring(input.read())) ############################################################################################# #Check if the name of the output file is already exists. #If it exits, it will add incrementing suffix depending on how many copies are already there. ############################################################################################# while(os.path.isfile(output+'.json')==True): print("lastoutput"+output[-1]) print("i "+str(i)) if(output == template_output): output+=str(i) else: output = template_output output+=str(i) i+=1 with open(output+ ".json","w+") as newFile: json.dump(jsonOut, newFile, ensure_ascii=False)
def get_senate_by_gov(df): import pandas as pd import requests from json import dumps from xmljson import badgerfish as bf from xml.etree.ElementTree import fromstring from pandas.io.json import json_normalize import urllib """Some of the urls don't work the first time, but by setting a proxy requests sends info to senate.gov to connect to the page""" s = requests.Session() s.auth = ('user', 'pass') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', } url = 'http://www.senate.gov/general/contact_information/senators_cfm.xml' print url r = requests.get(url, headers=headers, proxies=urllib.getproxies()) #url = 'http://www.senate.gov/general/contact_information/senators_cfm.xml' #r = requests.get(url) df = json_normalize(pd.DataFrame(bf.data(fromstring(r.content))).loc['member', 'contact_information']) df.columns = df.columns.str.replace('$', '').str.replace('.', '') return df
def getUpdatedPropertyDetails(zpid): payload = {'zws-id': ZWS_ID, 'zpid': zpid} response = requests.get(build_url(GET_UPDATED_PROPERTY_DETAILS_API_NAME), params=payload) res_json = loads(dumps(bf.data(fromstring(response.text)))) return res_json
def show(request): if request.method=="POST": # address="2114+Bigelow+Ave" # citystatezip="Seattle%2C+WA" propert = request.POST.get('address') l=propert.split(',') address = l[0] citystatezip = l[1] url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz19lq7ppy70r_305s0&address=%s&citystatezip=%s"%(address, citystatezip) response = requests.get(url) # print(response.status_code) c = response.content d = bf.data(fromstring(c)) my_dict = dict(d) my_dct = my_dict['{http://www.zillow.com/static/xsd/SearchResults.xsd}searchresults'] dct = dict(my_dct) final_dct = dict(dict(dict(dct['response'])['results'])['result']) bedrooms = dict(final_dct['bedrooms']) bathrooms = dict(final_dct['bathrooms']) address = dict(final_dct['address']) final_dct = {} final_dct['bedrooms'] = dict(bathrooms)['$'] final_dct['bathrooms'] = dict(bathrooms)['$'] final_dct['city'] = dict(dict(address)['city'])['$'] final_dct['state'] = dict(dict(address)['state'])['$'] final_dct['street'] = dict(dict(address)['street'])['$'] name = request.POST.get('name') print name # final_dct['zipcode'] = dict(dict(address)['zipcode'])['$'] # print final_dct # return HttpResponse("success") # return Response(final_dct, status=status.HTTP_201_CREATED) return render(request,'edit_your_property.html', {'data':final_dct,'zipcode':citystatezip, 'address':l[0], 'name':name})
def xliff_to_json(filename): """ Converts a single XLIFF file to a JSON equivalent. Removes some excess data such as notes (context). Requires a lot of memory to complete as files are large. """ filename = folder + filename with codecs.open(filename, 'r', 'utf-8') as fin: xliff = fin.read() jsonx = bf.data(fromstring(xliff)) for file in jsonx['xliff']['file']: # do some cleanup if 'group' in file['body']: file['body'].pop('group') # go through all strings... for unit in file['body']['trans-unit']: # ... and remove notes to reduce file size if 'note' in unit: try: unit.pop('note') except Exception as e: print("\tERROR: Unit is ", unit, "\n", str(e)) print(json.dumps(file, indent=2)) file['body']['trans-unit'].pop('note') # save new content as JSON file with open(filename + '.2.json', 'w', encoding='utf8') as fout: json.dump(jsonx, fout, ensure_ascii=False)
def get_vdc(cluster, vcloud_token, org_id): # def check_name_vdc(): url = "https://%s/api/org/%s" % (cluster['vcloud_ip'], org_id) headers = { 'x-vcloud-authorization': vcloud_token, 'Accept': "application/*+xml;version=31.0", 'Content-Type': "application/vnd.vmware.admin.user+xml" } try: response = requests.request("GET", url, verify=False, headers=headers) doc = ET.fromstring(response.text.encode('utf-8')) jsonStr = json.dumps(bf.data(doc)) jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "") vdc_objs = json.loads(jsonStr) # vdc_list = [] if vdc_objs["Org"]["Link"] == {}: print "Never have vdc list" return None, False elif vdc_objs["Org"]["Link"] != {}: for i in range(len(vdc_objs["Org"]["Link"])): vdc_list = [] if vdc_objs["Org"]["Link"][i][ "@type"] == "application/vnd.vmware.vcloud.vdc+xml": vdc_list.append(vdc_objs["Org"]["Link"][i]["@name"]) return vdc_list, False except Exception as e: print e return None, True
def utility(): for f in glob.glob('*.xml'): n = str.split(f, ".")[-2] with open(f, "r") as i: j = bf.data(fromstring(i.read())) with open(n + ".json", "w+") as newFile: json.dump(j, newFile, ensure_ascii=False)
def get_json(self): # todo Убрать JSON или нет? """ Возвращает строку с полной структурой json текущего объекта. Используется конвенция BadgerFish :return: str() """ E_tree = self.__xml_element() js = badgerfish.data(E_tree) return json.dumps(js)
def convertXMLtoJSON(filename): file = open(filename) XML_CONTENT = file.read() root = ET.fromstring(XML_CONTENT) JSON_CONTENT = bf.data(root) if filename.endswith('.xml'): filename = filename[:-4] saveJSONtoFile(JSON_CONTENT, filename + '.json')
def convert(xmlfile): """ convert(xmlfile) Makes input xml, convert it and return json """ tree = fromstring(xmlfile) jsonfile = jsonify(bf.data(tree)) return jsonfile
def traffic_log(self, nlogs=None, filter=None): ''' go through traffic log ''' # needs extra_qs='dir=backward' // bug PAN-74932 self.xapi.log(log_type='traffic', nlogs=nlogs, filter=filter, extra_qs='dir=backward') return bf.data(fromstring(pa.xapi.xml_root().encode('utf-8')))
def getCompsZpids(zpid, count=25, rentzestimat=False): payload = {'zws-id': ZWS_ID, 'zpid': zpid, 'count': count, 'rentzestimat': rentzestimat} response = requests.get(build_url(GET_COMPS_API_NAME), params=payload) res_json = loads(dumps(bf.data(fromstring(response.text)))) comp_zpids = [x['$'] for x in list(gen_dict_extract('zpid', res_json))] return comp_zpids
def convert_xml_to_json(self, eles): print 'convert to json...' kanjis = [bf.data(e) for e in eles] print 'clean..' kanjis = [k['character'] for k in kanjis] print kanjis[0] print kanjis[1]
def __init__(self, url_harvest, extra_data): '''Grab file and copy to local temp file''' super(CMISAtomFeedFetcher, self).__init__(url_harvest, extra_data) # parse extra data for username,password uname, pswd = extra_data.split(',') resp = requests.get(url_harvest, auth=HTTPBasicAuth(uname.strip(), pswd.strip())) self.tree = ET.fromstring(resp.content) self.objects = [badgerfish.data(x) for x in self.tree.findall('./{http://www.w3.org/2005/Atom}' 'entry/{http://docs.oasis-open.org/' 'ns/cmis/restatom/200908/}children//' '{http://www.w3.org/2005/Atom}entry') ] self.objects_iter = iter(self.objects)
def oaiItemHarvest(oaiURL, identifier, autoNum): params = urllib.urlencode({'verb': 'GetRecord', 'metadataPrefix': 'dcs', 'identifier': identifier}) requestURL = oaiURL + "?" + params request = urllib2.Request(requestURL) response = urllib2.urlopen(request) result = response.read() uRes = codecs.decode(result, 'utf-8') oaiXML = xml.dom.minidom.parseString(uRes.encode("utf-8")) with codecs.open(str(autoNum) + '.xml', 'w', encoding='utf-8') as dataz: oaiXML.writexml(dataz, indent='', newl='') with open(str(autoNum) + '.json', 'wb') as outFile: json.dump(bf.data(fromstring(uRes.encode('utf-8'))), outFile) return
def asJSON(self): if self.content is None: return None jdata = bf.data(fromstring(self.asXML())) return json.dumps(jdata, indent=self.indent, encoding=d1_config.ENCODING)
def xmlToJson(self, xmlData = None): """Converts XML to JSON as the service""" jsonData = '' if xmlData: jsonData = dumps(bf.data(fromstring(xmlData))) return jsonData