def get_forecast_from_api(): """ Using api.met.no to get the weather forecast :return data: data dictionary with relevant data from the api """ # Define endpoint and parameters # (lat and lon are for the weather station at Bygdøy used for weather forecast at Fornebu) endpoint = "https://api.met.no/weatherapi/locationforecast/1.9/" parameters = {"lat": 59.90, "lon": 10.69, "msl": 15} try: # Issue an HTTP GET request r = requests.get(endpoint, parameters) except requests.exceptions.RequestException as e: print("Not possible to get the weather forecast from api.met.no") print(e) sys.exit() # Handling the XML response bf = BadgerFish() json_string = json.dumps(bf.data(et.fromstring( r.content))) # creating json string json_dict = json.loads(json_string) # creating json dict data = json_dict["weatherdata"]["product"][ "time"] # collecting the relevant part of the response return data
def get_reports(): bf = BadgerFish(dict_type=dict) reports = [] for file in listdir(path_r): file = path_r + '/' + file with open(file, "r") as xml: reports.append(bf.data(fromstring(xml.read()))) return reports
def convertXmlToDict(xmlContent): import xml.etree.ElementTree as ET from xmljson import BadgerFish from collections import OrderedDict # parse XML DOM into dictionary bf = BadgerFish(dict_type=OrderedDict, xml_fromstring=False) # dump into dict string, then load back to dict return json.loads(json.dumps(bf.data(ET.fromstring(xmlContent))))
def xmlToJson(data): """ The Badgerfish notation is used to convert xml into dictionary or json This notation uses "$" for xml text content and @ to prefix xml attributes """ dataText = data.text # deleting namespace from xml because of long string repetition dataWithoutNameSpace = re.sub(' xmlns="[^"]+"', '', dataText, count=1) bf = BadgerFish(dict_type=dict) dataDict = bf.data(fromstring(dataWithoutNameSpace)) return dataDict
def hpfeedsend(self, eformat): """ workaround if hpfeeds broker is offline as otherwise hpfeeds lib will loop connection attempt """ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(2) result = sock.connect_ex((self.ECFG["host"], int(self.ECFG["port"]))) sock.close() if result != 0: """ broker unavailable """ self.logger.warning( f"HPFEEDS broker is configured to {self.ECFG['host']}:{self.ECFG['port']} but is currently unavailable. Disabling hpfeeds submission for this round!", '2') return (False) try: hpc = '' if self.ECFG["tlscert"].lower() != "none": hpc = hpfeeds.new(self.ECFG["host"], int(self.ECFG["port"]), self.ECFG["ident"], self.ECFG["secret"], certfile=self.ECFG["tlscert"], reconnect=False) print( f' -> Connecting to {hpc.brokername}/{self.ECFG["host"]}:{self.ECFG["port"]} via TLS' ) else: hpc = hpfeeds.new(self.ECFG["host"], int(self.ECFG["port"]), self.ECFG["ident"], self.ECFG["secret"], reconnect=False) print( f' -> Connecting to {hpc.brokername}/{self.ECFG["host"]}:{self.ECFG["port"]} via none TLS' ) """ remove auth header """ etree.strip_elements(self.esm, "Authentication") for i in range(0, len(self.esm)): if eformat == "xml": hpc.publish( self.ECFG["channels"], etree.tostring(self.esm[i], pretty_print=False)) if eformat == "json": bf = BadgerFish(dict_type=OrderedDict) hpc.publish(self.ECFG["channels"], json.dumps(bf.data(self.esm[i]))) return (True) except hpfeeds.FeedException as e: self.logger.error(f"HPFeeds Error ({e})", '2') return (False)
def xml2json(element, conv='parker'): """Takes an XML record and returns the json representation of it.""" if conv == 'bf': convention = BadgerFish(xml_fromstring=str) elif conv == 'parker': convention = Parker(xml_fromstring=str) else: logging.critical('Invalid XML2JSON Convention: ' + conv) raise ValueError('The parameter @conv should be "bf" or "parker" not ' + conv) data = convention.data(element) return json.dumps(data, indent=' ', ensure_ascii=False)
def listen_enter(opts, vars): vars['bf'] = BadgerFish( dict_type= OrderedDict, # pick dict class to preserve order of attributes and children xml_fromstring=opts['convert']) # convert strings if possible pass
def get_building_info(self, prop_id): """ Get the building info based on a property Args: prop_id : the property ID Returns: Dictionary : BadgerFish output of XML """ resource = '{0}/building/{1}'.format(self.domain, prop_id) self.logger.debug("Pulling data from {0}".format(resource)) response = self.session.get(resource) if response.status_code != requests.codes.ok: return response.raise_for_status() root = Et.fromstring(response.text) bf = BadgerFish(dict_type=dict) building_info = bf.data(root) return building_info
def get_account_info(self): """ Get Account information for the current user Returns: Dictionary : BadgerFish style representation of Energy Star response e.g.: account_info = client.get_account_info() account_id = account_info["account"]["id"]["$"] Notes: BadgerFish : http://www.sklar.com/badgerfish/ """ resource = self.domain + "/account" self.logger.debug("Pulling data from {0}".format(resource)) response = self.session.get(resource) if response.status_code != requests.codes.ok: return response.raise_for_status() data = response.text root = Et.fromstring(data) bf = BadgerFish(dict_type=dict) account_info = bf.data(root) return account_info
def xmlToJson(data): bf = BadgerFish(dict_type=OrderedDict) xml = lxml.html.fromstring(data) data = bf.data(xml) return data
return new_sent def is_ascii(s): return all(ord(c) < 128 for c in s) wordDict = {} revWordDict = {} lemmaDict = {} revLemmaDict = {} if __name__ == '__main__': bf = BadgerFish(dict_type=OrderedDict) trainFile = open("WikiQA-train.tsv") trainFiles = [trainFile] testFile = open("WikiQA-test.tsv") testFiles = [testFile] validFile = open("WikiQA-dev.tsv") validFiles = [validFile] fileList = [trainFiles,testFiles,validFiles] debugFile = open("debug","w")
from pytorch_pretrained_bert import BertTokenizer, BertForNextSentencePrediction, BertConfig import requests import pandas as pd import regex as re from sklearn.model_selection import train_test_split import time from fastprogress import progress_bar from xmljson import BadgerFish from xml.etree.ElementTree import fromstring # Use the HTTP GET method of requests library to get the xml dataset and decode it to string format r = requests.get('https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml' ).content.decode() # Initialize the badgerfish class of xmljson package to convert xml data to dictionary format bf = BadgerFish(dict_type=dict) # Convert the xml data to python dictionary format xml_dict = dict(bf.data(fromstring(r))) title_list = [] # Get the title data from the rss feed and append to a list for title in xml_dict['rss']['channel']['item']: title_list.append(title['title']['$']) print(title_list[:10]) print(len(title_list)) desc_list = [] # Get the descriptio data corresponding to the title and append to another list for desc in xml_dict['rss']['channel']['item']: desc_list.append(desc['description']['$'])
def _get_collection_data(filename): path = Path(filename) assert path.is_file() bf = BadgerFish(dict_type=OrderedDict) return path, bf.data(fromstring(open(path, 'rb').read()))
logging.info("New KMZ files are added",extra={"verbosity":1}) ll=len(added) for zk in range(ll): print ("Added: ", ", ".join (added)) dir=config.get("Section1","path1") fname=os.path.join(dir,added[zk]) logging.info("KMZ file is passed as argument to kmz_to_kml function",extra={"verbosity":1}) kmlfile=kmz_to_kml(fname) dom = md.parse(kmlfile) root = dom.documentElement logging.info("Obtained kml file is generically parsed",extra={"verbosity":1}) print_node(root) tree = ET.parse(kmlfile) root1 = tree.getroot() xmlstr = ET.tostring(root1, encoding='utf8', method='xml') bf = BadgerFish(dict_type=OrderedDict) bf_str = BadgerFish(xml_fromstring=False) t_n = os.path.splitext(config.get("Section2","path1"))[0] + os.path.splitext(basename(fname))[0] + ".geojson" f_file = open(t_n, 'w') logging.info("kml file is converted to geojson format1",extra={"verbosity":1}) f_file.write(dumps(bf_str.data(fromstring(xmlstr)))) f_file.close() o = xmltodict.parse(xmlstr) t_s = os.path.splitext(config.get("Section2","path2"))[0] + os.path.splitext(basename(fname))[0] + ".geojson" g_file = open(t_s, 'w') rr = json.dumps(o) logging.info("kml file is converted to geojson format2",extra={"verbosity":1}) g_file.write(json.dumps(o, sort_keys=False, indent=4, separators=(',', ': '))) g_file.close() t_k = os.path.splitext(config.get("Section2","path3"))[0] + os.path.splitext(basename(fname))[0] + ".geojson" h_file = open(t_k, 'w')
def xml2json(xmlfile): bf = BadgerFish(dict_type=OrderedDict) return bf.data(lxml.etree.fromstring(open(xmlfile).read()))
logging.info("output file is : %s", outfile) if not os.path.exists(dir_data): os.makedirs(dir_data) if limit != 0: logging.info("input file limit activated %s definitions", str(limit)) # check input file presency if not os.path.exists(infile): logging.error("input file %s not found", infile) sys.exit(8) bf = BadgerFish(dict_type=OrderedDict, xml_fromstring=False) # read xml structure with open(infile) as fd: text = fd.read() xml_json = bf.data(ET.fromstring(text)) # test purpose : print json structure # json_print(xml_json, 0) attr = {} attr.update( {"Topology_Container": ["@name", "@model_type", "@model_handle"]}) attr.update({ "Device": ["@name", "@network_address", "@model_type", "@model_handle"]
#!/usr/bin/env python2 # -*- coding: utf-8 -*- from __future__ import unicode_literals from optparse import OptionParser import os import sys import codecs import lxml from lxml.etree import tostring, Element from lxml import etree import re from xmljson import BadgerFish from collections import OrderedDict, defaultdict, Counter bf = BadgerFish(dict_type=dict) """ Module for converting the ABSA XML data into a tokenized vertical format """ sys.stdout = codecs.getwriter('utf-8')(sys.__stdout__) sys.stderr = codecs.getwriter('utf-8')(sys.__stderr__) sys.stdin = codecs.getreader('utf-8')(sys.__stdin__) # Tokenizer pattern pattern = r'''(?x)(?u) # set flag (?x) to allow verbose regexps
def xml_to_dict(path_xml): with open(path_xml, 'r', encoding='utf-8') as r: ss = r.read() print(ss) return BadgerFish(dict_type=dict).data(fromstring(ss))