def test_json_from_url(self): data = Json2xml.fromurl('https://coderwall.com/vinitcool76.json').data data_object = Json2xml(data) xml_output = data_object.json2xml() dict_from_xml = xmltodict.parse(xml_output) # since it's a valid XML, xml to dict is able to load it and return # elements from under the all tag of xml self.assertTrue(type(dict_from_xml['all']) == OrderedDict)
def test_json_from_file_works(self): data = Json2xml.fromjsonfile('examples/example.json').data data_object = Json2xml(data) xml_output = data_object.json2xml() dict_from_xml = xmltodict.parse(xml_output) # since it's a valid XML, xml to dict is able to load it and return # elements from under the all tag of xml self.assertTrue(type(dict_from_xml['all']) == OrderedDict)
def json2xml(jsonfile, URL=False): try: from src.json2xml import Json2xml except: return "PLEASE INSTALL JSON2XML" break if URL == False: data = Json2xml.fromjsonfile(jsonfile).data data_object = Json2xml(data)
def test_json_from_string(self): data = Json2xml.fromstring( '{"login":"******","id":1,"avatar_url":"https://avatars0.githubusercontent.com/u/1?v=4"}' ).data data_object = Json2xml(data) xml_output = data_object.json2xml() dict_from_xml = xmltodict.parse(xml_output) print('keys', dict_from_xml) # since it's a valid XML, xml to dict is able to load it and return # elements from under the all tag of xml self.assertTrue(type(dict_from_xml['all']) == OrderedDict)
def main(argv=None): parser = argparse.ArgumentParser(description='Utility to convert json to valid xml.') parser.add_argument('--url', dest='url', action='store') parser.add_argument('--file', dest='file', action='store') args = parser.parse_args() if args.url: url = args.url data = Json2xml.fromurl(url) print(Json2xml.json2xml(data)) if args.file: file = args.file data = Json2xml.fromjsonfile(file) print(Json2xml.json2xml(data))
def get_lga(collection): params = request.args format = params.get('format','xml') if not lgaStr in params and not filterStr in params: response = jsonify(error="Empty input") return reponse_format(response ,format, True),400 if filterStr in params: queryStr,queryList = bnf2db.covert2DB(params[filterStr]) result = DB.getDataByFilter(queryStr, queryList,collection) if not result: response = jsonify(error="Can't find it in DB") return reponse_format(response ,format, True),404 result = dumps(result, indent=4) response = Response() response.data = result return reponse_format(response ,format, True),200 suburbList = paramHandle(params) if not suburbList: response = jsonify(error="Invaild input, can't found suburb") return reponse_format(response ,format, True), 404 fetch_list, exist_list = DB.checkListExistInDB(suburbList, collection) if len(exist_list) == 0: return jsonify(error="Can't find suburb info in DB"), 404 data = DB.getListFile(collection, exist_list) data_object = Json2xml(data) jsonData = dumps(data, indent=4) xmlData = data_object.json2xml() formatData = xmlData if format == "json": formatData = jsonData return formatData
def reponse_format(response, format, isATOM): response.headers['Content-Type'] = 'application/xml' if format == 'json': response.headers['Content-Type'] = 'application/json' return response data = loads(response.data) data_object = Json2xml(data) xml = data_object.json2xml() response.data = xml return response
def get_all_lga(collection): params = request.form format = params.get('format', 'xml') data = DB.getAllFile(collection) data_object = Json2xml(data) jsonData = dumps(data, indent=4) xmlData = data_object.json2xml() formatData = xmlData if format == "json": formatData = jsonData return formatData
def get_single_lga(collection, suburb): params = request.args format = params.get('format','json') if not getPostcode.is_downloadable_suburb(suburb): response = jsonify(error="Invaild input") return reponse_format(response ,format, True),400 data = DB.getFile(collection, suburb) if not data: return jsonify(error="Can't find " + suburb + " info in DB"), 404 jsonData = dumps(data, indent=4) data_object = Json2xml(data) xmlData = data_object.json2xml() formatData = xmlData if format == "json": formatData = jsonData return formatData
def get_xml_configuration(final_content): ''' :param final_content: json configuration :return: Returns xml configuration ''' conf = {} conf_lst = [] for data in final_content: conf_inner = {} name = data value = final_content[data] conf_inner['name'] = name conf_inner['value'] = value conf_lst.append(conf_inner) conf['property'] = conf_lst data_object = Json2xml(conf) conf_xml = data_object.json2xml() conf_xml = conf_xml.replace('<all>', '<configuration>') conf_xml = conf_xml.replace('</all>', '</configuration>') final_configuration = static_xml_content + conf_xml return final_configuration
from src.json2xml import Json2xml data = Json2xml.fromurl('https://coderwall.com/vinitcool76.json').data data_object = Json2xml(data) xml_data = data_object.json2xml() #xml output with open('star.xml', 'w') as file: file.write(xml_data)
def toXml(self): data = Json2xml.fromstring(self.toJson()).data dataConverter = Json2xml(data) return dataConverter.json2xml()
def json2xml(json_data): #took others libraries to help: #Library link: https://github.com/vinitkumar/json2xml data_object = Json2xml(json_data) xml_output = data_object.json2xml() return xml_output
def test_is_json_from_file_works(self): data = Json2xml.fromjsonfile('examples/example.json').data data_object = Json2xml(data) xml_output = data_object.json2xml() htmlkeys = xml_output.XML_FORMATTERS.keys() self.assertTrue('html' in htmlkeys)
def get(self, buoy_id, buoy_data_type): # figure out the desired response format, json or XML response_mime = flask.request.accept_mimetypes.best_match( ['application/json', 'application/xml']) if buoy_data_type not in noaa_data_sets: response = { "message": "Unsupported NOAA data set {buoy_data_type}".format( buoy_data_type=buoy_data_type) } if response_mime == 'application/xml': response_data = Json2xml(response).json2xml() else: response_data = json.dumps(response) return flask.Response(response_data, status=400, content_type=response_mime) bouy_request_url = noaa_buoy_url.format(buoyid=buoy_id, data_type=buoy_data_type) bouy_request = requests.get(bouy_request_url) if bouy_request.status_code == 404: # Pass through a 404 response from NOAA to the wave_thing client. This is how NOAA indicates # that a buoy does not have a data set return flask.Response("", status=404) elif bouy_request.status_code != 200: # The request to NOAA failed. Since this script didn't itself fail return a 502: bad gateway message response = { "message": "NOAA URL {noaa_url} returned response code {request_status_code}. Expecting 200" .format(noaa_url=bouy_request_url, request_status_code=bouy_request.status_code), "upstream code": bouy_request.status_code } if response_mime == 'application/xml': response_data = Json2xml(response).json2xml() else: response_data = json.dumps(response) return flask.Response(response_data, status=502, content_type=response_mime) else: # request was good. buoy_response = { "buoy_id": buoy_id, "data_type": buoy_data_type, "data_points": [], "request_timestamp_utc": str(arrow.now('utc')) } # Ignore the comment lines that start with hash data_lines = filter(lambda line: not line.startswith("#"), bouy_request.text.splitlines()) # Loop over the remaining lines and pass the content to the appropriate decoding function as # that is mapped to the data type string. IE data type 'txt' -> txt_response_to_data_points for data_line in data_lines: buoy_response["data_points"].append( noaa_data_sets[buoy_data_type] (* timestamp_from_noaa_format_and_normalize_for_missing_data( data_line.split()))) if response_mime == 'application/xml': response_data = Json2xml(buoy_response).json2xml() else: response_data = json.dumps(buoy_response) return flask.Response(response_data, status=200, content_type=response_mime)
def parse(self, response): linkitem = LinkItem() linkitem['url'] = response.url linkitem['response'] = response.status linkitem['parsable'] = any(d in response.url for d in parsable_domain_list) yield linkitem try: rawhtml = response.xpath('//html').extract()[0] article = DP(html=rawhtml, url=response.url) article.get_domaininfo() article.inspect_date() url_retrieved = [] url_validate = re.compile(r'^https?') # logging.info(article.date_flag) # logging.info(article.has_more) if article.date_flag: article.inspect_article() article.clean_data() if article.content_flag: articleitem = ArticleItem() instanceitem = InstanceItem() linkritem = LinkRItem() articleitem['author'] = article.author articleitem['url'] = response.url articleitem['title'] = article.title articleitem['datetime'] = article.unixtime articleitem['domain'] = article.domain yield articleitem # main article as an instance instanceitem['author'] = article.author instanceitem['url'] = response.url instanceitem['datetime'] = article.datetime instanceitem['unixtime'] = article.unixtime instanceitem['type'] = 'Article' instanceitem['text_body'] = article.content instanceitem['text_body_html'] = article.content_html instanceitem['likes'] = article.likes instanceitem['links_contained'] = [] instanceitem['relevance'] = article.content_flag instanceitem['gen_time'] = time.time() for link in article.links: if not url_validate.search(str(link['href'])) == None: instanceitem['links_contained'].append(link['href']) linkritem['link_from'] = response.url linkritem['link_to'] = link['href'] linkritem['gen_time'] = instanceitem['gen_time'] yield linkritem url_retrieved.append(str(link['href'])) yield scrapy.Request(str(link['href']), callback=self.parse) instanceitem['links_contained'] = ','.join(instanceitem['links_contained']) yield instanceitem if article.has_more: instance = IP(url=response.url) if instance.domain in json2xml_list: instance.get_instanceinfo_json() # logging.info(instance.json_xpath) json_data = Json2xml.fromstring(response.xpath(instance.json_xpath).extract_first()).data json_object = Json2xml(json_data).json2xml() instance_iter = BeautifulSoup(json_object, 'lxml').select(instance.instance_selector) # logging.info(len(instance_iter)) for i in instance_iter: instanceitem['author'] = i.find(instance.author_selector).get_text() instanceitem['url'] = response.url instanceitem['datetime'] = i.find_all(instance.datetime_selector)[-1].get_text() instanceitem['unixtime'] = time.mktime(dateparser.parse(instanceitem['datetime']).timetuple()) instanceitem['type'] = 'Comment' instanceitem['text_body_html'] = '' instanceitem['text_body'] = i.find_all(instance.content_selector)[-1].get_text() instanceitem['likes'] = '' instanceitem['id'] = i.find_all('url')[-1].get_text() instanceitem['reply_to'] = '' instanceitem['links_contained'] = re.findall(r'(https?://[^\s]+)', instanceitem['text_body']) instanceitem['relevance'] = article.content_flag instanceitem['gen_time'] = time.time() for link in instanceitem['links_contained']: if not url_validate.search(str(link)) == None: linkritem['link_from'] = response.url linkritem['link_to'] = str(link) linkritem['gen_time'] = instanceitem['gen_time'] yield linkritem url_retrieved.append(str(link)) yield scrapy.Request(str(link), callback=self.parse) instanceitem['links_contained'] = ','.join(instanceitem['links_contained']) if not instanceitem['text_body'] == None: yield instanceitem else: instance.get_instanceinfo() instance_iter = response.xpath(instance.instance_xpath) for i in instance_iter: instanceitem['author'] = i.xpath(instance.author_xpath).extract_first() instanceitem['url'] = response.url instanceitem['datetime'] = i.xpath(instance.datetime_xpath).extract_first() instanceitem['unixtime'] = time.mktime(dateparser.parse(instanceitem['datetime']).timetuple()) instanceitem['type'] = 'Comment' instanceitem['text_body_html'] = i.xpath(instance.content_html_xpath).extract_first() instanceitem['likes'] = i.xpath(instance.likes_xpath).extract_first() instanceitem['id'] = i.xpath(instance.id_xpath).extract_first() instanceitem['reply_to'] = i.xpath(instance.reply_to_xpath).extract_first() instanceitem['links_contained'] = i.xpath(instance.links_contained_xpath).extract() instanceitem['relevance'] = article.content_flag instanceitem['gen_time'] = time.time() for link in instanceitem['links_contained']: if not url_validate.search(str(link)) == None: linkritem['link_from'] = response.url linkritem['link_to'] = str(link) linkritem['gen_time'] = instanceitem['gen_time'] yield linkritem url_retrieved.append(str(link)) yield scrapy.Request(str(link), callback=self.parse) instanceitem['links_contained'] = ','.join(instanceitem['links_contained']) if not instanceitem['text_body_html'] == None: instanceitem['text_body'] = BeautifulSoup(instanceitem['text_body_html'], 'lxml').get_text().strip() yield instanceitem # if not len(url_retrieved) == 0: # url_retrieved = list(set(url_retrieved)) # urlfile = open('urls.txt', 'a') # for link in url_retrieved: # urlfile.write("{}\n".format(link)) # yield scrapy.Request(link, callback = self.parse) except Exception as e: pass