def _parse(xml, imdbid): logging.info('Parsing Torrentz2 results.') try: channel = yahoo.data(fromstring(xml))['rss']['channel'] items = channel['item'] if 'item' in channel else [] except Exception as e: logging.error('Unexpected XML format from Torrentz2.', exc_info=True) return [] if isinstance(items, dict): # fix for parsing rss with one item only items = [items] results = [] for i in items: result = {} try: if not i['title']: continue desc = i['description'].split(' ') hash_ = desc[-1] m = (1024 ** 2) if desc[2] == 'MB' else (1024 ** 3) result['score'] = 0 result['size'] = int(desc[1]) * m result['status'] = 'Available' result['pubdate'] = None result['title'] = i['title'] result['imdbid'] = imdbid result['indexer'] = 'Torrentz2' result['info_link'] = i['link'] result['torrentfile'] = core.providers.torrent.magnet(hash_, i['title']) result['guid'] = hash_ result['type'] = 'magnet' result['downloadid'] = None result['seeders'] = int(desc[4]) result['leechers'] = int(desc[6]) result['download_client'] = None result['freeleech'] = 0 results.append(result) except Exception as e: logging.error('Error parsing Torrentz2 XML.', exc_info=True) continue logging.info('Found {} results from Torrentz2.'.format(len(results))) return results
def read_and_index_wikipathways_xml(xml, es, indexf, index): xml = re.sub(' xmlns="[^"]+"', '', xml, count=1) pathway = yahoo.data(fromstring(xml))["Pathway"] # Delete fields that would normally be used for rendering images for a in ["Biopax", "BiopaxRef", "Graphics", "Shape", "Group", "InfoBox"]: if a in pathway: del pathway[a] for a in ["Interaction", "DataNode", "Label"]: if a in pathway: for i in pathway[a]: if isinstance(i, str): continue del i["Graphics"] if "GraphId" in i: del i["GraphId"] r = indexf(es, pathway, pathway["Name"], index) return r
def xml_json(data, remove_ns=True, preserve_root=False, encoding='utf-8') -> dict: if type(data) == str: if remove_ns: xml_data = ET.iterparse(StringIO(data)) for _, el in xml_data: if '}' in el.tag: el.tag = el.tag.split('}', 1)[1] # strip all namespaces data = ET.tostring(xml_data.root, encoding=encoding).decode(encoding) encoded_data = data.encode(encoding) # noinspection PyArgumentList parser = etree.XMLParser(encoding=encoding, recover=False, huge_tree=True) xml_data = objectify.fromstring(encoded_data, parser=parser) else: xml_data = data json_data = yahoo.data(xml_data) if type(json_data) == collections.OrderedDict and not preserve_root: json_data = json_data.get(list(json_data.keys())[0]) return json_data
def _parse(xml, imdbid): logging.info('Parsing LimeTorrents results.') try: items = yahoo.data(fromstring(xml))['rss']['channel']['item'] except Exception as e: logging.error('Unexpected XML format from ', exc_info=True) return [] results = [] for i in items: result = {} try: result['score'] = 0 result['size'] = int(i['size']) result['status'] = 'Available' result['pubdate'] = None result['title'] = i['title'] result['imdbid'] = imdbid result['indexer'] = 'LimeTorrents' result['info_link'] = i['link'] result['torrentfile'] = i['enclosure']['url'] result['guid'] = result['torrentfile'].split('.')[1].split( '/')[-1].lower() result['type'] = 'torrent' result['downloadid'] = None result['freeleech'] = 0 result['download_client'] = None s = i['description'].split('Seeds: ')[1] seed_str = '' while s[0].isdigit(): seed_str += s[0] s = s[1:] result['seeders'] = int(seed_str) results.append(result) except Exception as e: logging.error('Error parsing LimeTorrents XML.', exc_info=True) continue logging.info('Found {} results from Limetorrents.'.format(len(results))) return results
def _parse(xml, imdbid): logging.info('Parsing TorrentDownloads results.') xml = re.sub(r'&(?!amp;)', '&', xml) try: rss = yahoo.data(fromstring(xml))['rss']['channel'] except Exception as e: logging.error('Unexpected XML format from TorrentDownloads.', exc_info=True) return [] if 'item' not in rss: logging.info("No result found in TorrentDownloads") return [] host = base_url() results = [] for i in rss['item']: result = {} try: result['score'] = 0 result['size'] = int(i['size']) result['status'] = 'Available' result['pubdate'] = None result['title'] = i['title']['content'] if isinstance(i['title'], dict) else i['title'] result['imdbid'] = imdbid result['indexer'] = 'TorrentDownloads' result['info_link'] = '{}{}'.format(host, i['link']) result['torrentfile'] = core.providers.torrent.magnet(i['info_hash'], i['title']) result['guid'] = i['info_hash'] result['type'] = 'magnet' result['downloadid'] = None result['freeleech'] = 0 result['download_client'] = None result['seeders'] = int(i['seeders']) result['leechers'] = int(i['leechers']) results.append(result) except Exception as e: logging.error('Error parsing TorrentDownloads XML.', exc_info=True) continue logging.info('Found {} results from TorrentDownloads.'.format(len(results))) return results
def search(self): q = self.query or '' query_args = append_query_arg(self.query_args, 'query', urllib.quote_plus(q)) headers = {} result = self.obo.conn.make_request("GET", bucket=self.bucket_name, key='', query_args=query_args, headers=headers) if result.status == 200: s = result.read() print dump_json(xj.data(xmlfromstring(s))) # print dump_json([dict(next_xml_entry(attr) for attr in el) for el in et.fromstring(s)]) else: print 'ERROR: http status: ' + str(result.status) print result.read()
def xmltojson(file_name,out_file=None): fp = open(file_name,'r') xmldata = fp.read() jsond = dumps(yahoo.data(fromstring(xmldata))) jsond = loads(jsond) spaces = jsond['parking']['space'] if not out_file is None: f = open(out_file,'w') f.write(dumps(spaces,indent=4, separators=(',', ': '))) f.close() for space in spaces: print(space['contour']) for point in space['contour']['point']: print(point)
def parsexml(filename, patching=True): """ parsexml """ if isinstance(filename, (dict)): return filename elif isstring(filename) and filename.startswith("<"): text = filename elif os.path.isfile(filename): text = filetostr(filename) else: return "" # patch remove text that could create problems in javascript parsing if patching: text2remove = textin(text, "![CDATA[", "]]", False) text = text.replace(text2remove, "") # text = text.replace("<","<") # end patch data = bf.data(fromstring(text.encode("utf-8"))) return data
def _parse(xml, imdbid): logging.info('Parsing TorrentDownloads results.') try: items = yahoo.data(fromstring(xml))['rss']['channel']['item'] except Exception as e: logging.error('Unexpected XML format from TorrentDownloads.', exc_info=True) return [] results = [] for i in items: result = {} try: result['score'] = 0 result['size'] = int(i['size']) result['status'] = 'Available' result['pubdate'] = None result['title'] = i['title'] result['imdbid'] = imdbid result['indexer'] = 'TorrentDownloads' result['info_link'] = 'http://www.torrentdownloads.me{}'.format( i['link']) result['torrentfile'] = core.providers.torrent.magnet( i['info_hash']) result['guid'] = i['info_hash'] result['type'] = 'magnet' result['downloadid'] = None result['freeleech'] = 0 result['download_client'] = None result['seeders'] = int(i['seeders']) results.append(result) except Exception as e: logging.error('Error parsing TorrentDownloads XML.', exc_info=True) continue logging.info('Found {} results from TorrentDownloads.'.format( len(results))) return results
def parse(self, name): from xmljson import yahoo as xml2json inputFile = name outputFile = re.search("\w+.ui", inputFile) if outputFile is None: print("Error. El fichero debe tener extension .ui") return None # ret_out = outputFile outputFile = re.sub(".ui", ".dgi", inputFile) try: ui = open(inputFile, 'r') xml = ui.read() except Exception: print("Error. El fichero no existe o no tiene formato XML") sys.exit() json = xml2json.data(fromstring(xml)) json = self.manageProperties(json) strJson = dumps(json, sort_keys=True, indent=2) """ try: dgi = open(outputFile, 'w') dgi.write(strJson) dgi.close() except: print("Error. Ha habido un problema durante la escritura del fichero") return None """ strJson = strJson.replace("\n", "") strJson = " ".join(strJson.split()) return strJson
def tojson(xml): """Convert XML document with financial statements to JSON""" from xmljson import yahoo from lxml import etree try: root = etree.fromstring(xml.encode("utf-8")) except: return {} for elem in root.getiterator(): try: tag = etree.QName(elem.tag) except: traceback.print_exc() continue elem.tag = tag.localname d = {} for key, value in elem.attrib.items(): nkey = etree.QName(key).localname d[nkey] = value del elem.attrib[key] elem.attrib.update(d) d = yahoo.data(root) return d["xbrl"]
def parse(self, name): inputFile = name outputFile = re.search("\w+.ui", inputFile) if outputFile is None: print("Error. El fichero debe tener extension .ui") return None # ret_out = outputFile outputFile = re.sub(".ui", ".dgi", inputFile) try: ui = open(inputFile, 'r') xml = ui.read() except Exception: print("Error. El fichero no existe o no tiene formato XML") sys.exit() json = xml2json.data(fromstring(xml)) json = self.manageProperties(json) strJson = dumps(json, sort_keys=True, indent=2) """ try: dgi = open(outputFile, 'w') dgi.write(strJson) dgi.close() except: print("Error. Ha habido un problema durante la escritura del fichero") return None """ strJson = strJson.replace("\n", "") strJson = " ".join(strJson.split()) return strJson
def parse_newznab_xml(self, feed, imdbid=None): ''' Parse xml from Newznab api. feed (str): xml feed text imdbid (str): imdb id #. Just numbers, do not include 'tt' Replaces all namespaces with 'ns', so namespaced attributes are accessible with the key '{ns}attr' Loads feed with xmljson in yahoo format Creates item dict for database table SEARCHRESULTS -- removes unused keys and ensures required keys are present (even if blank) Returns list of dicts of parsed nzb information. ''' results = [] feed = re.sub(r'xmlns:([^=]*)=[^ ]*"', r'xmlns:\1="ns"', feed) try: channel = yahoo.data(fromstring(feed))['rss']['channel'] indexer = channel['title'] items = channel['item'] if type(items) != list: items = [items] except Exception as e: logging.error('Unexpected XML format from NewzNab indexer.', exc_info=True) return [] for item in items: try: item['attr'] = {} for i in item['{ns}attr']: item['attr'][i['name']] = i['value'] result = { "download_client": None, "downloadid": None, "freeleech": 1 if item['attr'].get('downloadvolumefactor', 1) == 0 else 0, "guid": item.get('link'), "indexer": indexer, "info_link": item.get('comments', '').split('#')[0], "imdbid": 'tt{}'.format(imdbid if imdbid is not None else item['attr'].get('imdb')), "pubdate": item.get('pubDate', '')[5:16], "score": 0, "seeders": 0, "size": int( item.get('size') or item.get('enclosure', {}).get('length', 0)), "status": "Available", "title": item.get('title') or item.get('description'), "torrentfile": None, "type": self.feed_type } if item['attr'].get('imdb'): result['imdbid'] = 'tt{}'.format(item['attr'].get('imdb')) else: result['imdbid'] = self.imdbid if result['type'] != 'nzb': result['torrentfile'] = result['guid'] if result['guid'].startswith('magnet'): result['guid'] = result['guid'].split('&')[0].split( ':')[-1] result['type'] = 'magnet' result['seeders'] = item['attr'].get('seeders', 0) results.append(result) except Exception as e: logging.warning('', exc_info=True) continue return results
xml_raw = [] for line in r.iter_lines(): if re.search(r'<DescribeVpcsResponse',line): '''#print(line)''' xml_raw.append('<DescribeResponse xmlns="">') elif re.search(r'</DescribeVpcsResponse',line): '''#print(line)''' xml_raw.append('</DescribeResponse>') else: xml_raw.append(line) xr_string=string.join(xml_raw, "\n") raw_data = xj_out.data(fromstring(xr_string)) #raw_data = dumps(xj_out.data(fromstring(xr_string))) #print r.content print ("raw_data ==", raw_data) #print ("raw_json ==", raw_json) #xj_data = raw_data["DescribeResponse"] xj_data = raw_data for key0 in xj_data: print "key0 ==", key0 ##print "data ==", xj_data[key0] #if re.search(r'vpcSet', key0): # for key1 in xj_data[key0]['item']: # print "key1 == ", key1 # #print "key1 == ", key1
# Creates string to pass into with HMAC authentication signing_string = api_id + '\n' + str( api_ts) + '\n' + api_baseCall + '\n' + api_param # Creates HMAC authentication, uses API secret, 'signing_string' api_mac = hmac.new(api_secret.encode(), signing_string.encode(), hashlib.sha1).hexdigest() # Creates full address of API call, inserts API Id, time created, HMAC authentication code, and form ID api_url = 'http://battletx.bsd.net/page/api/signup/get_signups_by_form_id?api_ver=2&api_id=' + api_id + '&api_ts=' + \ str(api_ts) + '&api_mac=' + api_mac + '&signup_form_id=' + str(signup_form_id) #Reformating BSD XML: api_xml_data = urllib.request.urlopen( api_url).read() # Uses urllib library to read XML data from BSD API URL doc = dumps( yh.data(fromstring(api_xml_data)) ) # Parses XML data using xmljson library, parses using yahoo standard loaded_doc = json.loads(doc) # Deserializes data name_of_list_in_use = 'cmi_list' # will be used in title of CSV file # Function iterates over dictionary and checks keys, if keys match strings, count is altered def indiv_dict_length(tuple): count = 0 # declares temporary count variable, returns it at end of function for k, v in tuple: if v != {}: if k == 'firstname': count += 1 if k == 'lastname': count += 1 if k == 'email':
def parse_xml_entry(xml): parsed = yahoo.data(etree.fromstring(xml)) string = json.dumps(parsed).replace(UNIPROT_NS, '') return json.loads(string)['entry']
def _parse(xml, imdbid): logging.info('Parsing LimeTorrents results.') try: rss = yahoo.data(fromstring(xml))['rss']['channel'] except Exception as e: logging.error('Unexpected XML format from ', exc_info=True) return [] if 'item' not in rss: logging.info("No result found in LimeTorrents") return [] host = base_url() results = [] for i in rss['item']: result = {} try: result['score'] = 0 result['size'] = int(i['size']) result['status'] = 'Available' result['pubdate'] = None result['title'] = i['title'] result['imdbid'] = imdbid result['indexer'] = 'LimeTorrents' if i['link'][0] == '/': result['info_link'] = host + i['link'] else: # some proxies have wrong link url (https:https://...) result['info_link'] = re.sub(r'^(https:)+//', 'https://', i['link']) result['guid'] = i['enclosure']['url'].split('.')[-2].split( '/')[-1].lower() if re.search(r'https?://itorrents\.org/', i['enclosure']['url']): result['torrentfile'] = core.providers.torrent.magnet( result['guid'], result['title']) result['type'] = 'magnet' else: result['torrentfile'] = i['enclosure']['url'] result['type'] = 'torrent' result['downloadid'] = None result['freeleech'] = 0 result['download_client'] = None # use 2 regular exprssions # search has Seeds: X , Leechers Y # rss has Seeds: X<br />Leechers: Y<br /> desc = i['description'] matches = re.findall("Seeds:? *([0-9]+)", desc) if matches: result['seeders'] = int(matches[0]) else: result['seeders'] = 0 matches = re.findall("Leechers:? *([0-9]+)", desc) if matches: result['leechers'] = int(matches[0]) else: result['leechers'] = 0 results.append(result) except Exception as e: logging.error('Error parsing LimeTorrents XML.', exc_info=True) continue logging.info('Found {} results from Limetorrents.'.format(len(results))) return results
def isCollaborationConfirmed(providerId, partnerId): return True TRANSFER_EVENTS = [] # Loading XML from input file supplied in command line argument xml_file_url = sys.argv[1] xml_file = open(xml_file_url, "r") xml_data = xml_file.read() # Data from import file loaded as dictionary try: import_data = parser.data(fromstring(xml_data)) except ParseError: error('Invalid XML file') if 'OrigintrailExport' not in import_data: error("Missing OrigintrailExport element!") OrigintrailExport = import_data['OrigintrailExport'] if 'creationTimestamp' not in OrigintrailExport: error('Missing export creation timestamp!') creationTimestamp = OrigintrailExport['creationTimestamp'] # Reading provider data if 'Provider' not in OrigintrailExport:
import sys import os from json import dumps from xmljson import yahoo as yahoo import xml.etree.ElementTree as ET if(len(sys.argv)!=2) : #if there are more 1 argument print("Wrong Path") exit() if(os.path.isfile(sys.argv[1])) : #if file exits file = open(sys.argv[1],"r") else : print("File isn't exist") exit() xmlFile = ET.parse(file).getroot() coverted = dumps(yahoo.data(xmlFile)["current"], indent=4) #Convert to json with current root and indent = 4 newFilename = sys.argv[1].split(".")[0] #Get xml file name convertedFile = open(newFilename + ".json", "w") convertedFile.write(coverted) #write json into file convertedFile.close() file.close() print(newFilename + ".json")
def __xml_to_json(self, xml): self.__logger.info(xml) repsonse_json_str = dumps(yahoo.data(fromstring(xml))) return json.loads(repsonse_json_str)
choice = input().lower() if not choice in ["y", "yes", "ye"]: print("Aborting...") sys.exit(0) for foldername in os.listdir(root_path): folder_path = os.path.join(root_path, foldername) if not os.path.isdir(folder_path): continue for filename in os.listdir(folder_path): print("FILENAME", filename) if not filename.endswith(".xml"): continue with open(os.path.join(folder_path, filename)) as f: xmldata = f.read() data = yh.data(fromstring(xmldata)) if filename in ["blackrock.xml", "stimulusTypes.xml"]: continue name = foldername + "_" + filename.replace(".xml", "") data = data['odML']['section'] result = {"definition": data["definition"], "name": data["name"]} try: properties = data["property"] except KeyError: print("ERROR on property") continue try:
sys.exit() inputFile = sys.argv[1] outputFile = re.search("\w+.ui", inputFile) if outputFile == None: print("Error. El fichero debe tener extension .ui") sys.exit() outputFile = re.sub(".ui", ".dgi", inputFile) try: ui = open(inputFile, 'r') xml = ui.read() except: print("Error. El fichero no existe o no tiene formato XML") sys.exit() json = xml2json.data(fromstring(xml)) json = manageProperties(json) strJson = dumps(json, sort_keys=True, indent=2) try: dgi = open(outputFile, 'w') dgi.write(strJson) dgi.close() except: print("Error. Ha habido un problema durante la escritura del fichero") sys.exit() print("Hecho")
#!/usr/bin/env python ''' Simple format converter from XML to JSON ''' import os import re from json import dumps from xml.etree.ElementTree import fromstring from xmljson import yahoo INPUT_BASEPATH = os.path.expanduser('~/Downloads/data/set1') EXT_REGEX = '.*.xml$' REGEX_FLAGS = re.IGNORECASE for filename in os.listdir(INPUT_BASEPATH): if re.match(EXT_REGEX, filename, REGEX_FLAGS): outfname = os.path.join(INPUT_BASEPATH, os.path.splitext(filename)[0] + '.json') ifs = open(os.path.join(INPUT_BASEPATH, filename), 'r') ofs = open(outfname, 'w') ofs.write(dumps(yahoo.data(fromstring(ifs.read())))) ofs.close() ifs.close() print('Conversion of %s is Done' % (filename))
def test_connection(indexer, apikey): ''' Tests connection to NewzNab API indexer (str): url of indexer apikey (str): indexer api key Test searches for imdbid tt0063350 (Night of the Living Dead 1968) Returns dict ajax-style response ''' if not indexer: return {'response': False, 'error': _('Indexer URL is blank.')} while indexer[-1] == '/': indexer = indexer[:-1] response = {} logging.info('Testing connection to {}.'.format(indexer)) url = '{}/api?apikey={}&t=search&id=tt0063350'.format(indexer, apikey) try: r = Url.open(url) if r.status_code != 200: return { 'response': False, 'error': '{} {}'.format(r.status_code, r.reason.title()) } else: response = r.text except (SystemExit, KeyboardInterrupt): raise except Exception as e: logging.error('Newz/TorzNab connection check.', exc_info=True) return { 'response': False, 'error': _('No connection could be made because the target machine actively refused it.' ) } error_json = yahoo.data(fromstring(response)) e_code = error_json.get('error', {}).get('code') if e_code: if error_json['error'].get('description') == 'Missing parameter': logging.info('Newz/TorzNab connection test successful.') return { 'response': True, 'message': _('Connection successful.') } else: logging.error('Newz/TorzNab connection test failed. {}'.format( error_json['error'].get('description'))) return { 'response': False, 'error': error_json['error'].get('description') } elif 'unauthorized' in response.lower(): logging.error( 'Newz/TorzNab connection failed - Incorrect API key.') return {'response': False, 'error': _('Incorrect API key.')} else: logging.info('Newz/TorzNab connection test successful.') return {'response': True, 'message': _('Connection successful.')}
def addToJson(self, xml): _json = xml2json.data( fromstring(etree.tostring(xml, pretty_print=True))) _jsonStr = dumps(_json, sort_keys=True, indent=2) return _jsonStr
def rua2json(self, xmldata, validation_result=[]): """ Returns a string in JSON format based on RUA XML input and its validation results with optionally resolved IP addresses. Resolved checks are validated somewhat. """ # Setup result dict structures result = [] result_dict = OrderedDict() feedback_list = [] feedback_dict = {} feedback_dict["feedback"] = feedback_list validation_dict = {} if self.do_validate_xml: validation_dict["vendor_rua_xsd_validations"] = validation_result else: validation_dict["vendor_rua_xsd_validations"] = "None" # Get metadata elements from aggregate report meta_elements = ["report_metadata", "policy_published", "version"] for meta_element in meta_elements: try: element = yahoo.data(xmldata.find(meta_element)) except Exception: self.helper.log_debug( "rua2json: report did not contain metadata element, %s" % meta_element) else: if meta_element == 'policy_published': # convert keys and values to lowercasr element = self.dict2lower(element) feedback_list.append(element) else: feedback_list.append(element) records = xmldata.findall("record") self.helper.log_debug("rua2json: report_id %s has %d records" % (xmldata.findtext("report_metadata/report_id", default=""), len(records))) # Get individual records from aggregate report for record in records: data_ip = record.findtext('row/source_ip') row_tag = record.find("row") if self.do_resolve: try: self.helper.log_debug("rua2json: resolving %s" % data_ip) resolve = socket.gethostbyaddr(data_ip) backresolve = socket.gethostbyname_ex(resolve[0]) if data_ip == backresolve[2][0]: # Add resolved ip to row ip_resolution = etree.SubElement( row_tag, "ip_resolution") ip_resolution.text = resolve[0] except Exception: self.helper.log_debug("rua2json: failed to resolve %s" % data_ip) record = yahoo.data(record) record = self.dict2lower(record) feedback_list.append(record) # Aggregate report metadata, policy, record and xsd_validation result_dict.update(feedback_dict) result_dict.update(validation_dict) result.append(dumps(result_dict) + "\n") feedback_list.pop( ) # Remove record before adding next record to list self.helper.log_debug( "rua2json: report_id %s finished parsing" % xmldata.findtext("report_metadata/report_id", default="")) return result
def xml_json(data: ET.Element) -> dict: json_data = yahoo.data(data) return json_data
def get_and_convert(url): return yahoo.data(fromstring(requests.get(url).content))
import pandas as pd from os import walk from xmljson import badgerfish as bf from xmljson import yahoo as yh from xml.etree.ElementTree import fromstring, parse from json import dumps, loads firstdata_filelist = list(walk('data_01'))[0][2] first_file = firstdata_filelist[0] with open('data_01/{}'.format(first_file)) as file_xml: first_file_data = file_xml.read() parsed_data = bf.data(fromstring(first_file_data)) yahoo_data = dumps(yh.data(fromstring(first_file_data))) first_file_json = dumps(parsed_data, indent=4) # print(dict(parsed_data)) # keys = list(dict(parsed_data).keys()) firstdata_dict = dict(parsed_data) firstdata_series = pd.read_json(yahoo_data) firstdata_series.to_excel('first_data.xlsx') print(firstdata_series) # print(dict(parse('data_01/{}'.format(first_file)))) # while keys: # for key in keys: #
def addToJson(self, xml): _json = xml2json.data(fromstring( etree.tostring(xml, pretty_print=True))) _jsonStr = dumps(_json, sort_keys=True, indent=2) return _jsonStr
df.head() # Displaying first 5 rows api_endpoint = 'http://digital.sl.nsw.gov.au' api_pds_endpoint = 'https://libprd70.sl.nsw.gov.au/pds' api_sru_endpoint = 'http://digital.sl.nsw.gov.au/search/permanent/sru' api_username = '******' api_password = '******' api_institude_code = 'SLNSW' ros = Rosetta(api_endpoint, api_pds_endpoint, api_sru_endpoint, api_username, api_password, api_institude_code, api_timeout=1200) IE_PID = df["IE PID"][4] r = ros.iews_get_ie(IE_PID, 0, raw=True) json_data = yahoo.data(r) root_str = ET.tostring(r, encoding='utf-8').decode('utf-8') mets_ordereddict = xml_json(root_str) mets_json = json.dumps([mets_ordereddict]) mets_dict = json.loads(mets_json) with open('mets.json', 'w') as fp: json.dump(mets_dict, fp, ensure_ascii=False, indent=2)
def parse_newznab_xml(self, feed, imdbid=None): ''' Parse xml from Newznab api. feed (str): xml feed text imdbid (str): imdb id #. Just numbers, do not include 'tt' Replaces all namespaces with 'ns', so namespaced attributes are accessible with the key '{ns}attr' Loads feed with xmljson in yahoo format Creates item dict for database table SEARCHRESULTS -- removes unused keys and ensures required keys are present (even if blank) Returns list of dicts of parsed nzb information. ''' results = [] feed = re.sub(r'xmlns:([^=]*)=[^ ]*"', r'xmlns:\1="ns"', feed) try: channel = yahoo.data(fromstring(feed))['rss']['channel'] indexer = channel['title'] items = channel.get('item', []) if type(items) != list: items = [items] except Exception as e: logging.error('Unexpected XML format from NewzNab indexer.', exc_info=True) logging.debug(feed) return [] for item in items: try: item['attr'] = {} for i in item['{ns}attr']: item['attr'][i['name']] = i['value'] if (self.feed_type == 'torrent'): # Jackett doesn't properly encode query string params so we do it here. rt, qs = item.get('link', '?').split('?') if rt == qs == '': guid = None else: qsprs = urllib.parse.parse_qs(qs) params = [] if 'xt' in qsprs: params.append('xt=' + qsprs.pop('xt')[0]) for k in qsprs: for v in qsprs[k]: params.append('{}={}'.format( k, urllib.parse.quote(v))) guid = rt + '?' + '&'.join(params) else: guid = item.get('link') result = { "download_client": None, "downloadid": None, "freeleech": float(item['attr'].get('downloadvolumefactor', 1)) == 0.0, "guid": guid, "indexer": indexer, "info_link": item.get('comments', '').split('#')[0], "imdbid": imdbid if imdbid is not None else 'tt{}'.format( item['attr'].get('imdb')), "pubdate": item.get('pubDate', '')[5:16], "score": 0, "seeders": 0, "size": int( item.get('size') or item.get('enclosure', {}).get('length', 0)), "status": "Available", "title": item.get('title') or item.get('description'), "torrentfile": None, "type": self.feed_type } if result['type'] != 'nzb': result['torrentfile'] = result['guid'] if result['guid'].startswith('magnet'): result['guid'] = result['guid'].split('&')[0].split( ':')[-1] result['type'] = 'magnet' result['seeders'] = item['attr'].get('seeders', 0) results.append(result) except Exception as e: logging.warning('', exc_info=True) continue return results