Exemple #1
0
def _parse(xml, imdbid):
    logging.info('Parsing Torrentz2 results.')

    try:
        channel = yahoo.data(fromstring(xml))['rss']['channel']
        items = channel['item'] if 'item' in channel else []
    except Exception as e:
        logging.error('Unexpected XML format from Torrentz2.', exc_info=True)
        return []

    if isinstance(items, dict):
        # fix for parsing rss with one item only
        items = [items]

    results = []
    for i in items:
        result = {}
        try:                                                              
            if not i['title']:                                            
                continue
            desc = i['description'].split(' ')
            hash_ = desc[-1]

            m = (1024 ** 2) if desc[2] == 'MB' else (1024 ** 3)

            result['score'] = 0
            result['size'] = int(desc[1]) * m
            result['status'] = 'Available'
            result['pubdate'] = None
            result['title'] = i['title']
            result['imdbid'] = imdbid
            result['indexer'] = 'Torrentz2'
            result['info_link'] = i['link']
            result['torrentfile'] = core.providers.torrent.magnet(hash_, i['title'])
            result['guid'] = hash_
            result['type'] = 'magnet'
            result['downloadid'] = None
            result['seeders'] = int(desc[4])
            result['leechers'] = int(desc[6])
            result['download_client'] = None
            result['freeleech'] = 0

            results.append(result)
        except Exception as e:
            logging.error('Error parsing Torrentz2 XML.', exc_info=True)
            continue

    logging.info('Found {} results from Torrentz2.'.format(len(results)))
    return results
def read_and_index_wikipathways_xml(xml, es, indexf, index):
    xml = re.sub(' xmlns="[^"]+"', '', xml, count=1)
    pathway = yahoo.data(fromstring(xml))["Pathway"]
    # Delete fields that would normally be used for rendering images
    for a in ["Biopax", "BiopaxRef", "Graphics", "Shape", "Group", "InfoBox"]:
        if a in pathway:
            del pathway[a]
    for a in ["Interaction", "DataNode", "Label"]:
        if a in pathway:
            for i in pathway[a]:
                if isinstance(i, str):
                    continue
                del i["Graphics"]
                if "GraphId" in i:
                    del i["GraphId"]
    r = indexf(es, pathway, pathway["Name"], index)
    return r
Exemple #3
0
def xml_json(data, remove_ns=True, preserve_root=False, encoding='utf-8') -> dict:
    if type(data) == str:
        if remove_ns:
            xml_data = ET.iterparse(StringIO(data))
            for _, el in xml_data:
                if '}' in el.tag:
                    el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
            data = ET.tostring(xml_data.root, encoding=encoding).decode(encoding)
        encoded_data = data.encode(encoding)
        # noinspection PyArgumentList
        parser = etree.XMLParser(encoding=encoding, recover=False, huge_tree=True)
        xml_data = objectify.fromstring(encoded_data, parser=parser)
    else:
        xml_data = data
    json_data = yahoo.data(xml_data)
    if type(json_data) == collections.OrderedDict and not preserve_root:
        json_data = json_data.get(list(json_data.keys())[0])
    return json_data
Exemple #4
0
def _parse(xml, imdbid):
    logging.info('Parsing LimeTorrents results.')

    try:
        items = yahoo.data(fromstring(xml))['rss']['channel']['item']
    except Exception as e:
        logging.error('Unexpected XML format from ', exc_info=True)
        return []

    results = []
    for i in items:
        result = {}
        try:
            result['score'] = 0
            result['size'] = int(i['size'])
            result['status'] = 'Available'
            result['pubdate'] = None
            result['title'] = i['title']
            result['imdbid'] = imdbid
            result['indexer'] = 'LimeTorrents'
            result['info_link'] = i['link']
            result['torrentfile'] = i['enclosure']['url']
            result['guid'] = result['torrentfile'].split('.')[1].split(
                '/')[-1].lower()
            result['type'] = 'torrent'
            result['downloadid'] = None
            result['freeleech'] = 0
            result['download_client'] = None

            s = i['description'].split('Seeds: ')[1]
            seed_str = ''
            while s[0].isdigit():
                seed_str += s[0]
                s = s[1:]

            result['seeders'] = int(seed_str)

            results.append(result)
        except Exception as e:
            logging.error('Error parsing LimeTorrents XML.', exc_info=True)
            continue

    logging.info('Found {} results from Limetorrents.'.format(len(results)))
    return results
def _parse(xml, imdbid):
    logging.info('Parsing TorrentDownloads results.')

    xml = re.sub(r'&(?!amp;)', '&', xml)
    try:
        rss = yahoo.data(fromstring(xml))['rss']['channel']
    except Exception as e:
        logging.error('Unexpected XML format from TorrentDownloads.', exc_info=True)
        return []

    if 'item' not in rss:
        logging.info("No result found in TorrentDownloads")
        return []

    host = base_url()
    results = []
    for i in rss['item']:
        result = {}
        try:
            result['score'] = 0
            result['size'] = int(i['size'])
            result['status'] = 'Available'
            result['pubdate'] = None
            result['title'] = i['title']['content'] if isinstance(i['title'], dict) else i['title']
            result['imdbid'] = imdbid
            result['indexer'] = 'TorrentDownloads'
            result['info_link'] = '{}{}'.format(host, i['link'])
            result['torrentfile'] = core.providers.torrent.magnet(i['info_hash'], i['title'])
            result['guid'] = i['info_hash']
            result['type'] = 'magnet'
            result['downloadid'] = None
            result['freeleech'] = 0
            result['download_client'] = None
            result['seeders'] = int(i['seeders'])
            result['leechers'] = int(i['leechers'])

            results.append(result)
        except Exception as e:
            logging.error('Error parsing TorrentDownloads XML.', exc_info=True)
            continue

    logging.info('Found {} results from TorrentDownloads.'.format(len(results)))
    return results
Exemple #6
0
    def search(self):
        q = self.query or ''
        query_args = append_query_arg(self.query_args, 'query',
                                      urllib.quote_plus(q))

        headers = {}

        result = self.obo.conn.make_request("GET",
                                            bucket=self.bucket_name,
                                            key='',
                                            query_args=query_args,
                                            headers=headers)
        if result.status == 200:
            s = result.read()
            print dump_json(xj.data(xmlfromstring(s)))
            # print dump_json([dict(next_xml_entry(attr) for attr in el) for el in et.fromstring(s)])

        else:
            print 'ERROR: http status: ' + str(result.status)
            print result.read()
Exemple #7
0
def xmltojson(file_name,out_file=None):
    fp = open(file_name,'r')

    xmldata = fp.read()

    jsond = dumps(yahoo.data(fromstring(xmldata)))

    jsond = loads(jsond)

    spaces = jsond['parking']['space']

    if not out_file is None:
        f = open(out_file,'w')
        f.write(dumps(spaces,indent=4, separators=(',', ': ')))
        f.close()

    for space in spaces:
        print(space['contour'])
        for point in space['contour']['point']:
            print(point)
Exemple #8
0
def parsexml(filename, patching=True):
    """
    parsexml
    """
    if isinstance(filename, (dict)):
        return filename
    elif isstring(filename) and filename.startswith("<"):
        text = filename
    elif os.path.isfile(filename):
        text = filetostr(filename)
    else:
        return ""
    # patch  remove text that could create problems in javascript parsing
    if patching:
        text2remove = textin(text, "![CDATA[", "]]", False)
        text = text.replace(text2remove, "")

        # text = text.replace("&lt;","<")
    # end patch
    data = bf.data(fromstring(text.encode("utf-8")))
    return data
def _parse(xml, imdbid):
    logging.info('Parsing TorrentDownloads results.')

    try:
        items = yahoo.data(fromstring(xml))['rss']['channel']['item']
    except Exception as e:
        logging.error('Unexpected XML format from TorrentDownloads.',
                      exc_info=True)
        return []

    results = []
    for i in items:
        result = {}
        try:
            result['score'] = 0
            result['size'] = int(i['size'])
            result['status'] = 'Available'
            result['pubdate'] = None
            result['title'] = i['title']
            result['imdbid'] = imdbid
            result['indexer'] = 'TorrentDownloads'
            result['info_link'] = 'http://www.torrentdownloads.me{}'.format(
                i['link'])
            result['torrentfile'] = core.providers.torrent.magnet(
                i['info_hash'])
            result['guid'] = i['info_hash']
            result['type'] = 'magnet'
            result['downloadid'] = None
            result['freeleech'] = 0
            result['download_client'] = None
            result['seeders'] = int(i['seeders'])

            results.append(result)
        except Exception as e:
            logging.error('Error parsing TorrentDownloads XML.', exc_info=True)
            continue

    logging.info('Found {} results from TorrentDownloads.'.format(
        len(results)))
    return results
Exemple #10
0
    def parse(self, name):
        from xmljson import yahoo as xml2json
        inputFile = name
        outputFile = re.search("\w+.ui", inputFile)

        if outputFile is None:
            print("Error. El fichero debe tener extension .ui")
            return None

        # ret_out = outputFile

        outputFile = re.sub(".ui", ".dgi", inputFile)

        try:
            ui = open(inputFile, 'r')
            xml = ui.read()

        except Exception:
            print("Error. El fichero no existe o no tiene formato XML")
            sys.exit()

        json = xml2json.data(fromstring(xml))
        json = self.manageProperties(json)
        strJson = dumps(json, sort_keys=True, indent=2)

        """
        try:
            dgi = open(outputFile, 'w')
            dgi.write(strJson)
            dgi.close()
        except:
            print("Error. Ha habido un problema durante la escritura del fichero")
            return None
        """
        strJson = strJson.replace("\n", "")
        strJson = " ".join(strJson.split())
        return strJson
Exemple #11
0
def tojson(xml):
    """Convert XML document with financial statements to JSON"""
    from xmljson import yahoo
    from lxml import etree
    try:
        root = etree.fromstring(xml.encode("utf-8"))
    except:
        return {}
    for elem in root.getiterator():
        try:
            tag = etree.QName(elem.tag)
        except:
            traceback.print_exc()
            continue
        elem.tag = tag.localname
        d = {}
        for key, value in elem.attrib.items():
            nkey = etree.QName(key).localname
            d[nkey] = value
            del elem.attrib[key]
        elem.attrib.update(d)

    d = yahoo.data(root)
    return d["xbrl"]
Exemple #12
0
    def parse(self, name):
        inputFile = name
        outputFile = re.search("\w+.ui", inputFile)

        if outputFile is None:
            print("Error. El fichero debe tener extension .ui")
            return None

        # ret_out = outputFile

        outputFile = re.sub(".ui", ".dgi", inputFile)

        try:
            ui = open(inputFile, 'r')
            xml = ui.read()

        except Exception:
            print("Error. El fichero no existe o no tiene formato XML")
            sys.exit()

        json = xml2json.data(fromstring(xml))
        json = self.manageProperties(json)
        strJson = dumps(json, sort_keys=True, indent=2)

        """
        try:
            dgi = open(outputFile, 'w')
            dgi.write(strJson)
            dgi.close()
        except:
            print("Error. Ha habido un problema durante la escritura del fichero")
            return None
        """
        strJson = strJson.replace("\n", "")
        strJson = " ".join(strJson.split())
        return strJson
Exemple #13
0
    def parse_newznab_xml(self, feed, imdbid=None):
        ''' Parse xml from Newznab api.
        feed (str): xml feed text
        imdbid (str): imdb id #. Just numbers, do not include 'tt'

        Replaces all namespaces with 'ns', so namespaced attributes are
            accessible with the key '{ns}attr'

        Loads feed with xmljson in yahoo format
        Creates item dict for database table SEARCHRESULTS -- removes unused
            keys and ensures required keys are present (even if blank)

        Returns list of dicts of parsed nzb information.
        '''
        results = []

        feed = re.sub(r'xmlns:([^=]*)=[^ ]*"', r'xmlns:\1="ns"', feed)

        try:
            channel = yahoo.data(fromstring(feed))['rss']['channel']
            indexer = channel['title']
            items = channel['item']
            if type(items) != list:
                items = [items]
        except Exception as e:
            logging.error('Unexpected XML format from NewzNab indexer.',
                          exc_info=True)
            return []

        for item in items:
            try:
                item['attr'] = {}
                for i in item['{ns}attr']:
                    item['attr'][i['name']] = i['value']

                result = {
                    "download_client":
                    None,
                    "downloadid":
                    None,
                    "freeleech":
                    1
                    if item['attr'].get('downloadvolumefactor', 1) == 0 else 0,
                    "guid":
                    item.get('link'),
                    "indexer":
                    indexer,
                    "info_link":
                    item.get('comments', '').split('#')[0],
                    "imdbid":
                    'tt{}'.format(imdbid if imdbid is not None else
                                  item['attr'].get('imdb')),
                    "pubdate":
                    item.get('pubDate', '')[5:16],
                    "score":
                    0,
                    "seeders":
                    0,
                    "size":
                    int(
                        item.get('size')
                        or item.get('enclosure', {}).get('length', 0)),
                    "status":
                    "Available",
                    "title":
                    item.get('title') or item.get('description'),
                    "torrentfile":
                    None,
                    "type":
                    self.feed_type
                }

                if item['attr'].get('imdb'):
                    result['imdbid'] = 'tt{}'.format(item['attr'].get('imdb'))
                else:
                    result['imdbid'] = self.imdbid

                if result['type'] != 'nzb':
                    result['torrentfile'] = result['guid']
                    if result['guid'].startswith('magnet'):
                        result['guid'] = result['guid'].split('&')[0].split(
                            ':')[-1]
                        result['type'] = 'magnet'

                    result['seeders'] = item['attr'].get('seeders', 0)

                results.append(result)
            except Exception as e:
                logging.warning('', exc_info=True)
                continue

        return results
Exemple #14
0
xml_raw = []
for line in r.iter_lines():
  if re.search(r'<DescribeVpcsResponse',line):
    '''#print(line)'''
    xml_raw.append('<DescribeResponse xmlns="">')
  elif re.search(r'</DescribeVpcsResponse',line):
    '''#print(line)'''
    xml_raw.append('</DescribeResponse>')
  else:
    xml_raw.append(line)

xr_string=string.join(xml_raw, "\n")


raw_data = xj_out.data(fromstring(xr_string))
#raw_data = dumps(xj_out.data(fromstring(xr_string)))

#print r.content
print ("raw_data ==", raw_data)
#print ("raw_json ==", raw_json)
#xj_data = raw_data["DescribeResponse"]

xj_data = raw_data
for key0 in xj_data:
  print "key0 ==", key0
  ##print "data ==", xj_data[key0]
  #if re.search(r'vpcSet', key0):
  #  for key1 in xj_data[key0]['item']:
  #    print "key1 == ", key1
  #    #print "key1 == ", key1
# Creates string to pass into with HMAC authentication
signing_string = api_id + '\n' + str(
    api_ts) + '\n' + api_baseCall + '\n' + api_param
# Creates HMAC authentication, uses API secret, 'signing_string'
api_mac = hmac.new(api_secret.encode(), signing_string.encode(),
                   hashlib.sha1).hexdigest()
# Creates full address of API call, inserts API Id, time created, HMAC authentication code, and form ID
api_url = 'http://battletx.bsd.net/page/api/signup/get_signups_by_form_id?api_ver=2&api_id=' + api_id + '&api_ts=' + \
          str(api_ts) + '&api_mac=' + api_mac + '&signup_form_id=' + str(signup_form_id)

#Reformating BSD XML:
api_xml_data = urllib.request.urlopen(
    api_url).read()  # Uses urllib library to read XML data from BSD API URL
doc = dumps(
    yh.data(fromstring(api_xml_data))
)  # Parses XML data using xmljson library, parses using yahoo standard
loaded_doc = json.loads(doc)  # Deserializes data
name_of_list_in_use = 'cmi_list'  # will be used in title of CSV file


# Function iterates over dictionary and checks keys, if keys match strings, count is altered
def indiv_dict_length(tuple):
    count = 0  # declares temporary count variable, returns it at end of function
    for k, v in tuple:
        if v != {}:
            if k == 'firstname':
                count += 1
            if k == 'lastname':
                count += 1
            if k == 'email':
Exemple #16
0
def parse_xml_entry(xml):
    parsed = yahoo.data(etree.fromstring(xml))
    string = json.dumps(parsed).replace(UNIPROT_NS, '')
    return json.loads(string)['entry']
def _parse(xml, imdbid):
    logging.info('Parsing LimeTorrents results.')

    try:
        rss = yahoo.data(fromstring(xml))['rss']['channel']
    except Exception as e:
        logging.error('Unexpected XML format from ', exc_info=True)
        return []

    if 'item' not in rss:
        logging.info("No result found in LimeTorrents")
        return []

    host = base_url()
    results = []
    for i in rss['item']:
        result = {}
        try:
            result['score'] = 0
            result['size'] = int(i['size'])
            result['status'] = 'Available'
            result['pubdate'] = None
            result['title'] = i['title']
            result['imdbid'] = imdbid
            result['indexer'] = 'LimeTorrents'
            if i['link'][0] == '/':
                result['info_link'] = host + i['link']
            else:  # some proxies have wrong link url (https:https://...)
                result['info_link'] = re.sub(r'^(https:)+//', 'https://',
                                             i['link'])
            result['guid'] = i['enclosure']['url'].split('.')[-2].split(
                '/')[-1].lower()
            if re.search(r'https?://itorrents\.org/', i['enclosure']['url']):
                result['torrentfile'] = core.providers.torrent.magnet(
                    result['guid'], result['title'])
                result['type'] = 'magnet'
            else:
                result['torrentfile'] = i['enclosure']['url']
                result['type'] = 'torrent'
            result['downloadid'] = None
            result['freeleech'] = 0
            result['download_client'] = None

            # use 2 regular exprssions
            # search has Seeds: X , Leechers Y
            # rss has Seeds: X<br />Leechers: Y<br />
            desc = i['description']
            matches = re.findall("Seeds:? *([0-9]+)", desc)
            if matches:
                result['seeders'] = int(matches[0])
            else:
                result['seeders'] = 0

            matches = re.findall("Leechers:? *([0-9]+)", desc)
            if matches:
                result['leechers'] = int(matches[0])
            else:
                result['leechers'] = 0

            results.append(result)
        except Exception as e:
            logging.error('Error parsing LimeTorrents XML.', exc_info=True)
            continue

    logging.info('Found {} results from Limetorrents.'.format(len(results)))
    return results
Exemple #18
0
def isCollaborationConfirmed(providerId, partnerId):
    return True


TRANSFER_EVENTS = []

# Loading XML from input file supplied in command line argument
xml_file_url = sys.argv[1]
xml_file = open(xml_file_url, "r")
xml_data = xml_file.read()

# Data from import file loaded as dictionary

try:
    import_data = parser.data(fromstring(xml_data))
except ParseError:
    error('Invalid XML file')

if 'OrigintrailExport' not in import_data:
    error("Missing OrigintrailExport element!")

OrigintrailExport = import_data['OrigintrailExport']

if 'creationTimestamp' not in OrigintrailExport:
    error('Missing export creation timestamp!')

creationTimestamp = OrigintrailExport['creationTimestamp']

# Reading provider data
if 'Provider' not in OrigintrailExport:
Exemple #19
0
import sys
import os
from json import dumps
from xmljson import yahoo  as yahoo
import xml.etree.ElementTree as ET

if(len(sys.argv)!=2) :              #if there are more 1 argument
    print("Wrong Path")
    exit()
if(os.path.isfile(sys.argv[1])) :   #if file exits
    file = open(sys.argv[1],"r")
else :
    print("File isn't exist")
    exit()
xmlFile = ET.parse(file).getroot()
coverted = dumps(yahoo.data(xmlFile)["current"], indent=4)  #Convert to json with current root and indent = 4
newFilename = sys.argv[1].split(".")[0]                     #Get xml file name
convertedFile = open(newFilename + ".json", "w")
convertedFile.write(coverted)                               #write json into file
convertedFile.close()
file.close()
print(newFilename + ".json")
Exemple #20
0
 def __xml_to_json(self, xml):
     self.__logger.info(xml)
     repsonse_json_str = dumps(yahoo.data(fromstring(xml)))
     return json.loads(repsonse_json_str)
Exemple #21
0
choice = input().lower()
if not choice in ["y", "yes", "ye"]:
    print("Aborting...")
    sys.exit(0)

for foldername in os.listdir(root_path):
    folder_path = os.path.join(root_path, foldername)
    if not os.path.isdir(folder_path):
        continue
    for filename in os.listdir(folder_path):
        print("FILENAME", filename)
        if not filename.endswith(".xml"):
            continue
        with open(os.path.join(folder_path, filename)) as f:
            xmldata = f.read()
            data = yh.data(fromstring(xmldata))

        if filename in ["blackrock.xml", "stimulusTypes.xml"]:
            continue

        name = foldername + "_" + filename.replace(".xml", "")

        data = data['odML']['section']
        result = {"definition": data["definition"], "name": data["name"]}
        try:
            properties = data["property"]
        except KeyError:
            print("ERROR on property")
            continue

        try:
Exemple #22
0
	sys.exit()

inputFile = sys.argv[1]
outputFile = re.search("\w+.ui", inputFile)

if outputFile == None:
	print("Error. El fichero debe tener extension .ui")
	sys.exit()

outputFile = re.sub(".ui", ".dgi", inputFile)

try:
	ui = open(inputFile, 'r')
	xml = ui.read()
except:
	print("Error. El fichero no existe o no tiene formato XML")
	sys.exit()

json = xml2json.data(fromstring(xml))
json = manageProperties(json)
strJson = dumps(json, sort_keys=True, indent=2)

try:
	dgi = open(outputFile, 'w')
	dgi.write(strJson)
	dgi.close()
except:
	print("Error. Ha habido un problema durante la escritura del fichero")
	sys.exit()

print("Hecho")
Exemple #23
0
#!/usr/bin/env python
''' Simple format converter from XML to JSON '''

import os
import re
from json import dumps
from xml.etree.ElementTree import fromstring
from xmljson import yahoo

INPUT_BASEPATH = os.path.expanduser('~/Downloads/data/set1')
EXT_REGEX = '.*.xml$'
REGEX_FLAGS = re.IGNORECASE

for filename in os.listdir(INPUT_BASEPATH):
    if re.match(EXT_REGEX, filename, REGEX_FLAGS):
        outfname = os.path.join(INPUT_BASEPATH,
                                os.path.splitext(filename)[0] + '.json')
        ifs = open(os.path.join(INPUT_BASEPATH, filename), 'r')
        ofs = open(outfname, 'w')
        ofs.write(dumps(yahoo.data(fromstring(ifs.read()))))
        ofs.close()
        ifs.close()

        print('Conversion of %s is Done' % (filename))
Exemple #24
0
    def test_connection(indexer, apikey):
        ''' Tests connection to NewzNab API
        indexer (str): url of indexer
        apikey (str): indexer api key

        Test searches for imdbid tt0063350 (Night of the Living Dead 1968)

        Returns dict ajax-style response
        '''

        if not indexer:
            return {'response': False, 'error': _('Indexer URL is blank.')}

        while indexer[-1] == '/':
            indexer = indexer[:-1]

        response = {}

        logging.info('Testing connection to {}.'.format(indexer))

        url = '{}/api?apikey={}&t=search&id=tt0063350'.format(indexer, apikey)

        try:
            r = Url.open(url)
            if r.status_code != 200:
                return {
                    'response': False,
                    'error': '{} {}'.format(r.status_code, r.reason.title())
                }
            else:
                response = r.text
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            logging.error('Newz/TorzNab connection check.', exc_info=True)
            return {
                'response':
                False,
                'error':
                _('No connection could be made because the target machine actively refused it.'
                  )
            }

        error_json = yahoo.data(fromstring(response))

        e_code = error_json.get('error', {}).get('code')
        if e_code:
            if error_json['error'].get('description') == 'Missing parameter':
                logging.info('Newz/TorzNab connection test successful.')
                return {
                    'response': True,
                    'message': _('Connection successful.')
                }
            else:
                logging.error('Newz/TorzNab connection test failed. {}'.format(
                    error_json['error'].get('description')))
                return {
                    'response': False,
                    'error': error_json['error'].get('description')
                }
        elif 'unauthorized' in response.lower():
            logging.error(
                'Newz/TorzNab connection failed - Incorrect API key.')
            return {'response': False, 'error': _('Incorrect API key.')}
        else:
            logging.info('Newz/TorzNab connection test successful.')
            return {'response': True, 'message': _('Connection successful.')}
Exemple #25
0
 def addToJson(self, xml):
     _json = xml2json.data(
         fromstring(etree.tostring(xml, pretty_print=True)))
     _jsonStr = dumps(_json, sort_keys=True, indent=2)
     return _jsonStr
Exemple #26
0
 def rua2json(self, xmldata, validation_result=[]):
     """ Returns a string in JSON format based on RUA XML input and its validation results
         with optionally resolved IP addresses. Resolved checks are validated somewhat.
     """
     # Setup result dict structures
     result = []
     result_dict = OrderedDict()
     feedback_list = []
     feedback_dict = {}
     feedback_dict["feedback"] = feedback_list
     validation_dict = {}
     if self.do_validate_xml:
         validation_dict["vendor_rua_xsd_validations"] = validation_result
     else:
         validation_dict["vendor_rua_xsd_validations"] = "None"
     # Get metadata elements from aggregate report
     meta_elements = ["report_metadata", "policy_published", "version"]
     for meta_element in meta_elements:
         try:
             element = yahoo.data(xmldata.find(meta_element))
         except Exception:
             self.helper.log_debug(
                 "rua2json: report did not contain metadata element, %s" %
                 meta_element)
         else:
             if meta_element == 'policy_published':
                 # convert keys and values to lowercasr
                 element = self.dict2lower(element)
                 feedback_list.append(element)
             else:
                 feedback_list.append(element)
     records = xmldata.findall("record")
     self.helper.log_debug("rua2json: report_id %s has %d records" %
                           (xmldata.findtext("report_metadata/report_id",
                                             default=""), len(records)))
     # Get individual records from aggregate report
     for record in records:
         data_ip = record.findtext('row/source_ip')
         row_tag = record.find("row")
         if self.do_resolve:
             try:
                 self.helper.log_debug("rua2json: resolving %s" % data_ip)
                 resolve = socket.gethostbyaddr(data_ip)
                 backresolve = socket.gethostbyname_ex(resolve[0])
                 if data_ip == backresolve[2][0]:
                     # Add resolved ip to row
                     ip_resolution = etree.SubElement(
                         row_tag, "ip_resolution")
                     ip_resolution.text = resolve[0]
             except Exception:
                 self.helper.log_debug("rua2json: failed to resolve %s" %
                                       data_ip)
         record = yahoo.data(record)
         record = self.dict2lower(record)
         feedback_list.append(record)
         # Aggregate report metadata, policy, record and xsd_validation
         result_dict.update(feedback_dict)
         result_dict.update(validation_dict)
         result.append(dumps(result_dict) + "\n")
         feedback_list.pop(
         )  # Remove record before adding next record to list
     self.helper.log_debug(
         "rua2json: report_id %s finished parsing" %
         xmldata.findtext("report_metadata/report_id", default=""))
     return result
Exemple #27
0
def xml_json(data: ET.Element) -> dict:
    json_data = yahoo.data(data)
    return json_data
Exemple #28
0
def get_and_convert(url):
    return yahoo.data(fromstring(requests.get(url).content))
Exemple #29
0
import pandas as pd
from os import walk
from xmljson import badgerfish as bf
from xmljson import yahoo as yh
from xml.etree.ElementTree import fromstring, parse
from json import dumps, loads

firstdata_filelist = list(walk('data_01'))[0][2]

first_file = firstdata_filelist[0]
with open('data_01/{}'.format(first_file)) as file_xml:
    first_file_data = file_xml.read()
parsed_data = bf.data(fromstring(first_file_data))
yahoo_data = dumps(yh.data(fromstring(first_file_data)))
first_file_json = dumps(parsed_data, indent=4)

# print(dict(parsed_data))

# keys = list(dict(parsed_data).keys())
firstdata_dict = dict(parsed_data)
firstdata_series = pd.read_json(yahoo_data)
firstdata_series.to_excel('first_data.xlsx')
print(firstdata_series)
# print(dict(parse('data_01/{}'.format(first_file))))
# while keys:
#     for key in keys:
#
Exemple #30
0
 def addToJson(self, xml):
     _json = xml2json.data(fromstring(
         etree.tostring(xml, pretty_print=True)))
     _jsonStr = dumps(_json, sort_keys=True, indent=2)
     return _jsonStr
Exemple #31
0
df.head()  # Displaying first 5 rows

api_endpoint = 'http://digital.sl.nsw.gov.au'
api_pds_endpoint = 'https://libprd70.sl.nsw.gov.au/pds'
api_sru_endpoint = 'http://digital.sl.nsw.gov.au/search/permanent/sru'

api_username = '******'
api_password = '******'
api_institude_code = 'SLNSW'

ros = Rosetta(api_endpoint,
              api_pds_endpoint,
              api_sru_endpoint,
              api_username,
              api_password,
              api_institude_code,
              api_timeout=1200)

IE_PID = df["IE PID"][4]
r = ros.iews_get_ie(IE_PID, 0, raw=True)

json_data = yahoo.data(r)
root_str = ET.tostring(r, encoding='utf-8').decode('utf-8')

mets_ordereddict = xml_json(root_str)
mets_json = json.dumps([mets_ordereddict])
mets_dict = json.loads(mets_json)

with open('mets.json', 'w') as fp:
    json.dump(mets_dict, fp, ensure_ascii=False, indent=2)
Exemple #32
0
    def parse_newznab_xml(self, feed, imdbid=None):
        ''' Parse xml from Newznab api.
        feed (str): xml feed text
        imdbid (str): imdb id #. Just numbers, do not include 'tt'

        Replaces all namespaces with 'ns', so namespaced attributes are
            accessible with the key '{ns}attr'

        Loads feed with xmljson in yahoo format
        Creates item dict for database table SEARCHRESULTS -- removes unused
            keys and ensures required keys are present (even if blank)

        Returns list of dicts of parsed nzb information.
        '''
        results = []

        feed = re.sub(r'xmlns:([^=]*)=[^ ]*"', r'xmlns:\1="ns"', feed)

        try:
            channel = yahoo.data(fromstring(feed))['rss']['channel']
            indexer = channel['title']
            items = channel.get('item', [])
            if type(items) != list:
                items = [items]
        except Exception as e:
            logging.error('Unexpected XML format from NewzNab indexer.',
                          exc_info=True)
            logging.debug(feed)
            return []

        for item in items:
            try:
                item['attr'] = {}
                for i in item['{ns}attr']:
                    item['attr'][i['name']] = i['value']

                if (self.feed_type == 'torrent'):
                    # Jackett doesn't properly encode query string params so we do it here.
                    rt, qs = item.get('link', '?').split('?')
                    if rt == qs == '':
                        guid = None
                    else:
                        qsprs = urllib.parse.parse_qs(qs)
                        params = []
                        if 'xt' in qsprs:
                            params.append('xt=' + qsprs.pop('xt')[0])
                        for k in qsprs:
                            for v in qsprs[k]:
                                params.append('{}={}'.format(
                                    k, urllib.parse.quote(v)))
                        guid = rt + '?' + '&'.join(params)
                else:
                    guid = item.get('link')

                result = {
                    "download_client":
                    None,
                    "downloadid":
                    None,
                    "freeleech":
                    float(item['attr'].get('downloadvolumefactor', 1)) == 0.0,
                    "guid":
                    guid,
                    "indexer":
                    indexer,
                    "info_link":
                    item.get('comments', '').split('#')[0],
                    "imdbid":
                    imdbid if imdbid is not None else 'tt{}'.format(
                        item['attr'].get('imdb')),
                    "pubdate":
                    item.get('pubDate', '')[5:16],
                    "score":
                    0,
                    "seeders":
                    0,
                    "size":
                    int(
                        item.get('size')
                        or item.get('enclosure', {}).get('length', 0)),
                    "status":
                    "Available",
                    "title":
                    item.get('title') or item.get('description'),
                    "torrentfile":
                    None,
                    "type":
                    self.feed_type
                }

                if result['type'] != 'nzb':
                    result['torrentfile'] = result['guid']
                    if result['guid'].startswith('magnet'):
                        result['guid'] = result['guid'].split('&')[0].split(
                            ':')[-1]
                        result['type'] = 'magnet'

                    result['seeders'] = item['attr'].get('seeders', 0)

                results.append(result)
            except Exception as e:
                logging.warning('', exc_info=True)
                continue

        return results