Ejemplo n.º 1
0
    def from_xml(cls, xml_folder, name="XML Dataset"):

        from xmljson import badgerfish as bf
        from xml.etree.ElementTree import fromstring
        """
        Generates a dataset from a folder with XML and corresponding images

        :param xml_folder: 
        :type xml_folder: pathlib.Path
        :raise ImportError: Raised if xml_folder is a `pathlib.Path`
                            object and it cannot be imported
        """
        dataset = cls(name)
        xml_list = list(xml_folder.glob("*.jpg"))
        categories = []
        for idx, imgp in enumerate(xml_list):
            xml = bf.data(
                fromstring(open(imgp.with_suffix(".xml"), "r").read()))

            # Handle single object case
            if type(xml["annotation"]["object"]) is not list:
                xml["annotation"]["object"] = [xml["annotation"]["object"]]

            for ann in xml["annotation"]["object"]:

                cat = ann["name"]["$"]
                categories.append(cat)
        categories = list(set(categories))
        xml_categories = {
            cat: Category(cat, id=idx + 1)
            for idx, cat in enumerate(categories)
        }

        for idx, imgp in enumerate(xml_list):
            image = Image.from_path(str(imgp))
            image.id = idx
            image.dataset = name

            xml = bf.data(
                fromstring(open(imgp.with_suffix(".xml"), "r").read()))

            # Handle single object case
            if type(xml["annotation"]["object"]) is not list:
                xml["annotation"]["object"] = [xml["annotation"]["object"]]

            for ann in xml["annotation"]["object"]:
                i = ann["bndbox"]
                cat = ann["name"]["$"]

                x, y, xx, yy = (int(i["xmin"]["$"]), int(i["ymin"]["$"]),
                                int(i["xmax"]["$"]), int(i["ymax"]["$"]))
                bbox = [x, y, xx, yy]

                fin_ann = Annotation(image=image,
                                     bbox=bbox,
                                     category=xml_categories[cat])
                image.add(fin_ann)

            dataset.add(image)
        return dataset
Ejemplo n.º 2
0
    def __init__(self, source):
        """Initializes a Podcast instance based on XML data from a string or file path"""

        logger.debug("Initializing podcast from source: " + source)

        if os.path.isfile(source):
            tree = ET.parse(source)
            root = tree.getroot()
            data = bf.data(root)
        else:
            data = bf.data(fromstring(source))

        channel = data.get("rss").get("channel")

        self.source = source
        self.channel = channel
        self.author = channel.get("author", {}).get("$")
        self.copyright = channel.get("copyright", {}).get("$")
        self.description = channel.get("description", {}).get("$")
        self.explicit = channel.get("explicit", {}).get("$")
        self.generator = channel.get("generator", {}).get("$")
        self.language = channel.get("language", {}).get("$")
        self.managing_editor = channel.get("managingEditor", {}).get("$")
        self.new_feed_url = channel.get("new-feed-url", {}).get("$")
        self.pub_date = channel.get("pubDate", {}).get("$")
        self.subtitle = channel.get("subtitle", {}).get("$")
        self.summary = channel.get("summary", {}).get("$")
        self.title = channel.get("title", {}).get("$")
        self.type = channel.get("type", {}).get("$")
        self.web_master = channel.get("webMaster", {}).get("$")
Ejemplo n.º 3
0
def get_canadian_weather(city, province=None):

    # Get city list
    entries = badgerfish.data(
        fromstring(
            get_cached_url(
                "http://dd.weather.gc.ca/citypage_weather/xml/siteList.xml",
                timedelta(days=30),
            )))["siteList"]["site"]
    matching_entries = [
        entry for entry in entries if entry["nameEn"]["$"] == city and (
            province is None or entry["provinceCode"]["$"] == province)
    ]
    assert len(
        matching_entries
    ) == 1, f"Your provided city/province matched {len(matching_entries)} entries: {matching_entries}"
    entry = matching_entries[0]

    # Get city forecast
    forecast = badgerfish.data(
        fromstring(
            get_cached_url(
                f"""http://dd.weather.gc.ca/citypage_weather/xml/{entry["provinceCode"]["$"]}/{entry["@code"]}_e.xml""",
                timedelta(hours=1),
            )))["siteData"]
    return forecast
Ejemplo n.º 4
0
def get_congress_votes(year, roll_num, congress, session):
    import pandas as pd
    import numpy as np
    import requests
    from bs4 import BeautifulSoup
    import sqlite3
    from xmljson import badgerfish as bf
    from xml.etree.ElementTree import fromstring
    from pandas.io.json import json_normalize

    x = len(str(roll_num))
    if x == 3:
        roll_num = roll_num
    elif x == 2:
        roll_num = '0{}'.format(roll_num)
    elif x == 1:
        roll_num = '00{}'.format(roll_num)

    url = 'http://clerk.house.gov/evs/{}/roll{}.xml'.format(year, roll_num)
    page = requests.get(url)
    try:
        df = json_normalize(
            pd.DataFrame(bf.data(fromstring(
                page.content))).loc['vote-data',
                                    'rollcall-vote']['recorded-vote'])
        try:
            df.columns = [
                'member_full', 'bioguide_id', 'party', 'role', 'name',
                u'state', 'unaccented-name', 'vote'
            ]
            df = df[[
                'member_full', 'bioguide_id', 'party', 'role', u'state', 'vote'
            ]]
        except:
            df.columns = ['member_full', 'party', 'role', 'state', 'vote']
            df.loc[:, 'bioguide_id'] = None
            df = df[[
                'member_full', 'bioguide_id', 'party', 'role', u'state', 'vote'
            ]]

        df.loc[:, 'year'] = year
        df.loc[:, 'roll'] = roll_num
        df.loc[:, 'congress'] = congress
        df.loc[:, 'session'] = session
        df.loc[:, 'date'] = pd.to_datetime(
            json_normalize(
                pd.DataFrame(bf.data(fromstring(
                    page.content))).loc['vote-metadata',
                                        'rollcall-vote']).loc[0,
                                                              'action-date.$'])

        return df
    except KeyError:
        'No date for this vote'
Ejemplo n.º 5
0
 def _dochits_to_objset(self, docHits):
     objset = []
     #iterate through docHits
     for d in docHits:
         # need to descend two layers in API for object metadata
         url_object = d.text
         obj_resp = requests.get(url_object,
                             auth=HTTPBasicAuth(self.uname.strip(), self.pswd.strip()))
         objTree = ET.fromstring(obj_resp.content)
         for mdataRef in objTree.findall('{http://preservica.com/EntityAPI/v6.0}'
                                         'AdditionalInformation/{http://preservica.com/'
                                         'EntityAPI/v6.0}Metadata/{http://preservica.com/'
                                         'EntityAPI/v6.0}Fragment[@schema="http://www.open'
                                         'archives.org/OAI/2.0/oai_dc/"]'):
             url_mdata = mdataRef.text
         mdata_resp = requests.get(url_mdata,
                             auth=HTTPBasicAuth(self.uname.strip(), self.pswd.strip()))
         mdataTree = ET.fromstring(mdata_resp.content)
         object = [
             # strip namespace from JSON
             json.dumps(badgerfish.data(x)).replace('{http://purl.org/dc/elements/1.1/}','')
             for x in mdataTree.findall('{http://preservica.com/XIP/v6.0}MetadataContainer/'
                                        '{http://preservica.com/XIP/v6.0}Content/'
                                        '{http://www.openarchives.org/OAI/2.0/oai_dc/}dc')
         ]
         # need to inject Preservica ID into json for isShownAt/isShownBy
         preservica_id = url_object.split('information-objects/')[1]
         jRecord = json.loads(object[0])
         jRecord.update({"preservica_id": { "$": preservica_id } })
         objset.append(jRecord)
     self.doc_current += len(docHits)
     if self.url_next:
         self.url_API = self.url_next
     return objset
Ejemplo n.º 6
0
    def handle(self, *args, **options):
        r = requests.get(XML_SOURCE)

        root = ET.fromstring(r.text)
        amount = 0
        for item in root.iter('item'):
            d = bf.data(item)
            images = []
            if d['item']['image_link']:
                images.append(d['item']['image_link']['$'])

            data = {
                'identifier': d['item']['id']['$'],
                'title': d['item']['title']['$'],
                'start_date': start_date,
                'kind': d['item']['vertragsart']['$'],
                'link': d['item']['link']['$'],
                'description': d['item']['description']['$'],
                'location': d['item']['standort']['$'],
                'images': images,
                'contact_email': CONTACT_EMAIL,
                'company': {
                    'name': d['item']['unternehmen']['$'],
                    'address': {
                        'street': '',
                        'zip': '',
                        'city': d['item']['standort']['$'],
                    }
                }
            }
            print(data['kind'])
            amount += 1
        print(amount)
Ejemplo n.º 7
0
def formatEmailBodyToJson(emailBody):
    ## Flatten this email text into a single line of text suitable for translaction into json

    # emails come out of google with a line limit and the continuation character is an equals sign
    # look for an equals sign followed by a new line and remove that.
    emailBody = emailBody.replace(b'=\n', b'')

    # This data from google also has some html escape artifacts, %3D would be an equals sign, instead we
    # are just left with 3D. Remove it.
    emailBody = emailBody.replace(b'3D', b'')

    # The media companies also pollute their xml with a bunch of garbage that makes conversion to json impossible
    # Remove it. This is all found inside the <Infringement> tag.
    emailBody = emailBody[:emailBody.find(b'xmlns=')] + emailBody[emailBody.find(b'.xsd\"') + 5:]

    # At this stage we still have the entire email. We only want the XML data. Look for the start of the XML.
    # Typically the XML ends with the closing Infringement tag so look for that to signify the end of the XML.
    xmlstart = emailBody.find(b"<?xml")
    xmlend = emailBody.find(b"</Infringement>") + len(b"</Infringement>")

    # slice the email text into just the XML now that we have found starting and ending positions.
    emailBody = emailBody[xmlstart:xmlend]

    # Convert this XML into json data.
    jsondata = loads(dumps((bf.data(fromstring(emailBody)))))

    return jsondata
Ejemplo n.º 8
0
def do_sub_element(el_list, el_type):
    # call to ENA for additional elements

    # array of elements to extract
    do_if = ['SUBMISSION', 'STUDY', 'SAMPLE', 'EXPERIMENT']

    out = dict()
    out_list = list()

    if '-' in el_type:
        short_form_type = el_type[el_type.index('-') + 1:]

    if short_form_type in do_if:
        # init output dict
        if type(el_list) == str:
            el_list = [el_list]

        for el in el_list:
            # do lookup
            resp = requests.get('http://www.ebi.ac.uk/ena/data/view/' + el +
                                '%26display%3Dxml').content
            st = resp.decode('utf-8')
            et = fromstring(st)
            data = bf.data(et)
            x = iterate_over_attributes(data, short_form_type)
            out_list.append(x)

    return out_list
Ejemplo n.º 9
0
def vmID(vcloud_token, vapp):
    try:
        url = "https://10.10.21.155/api/vApp/" + vapp

        headers = {
            'Content-Type': "application/vnd.vmware.admin.organization+xml",
            'x-vcloud-authorization': vcloud_token,
            'Accept': "application/*+xml;version=31.0",
        }

        response = requests.request("GET", url, verify=False, headers=headers)
        # print(response.text)
        doc = ET.fromstring(response.text.encode('utf-8'))
        jsonStr = json.dumps(bf.data(doc))
        jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "")
        y = json.loads(jsonStr)
        print y
        print y["VApp"]["Children"]["Vm"]["@name"]
        print y["VApp"]["Children"]["Vm"]["@href"]
        vm_id = y["VApp"]["Children"]["Vm"]["@href"]
        vm_id = vm_id.split("/")
        print vm_id[5]
        return vm_id[5], False
    except Exception as e:
        print "false"
        return "false", True
Ejemplo n.º 10
0
def vapp_id(vcloud_token, vdc_id):

    try:
        url = "https://10.10.21.155/api/vdc/" + vdc_id + ""

        headers = {
            'Content-Type': "application/vnd.vmware.admin.organization+xml",
            'x-vcloud-authorization': vcloud_token,
            'Accept': "application/*+xml;version=31.0",
        }

        response = requests.request("GET", url, verify=False, headers=headers)
        # print(response.text)
        doc = ET.fromstring(response.text.encode('utf-8'))
        jsonStr = json.dumps(bf.data(doc))
        jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "")
        y = json.loads(jsonStr)
        # print y, "\ntest"
        print y["Vdc"]["ResourceEntities"]["ResourceEntity"]["@href"], "debug"
        vapp_id = y["Vdc"]["ResourceEntities"]["ResourceEntity"][
            "@href"].split("/")
        print vapp_id[5]
        return vapp_id[5], False
    except Exception as e:
        print "false"
        return "false", True
def load_gpx(file_location):
    my_data = eval(dumps(bf.data(fromstring(open(file_location,
                                                 'rt').read()))))
    a = '{http://www.topografix.com/GPX/1/1}'
    X = []
    S = []
    md = None
    origin = None
    for p in my_data[a + "gpx"][a + "trk"][a + "trkseg"][a + "trkpt"]:
        datetime_object = datetime.strptime(p[a + "time"]['$'],
                                            '%Y-%m-%dT%H:%M:%SZ')
        if md is None or datetime_object < md:
            md = datetime_object
            origin = (float(p['@lat']), float(p['@lon']))
            #X.append( [datetime_object,  float(p['@lat']), float(p[a+"ele"]['$']),  float(p['@lon'])])
        S.append(
            multi_sensor_point(datetime_object, float(p['@lat']),
                               float(p[a + "ele"]['$']), float(p['@lon']),
                               None, None))

    ##  Duplet of (time, coordinates) :  (time, lat, lon)
    # X=[[(x[0]-md).total_seconds(),(x[1],x[3])] for x in X]
    X = [[(x.datetime - md).total_seconds(),
          (x.latitude, x.longitude, x.elevation)] for x in S]
    return sorted(X, key=operator.itemgetter(0))
Ejemplo n.º 12
0
def topiczoom(text):
    def remove_namespaces(e):
        for elem in e.getiterator():
            if not hasattr(elem.tag, 'find'): continue  # (1)
            i = elem.tag.find('}')
            if i >= 0:
                elem.tag = elem.tag[i + 1:]
        return (e)

    params = (('lang', 'de'), )

    data = text

    response = requests.post('http://twittopic.topiczoom.de/quickindex.xml',
                             params=params,
                             data=data)

    feedjson = bf.data(remove_namespaces(fromstring(response.text)))

    topiczoom_list = feedjson["Envelope"]["Body"]["TZTopicSet"]["TZTopic"]

    topiczoom_list = sorted(topiczoom_list,
                            reverse=True,
                            key=lambda x: x["@weight"])

    return (topiczoom_list)
Ejemplo n.º 13
0
def get_GO_terms(pdb_id):
    pdb, chain = pdb_id[:4], pdb_id[4:]
    req = requests.get('http://www.rcsb.org/pdb/rest/goTerms?structureId=%s.%s' % (pdb, chain))
    if req.status_code != 200:   # then assume it's a .cif
        raise requests.HTTPError('HTTP Error %s' % req.status_code)
    data = bf.data(fromstring(req.content))['goTerms']
    return [] if 'term' not in data else data['term']
Ejemplo n.º 14
0
def create_sessions(hostname, username, password):
    url = "https://" + hostname + "/api/sessions"

    payload = ""
    headers = {
        'Accept': "application/*+xml;version=31.0",
        'Authorization': "Basic " + base64.b64encode(username + ":" + password)
    }

    response = requests.request("POST",
                                url,
                                verify=False,
                                data=payload,
                                headers=headers)
    # print type(response.text)
    try:
        if response.status_code == 200:
            print response
            doc = ET.fromstring(response.text.encode('utf-8'))
            jsonStr = json.dumps(bf.data(doc))
            jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}",
                                      "")
            y = json.loads(jsonStr)
            # print y,"\nsessions"
            # print(response.text)
            # print response.headers.get("x-vcloud-authorization")
            id_sess = response.headers.get("x-vcloud-authorization")
            return id_sess
            # dbTest(id_sess)
    except Exception as e:
        print response
Ejemplo n.º 15
0
def storeTree(item):
    print(f'{item.name} start: {datetime.now()}')
    actXSD = xmlschema.XMLSchema(
        '/Users/john/Development/HumAI_data/Schema/iati-activities-schema.xsd')
    orgXSD = xmlschema.XMLSchema(
        '/Users/john/Development/HumAI_data/Schema/iati-organisations-schema.xsd'
    )

    db = firestore.Client()

    try:
        tree = ET.parse(item)
    except xml.etree.ElementTree.ParseError as exp:
        parse_error += 1
        print('Parse error:', exp, file=sys.stderr)
    else:
        tree_ = bf.data(tree.getroot())
        if "iati-activities" in tree_:
            print('Prune activity ', item.name)
    #        pruneTree(db, None, tree_, actXSD)
        elif "iati-organisations" in tree_:
            print('Prune organisation ', item.name)
    #        pruneTree(db, None, tree_, orgXSD)
        else:
            pass
    print(f'{item.name} end: {datetime.now()}')
Ejemplo n.º 16
0
    def json(self, conversion: _Text = 'badgerfish') -> Mapping:
        """A JSON Representation of the XML.  Default is badgerfish.
        :param conversion: Which conversion method to use. (`learn more <https://github.com/sanand0/xmljson#conventions>`_)
        """
        if not self._json:

            if conversion is 'badgerfish':
                from xmljson import badgerfish as serializer

            elif conversion is 'abdera':
                from xmljson import abdera as serializer

            elif conversion is 'cobra':
                from xmljson import cobra as serializer

            elif conversion is 'gdata':
                from xmljson import gdata as serializer

            elif conversion is 'parker':
                from xmljson import parker as serializer

            elif conversion is 'yahoo':
                from xmljson import yahoo as serializer

            self._json = json.dumps(serializer.data(etree.fromstring(
                self.xml)))

        return self._json
Ejemplo n.º 17
0
def handle_xml_pull(request):
    if 'query' in request.GET:
        xmlFileId = request.GET['query']
    else:
        xmlFileId = request.POST.__getitem__('fileId')

    try:
        f = XmlSession.objects.get(id__exact=xmlFileId)
    except ObjectDoesNotExist:
        return redirect('/displayer/?error=xml-lost')

    result = xel.parseString(str(f.etreeString))

    root = result.root

    if type(root) is NoneType:
        return redirect('/displayer/?error=xml-parse-error')
    else:
        request.session['fileId'] = xmlFileId
        context = {
            "file_json": dumps(bf.data(root)),
            "delete_list": f.delete_list,
            "file_name": f.xml_file_name,
            "parse_error": dumps(result.error)
        }
        return render(request, 'displayer/select.html', context)
Ejemplo n.º 18
0
def get_all_departments():
    api_response = requests.get(
        'http://courses.illinois.edu/cisapp/explorer/catalog/2017/spring.xml')
    api_json = bf.data(fromstring(api_response.text))
    departments_arr = api_json['{http://rest.cis.illinois.edu}term'][
        'subjects']['subject']
    return departments_arr
Ejemplo n.º 19
0
def xml_to_json(input, output):
    
    i = 0
    template_output = output
    
    #################################################################
    #BadgerFish(bf): Use "$" for text content, @ to prefix attributes
    #fromstring: barses an XML section from a string constant
    #################################################################
    with open(input+ ".xml", "r") as input:
        jsonOut = bf.data(fromstring(input.read()))
        
        #############################################################################################
        #Check if the name of the output file is already exists.
        #If it exits, it will add incrementing suffix depending on how many copies are already there.
        #############################################################################################
        while(os.path.isfile(output+'.json')==True):
            print("lastoutput"+output[-1])
            print("i "+str(i))
            if(output == template_output):
                output+=str(i)
            else:
                output = template_output
                output+=str(i)
            i+=1

    with open(output+ ".json","w+") as newFile:
            json.dump(jsonOut, newFile, ensure_ascii=False)
def get_senate_by_gov(df):
    import pandas as pd
    import requests
    from json import dumps
    from xmljson import badgerfish as bf
    from xml.etree.ElementTree import fromstring
    from pandas.io.json import json_normalize
    import urllib
    
    """Some of the urls don't work the first time,
    but by setting a proxy requests sends info to 
    senate.gov to connect to the page"""
    s = requests.Session()
    s.auth = ('user', 'pass')
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
    }
    url = 'http://www.senate.gov/general/contact_information/senators_cfm.xml'
    print url
    r =  requests.get(url, headers=headers, proxies=urllib.getproxies())



    #url = 'http://www.senate.gov/general/contact_information/senators_cfm.xml'
    #r = requests.get(url)
    df = json_normalize(pd.DataFrame(bf.data(fromstring(r.content))).loc['member', 'contact_information'])
    df.columns = df.columns.str.replace('$', '').str.replace('.', '')

    return df
Ejemplo n.º 21
0
def getUpdatedPropertyDetails(zpid):
    payload = {'zws-id': ZWS_ID, 'zpid': zpid}
    response = requests.get(build_url(GET_UPDATED_PROPERTY_DETAILS_API_NAME),
                            params=payload)
    res_json = loads(dumps(bf.data(fromstring(response.text))))

    return res_json
Ejemplo n.º 22
0
def show(request):
	if request.method=="POST":
		# address="2114+Bigelow+Ave"
		# citystatezip="Seattle%2C+WA"
		propert = request.POST.get('address')
		l=propert.split(',')
		address = l[0]
		citystatezip = l[1]
		url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz19lq7ppy70r_305s0&address=%s&citystatezip=%s"%(address, citystatezip)
		response = requests.get(url)
		# print(response.status_code)
		c = response.content
		d = bf.data(fromstring(c))
		my_dict = dict(d)
		my_dct = my_dict['{http://www.zillow.com/static/xsd/SearchResults.xsd}searchresults']
		dct = dict(my_dct)
		final_dct = dict(dict(dict(dct['response'])['results'])['result'])
		bedrooms = dict(final_dct['bedrooms'])
		bathrooms = dict(final_dct['bathrooms'])
		address = dict(final_dct['address'])
		final_dct = {}
		final_dct['bedrooms'] = dict(bathrooms)['$']
		final_dct['bathrooms'] = dict(bathrooms)['$']
		final_dct['city'] = dict(dict(address)['city'])['$']
		final_dct['state'] = dict(dict(address)['state'])['$']
		final_dct['street'] = dict(dict(address)['street'])['$']
		name = request.POST.get('name')
		print name
        # final_dct['zipcode'] = dict(dict(address)['zipcode'])['$']
        # print final_dct
        # return HttpResponse("success")
		# return Response(final_dct, status=status.HTTP_201_CREATED)
		return render(request,'edit_your_property.html', {'data':final_dct,'zipcode':citystatezip, 'address':l[0], 'name':name})
Ejemplo n.º 23
0
def xliff_to_json(filename):
    """ Converts a single XLIFF file to a JSON equivalent.
    Removes some excess data such as notes (context).
    Requires a lot of memory to complete as files are large.
    """
    filename = folder + filename
    with codecs.open(filename, 'r', 'utf-8') as fin:
        xliff = fin.read()
        jsonx = bf.data(fromstring(xliff))

    for file in jsonx['xliff']['file']:
        # do some cleanup
        if 'group' in file['body']:
            file['body'].pop('group')
        # go through all strings...
        for unit in file['body']['trans-unit']:
            # ... and remove notes to reduce file size
            if 'note' in unit:
                try:
                    unit.pop('note')
                except Exception as e:
                    print("\tERROR: Unit is ", unit, "\n", str(e))
                    print(json.dumps(file, indent=2))
                    file['body']['trans-unit'].pop('note')

    # save new content as JSON file
    with open(filename + '.2.json', 'w', encoding='utf8') as fout:
        json.dump(jsonx, fout, ensure_ascii=False)
Ejemplo n.º 24
0
def get_vdc(cluster, vcloud_token, org_id):
    # def check_name_vdc():
    url = "https://%s/api/org/%s" % (cluster['vcloud_ip'], org_id)
    headers = {
        'x-vcloud-authorization': vcloud_token,
        'Accept': "application/*+xml;version=31.0",
        'Content-Type': "application/vnd.vmware.admin.user+xml"
    }
    try:
        response = requests.request("GET", url, verify=False, headers=headers)
        doc = ET.fromstring(response.text.encode('utf-8'))
        jsonStr = json.dumps(bf.data(doc))
        jsonStr = jsonStr.replace("{http://www.vmware.com/vcloud/v1.5}", "")
        vdc_objs = json.loads(jsonStr)
        # vdc_list = []
        if vdc_objs["Org"]["Link"] == {}:
            print "Never have vdc list"
            return None, False
        elif vdc_objs["Org"]["Link"] != {}:
            for i in range(len(vdc_objs["Org"]["Link"])):
                vdc_list = []
                if vdc_objs["Org"]["Link"][i][
                        "@type"] == "application/vnd.vmware.vcloud.vdc+xml":
                    vdc_list.append(vdc_objs["Org"]["Link"][i]["@name"])
            return vdc_list, False
    except Exception as e:
        print e
    return None, True
Ejemplo n.º 25
0
def utility():
    for f in glob.glob('*.xml'):
        n = str.split(f, ".")[-2]
        with open(f, "r") as i:
            j = bf.data(fromstring(i.read()))
            with open(n + ".json", "w+") as newFile:
                json.dump(j, newFile, ensure_ascii=False)
Ejemplo n.º 26
0
 def get_json(self):  # todo Убрать JSON или нет?
     """
       Возвращает строку с полной структурой json текущего объекта. Используется конвенция BadgerFish
     :return: str()
     """
     E_tree = self.__xml_element()
     js = badgerfish.data(E_tree)
     return json.dumps(js)
Ejemplo n.º 27
0
def convertXMLtoJSON(filename):
    file = open(filename)
    XML_CONTENT = file.read()
    root = ET.fromstring(XML_CONTENT)
    JSON_CONTENT = bf.data(root)
    if filename.endswith('.xml'):
        filename = filename[:-4]
    saveJSONtoFile(JSON_CONTENT, filename + '.json')
Ejemplo n.º 28
0
def convert(xmlfile):
	"""
	convert(xmlfile)
	Makes input xml, convert it and return json
	"""
	tree = fromstring(xmlfile)
	jsonfile = jsonify(bf.data(tree))
	return jsonfile
Ejemplo n.º 29
0
 def traffic_log(self, nlogs=None, filter=None):
     ''' go through traffic log
     '''
     # needs extra_qs='dir=backward' // bug PAN-74932
     self.xapi.log(log_type='traffic',
                   nlogs=nlogs,
                   filter=filter,
                   extra_qs='dir=backward')
     return bf.data(fromstring(pa.xapi.xml_root().encode('utf-8')))
Ejemplo n.º 30
0
def getCompsZpids(zpid, count=25, rentzestimat=False):
    payload = {'zws-id': ZWS_ID,
               'zpid': zpid,
               'count': count,
               'rentzestimat': rentzestimat}
    response = requests.get(build_url(GET_COMPS_API_NAME), params=payload)
    res_json = loads(dumps(bf.data(fromstring(response.text))))
    comp_zpids = [x['$'] for x in list(gen_dict_extract('zpid', res_json))]
    return comp_zpids
Ejemplo n.º 31
0
    def convert_xml_to_json(self, eles):
        print 'convert to json...'
        kanjis = [bf.data(e) for e in eles]

        print 'clean..'
        kanjis = [k['character'] for k in kanjis]

        print kanjis[0]
        print kanjis[1]
Ejemplo n.º 32
0
 def __init__(self, url_harvest, extra_data):
     '''Grab file and copy to local temp file'''
     super(CMISAtomFeedFetcher, self).__init__(url_harvest, extra_data)
     # parse extra data for username,password
     uname, pswd = extra_data.split(',')
     resp = requests.get(url_harvest, auth=HTTPBasicAuth(uname.strip(),
                         pswd.strip()))
     self.tree = ET.fromstring(resp.content)
     self.objects = [badgerfish.data(x) for x in
                     self.tree.findall('./{http://www.w3.org/2005/Atom}'
                                       'entry/{http://docs.oasis-open.org/'
                                       'ns/cmis/restatom/200908/}children//'
                                       '{http://www.w3.org/2005/Atom}entry')
                     ]
     self.objects_iter = iter(self.objects)
Ejemplo n.º 33
0
def oaiItemHarvest(oaiURL, identifier, autoNum):
    params = urllib.urlencode({'verb': 'GetRecord',
                               'metadataPrefix': 'dcs',
                               'identifier': identifier})
    requestURL = oaiURL + "?" + params
    request = urllib2.Request(requestURL)
    response = urllib2.urlopen(request)
    result = response.read()
    uRes = codecs.decode(result, 'utf-8')
    oaiXML = xml.dom.minidom.parseString(uRes.encode("utf-8"))
    with codecs.open(str(autoNum) + '.xml', 'w', encoding='utf-8') as dataz:
            oaiXML.writexml(dataz, indent='', newl='')
    with open(str(autoNum) + '.json', 'wb') as outFile:
        json.dump(bf.data(fromstring(uRes.encode('utf-8'))), outFile)
    return
 def asJSON(self):
   if self.content is None:
     return None
   jdata = bf.data(fromstring(self.asXML()))
   return json.dumps(jdata, indent=self.indent, encoding=d1_config.ENCODING)
Ejemplo n.º 35
0
 def xmlToJson(self, xmlData = None):
     """Converts XML to JSON as the service"""
     jsonData = ''
     if xmlData:
         jsonData = dumps(bf.data(fromstring(xmlData)))
     return jsonData