def pars(self, delimiter=';', clear_data=True): if not self.csv_name: err = 'csv_name file name error' raise ValueError(err) return if not self.tei_name: err = 'tei_name file name error' raise ValueError(err) return with open(self.csv_name, 'r') as csvfile: iterator = csv.reader(csvfile, delimiter=delimiter) self.processCSV(iterator) with open(self.tei_name, 'r', encoding="utf-8") as tiefile: xml = tiefile.read() tei_ = objectify.XML(xml) self.namespase = tei_.nsmap[None] self.fullnamespase = tei_.nsmap entry_ = tei_['text']['body']['entry'] for block in entry_: self.processBlock(block) with open('result.tei', 'wb') as f: result_data = etree.tostring(tei_, encoding='UTF-8', pretty_print=True) if clear_data: result_data = self.clearData(result_data) f.write(result_data) with open('eror_log.txt', 'w') as f: json.dump(self.error_log, f)
def search_result(request): book_name = request.GET['book_name'] bookApi = NaverOpenApiBook() start = 1 pageRowCnt = 10 if request.GET.has_key('curPage'): if int(request.GET['curPage']) > 0: curPage = int(request.GET['curPage']) start = (curPage * pageRowCnt) + 1 resBody = bookApi.searchBaseInfo(book_name, start) rss = objectify.XML(resBody) total_row_cnt = rss.channel.total.text if total_row_cnt > 1000: total_row_cnt = 1000 pageNavigation = PageNavigation(request, total_row_cnt) return render_to_response('book/book_search_result.html', { 'rss': rss, 'cur_page': pageNavigation.curPage, 'page_navigation_max_cnt': pageNavigation.pageNavigationMaxCnt, 'page_row_cnt': pageNavigation.pageRowCnt, 'page_navigation_html': pageNavigation.pageNavigationHtml, 'book_list_total_row_cnt': total_row_cnt, }, context_instance=RequestContext(request))
def put_import_task_status(self, worker_id=None, task_id=None, status=None, volume_id=None, bytes_converted=None, error_code=None): if task_id is None or status is None: raise RuntimeError("Invalid parameters") params = { 'InstanceId': worker_id, 'ImportTaskId': task_id, 'Status': status } if bytes_converted != None: params['BytesConverted'] = bytes_converted if volume_id is not None: params['VolumeId'] = volume_id if error_code is not None: params['ErrorCode'] = error_code resp = self.conn.make_request('PutInstanceImportTaskStatus', params, path='/', verb='POST') if resp.status != 200: raise httplib.HTTPException(resp.status, resp.reason, resp.read()) root = objectify.XML(resp.read()) return 'true' != root.cancelled.text if hasattr(root, 'cancelled') else True
class API(object): def __init__(self, geo=GEO, par=PARTNER_ID, key=KEY, **defaults): self.par = par self.key = key self.geo = geo self.URL = 'http://xoap.weather.com/weather/local/%s' % self.geo defaults['par'] = self.par defaults['key'] = self.key defaults['cc'] = '*' defaults['link'] = 'xoap' defaults['prod'] = 'xoap' defaults['unit'] = 'm' defaults['dayf'] = 5 serialized = self._encode(defaults) logger.debug('About to call %s with params %r' % (self.URL, defaults)) try: c = urllib2.urlopen(self.URL, serialized) except urllib2.URLError, e: raise WeatherError("Unexpected error while talking to server: %s" % (e, )) resp = c.read() root = etree.fromstring(resp) objects = objectify.XML(resp) logging.debug('Result: %r' % resp) if root.tag == 'error': msg = root.find('err').text raise WeatherError(msg) self.response = resp self.root = root self.objects = objects
def _decode_message(self, data): """Decode a message from an XML string. Args: data (str): data to decode. data is an XML string that represent an 'AbstractMessage' object. Returns: AbstractMessage. decoded message. Raises: ParsingError: scheme validation failed. AttributeError: message tag name is not a valid message class name. the scheme should validate that this case will never happen. """ root = objectify.XML(data) if not self.scheme.validate(root): scheme_errors = self.scheme.error_log.filter_from_errors() raise ParsingError("Scheme validation failed. reason: %r" % scheme_errors) message_class = getattr(messages, root.tag) kwargs = dict([(element.tag, self._decode(element)) for element in root.getchildren()]) return message_class(**kwargs)
def test_xml2dict(self): root = objectify.XML(""" <root> <test> <attr>Data1</attr> </test> <test> <attr>Data1</attr> <attrtwo>2</attrtwo> </test> <AttrList> <attr>TEST</attr> </AttrList> </root> """) assert xml2dict(root) == { "test": [{ "attr": "Data1" }, { "attrtwo": "2", "attr": "Data1" }], "AttrList": { "attr": "TEST" } } assert xml2dict(Type1()) == {"prop1": "TEST"} with pytest.raises(Exception): xml2dict(Type2())
def objectify(self): """ Takes a xml source an returns it as an object The source can be any of the following: - a file name/path - a file object - a file-like object - a URL using the HTTP or FTP protocol """ try: self.root = objectify.XML(self.Xml) except ValueError: self.Xml = etree.tostring(self.Xml) self.root = objectify.XML(self.Xml) except Exception, e: print "Objectify error: %s " % e
def __parse(self, file_path): tiff_file = tf.TiffFile(file_path) page = tiff_file.pages[0] ome_xml = objectify.XML(page.tags['image_description'].value) pixel_info = ome_xml['Image']['Pixels'] # AttributeError for tag, ome_key in self.tagInfoMapfloat.items(): self[tag] = np.float64(pixel_info.attrib[ome_key]) for tag, ome_key in self.tagInfoMapint.items(): self[tag] = np.int(pixel_info.attrib[ome_key]) self["bits_per_sample"] = page.bits_per_sample self["file_type"] = self.__class__.__name__[:-13]
def parse_xml(xml): termarr = [] root = objectify.XML(xml) for r in root.Records.getchildren(): if root.tag == 'GetSpatialReferencesResponse': termarr.append(r.SRSName.text) elif root.tag == 'GetUnitsResponse': termarr.append(r.UnitsType.text) else: termarr.append(r.Term.text) return list(set(termarr))
def objectify(self): """ Parses the XML string and generates an objectified structure of the document, which is stored in the variable root. The source can be any of the following: - a file name/path - a file object - a file-like object - a URL using the HTTP or FTP protocol """ if not is_available_xml_objectify: print "Module lxml.objectify not available" return try: self.root = objectify.XML(self.Xml) except ValueError: self.Xml=etree.tostring(self.Xml) self.root = objectify.XML(self.Xml) except Exception, e: print "Objectify error: %s " % e
def __init__(self, objectifiedElement=None, xmlFileName=None, xmlStr=None, rootag='root', attrib=None, nsmap=None, **kwargs): u''' initialize from ObjectifiedElement or xml file or xml string or create a brand new. Arguments --------- objectifiedElement : ObjectifiedElement, optional an ObjectifiedElement object. xmlFileName : str, optional xml's filename. xmlStr : str, optional xml's content. rootag : str, optional create ObjectifiedElement instance which root tag's name is rootag. attrib, nsmap, kwargs : optional refer to objectify.Element() ''' self._____o____ = None if objectifiedElement is not None: self._____o____ = objectifiedElement elif xmlFileName: self._____o____ = objectify.XML(xmlFileName) elif xmlStr: self._____o____ = objectify.fromstring(xmlStr) else: self._____o____ = objectify.Element(rootag, attrib=attrib, nsmap=nsmap, **kwargs)
def test_save_to_file(tmpdir): class Person(ComplexType): def __init__(self, fname, lname): self.fname = fname self.lname = lname token = Person('token', 'black') filename = os.path.join(tmpdir.strpath, 'person.xml') assert os.path.isfile(filename) is False token.save(filename) assert os.path.isfile(filename) is True content = io.open(filename, 'rb').read() root = objectify.XML(content) assert root.fname == 'token' assert root.lname == 'black'
def __init__(self, html_url, xsl_str): xsl_transformer = etree.XSLT(etree.XML(xsl_str)) self._html_tree = html.parse(html_url) self._html_tree.getroot().make_links_absolute() self._clean_html_str = etree.tostring(self._html_tree, pretty_print=True, method="xml") self._html_with_xml_header_str = '<?xml version="1.0"?>' + self._clean_html_str self._html_with_xml_header_tree = etree.XML( self._html_with_xml_header_str) self._xml_tree = xsl_transformer(self._html_with_xml_header_tree) self._xml_str = str(self._xml_tree) self._xml_pyobject = objectify.XML(self._xml_str) objectify.deannotate(self._xml_pyobject)
def insert(request): bookApi = NaverOpenApiBook() resBody = bookApi.searchDetailForNaver(request.POST['link']) bSoup = BeautifulSoup(resBody, "html.parser") bookInfoInner = bSoup.find('div', class_='book_info_inner') total_score = bookInfoInner.find( 'div', class_='txt_desc').find('strong').get_text().replace(u'점', '') total_page = 0 emList = bookInfoInner.findAll('em') for emInfo in emList: if unicode(emInfo.get_text().strip()).find(u'페이지') > -1: total_page = emInfo.next_sibling.strip() resBody = bookApi.searchDetailInfo(request.POST['query'], request.POST['isbn'].split(' ')[1]) rss = objectify.XML(resBody) bookFormData = { 'user_score': total_score, 'current_page': 0, 'total_page': total_page, 'isbn': unicode(rss.channel.item[0].isbn).split(' ')[1], 'book_name': unicode(rss.channel.item[0].title), 'author': unicode(rss.channel.item[0].author), 'publisher': unicode(rss.channel.item[0].publisher), 'pub_date': unicode(rss.channel.item[0].pubdate), 'image_url': rss.channel.item[0].image, 'link_naver': rss.channel.item[0].link, 'user': request.session.get('id') } bookForm = BookForm(bookFormData) if bookForm.is_valid(): bookForm.save() else: print bookForm.errors return HttpResponse( '<script>alert("폼 유효성 검사 실패!");history.back();</script>') return HttpResponseRedirect("/book/book_list")
def parse_schema(self, schema_file): """ Parses the given XSD file using the lxml library then returns a dictionary containing parsing results. """ try: schema_path = os.path.join(self.input_path, schema_file) with open(schema_path) as xml_file: data = xml_file.read() except Exception as ex: print("Error occurred while opening the XSD file: ", ex) sys.exit(1) try: f = objectify.XML(data) except Exception as ex: print("Invalid XSD file: ", schema_file, ex) sys.exit(1) try: includes = [i.get('schemaLocation') for i in f.include] except: includes = [] # no xsd:include found try: types = [Node(c).to_dict() for c in f.complexType] except Exception as ex: complex_types = {} # no xsd:complexType found, else: complex_types = dict( (node.get('name'), node.get('content')) for node in types) try: elements = {} for e in f.element: node = Node(e, complex_types).to_dict() content = node.get('content') if content: if content.get('type') == 'annotation': content = node['content'] elements[node.get('name')] = content except Exception as ex: elements = {} # no xsd:element found return elements, includes, complex_types
def parse_file(filename, path='settle'): with open(os.path.join(path, filename), 'rb') as fh: obj = objectify.XML(fh.read()) data = obj.Batch.MktDataFull arr = [] for i in data: inst = dict(i.Instrmt.attrib) und = Underlying.from_xml( **i.Undly.attrib) if hasattr(i, 'Undly') else None inst['instrument'] = Instrument.from_xml(Undly=und, **inst) inst['underlying'] = und inst['date'] = datetime.datetime.strptime(i.values()[0], '%Y-%m-%d') for full in i.Full: full = dict(full.attrib) full.update(inst) arr.append(full) df = pd.DataFrame(arr) df.index = df['date'] return df
def process_message(self, message, timestamp=None): from lxml import objectify self.logger.debug("Received: %s ", message) if self.validate: from lxml import etree if self.schema is None: self.schema = etree.XMLSchema( file=urllib.urlopen(self.location)) parsed_message = etree.fromstring(message) if not parsed_message.nsmap.has_key( None): #if no default namespace specified, set it new_element = etree.Element(parsed_message.tag, attrib=parsed_message.attrib, nsmap={None: self.namespace}) new_element.extend(parsed_message.getchildren()) parsed_message = etree.fromstring(etree.tostring(new_element)) if parsed_message.tag.startswith('{' + self.namespace + '}'): if not self.schema.validate(parsed_message): log = self.schema.error_log error = log.last_error self.logger.error("XML validation error: %s", error) else: self.logger.info( 'Element not in standard namespace, considered as extension: %s', parsed_message.tag) event = objectify.XML(message) event._type = event.tag.replace('{' + self.namespace + '}', '') # transform the objectified XML to a pure python object to be able to add typed fields pure_event = Event() for el in event.iterchildren(): pure_event.__setattr__(el.tag, el) if (timestamp): pure_event.timestamp = timestamp self.send_event(pure_event)
# coding: utf-8 from lxml import objectify with open('C:/Users/Leto/PycharmProjects/SINner/sinner/data/metatypes.xml' ) as file: file.readline() metatypes = objectify.XML(file.read()).metatypes with open( 'C:/Users/Leto/PycharmProjects/SINner/sinner/data/skills.xml') as file: file.readline() skills = objectify.XML(file.read()).skills #skill_groups = objectify.XML(file.read()).skillgroups #skill_categories = objectify.XML(file.read()).categories with open('C:/Users/Leto/PycharmProjects/SINner/sinner/data/gear.xml') as file: file.readline() xml = file.read() gears = objectify.XML(xml).gears gear_categories = objectify.XML(xml).categories
def parse_schema(self, schema_file): """ Parses the given XSD file using the lxml library then returns a dictionary containing parsing results. """ elements = {} try: schema_path = os.path.join(self.input_path, schema_file) with open(schema_path) as xml_file: data = xml_file.read() except Exception, ex: print "Error occurred while opening the XSD file: ", ex sys.exit(1) try: f = objectify.XML(data) except Exception, ex: print "Invalid XSD file: ", schema_file, ex sys.exit(1) try: includes = [i.get('schemaLocation') for i in f.include] except: includes = [] # no xsd:include found try: types = [Node(c).to_dict() for c in f.complexType] except Exception, ex: complex_types = {} # no xsd:complexType found, else: complex_types = dict(
xml = xml.read() except urllib2.HTTPError, e: print e return except ValueError: xml = etree.parse(source) try: if not isinstance(xml, str): xml = etree.tostring(xml) except: print "parser error (to string)" return try: root = objectify.XML(xml) except: print "objectify error" return return root def populate_model(root): """ Populates classes of specmodel """ molecules = {} radtranss = {} states = {}
def process_message(message): from lxml import objectify event = objectify.XML(message) event.type = event.tag enqueue_event(event)