def openFile(): f = askopenfile(mode="r", filetypes=[("Text Files", '.txt'), ("Office Open XML", '.docx')]) if f != None: ext = splitext(f.name)[1] if ext == ".txt": print(splitext(f.name, )[1]) t = f.read() text.delete(0.0, END) text.insert(0.0, t) elif ext == ".docx": doc = zipfile.ZipFile(f.name) xml_Content = doc.read('word/document.xml') doc.close() tree = XML(xml_Content) paragraphs = [] for paragraph in tree.getiterator(PARA): texts = [ node.text for node in paragraph.getiterator(TEXT) if node.text ] if texts: paragraphs.append(''.join(texts)) text.delete(0.0, END) text.insert(0.0, '\n\n'.join(paragraphs)) else: print("Oops! not a supported file type.")
def _process_response(self, response, returntype=dict): """ big ugly function.. slowly improving. """ if DEBUG: print response # Parse XML root = XML(response) # Dict repsonse if returntype==dict: response_data = xmldict.XmlDictConfig(root) if response_data.has_key("error"): raise RemoteError(response_data) # List response elif returntype==list: response_data = [] return_dict = {} for elem in root.getiterator(): if elem.tag == root_tag and return_dict.has_key(elem.tag): response_data.append(return_dict) return_dict = {elem.tag: elem.text} else: return_dict[elem.tag] = elem.text if return_dict.has_key("error"): raise RemoteError(return_dict) # add final dict to the list response_data.append(return_dict) else: raise InvalidParameterError("unkown datatype: %s" % (returntype)) if DEBUG: print response_data return response_data
def parse_docx(file_path: Path) -> str: """ NOTE: http://xmlstackoverflow.blogspot.com/2014/09/reading-doc-extension-file-elementtree.html All microsoft files are zipped xml documents. """ WORD_NAMESPACE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}" PARA = WORD_NAMESPACE + "p" TEXT = WORD_NAMESPACE + "t" with ZipFile(file_path) as document: try: xml_content = document.read("word/document.xml") except Exception as e: print(f"FAILED:{document}") logging.error(f"Failed to parsed document {file_path}", e) return tree = XML(xml_content) paragraphs = [] for paragraph in tree.getiterator(PARA): texts = [ node.text.replace("\xa0", " ").strip(" ") for node in paragraph.getiterator(TEXT) if node.text ] if texts: paragraphs.append("".join(texts)) return " ".join(paragraphs)
def get_docx_text(self, path): """ Take the path of a docx file as argument, return the text in unicode. """ document = zipfile.ZipFile(path) xml_content = document.read('word/document.xml') document.close() tree = XML(xml_content) paragraphs = [] for paragraph in tree.getiterator(PARA): texts = [ node.text for node in paragraph.getiterator(TEXT) if node.text ] if texts: paragraphs.append(''.join(texts)) return '\n\n'.join(paragraphs)
def read_docx_file(file, block_size=4096): word_namespace = \ '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' par = word_namespace + 'p' text = word_namespace + 't' try: with zipfile.ZipFile(file) as document: xml_content = document.read('word/document.xml') tree = XML(xml_content) except Exception as e: return False, str(e).replace('zip', 'docx') paragraphs = [] for paragraph in tree.getiterator(par): texts = [ node.text for node in paragraph.getiterator(text) if node.text ] if texts: paragraphs.append(''.join(texts)) return True, '\n'.join(paragraphs)
def word2text(self): document = zipfile.ZipFile(self.location) xml_content = document.read('word/document.xml') document.close() tree = XML(xml_content) paragraphs = list() cleared_text = list() for paragraph in tree.getiterator(PARA): text = [ node.text for node in paragraph.getiterator(TEXT) if node.text ] if text: for word in text: word = unicodedata.normalize("NFKD", word) word.strip() print(word) # print(cleared_text) # if text: # paragraphs.append(''.join(text)) return paragraphs
#Response from fuzzwork: #{"3532":{"buy":{"weightedAverage":"7522844.02163","max":"11500000.06","min":"343368.68","stddev":"3969289.36861","median":"9750000.015","volume":"104.0","orderCount":"10","percentile":"11500000.0538"},"sell":{"weightedAverage":"29857450.9729","max":"68499999.98","min":"15399998.88","stddev":"13610076.8519","median":"18900000.01","volume":"120.0","orderCount":"21","percentile":"15399998.8933"}}} #END RESPONSE if (contents[0:5] == "<?xml"): # Parse the XML response from EVE-Central # See usage here for ElementTree: http://effbot.org/zone/element-index.htm tree = XML(contents) # From a string if (verbose == 1): print "Data received from fuzzwork:" print tree #print "XML Element count = ", len(tree) # the tree root is the toplevel html element tree_map = dict((c, p) for p in tree.getiterator() for c in p) root = tree for c in root.getchildren(): if (verbose == 1): print c.tag for d in c.getchildren(): if (verbose == 1): print " ", d.tag for e in d.getchildren(): if (verbose == 1): print " ", e.tag for f in e.getchildren(): if (verbose == 1): print " ", f.tag, " = ", f.text if ((e.tag == "sell")
#!/usr/bin/env python # -*- coding: UTF-8 -*- from xml.etree.ElementTree import XML parsed = XML(''' <root> <group> <child id="a">Questo è il figlio "a".</child> <child id="b">Questo è il figlio "b".</child> </group> <group> <child id="c">Questo è il figlio "c".</child> </group> </root> ''') print 'parsed =', parsed for elem in parsed.getiterator(): print elem.tag if elem.text is not None and elem.text.strip(): print ' text: "%s"' % elem.text if elem.tail is not None and elem.tail.strip(): print ' tail: "%s"' % elem.tail for name, value in sorted(elem.attrib.items()): print ' %-4s = "%s"' % (name, value) print
def refund(self, payment): """ Refunds one Nordea payment in full. Note that the refund operation executes another payment at the bank and that those two payments are not explicitly linked. As a result, if you query the original payment after refund, OK is still returned. More properly implemented refunds would actually return an another Payment instance that describes the refund. FIXME: Refund works in the sense that it executes the refund. However, the response MAC calculation fails and the calculated MAC doesn't match the returned one. No idea why this is. As a result an exception is thrown even though everything went a ok. """ # FIXME: we need for parameters flexible forward and backwards # parameter marshalling methods print "refund() called" data = {} data["SOLOPMT_VERSION"] = "0001" data["SOLOPMT_TIMESTMP"] = "199911161024590003" # FIXME data["SOLOPMT_RCV_ID"] = self.get_setting("merchant_key") #data["SOLOPMT_LANGUAGE"] = payment.get_value("language") data["SOLOPMT_LANGUAGE"] = "1" # FIXME data["SOLOPMT_RESPTYPE"] = "xml" # "html" or "xml" #data["SOLOPMT_RESPDATA"] = "http://158.233.9.9/hsmok.htm" # FIXME data["SOLOPMT_RESPDATA"] = "text/xml" # FIXME #data["SOLOPMT_RESPDETL"] = "Y" data["SOLOPMT_STAMP"] = payment.get_value("code") #data["SOLOPMT_REF"] = payment.get_value("fi_reference") data["SOLOPMT_AMOUNT"] = payment.get_value("amount") data["SOLOPMT_CUR"] = payment.get_value("currency") #data["SOLOPMT_REF2"] = returned payment ref data["SOLOPMT_KEYVERS"] = "0001" # FIXME data["SOLOPMT_ALG"] = "01" order = ("VERSION", "TIMESTMP", "RCV_ID", "LANGUAGE", "RESPTYPE", "RESPDATA", "RESPDETL", "STAMP", "REF", "AMOUNT", "CUR", "REF2", "KEYVERS", "ALG", "secret") # order = ("VERSION", "TIMESTMP", "RCV_ID", "LANGUAGE", "RESPTYPE", # "RESPDATA", "RESPDETL", "STAMP", # #"REF", "AMOUNT", "CUR", # "KEYVERS", "ALG", "secret") s = "" for p in order: if p == "secret": s += self.get_setting("merchant_secret") + "&" else: key = "SOLOPMT_%s" % p if key in data: s += data[key] + "&" #print "MAC:", s #print "WANT:", "0001&199911161024590001&12345678&1&html&http://158.233.9.9/hsmok.htm&Y&501&0001&01&LEHTI&" import md5 m = md5.new(s) data["SOLOPMT_MAC"] = m.hexdigest().upper() #print "pp.get_query_form() called" print "MAC-0:", data["SOLOPMT_MAC"] import urllib2 from urllib import urlencode con = urllib2.urlopen(self.REFUND_URL, urlencode(data)) resp = con.read() #print resp if False: fh = open("nordea.xml", "w") fh.write(resp) fh.close() else: fh = open("nordea.xml", "r") resp = fh.read() fh.close() #print resp # https:///cgi-bin/SOLOPM10 #http = httplib.HTTPSConnection("solo3.nordea.fi", 443) #http.putrequest("POST", "/cgi-bin/SOLOPM10") from xml.etree.ElementTree import XML, SubElement REFUND_RESP_PARAMS = ( "SOLOPMT_VERSION", "SOLOPMT_TIMESTMP", "SOLOPMT_RCV_ID", "SOLOPMT_RESPCODE", "SOLOPMT_STAMP", "SOLOPMT_RCV_ACCOUNT", "SOLOPMT_REF", "SOLOPMT_DATE", "SOLOPMT_AMOUNT", "SOLOPMT_PAID", "SOLOPMT_CUR", "SOLOPMT_STATUS", "SOLOPMT_KEYVERS", "SOLOPMT_ALG", ) macs = "" respdata = {} xml = XML(resp) for e in xml.getiterator(): if e.tag.startswith("SOLOPMT_"): respdata[e.tag] = e.text for p in REFUND_RESP_PARAMS: if p in respdata: macs += respdata[p] + "&" print "USE:", p else: print "NOT:", p # print "%s = %s" % (e.tag, e.text) macs += self.get_setting("merchant_secret") + "&" respmac = respdata["SOLOPMT_MAC"] import md5 m = md5.new(macs) print "macs", macs print "MAC-A", m.hexdigest().upper() print "MAC-B", respmac from exceptions import PaymentInvalidMacError if respmac != m.hexdigest().upper(): raise PaymentInvalidMacError("Response MAC is invalid.") return respdata
def query(self, payment): """ The Nordea payment query interface has two basic operations modes: one targeted at operator instigated queries and another for automated queries. The RESPTYPE variable determines the mode of operation. With RESPTYPE being 'html' the mode targeted at human operators is invoked. You post a form and it returns a HTML page with the payment details. If you've set the RESPDATA parameter, there is a form on the details page that will post the payment details to the specified URL. On the other hand if RESPTYPE is 'xml' the post to Nordea will return an XML document with the payment details that can then be processed in anyway that makes sense. Here only the automated query is implemented and is intended for background queries. """ # FIXME: we need for parameters flexible forward and backwards # parameter marshalling methods import urllib2 from urllib import urlencode import md5 from xml.etree.ElementTree import XML data = {} data["SOLOPMT_VERSION"] = "0001" data["SOLOPMT_TIMESTMP"] = "199911161024590001" # FIXME data["SOLOPMT_RCV_ID"] = self.get_setting("merchant_key") #data["SOLOPMT_LANGUAGE"] = payment.get_value("language") data["SOLOPMT_LANGUAGE"] = "1" # FIXME data["SOLOPMT_RESPTYPE"] = "xml" # "html" or "xml" #data["SOLOPMT_RESPDATA"] = "http://158.233.9.9/hsmok.htm" # FIXME data["SOLOPMT_RESPDATA"] = "text/xml" # FIXME #data["SOLOPMT_RESPDETL"] = "Y" data["SOLOPMT_STAMP"] = payment.get_value("code") # data["SOLOPMT_REF"] = payment.get_value("fi_reference") # data["SOLOPMT_AMOUNT"] = payment.get_value("amount") # data["SOLOPMT_CUR"] = payment.get_value("currency") data["SOLOPMT_KEYVERS"] = "0001" # FIXME data["SOLOPMT_ALG"] = "01" order = ("VERSION", "TIMESTMP", "RCV_ID", "LANGUAGE", "RESPTYPE", "RESPDATA", "RESPDETL", "STAMP", "REF", "AMOUNT", "CUR", "KEYVERS", "ALG", "secret") s = "" for p in order: if p == "secret": s += self.get_setting("merchant_secret") + "&" else: key = "SOLOPMT_%s" % p if key in data: s += data[key] + "&" m = md5.new(s) data["SOLOPMT_MAC"] = m.hexdigest().upper() con = urllib2.urlopen(self.QUERY_URL, urlencode(data)) resp = con.read() QUERY_RESP_PARAMS = ( "SOLOPMT_VERSION", "SOLOPMT_TIMESTMP", "SOLOPMT_RCV_ID", "SOLOPMT_RESPCODE", "SOLOPMT_STAMP", "SOLOPMT_RCV_ACCOUNT", "SOLOPMT_REF", "SOLOPMT_DATE", "SOLOPMT_AMOUNT", "SOLOPMT_CUR", "SOLOPMT_PAID", "SOLOPMT_STATUS", "SOLOPMT_KEYVERS", "SOLOPMT_ALG", ) macs = "" respdata = {} respmac = "" xml = XML(resp) for e in xml.getiterator(): if e.tag in QUERY_RESP_PARAMS: macs += e.text + "&" respdata[e.tag] = e.text if e.tag == "SOLOPMT_MAC": respmac = e.text macs += self.get_setting("merchant_secret") + "&" m = md5.new(macs) if respmac != m.hexdigest().upper(): raise PaymentInvalidMacError("Response MAC is invalid.") if respdata["SOLOPMT_RESPCODE"] == "OK": return (True, respdata) else: return (False, respdata)
def htmlify(self): #print 'HTMLifying ', self.path html = '' if self.type == 'image': import os.path if '.noteify' in self.filename and '.png' in self.filename: html = '' else: html = '<img src="../img/{}" height="750" />'.format(self.filename) elif self.type == 'code': html = '<pre style="background-color:rgba(0, 0, 255, 0.2);"><code>{}</code></pre>'.format( open(self.path, 'rU').read() ) elif self.type == 'PDF': from wand.image import Image import os import os.path filename_root = self.filename.split('.pdf')[0] new_filename = filename_root + '.noteify.png' if os.path.exists(self.path.split(self.filename)[0] + new_filename) or os.path.exists(self.path.split(self.filename)[0] + filename_root + '-1.png'): pass else: with Image(filename=self.path, resolution=300) as img: try: img.save(filename=self.path.split(self.filename)[0] + new_filename) except: pass self.filename = sorted([filename for filename in os.listdir(self.path.split(self.filename)[0]) if filename_root in filename and '.noteify' in filename and '.png' in filename]) for filename in self.filename: html += '<img src="../pdf/{}" height="750" />'.format(filename) elif self.type == 'Markdown': import markdown converter = markdown.Converter() html = converter.convert(self.path) elif self.type == 'Powerpoint PPTX': from comtypes import client powerpoint = client.CreateObject('Powerpoint.Application') powerpoint.Presentations.Open(self.path) powerpoint.ActivePresentation.Export(self.path, 'PNG') powerpoint.ActivePresentation.Close() powerpoint.Quit() elif self.type == 'Excel XLSX': import xlrd import csv try: workbook = xlrd.open_workbook(self.path) except xlrd.biffh.XLRDError: html = '' else: sheet_names = workbook.sheet_names() for sheet_name in sheet_names: sheet = workbook.sheet_by_name(sheet_name) with open('.xltemp.csv', 'w') as f: c = csv.writer(f, quoting=csv.QUOTE_ALL) for rownum in xrange(sheet.nrows): try: c.writerow(sheet.row_values(rownum)) except UnicodeEncodeError: c.writerow(['']) html = self.csv_to_html(filename='.xltemp.csv') elif self.type == 'Word DOCX': from xml.etree.ElementTree import XML import zipfile WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' PARA = WORD_NAMESPACE + 'p' TEXT = WORD_NAMESPACE + 't' docx = zipfile.ZipFile(self.path) xml_content = docx.read('word/document.xml') docx.close() tree = XML(xml_content) html = '<p>' paragraphs = [] for paragraph in tree.getiterator(PARA): texts = [node.text for node in paragraph.getiterator(TEXT) if node.text] if texts: paragraphs.append(''.join(texts)) html += '</p><p>'.join(paragraphs) html += '</p>' elif self.type == 'CSV': html = self.csv_to_html(filename=self.path) else: html = '' return html