/
miscellaneous.py
98 lines (79 loc) · 2.79 KB
/
miscellaneous.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, re
import wx
from datetime import datetime
import pprint, threading
import HTMLParser
TEMP_PATH = os.path.join(os.path.dirname(__file__), 'temp')
print_buffer = ''
html_parser = None
def unescape(html):
global html_parser
if not html_parser: html_parser = HTMLParser.HTMLParser()
return html_parser.unescape(html)
def thread_func(time_delay = 0.01):
def _thread_func(func):
def __thread_func(*args, **kwargs):
def _func(*args):
time.sleep(time_delay)
func(*args, **kwargs)
threading.Thread(target = _func, args = tuple(args)).start()
return __thread_func
return _thread_func
def pass_exception(func):
def _pass_exception(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
print '%s RUN EXCEPTION: %s\n' % (func.__name__, e)
print traceback.format_exc()
return _pass_exception
def p_(str):
pprint.pprint(str)
def print_(str, info = False, output_time = False):
global print_buffer
time_str = '[%s]' % datetime.now().strftime('%H:%M:%S') if output_time else ''
str_ = '%s%s' % (time_str, str)
print str_
if info: print_buffer += '%s\n' % str_.strip()
def print_flush():
global print_buffer
wx.msg(print_buffer)
print_buffer = ''
def chinese(word):
return word.decode('utf-8')
def get_save_path():
if not os.path.isdir(TEMP_PATH): os.makedirs(TEMP_PATH)
return TEMP_PATH
def clear_save():
clear_postfix = ['.html', '.png', '.jpg']
folder = get_save_path()
files = [os.path.join(folder, f) for f in os.listdir(folder) if os.path.splitext(f)[-1] in clear_postfix]
for f in files:
os.remove(f)
print_('clear all [%d] temp files!' % len(files))
def html_contents(html, start_tag, end_tag):
if start_tag.endswith('>'):
pattern = '%s(.*?)%s' % (start_tag, end_tag)
else:
pattern = '%s[^>]*>(.*?)%s' % (start_tag, end_tag)
#print 'pattern:', pattern
#print re.findall(pattern, html, re.MULTILINE|re.DOTALL)
return re.findall(pattern, html, re.MULTILINE|re.DOTALL)
def html_content(html, start_tag, end_tag):
return html_contents(html, start_tag, end_tag)[0]
def html_attributes(html, start_tag, attr):
pattern = '%s[^>]*%s=["\']([^"\']*)["\']' % (start_tag, attr)
#print pattern
return re.findall(pattern, html, re.MULTILINE|re.DOTALL)
def html_attribute(html, start_tag, attr):
return html_attributes(html, start_tag, attr)[0]
def pause():
c = raw_input('pause... (Press g for pdb, others continue)')
if c == 'g':
import ipdb
ipdb.set_trace()
def print_element(element):
p_('The outerHTML of element:')
p_(element.get_attribute('outerHTML'))