def getCategoryUrl(site="",url=""): catDb = openTable(tableName=global_setting['catTable']) r = session.get(url) if not r.text: return False soup = BeautifulSoup(r.text) for level1 in soup.select('.classify_books'): curLevel1 = level1.select('.classify_title')[0].text curLevel1 = re.sub('\s', '', curLevel1) for level2 in level1.select('.classify_kind'): curLevel2 = level2.select('.classify_kind_name')[0].text curLevel2 = re.sub('\s', '', curLevel2) for level3 in level2.select('ul li a'): #curLevel3 = re.sub('\s', '', level3.text) curLevel3 = level3.text.strip() curlUrl = level3['href'] retFind = re.findall(r'\/cp(.*)\.html',curlUrl) if retFind: curCatID = retFind[0] catType = 'book' else: retFind = re.findall(r'\/cid(.*)\.html',curlUrl) if retFind: curCatID = retFind[0] catType = 'nonbook' if retFind: if catDb.find({'catId':curCatID}).count() >0: logger.debug('catetogy %s exists,skip\n'%(curCatID)) else: catDb.insert({'catId':curCatID,'level1':curLevel1, 'level2':curLevel2, 'level3':curLevel3, 'catUrl':curlUrl,'catType':catType, 'site':site}) return True
def _sanitize(self, data): retv = '' if data.find('\x1b') != -1: tmp = filter(lambda x: x in string.printable, data) retv += re.sub('(\{|\}|\*|\%)', '', re.sub('\[[0-9\;]+m', '', tmp)) return retv return data
def _clean_text(self, text): """ Cleans up text before we make it into an HTML tree: 1. Nukes <![CDATA stuff. 2. Nukes XML encoding declarations 3. Replaces </br> with <br/> 4. Nukes invalid bytes in input 5. ? """ # Remove <![CDATA because it causes breakage in lxml. text = re.sub(r"<!\[CDATA\[", u"", text) text = re.sub(r"\]\]>", u"", text) # Remove <?xml> declaration in Unicode objects, because it causes an error: # "ValueError: Unicode strings with encoding declaration are not supported." # Note that the error only occurs if the <?xml> tag has an "encoding" # attribute, but we remove it in all cases, as there's no downside to # removing it. This moves our encoding detection to chardet, rather than # lxml. if isinstance(text, unicode): text = re.sub(r"^\s*<\?xml\s+.*?\?>", "", text) # Fix </br> text = re.sub("</br>", "<br/>", text) # Fix invalid bytes (http://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python) text = re.sub(u"[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+", "", text) return text
def _parse_productions(self): """ Parse the current contents of the textwidget buffer, to create a list of productions. """ productions = [] # Get the text, normalize it, and split it into lines. text = self._textwidget.get('1.0', 'end') text = re.sub(self.ARROW, '->', text) text = re.sub('\t', ' ', text) lines = text.split('\n') # Convert each line to a CFG production for line in lines: line = line.strip() if line=='': continue productions += parse_cfg_production(line) #if line.strip() == '': continue #if not CFGEditor._PRODUCTION_RE.match(line): # raise ValueError('Bad production string %r' % line) # #(lhs_str, rhs_str) = line.split('->') #lhs = Nonterminal(lhs_str.strip()) #rhs = [] #def parse_token(match, rhs=rhs): # token = match.group() # if token[0] in "'\"": rhs.append(token[1:-1]) # else: rhs.append(Nonterminal(token)) # return '' #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str) # #productions.append(Production(lhs, *rhs)) return productions
def file_num_sort(a, b): a_num = re.sub(r'[^0-9]+', '', a) b_num = re.sub(r'[^0-9]+', '', b) if a_num == '' or b_num == '': return cmp(a, b) else: return cmp(int(a_num), int(b_num))
def makeIdentifier(self, string): string = re.sub( r"\s+", " ", string.strip()) string = unicodedata.normalize('NFKD', safeEncode(string)) string = re.sub(r"['\"!?@#$&%^*\(\)_+\.,;:/]","", string) string = re.sub(r"[_ ]+","_", string) string = string.strip('_') return string.strip().lower()
def shortcodify(name): # strip out all charcters that are not ascii nons = re.sub(r'[^\w\s]', "", name) # replae all whitespace with dash nonw = re.sub(r'\s', '-', nons) # return the lowercase version of the string return nonw.lower()
def GetFootnotes(self, doc, plainPrefix, prefix): chapterFootnotes = [] for ppp in html.tostring(doc).split(r'<a name="P') : footnote = ppp.partition('"><b>') footnoteNo = footnote[0].partition('"')[0] verse = re.sub(r'^[^#]*#', '', footnote[0]) if verse[0] != 'W' : continue footnoteText = re.sub(r'otworz\.php\?skrot=', r'#W', footnote[2].partition(' - ')[2]).strip() subs = ( # remove trailing whitespaces (r'\s+<br>', r'<br>'), # change class name (r'skrot', r'przypis'), # fix href ('%20', ''), ('%C5%821', 'l'), ('%C5%822', 'L'), # one newline is enough (r'<br><br>', r'<br>'), # <div> tags were not open (r'<br></div>', r'<br>') ) for fromPattern, toPattern in subs: footnoteText = re.sub(fromPattern, toPattern, footnoteText) verse = re.sub('W', ',', verse) chapterFootnotes.append('<a id="P' + plainPrefix + 'P' + footnoteNo + '" href="#W' + plainPrefix + verse + '" class="przypis"> [' + prefix + verse + ']</a> ' + footnoteText) #chapterFootnotes.append('<a id="' + plainPrefix + 'P' + footnoteNo + '" href="#' + plainPrefix + verse + '" class="przypis"> [' + plainPrefix + verse + ']</a> ' + footnoteText) self.footnotes.append("\n".join(chapterFootnotes))
def copy_template(): config_prompt(template) shutil.copytree(template, name) if os.path.exists('%s/%s' % (name, 'config.yaml')): os.remove('%s/%s' % (name, 'config.yaml')) for dirname, dirnames, files in os.walk(name): for d in dirnames: if d == options.template: shutil.copytree('%s/%s' % (dirname, d), '%s/%s' % (dirname, name)) shutil.rmtree('%s/%s' % (dirname, d)) for dirname, dirnames, files in os.walk(name): for filename in files: f = open('%s/%s' % (dirname, filename), 'r') lines = f.readlines() f.close() first_pass = [re.sub('{{\s*(\w+)\s*}}', replace_variable, line) for line in lines] new_lines = [re.sub('__config_(\w+)__', replace_variable, line) for line in first_pass] f = open('%s/%s' % (dirname, filename), 'w') f.write(''.join(new_lines)) f.close()
def clean_word(word): """Removes any potential non-word characters""" word = re.sub("[0-9]* ", "", word) word = re.sub("[\s]*", "", word) word = word.replace('\n', '') word = word.replace('\r', '') return word
def main(): cur_dir = os.path.dirname(__file__) os.chdir(os.path.join(cur_dir, "..")) modules = sys.argv[1:] if not modules: modules = ['django_evolution'] p = subprocess.Popen(['pyflakes'] + modules, stderr=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True) contents = p.stdout.readlines() # Read in the exclusions file exclusions = {} fp = open(os.path.join(cur_dir, "pyflakes.exclude"), "r") for line in fp.readlines(): if not line.startswith("#"): exclusions[line.rstrip()] = 1 fp.close() # Now filter thin for line in contents: line = line.rstrip() test_line = re.sub(r':[0-9]+:', r':*:', line, 1) test_line = re.sub(r'line [0-9]+', r'line *', test_line) if test_line not in exclusions: print line
def convert_corpus(filepath, mapping, alignment, begin="xxBeGiN142xx", end="xxEnD142xx"): general_corpus = '' with open(filepath, 'rb') as f: general_corpus = re.sub('(' + begin + '\W+)+', ' . ', f.read()) general_corpus = re.sub('\n+', ' this_is_n3wline ', general_corpus) corpus = [] for token in general_corpus.split(): if token.strip() == '.': if len(corpus) > 0: if '\n' not in corpus[-1]: # If the token is punctuation assign a random punctuation. corpus[-1] = corpus[-1] + random.choice(['.', '.', '.' , ',', ',' ',', '!', '?']) elif token.strip() == 'this_is_n3wline': corpus[-1] = corpus[-1] + '.\n\n' elif alignment[token] in mapping: if len(corpus) > 0 and re.search('[\n\.!?]',corpus[-1]): corpus.append(mapping[alignment[token]].capitalize().strip()) else: corpus.append(mapping[alignment[token]].strip()) corpus[0] = corpus[0].capitalize() output = ' '.join(corpus) output = re.sub(r' +', ' ', output) output = re.sub(r'\n+ ', '\n\n', output) return output
def html_remove_image_history(doc): """ Remove image history and links to information. """ doc = re.sub(r"<h2>Image history</h2>[\s\S]+?</ul>", r"", doc) doc = re.sub(r"<h2>Image links</h2>[\s\S]+?</ul>", r"", doc) return doc
def chunkifier(conc_text, bytes, kwic=False, highlight=False): """Divides the passage in three: * from the beginning to the first hit (not included) * from the first hit to the end of the last hit * form the end of the last hit to the end of the passage Returns a tuple containing all three parts of the passage""" #conc_text = re.sub("[ \n\r]+\w*$", "", conc_text) ## no words cut out, or worse, no broken mutiple-byte chars conc_start = conc_text[:bytes[0]] conc_middle = '' end_byte = 0 for pos, word_byte in enumerate(bytes): if highlight: text, end_byte = highlighter(conc_text[word_byte:]) end_byte = word_byte + end_byte else: text_chunks = re.split("([^ \.,;:?!\'\-\"\n\r\t\(\)]+)", conc_text[word_byte:]) end_byte = word_byte + len(text_chunks[1]) text = text_chunks[1] conc_middle += text if len(bytes) > pos+1: conc_middle += conc_text[end_byte:bytes[pos+1]] conc_end = conc_text[end_byte:] ## Make sure we have no words cut out conc_start = re.sub("^[^ ]+ ", "", conc_start) conc_end = re.sub(" [^ ]+$", "", conc_end) return conc_start, conc_middle, conc_end
def sendGPS(self, head, cmd): maxcnt = 100 string = head + cmd res = "" print "GPS SEND: '%s'" % string self.dev.flushInput() self.dev.write(string + "\r\n") for j in xrange(maxcnt): res = self.dev.readline() if len(res) > 0: res = re.sub("^\s+", "", res) res = re.sub("\s+$", "", res) print "RAW GPS REPLY: '%s'" % res pos = res.find(head) if pos != -1: res = res[pos:].split("*")[0] print "GPS REPLY: '%s'" % res return res else: print "ZERO REPLY" return None print "sendGPS: FAILED: '%s'" % res
def ToC(testament, books): url='http://biblia.deon.pl/index.php' response = urllib2.urlopen(url).read() doc = html.fromstring(response) for entry, href in zip(doc.xpath('.//tr[@valign="top"][' + testament + ']/td/a'), books): print re.sub(r'class=\"ks\" href=\".*?\"', r'href="#K' + unicodeToPlain(href) + r'"', html.tostring(entry)) + '<br>' print '<br><br>'
def getPanelInfo(self, doc, strXPath): try: npos = doc.text_content().find(strXPath) if npos == -1: return "" strContent = doc.text_content()[npos:-1] npos = strContent.find("})") if npos == -1: return "" strContent = strContent[0:npos+1] strContent = (strContent[strContent.find("\"html\":\"")+8:-4]) if "v2" in self.xpathType: strContent = strContent.decode('unicode-escape') strContent = re.sub(r"(\\n)*(\\t)*(\\ /)*(\\)*", "", strContent) strContent = re.sub(r"\\/", "/", strContent) if strContent: strContent = strContent.replace("<", "<").replace(">", ">").replace("nbsp;", "") else: return "" except Exception: s=sys.exc_info() msg = (u"getPanelInfo Error %s happened on line %d" % (s[1],s[2].tb_lineno)) logger.error(msg) return "" return strContent
def start(self): keyword = getattr(settings, 'USHAHIDI_KEYWORD', '') self.default_response = getattr(settings, 'USHAHIDI_RESPONSE', 'Thank you for your report.') self.error_response = getattr(settings, 'USHAHIDI_ERROR', "Due to some error, we're unable to process your message. Please resend.") self.pattern = re.compile(r"^\s*(?:%s)(?:[\s,;:]+(.+))?$" % (keyword)) self.trigger_url = re.sub('\$\{sender_number\}', '%(sender)s', getattr(settings, 'USHAHIDI_TRIGGER_URL')) self.trigger_url = re.sub('\$\{message_content\}', '%(message)s', self.trigger_url)
def gen_xkcd_sub(msg, hook=False): # http://xkcd.com/1288/ substitutions = { 'witnesses': 'these dudes I know', 'allegedly': 'kinda probably', 'new study': 'tumblr post', 'rebuild': 'avenge', 'space': 'SPAAAAAACCCEEEEE', 'google glass': 'virtual boy', 'smartphone': 'pokedex', 'electric': 'atomic', 'senator': 'elf-lord', 'car': 'cat', 'election': 'eating contest', 'congressional leaders': 'river spirits', 'homeland security': 'homestar runner', 'could not be reached for comment': 'is guilty and everyone knows it' } # http://xkcd.com/1031/ substitutions['keyboard'] = 'leopard' # http://xkcd.com/1418/ substitutions['force'] = 'horse' output = msg if not hook or random() < 0.001 or True: for text, replacement in substitutions.items(): if text in output: output = re.sub(r"\b%s\b" % text, replacement, output) output = re.sub(r'(.*)(?:-ass )(.*)', r'\1 ass-\2', output) if msg == output: return None if hook else msg else: return output
def injection_test(payload, http_request_method, url): # Check if defined method is GET (Default). if http_request_method == "GET": # Check if its not specified the 'INJECT_HERE' tag #url = parameters.do_GET_check(url) # Encoding spaces. payload = payload.replace(" ","%20") # Define the vulnerable parameter vuln_parameter = parameters.vuln_GET_param(url) target = re.sub(settings.INJECT_TAG, payload, url) request = urllib2.Request(target) # Check if defined extra headers. headers.do_check(request) try: # Get the response of the request response = get_request_response(request) except KeyboardInterrupt: response = None # Check if defined method is POST. else: parameter = menu.options.data parameter = urllib2.unquote(parameter) # Check if its not specified the 'INJECT_HERE' tag parameter = parameters.do_POST_check(parameter) # Define the POST data if settings.IS_JSON == False: data = re.sub(settings.INJECT_TAG, payload, parameter) request = urllib2.Request(url, data) else: payload = payload.replace("\"", "\\\"") data = re.sub(settings.INJECT_TAG, urllib.unquote(payload), parameter) try: data = json.loads(data, strict = False) except: pass request = urllib2.Request(url, json.dumps(data)) # Check if defined extra headers. headers.do_check(request) # Define the vulnerable parameter vuln_parameter = parameters.vuln_POST_param(parameter, url) try: # Get the response of the request response = get_request_response(request) except KeyboardInterrupt: response = None return response, vuln_parameter
def extract_bow_v2_features(train, test, test_contains_labels = False): ''' Performs feature extraction for another simple tfidf model used for ensembling purposes. ''' s_data = [] s_labels = [] t_data = [] t_labels = [] stemmer = PorterStemmer() for i, row in train.iterrows(): s=(" ").join(["q"+ z for z in BeautifulSoup(train["search_term"][i], "lxml").get_text(" ").split(" ")]) + " " + (" ").join(["z"+ z for z in BeautifulSoup(train.product_title[i], "lxml").get_text(" ").split(" ")]) + " " + BeautifulSoup(train.product_description[i], "lxml").get_text(" ") s=re.sub("[^a-zA-Z0-9]"," ", s) s= (" ").join([stemmer.stem(z) for z in s.split(" ")]) s_data.append(s) s_labels.append(str(train["relevance"][i])) for i, row in test.iterrows(): s=(" ").join(["q"+ z for z in BeautifulSoup(test["search_term"][i], "lxml").get_text().split(" ")]) + " " + (" ").join(["z"+ z for z in BeautifulSoup(test.product_title[i], "lxml").get_text().split(" ")]) + " " + BeautifulSoup(test.product_description[i], "lxml").get_text() s=re.sub("[^a-zA-Z0-9]"," ", s) s= (" ").join([stemmer.stem(z) for z in s.split(" ")]) t_data.append(s) if test_contains_labels: t_labels.append(str(test["relevance"][i])) return (s_data, s_labels, t_data, t_labels)
def parse_profile(file_name): return_dict = dict() with open(file_name) as reader: for line in reader.readlines(): line = re.sub(r"export\s+", "", line.strip()) if "=" in line: key, value = line.split("=", 1) # Values that are wrapped in tics: remove the tics but otherwise leave as is if value.startswith(TIC): # Remove first tic and everything after the last tic last_tic_position = value.rindex(TIC) value = value[1:last_tic_position] return_dict[key] = value continue # Values that are wrapped in quotes: remove the quotes and optional trailing comment elif value.startswith(QUOTE): # Values that are wrapped quotes value = re.sub(r'^"(.+?)".+', '\g<1>', value) # Values that are followed by whitespace or comments: remove the whitespace and/or comments else: value = re.sub(r'(#|\s+).*', '', value) for variable in re.findall(r"\$\{?\w+\}?", value): # Find embedded shell variables dict_key = variable.strip("${}") # Replace them with their values value = value.replace(variable, return_dict.get(dict_key, "")) # Add this key to the dictionary return_dict[key] = value return return_dict
def htmlify (self, text): t=text.strip() #t=xml.sax.saxutils.escape(t) t="<p>%s</p>"%t t=re.sub('\n\n+','</p><p>',t) t=re.sub('\n','<br>',t) return t
def main(argv): (type, address, action, key, value) = parse_cli() base_url = '' address = re.sub('http://', '', address) if type == 'etcd': base_url = address + '/v2/keys/' elif type == 'consul': base_url = address + '/v1/kv/' base_url = re.sub('\/+', '/', base_url) base_url = 'http://' + base_url base_url = re.sub('\/+$', '', base_url) if action.lower() == 'set': set_key_value(base_url, key, value) elif action.lower() == 'get': value = parse_value(get_key_value(base_url, key), type) if value is not None: print value elif action.lower() == 'delete': delete_key_value(base_url, key)
def classifyText( text, params ): start_time = params.my_time() #clean try: text = params.cleaner.clean_html( text ) except: pass text = re.sub('<.*?>', ' ', text ) text = re.sub('\s+', ' ', text ) text = text.lower() #Tokenize tokens = re.findall('[a-z]+', text ) #Remove stop words tokens_2 = [] for t in tokens: if( not t in params.stopword_list ): tokens_2.append(t) # print tokens_2 #Stem stems = [] for t in tokens_2: stem = params.porterStemmer.stem( t, 0, len(t)-1 ) stems.append(stem) z = 0#params.linear_classifier['{{intercept}}']+.6 for s in stems: if s in params.linear_classifier: # print s, params.linear_classifier[s] z += params.linear_classifier[s] end_time = params.my_time() return ( z<0, [start_time, end_time, len(stems), z, 1/(1+math.exp(-z)), int(z>0)] )
def parse_list(self, page): # Remove null bytes page = re.sub(r'\0', r' ', page) # Remove sequences of ''''''' page = re.sub(r"'+", "'", page) reader = csv.DictReader(StringIO(page), quoting=csv.QUOTE_ALL, escapechar='\\') # There is one row in the data for each violation, not just each # inspection. Violations from the same inspection will be contiguous, # so roll up the violations until we see a different inspection. current_record = None for row in reader: if row['CITY'] != 'CHARLOTTE': continue row['comments'] = [] # Strip any leading zeros. Both 01 and 1 appear sometimes, but # they mean the same thing. item_id = row['ITEM_NUM'].lstrip('0') violation = {'id': item_id, 'value': row['ITEM_VALUE'], 'comment': row['COMMENT']} if current_record is None: current_record = row current_record['violation'] = [violation] elif current_record['FAC_NAME'] != row['FAC_NAME'] or current_record['DATE'] != row['DATE']: yield current_record current_record = row current_record['violation'] = [violation] else: current_record['violation'].append(violation) # The final record won't be yielded from the loop above because it has # no following record to trigger it, so yield it here. yield current_record
def obfuscate_codeblocks(source): """Method for obfuscating codeblocks contents. It can be often useful to temporarly obfuscate codeblocks contents for performing safely some tasks and then re-introducing them. Parameters ---------- source : str string (as single stream) containing the source Returns ------- protected_contents : list list of str containing the contents of codeblocks str source with codeblocks contents obfuscated and replaced by a safe placeholder >>> source = '``` my code block ``` other contents' >>> prot, ob_source = obfuscate_codeblocks(source) >>> prot[0][2] '``` my code block ```' >>> ob_source '$PROTECTED-1 other contents' """ obfuscate_source = source protected_contents = [] for match in re.finditer(__regex_codeblock__,obfuscate_source): protected_contents.append([match.start(),match.end(),match.group()]) obfuscate_source = re.sub(__regex_codeblock__,'$PROTECTED-'+str(len(protected_contents)),obfuscate_source,1) for match in re.finditer(__regex_codeblock_html__,obfuscate_source): protected_contents.append([match.start(),match.end(),match.group()]) obfuscate_source = re.sub(__regex_codeblock_html__,'$PROTECTED-'+str(len(protected_contents)),obfuscate_source,1) return protected_contents,obfuscate_source
def GetBook(self, book): self.footnotes=[] self.content=[] counter = 1 plainBook = unicodeToPlain(book) while True: url='http://www.biblia.deon.pl/otworz.php' values={'ksiega': book.encode('iso8859_2'), 'rozdzial': str(counter)} data=urllib.urlencode(values) response = urllib2.urlopen(urllib2.Request(url, data)).read() doc = html.fromstring(response) if counter == 1: BookTitle = (doc.findall('.//span[@style="font-size:22px;"]')[0]) self.content.append(re.sub(r'</span>', r'</div>', re.sub(r'<span style=\"font-size:22px;\"',r'<br><br><a name="K' + plainBook + r'"></a><div class="tytul"', html.tostring(BookTitle)))) ChaptersInBook = len(doc.findall('.//select[@name="rozdzial"]/option')) else: self.content.append('<br><br>') plainPrefix = plainBook + str(counter) self.content.append('<div class="numer">' + str(counter) + '</div>') Book.GetContent(self, doc.xpath('//div[@class="tresc"]')[0], plainPrefix) Book.GetFootnotes(self, doc.xpath('//td[@width="150"]/table/tr[5]/td/div[1]')[0], plainPrefix, unicodeToReference(book) + ' ' + str(counter)) if counter == ChaptersInBook: self.content.append('<br><br>' + "".join(self.footnotes)) break counter += 1
def sendCmd(self, cmd, trg=None, det=None, val=None, shouldBeAnswer=True): string = str(cmd) if (trg != None): string = string + "," + str(trg) if (det != None): string = string + "," + str(det) if (val != None): string = string + "," + str(val) self.dev.write(string + "\r\n") res = self.dev.readline() res = re.sub("^\s+", "", res) res = re.sub("\s+$", "", res) if (not shouldBeAnswer) and len(res) == 0: print "CMD='%s' NORET" % (string) return 0 print "CMD='%s' RET='%s'" % (string, res) if res == 'SYNTAX ERROR': #sometimes you get a 'SYNTAX ERROR' while the command will succeed if you try again if self.retries <= self.max_retries: self.retries=self.retries+1 time.sleep(0.1) return self.sendCmd(cmd, trg, det, val, shouldBeAnswer) else: print "IO ERROR: max_retries reached for command ", cmd else: self.retries=0 return res
def __init__(self, responseObj=None, arguments=None): super(MXOptionResponse, self).__init__(responseObj=responseObj, arguments=arguments) htmlTree = html.fromstring(self.getContentAsText()) kwargs = { } for tr in htmlTree.xpath('//div[@id="quotes"]/section/section/table/tbody/tr'): td = tr.xpath('./td') #For each header in the table row for i, th in enumerate(tr.xpath('./th')): th_text = th.text.strip().lower() if th.text is not None else '' #If it is a mapped name if th_text in self._MAPPING: #Throw exception if no value is present if len(td) <= i: raise Exception('Failed to match value for "%s" using index %d"' % (th_text, i)) #Assume all values are float, trim any spacing or symbols #TODO fix me if td[i].text == '--': td[i].text = '-1' kwargs[self._MAPPING[th_text]] = float(re.sub('\s*([-+]?(?:\d*[.])?\d+).*', '\g<1>', td[i].text)) self._instrument = arguments['instrument'] #TODO Retrieve this from the HTML? We can probably stick with hardcoding from the name optionType = 'CALL' if 'C' in self._instrument.upper() else 'PUT' kwargs['_strikePrice'] = float(re.sub('\w+\s+[0-9]+(?:C|P)([-+]?(?:\d*[.])?\d+)', '\g<1>', self._instrument)) #TODO Retrieve this from the HTML? expirationStr = re.sub('\w+\s+(\d+)(?:C|P).*', '\g<1>', self._instrument) kwargs['_expirationSec'] = time.mktime(time.strptime(expirationStr, '%y%m%d')) self._option = MXOption(optionType=optionType, **kwargs)
def clean_item_data(obj): if not obj: return "" data = obj.text data = re.sub(r".*?:", "", data) return data.strip()
def handle_str(string): string = re.sub(' ', ' ', string) string = re.sub('\r\n', '<br/>', string, re.S) return '内容:%s' % string
#!C:\Users\aats\PycharmProjects\SU_python_fundamentals\phone_book_console_app\venv\Scripts\python.exe # EASY-INSTALL-ENTRY-SCRIPT: 'pip==19.0.3','console_scripts','pip3.7' __requires__ = 'pip==19.0.3' import re import sys from pkg_resources import load_entry_point if __name__ == '__main__': sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) sys.exit(load_entry_point('pip==19.0.3', 'console_scripts', 'pip3.7')())