def getOffset(self, symbol): offset = 0 # reSearch object's span() function returns the start and end position of the pattern offsetPattern = reSearch("\\.+", symbol) if offsetPattern != None: # offset is negative for Low offset = offsetPattern.span()[0] - offsetPattern.span()[1] offsetPattern = reSearch("'+", symbol) if offsetPattern != None: # offset is positive for High offset = offsetPattern.span()[1] - offsetPattern.span()[0] return offset
def getValue(self, st): # Find value(s) in string after '=' v = CVal() # Value accumulator st = st.strip() # Remove starting and ending spaces if st.startswith(','): return None # Error: String after '=' starts with comma while True: s = reSearch(r'^("[^"]*")|^([^",#]+)', st) # Search for quoted or value without quotes if s is None: return None # Error: Nothing found but value expected start, end = s.span() vv = st[start:end].strip() # Get found value if vv.startswith('"'): vv = vv[1:-1] # Remove quotes v.add(self.decode(vv)) # Decode and store value st = st[end:].lstrip( ) # Remove value and following spaces from string if st == '': return v.get( ) # EOF normaly reached (after last value in string) if st.startswith(','): # String is continued with comma? st = st[1:].lstrip() # Remove comma and following spaces if st != '': # String is continued after comma? continue # Continue to search values # else: # Error: No value after comma # else: # Error: Next symbol is not comma return None # Error
def routine_get_links(url, param: type(linkParam)): tree = get_content(url) if tree is None: return links = [] if param.element_method is None: try: for element in tree.xpath(param.element_xpath): name = element.xpath(param.element_name_xpath) link = element.xpath(param.element_link_xpath)[0] links.append((name, link)) except Exception: return else: links = param.element_method(tree, param.element_method_args) if len(links) == 0: return #links = [(name, link), ...] if param.series_details is not None: links = [link for link in links if reSearch(param.series_details, ' '.join(link[0]))] if len(links) == 0: print('Episode not found!') return links = [param.baseUrl + link[1] if not is_url(link[1]) else link[1] for link in links] LogHandler.links = links index = param.skip if param.skip >= len(links): print('skip exceeded list; downloading last subtitle') index = len(links) - 1 link = links[index] return link if is_url(link) else param.baseUrl + link
def Read(settingfile): ''' 用于读取预设置的函数 参数: settingfile 预设置路径文件 ''' try: with open(settingfile, 'r', encoding='utf-8') as f: setting = f.read() setting_out = [] # 最后输出的数据列表,不包含带'//'的注释文件 for i in setting.split('\n'): if bool(reSearch('//', i)) != True: setting_text = i.split('|') print(color.green + "\ {key}:{value} 可读状态:{bool}\ ".format(key=setting_text[0].split(';')[0], value=setting_text[1], bool=setting_text[0].split(';')[1])) setting_out.append(i) else: pass return setting_out except: print(traceback.format_exc())
def east(code): '''[code] 个股公告''' response = requests.get( 'http://quote.eastmoney.com/' + code.lower() + '.html', headers=HEADERS) soup = BeautifulSoup(response.content, 'lxml') canlander = list(map(lambda e: [ e.text, e.attrs['href'], reSearch(r'\d+年\d+月\d+日', e.text)[0], ], soup.find(id='stockcanlendar').find_all('a'))) return (canlander, ('标题', '链接', '日期'))
def extrair(dados, regex=None, coluna=None, ficheiroSaida=None): """Processa e guarda os dados filtrados num ficheiro csv. Args: dados (iter<dict>): Iterador de dicionários dos dados. regex (str, opcional): Expressão regular para filtrar as linhas interessantes. .* por defeito. coluna (str, opcional): Coluna na qual a expressão regular é testada. wgm_username por defeito. ficheiroSaida (str, opcional): Caminho do ficheiro onde guardar os dados processador. out.csv por defeito. Raises: UserWarning: Levantado quando a chhave coluna não existe no iterador dados. """ if coluna not in dados[0]: raise UserWarning(f'"{coluna}" não é uma coluna válida.') filtered = filter(lambda dic: reSearch(regex, dic[coluna]), dados) escrever_csv_dicionario(ficheiroSaida, filtered, dados[0].keys())
def parse(text): rows = [] for e in text.split(';'): if not e.strip(): continue t = e.split('"')[1].split(',') name = t[0] code = reSearch(r'[^_]+(?==)', e)[0] price = float(t[3]) close = float(t[2]) percent = str(round(100 * (price - close) / close, 2)) + '%' percent = percent if price < close else '+' + percent date = t[30] time = t[31] print(time) rows.append([code, name, price, percent, time]) cols = ('代码', '名称', '价格', '涨跌幅', '时间') return (rows, cols)
def save(self): """ save in-memory configuration to file on disk """ try: # Read the file in buffer with open(self.fileName, 'rt') as cf: buf = cf.read() except: LOGGER.warning( 'Config file access error, a new file (%s) will be created', self.fileName) buf = '' buf = reSub(r'[\n]*$', '\n', buf) # Remove all ending blank lines except the one. for key, value in self.items(): if value is None: res = '' # Remove 'key=value' from file if value is None LOGGER.debug('Config value \'%s\' will be removed', key) else: # Make a line with value res = self.delimiter.join( [key, ', '.join(self.encode(val) for val in CVal(value))]) + '\n' LOGGER.debug('Config value to save: %s', res[:-1]) # Find line with key in file the buffer sRe = reSearch(r'^[ \t]*["]?%s["]?[ \t]*%s.+\n' % (key, self.delimiter), buf, flags=reM) if sRe is not None: # Value has been found buf = sRe.re.sub(res, buf) # Replace it with new value elif res != '': # Value was not found and value is not empty buf += res # Add new value to end of file buffer try: with open(self.fileName, 'wt') as cf: cf.write(buf) # Write updated buffer to file except: LOGGER.error('Config file write error: %s', self.fileName) return False LOGGER.info('Config written: %s', self.fileName) self.changed = False # Reset flag of change in not stored config return True
def zhihu(): '''知乎热榜(客户端热榜)''' text = requests.get(ZHIHU_HOT_URL, headers=HEADERS).text cols = ['索引', '标题', '描述', '回答数', '热度', '链接'] rows = [] text = reSearch( r'<script id="js-initialData" type="text/json">(?P<jsonstr>[\s\S]+?)</script>', text) if text: jsonstr = strip(text.group('jsonstr')) obj = jsonloads(jsonstr) for i, item in enumerate(getdeep(obj, 'initialState', 'topstory', 'hotList') or []): rows.append([ i + 1, getdeep(item, 'target', 'titleArea', 'text'), getdeep(item, 'target', 'excerptArea', 'text'), getdeep(item, 'target', 'metricsArea', 'text'), getdeep(item, 'feedSpecific', 'answerCount'), # getdeep(item, 'target', 'imageArea', 'url'), getdeep(item, 'target', 'link', 'url'), ]) return (rows, cols, 1)
#%% if __name__=="__main__": nlp = GetNLP(commoditiesIdNm, commoditiesIdCode, ) QBComp = nlp.get_pipe('QBComponent') text = ' ' + """Hartley platinum project, Zimbabwe; Hot Briquetted Iron plant, Yandi iron ore mine expansion and Beenup titanium minerals project, Western Australia; Cannington silver, lead, zinc project and Crinum coking coal mine, Queensland, Australia; Mt Owen thermal coal development, New South Wales, Australia; and Samarco pellet plant expansion, Brazil. The Northwest Territories Diamonds project in Canada is subject to a number of approvals. $41.669 million (1996 – $39.538 million). lead manager""" + ' ' doc = nlp(text) print( doc.user_data, '\n') # print( [QBComp.countriesIdNm[ctrid] for ctrid in doc._.countries], '\n') print( [QBComp.commoditiesIdNm[commid] for commid in doc._.commodities] , '\n') # print( [QBComp.sitesIdNm[sid] for sid in doc._.sites] , '\n') # print( [QBComp.companiesIdNm[cid] for cid in doc._.companies] , '\n') # print( doc._.units, '\n', doc._.unitTypes , '\n') # from re import split as resplit, search as reSearch for doc in nlp.pipe( resplit('\n|\t|\r\n', text) ): for tok in doc: if tok._.hasUnit: print(tok.text, reSearch(' \d|[\d\.,]+', tok.text).group(), tok._.ut, tok.sent.text)
def search(param: type(searchParam)): print(f"-----------\n{param.site}\n-----------") url = param.baseurl + quote(video.name) tree = get_content(url) if tree is None: print(f"{param.site} get conent error") return if param.noresult_method: found_result = param.noresult_method(tree) else: found_result = if_find_result(tree, param.noresultxpath, param.noresulttext) if not found_result: print('Not found') return search_results = None if param.result_method: try: search_results = param.result_method(tree) except Exception: return else: search_results = get_result(tree, param.result) if search_results is None or len(search_results) == 0: print('No element found') return if video.season > 0 and\ seasons.get(str(video.season)) is not None and\ any(seasons[str(video.season)] in result[0] for result in search_results): url = [link[1] for link in search_results if seasons[str(video.season)] in link[0]][0] print('Continue with this site? (Y/n)') if input() == 'n': return return url if is_url(url) else param.site + url else: for index, item in enumerate(search_results): if str(video.year) in item[0] and video.year != 0: temp_name = item[0] temp_name = temp_name.replace(video.name, '') temp_name = temp_name.replace(str(video.year), '') extra = f'{GREEN} : year matched{WHITE}' if reSearch( '[^ _.()-:]', temp_name) else f'{GREEN} : complete match{WHITE}' else: extra = '' print(f"{index + 1}) {item[0]}{extra}") print(f'0) {RED}not in list{WHITE}') item = -1 for _ in range(3): try: item = int(input('Choose a movie: ')) if item < 0 or item > len(search_results): item = -1 raise RuntimeError() else: break except KeyboardInterrupt: return 0 except Exception: print("Enter a correct number") if item <= 0: return url = search_results[item - 1][1] return url if is_url(url) else param.site + url
def tryNumber(s: str): if s == '' or s == '.': return s r = reSearch(r'^\d*(\.?)\d*$', s) return s if not r else float(s) if r.group(1) else int(s)
def parse(self, symbols): notes = list() tempoFactors = list() pitch = Pitch() symbols = symbols.split() for symbol in symbols: try: regex = "^[SrRgGmMPdDnN](?:\\.*|'*)~*$|^\((?:[SrRgGmMPdDnN](?:\\.*|'*))+\)$|^-$" if reSearch(regex, symbol) == None: raise ValueError("Syntax Error: " + symbol) return length = len(symbol) note = symbol[0] if reSearch("^\((?:[SrRgGmMPdDnN](?:\\.*|'*))+\)$", symbol) != None: tempoFactor = 0 for i in range(1, length - 1): if reSearch("[SrRgGmMPdDnN]", symbol[i]) != None: tempoFactor += 1 currentSymbol = symbol[1] for i in range(2, length): if reSearch("[SrRgGmMPdDnN]", symbol[i]) != None or symbol[i] == ')': offset = self.getOffset(currentSymbol) try: val = pitch.getPitch(currentSymbol[0], offset) + self.baseOffset except Exception as e: raise ValueError("Syntax Error: " + str(e)) return notes.append(val) tempoFactors.append(2 / tempoFactor) currentSymbol = symbol[i] else: currentSymbol += symbol[i] continue offset = self.getOffset(symbol) tempoFactor = 1 tempoPattern = reSearch("~+", symbol) if tempoPattern != None: tempoFactor = tempoPattern.span()[1] - tempoPattern.span( )[0] + 1 if symbol == PASS: tempoFactors.append(tempoFactor) notes.append(0) continue try: val = pitch.getPitch(note, offset) + self.baseOffset except Exception as e: raise ValueError("Syntax Error: " + symbol) return notes.append(val) tempoFactors.append(tempoFactor) except Exception as e: raise ValueError("Syntax Error: " + str(e)) return return notes, tempoFactors
def writeModemConfig(): #input.get into separate variable and: #check yes no variable yes_no_List = [ [input_Is_master.get(),Is_master2m], [input_Radio_on_at_start.get(),Radio_on_at_start2m], [input_DHCP_active.get(),DHCP_active2m], [input_Client_static_IP.get(),Client_static_IP2m], [input_Telnet_active.get(),Telnet_active2m], [input_Telnet_routed.get(),Telnet_routed2m], [input_Def_route_active.get(),Def_route_active2m], [input_DNS_active.get(),DNS_active2m] ] for i in yes_no_List: if i[0] == "yes" or i[0] == "no": i[1] = i[0] #print("YES NO variale set") #print(type(i[0])) print(i[1]) else: i[1] = "NULL" #print("YES NO NOT SET") #print("YesNoList is:",yes_no_List) #IP field check IPvalueList = [ [input_Modem_IP.get(),Modem_IP2m], [input_Netmask.get(),Netmask2m], [input_DNS_value.get(),DNS_value2m], [input_Def_route_val.get(),Def_route_val2m], [input_IP_begin.get(),IP_begin2m] ] regexIP = "^(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)" for i in IPvalueList: if reSearch(regexIP, i[0]) and i[0] != "": i[1] = i[0] #print("Valid IP set") else: i[1] = "NULL" #print("No Valid ip") #other checks #callsign check if len(input_Callsign.get()) <= 13 and input_Callsign.get() != "": Callsign2m = input_Callsign.get() print("Callsign set to:",Callsign2m) else: Callsign2m = "NULL" print("Callsign not Set or too long") #frequency check 420 zu 450. regexFreq = "^(4[2-4][0-9])\.([0-9][0-9][0-9])$" if reSearch(regexFreq, input_Frequency.get()) and len(input_Frequency.get()) == 7 and input_Frequency.get() != "420.000" or input_Frequency.get() == "450.000": Frequency2m = input_Frequency.get() print("Freq set to", Frequency2m) else: Frequency2m = "NULL" print("Freq to high, to low or wrong input") f0t9 = "0,1,2,3,4,5,6,7,8,9" #Freq Shift check fSvar = input_Freq_shift.get() wrongchars = 0 sFsigns = "-."+f0t9 for i in range(len(fSvar)): if sFsigns.find(fSvar[i-1]) == -1: wrongchars += 1 print("Wrongchars: ", wrongchars) if fSvar.count("-") == 1 and fSvar[0] != "-": wrongchars += 1 if wrongchars == 0 and fSvar.find(".") != -1 and int(fSvar[:fSvar.find(".")]) in range(-9,10) and len(fSvar[fSvar.find(".")+1:]) == 3 and f0t9.find(fSvar[fSvar.find(".")+1:fSvar.find(".")+2]) != -1 and f0t9.find(fSvar[fSvar.find(".")+2:fSvar.find(".")+3]) != -1 and f0t9.find(fSvar[fSvar.find(".")+3:fSvar.find(".")+4]) != -1: Freq_shift2m = input_Freq_shift.get() elif fSvar == "-10.000" or fSvar == "10.000": Freq_shift2m = input_Freq_shift.get() else: Freq_shift2m = "NULL" print("Freq-Shift to high, to low or wrong input") #RF_power2m = input_RF_power.get() RF_powerListString = "2,3,4,5,6,7,8,9,10,11,12,14,16,20" if len(input_RF_power.get()) <= 2 and len(input_RF_power.get()) >= 1 and RF_powerListString.find(input_RF_power.get()) != -1: RF_power2m = input_RF_power.get() print("RF Power set to:", RF_power2m) else: RF_power2m = "NULL" print("Wrong RF_power value.") #Modulation2m = input_Modulation.get() ModulationListString = "11, 12, 13, 14, 20, 21, 22, 23, 24" if len(input_Modulation.get()) == 2 and ModulationListString.find(input_Modulation.get()) != -1: Modulation2m = input_Modulation.get() print("Modulation set to:", Modulation2m) else: Modulation2m = "NULL" print("Wrong Modulaton Value") #Radio_netw_ID2m = input_Radio_netw_ID.get() rnwIDListString = "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15" if len(input_Radio_netw_ID.get()) <= 2 and len(input_Radio_netw_ID.get()) >= 1 and rnwIDListString.find(input_Radio_netw_ID.get()) != -1: Radio_netw_ID2m = input_Radio_netw_ID.get() print("Radio NW ID set to: ", Radio_netw_ID2m) else: Radio_netw_ID2m = "NULL" print("Wrong Radio Network ID input.") #Client_req_size2m = input_Client_req_size.get() #Master_IP_size2m = input_Master_IP_size.get() #valuesListForCheck = [Callsign2m,Is_master2m,MAC2m,Frequency2m,Freq_shift2m,RF_power2m,Modulation2m,Radio_netw_ID2m,Radio_on_at_start2m,DHCP_active2m,Client_req_size2m,Client_static_IP2m,Telnet_active2m,Telnet_routed2m,Modem_IP2m,Netmask2m,IP_begin2m,Master_IP_size2m,Def_route_active2m,Def_route_val2m,DNS_active2m,DNS_value2m] #print(valuesListForCheck) #push values to NPR modem cmd_set = "set" cmd_and_val2mList = [ [Callsign2m,"callsign"], [Frequency2m,"frequency"], [Freq_shift2m,"freq_shift"], [RF_power2m,"RF_power"], [Modulation2m,"modulation"], [Radio_netw_ID2m,"radio_netw_ID"], [yes_no_List[0][1],"is_master"], [yes_no_List[1][1],"radio_on_at_start"], [yes_no_List[2][1],"DHCP_active"], [yes_no_List[4][1],"telnet_active"], [yes_no_List[5][1],"telnet_routed"], [yes_no_List[6][1],"def_route_active"], [yes_no_List[7][1],"DNS_active"], ["NULL","client_req_size"], [IPvalueList[0][1],"modem_IP"], [IPvalueList[1][1],"netmask"], [IPvalueList[2][1],"DNS_value"], [IPvalueList[3][1],"def_route_val"], [IPvalueList[4][1],"IP_begin"], ] #print(cmd_and_val2mList) for i in cmd_and_val2mList: if i[0] != "NULL": cmd2m = str(cmd_set + " " + i[1] + " " + i[0]) mfeedback = sendModemCommand(cmd2m) time.sleep(0.1) print("Feedback from CMD send: ", mfeedback) print(type(cmd2m), cmd2m) else: nothing = "nothing" readModemConfig()