def mc2reco(fi,version="v5r4p0",newpath=""): """ converts allGamma-vXrYpZ_100GeV_10TeV-p2.noOrb.740485.mc.root to allGamma-v5r4p0_100GeV_10TeV-p2.noOrb.740485.reco.root""" #print '*** DEBUG: file: ',fi vtag = research("v\dr\dp\d",fi) if vtag is None: vtag = research("r\d{4}",fi) vtag = vtag.group(0) # lastly, replace the path if fi.startswith("root:"): #print fi fi = ("/%s"%fi.split("//")[2]) fname = basename(fi) path = dirname(fi) task = basename(path) npath = opjoin(newpath,task) fout = opjoin(npath,fname) fout = fout.replace(".mc",".reco") max_occ = 10 # version tag shouldn't be there more than 10 times; # if we do not include this criterion, if MC-version == reco version this would yield an infinite loop! occ = 0 while vtag in fout: fout = fout.replace(vtag,version) occ+=1 if occ >= max_occ: break #print "*** DBG ***: ",fout return fout
def __init__(self, apikey, compr=False): self.apikey = apikey # Get nonce value from BTC-E API. res = self.call('getInfo') if res['success'] == 0: if 'invalid nonce' in res['error']: self._nonce = int(research(r'\d+', res['error']).group())
def team_test_page_generator(info): soup = Bs(markup=get(url=info.TeamsPage).text, features='html.parser') rec_by_team = soup.find(id=info.TeamsGroup) items = rec_by_team.find_all('a') for item in items: m = research('id=(\d+)?', item.attrs['href']) yield info.TeamTestSeriesPageTemplate.replace('<id>', m.groups()[0])
def argument_checker_host(HTTPHandler, host, path): """ Initialize HTTPHandler, check host conectivity and check if bludit is running on the web server returns HTTPHandler,BLUDIT-KEY,Token """ header = { "Host": host, "User-Agent": "GoogleBot", "Connection": "KeepAlive" } HTTPHandler.request("GET", path.rstrip("/") + "/admin/", headers=header) # Checking host conectivity try: response = HTTPHandler.getresponse() except: ERROR("argument_checker_host", "host seems down") # Checking token and cookie existence try: token = research('input.+?name="tokenCSRF".+?value="(.+?)"', response.read().decode()).group(1) cookie = response.getheader("Set-Cookie").split(";")[0] except: ERROR("argument_checker_host", "bludit cookie and token doesn't found") print("[*] Page Cookie: ", cookie) print("[*] Login page Token: ", token) return cookie, token
def _download_video(dmp, v, folder): """ dmp: Dumper class """ if 'platform' in v: return _download_external(v['player'], folder) else: if 'player' not in v: return False if 'height' not in v: v['height'] = 480 if 'photo_800' in v else \ 360 if 'photo_320' in v else \ 240 url = v['player'] if ( 'access_key' not in v) else f"{v['player']}?access_key={v['access_key']}" data = urlopen(url).read() try: return _download( dmp, research( b'https://cs.*vkuservideo.*' + str( min(v['height'], v['width']) if ('width' in v) else v['height']).encode() + b'.mp4', data).group(0).decode(), folder, name=v['title'] + '_' + str(v['id']), ext='mp4') except AttributeError: return False
def get_ground_by_id(self, ground_row: Tag): ground_link = ground_row.find('a').attrs['href'] ground_id = research('\/([^\/]+?)\.html$', ground_link).group(1) if not self.__grounds.get(ground_id, False): soup = Bs(markup=get(url=ground_link).text, features='html.parser') m = rematch(pattern='^(.+?)\s*\|\s*(.+?)\s*\|', string=soup.title.get_text()) self.__grounds[ground_id] = Ground(m.group(1), m.group(2)) return self.__grounds[ground_id]
def team_series_page_generator(info, page, completedseries: CompletedSeriesList): if page == 'https://stats.espncricinfo.com/ci/engine/records/team/series_results.html?class=1;id=46;type=team': print(page) soup = Bs(markup=get(url=page).text, features='html.parser') country: str = research('^(.+?)\s+Cricket', soup.title.get_text()).group(1) print(f"Getting for: {country}") series_body = soup.table.tbody if series_body: for row in series_body.find_all('tr'): series_page = row.a.attrs["href"] series_id: int = int( research('\/(\d+?).html', series_page).group(1)) if completedseries.is_series_complete(series_id=series_id): continue else: yield get_series_results_page_url( f'{info.HomeCenter}{series_page}')
def find_meta(meta): """ Extract __*meta*__ from META_FILE. """ meta_match = research( r"(?m)^__{meta}__ = ['\"]([^'\"]*)['\"]".format(meta=meta), META_FILE) if meta_match: return meta_match.group(1) raise RuntimeError("Unable to find __{meta}__ string.".format(meta=meta))
def find_ranking(): for f in os.listdir("./temp"): if research("input", f): url_list = get_search_urls(os.path.join("./temp/", f)) p = Pool(processes=8) prod_x = partial( list_ranking_in_serp, source=os.path.splitext(basename(f)) [0]) # prod_x has only one argument x (y is fixed to 10) print(p.map(prod_x, url_list))
def __get_match_details(match_details_card: Tag, grounds: MatchGrounds) -> tuple: '''From the match card, get: Test match number, ground name and played in country''' test_number: int = None for row_count, row in enumerate(match_details_card.find_all('tr')): if row_count == 0: ground: Ground = grounds.get_ground_by_id(row) if row.td.get_text() == 'Match number': if research(pattern=r'(\d+?)\s*$', string=row.find_all('td')[1].get_text()) == None: test_number = research( pattern=r'(\d+?[a-z])\s*$', string=row.find_all('td')[1].get_text()).group(1) else: test_number = research( pattern=r'(\d+?)\s*$', string=row.find_all('td')[1].get_text()).group(1) return (test_number, ground.name, ground.country)
def top_websites(self, top_number): with suppress(Exception): top_websites = research(self.top_pattern, top_number) if top_websites: sites = ([d for d in self.websites_entries if d.get('global_rank') != 0]) sites = sorted(sites, key=lambda x: x['global_rank']) for site in sites[:int(top_websites.group(1))]: self.search_and_change(site, {"selected": "true"}) return True return False
def __get_ground_name_and_country(groundTag: Tag, grounds: dict) -> Ground: ground_link = groundTag.find('a').attrs['href'] ground_id = research('\/([^\/]+?)\.html$', ground_link).group(1) ground = grounds.get(ground_id, False) if not ground: soup = Bs(markup=get(url=ground_link).text, features='html.parser') m = rematch(pattern='^(.+?)\s*\|\s*(.+?)\s*\|', string=soup.title.get_text()) grounds[ground_id] = Ground(m.group(1), m.group(2)) return grounds[ground_id]
def check_kbe(error_msg): for err in knowledge_based_errors: if research(err, error_msg, flags=reIGNORECASE): sol = knowledge_based_errors[err].replace("@INSTALL_PREFIX@",opts.install_prefix).replace("@ARCH@", opts.architecture) if not err in knowledge_based_found: cmspkg_print("ERROR: Following error found.\n %s" % error_msg) cmspkg_print("Solution:\n %s" % sol) knowledge_based_found[err]=1 if opts.IgnoreKbe: return True exit(1) return False
def find_ranking_with_df(): for f in os.listdir("./temp"): if research("excel_input", f): df = load_dataframe_from_excel(os.path.join("./temp/", f)) title_list = df['title'].tolist() type_list = df['product_type'].tolist() parallel_serp(title_list, type_list) """
def status(self, box='INBOX'): """""" status, resp = self.cnx.status(box, '(MESSAGES RECENT UIDNEXT UIDVALIDITY UNSEEN)') if status == self.OK : data = research(self.REG_SATUS, Io.str(resp[self.K_HEAD])) l = resplit(' ',data.group(2)) dic = {'BOX' : box} for i in range(len(l)): if i%2 == 0 : dic[l[i]] = int(l[i+1]) else : dic = {} return dic
def set_series_complete(self, series_results_page: str) -> None: series_fixtures_page = series_results_page.replace( 'match-results', 'match-schedule-fixtures') series_id: int = int( research('\-(\d+?)\/match-schedule-fixtures$', series_fixtures_page).group(1)) if not self.is_series_complete(series_id=series_id): soup = Bs(markup=get(series_fixtures_page).text, features='html.parser') if len(soup.find_all(class_='no-matches-container')) == 1: self.__completed_series.append(series_id)
def __init__(self, conf, box='INBOX', noBoxCreat=False): """""" if conf.host != None and research('yahoo.com', conf.host) is not None : self.DRAFTS = self.DRAFTS[:-1] self.conf = conf self.rootBox = box self.BOXS = {} self.cnx = None self.cnxusr = None self.noBoxCreat = noBoxCreat self.switchAccount(self.conf, self.rootBox, True)
def buscarContacto(): searcher = input("Ingrese a quien quisiera buscar: ") for key, value in contacts.items(): for contact in value.items(): # Usamos RegEx para hacer un parcial match de lo que ingrese el usuario. if research(rf"{searcher.lower()}", contact[0].lower()): print("-", contact[0]) print("\n\n") input("Presione enter para continuar") print("\n\n\n")
def _check_and_fail_with_replacement_tags(data, message): """Searches a string for replacement tags and aborts if found. Should prevent running the program with an insufficient environment :param data: The string to search :param message: A message prepended to the failure message """ result = research("(%%[A-Z0-9_-]+%%)", data) if result: fail(message + ": Found unresolved replacement tags " "(missing env setting?): {}\n" "Ignore with -f setting".format(", ".join( map(lambda x: "'{}'".format(x), result.groups()))))
def __get_match_details(match_details_card: Tag, grounds) -> tuple: test_number: int = None ground_name: str = None ground_country: str = None for row_count, row in enumerate(match_details_card.find_all('tr')): if row_count == 0: ground: Ground = __get_ground_name_and_country(row, grounds) if row.td.get_text() == 'Match number': test_number = research( pattern=r'(\d+?)\s*$', string=row.find_all('td')[1].get_text()).group(1) return (test_number, ground.name, ground.country)
def argument_checker_credentials(HTTPHandler, host, path, cookie, token, username, password): """ Try to login using credentials and returns log in token""" bodydata = "tokenCSRF={}&username={}&password={}&save=".format( token, username, password) header = { "Host": host, "User-Agent": "GoogleBot", "X-Forwarded-For": "{}.{}.{}.{}".format(randint(10, 255), randint(10, 255), randint(10, 255), randint(10, 255)), "Connection": "close", "Content-Type": "application/x-www-form-urlencoded", "Content-Length": len(bodydata), "DNT": "1", "Cookie": cookie } HTTPHandler.request("POST", path.rstrip("/") + "/admin/", body=bodydata, headers=header) # Checking host conectivity again try: response = HTTPHandler.getresponse() except: ERROR("argument_checker_credentials", "unable to send POST request") if response.getheader("Location") == None: ERROR("argument_checker_credentials", "invalid credentials") else: header = { "Host": host, "User-Agent": "GoogleBot", "Connection": "close", "DNT": "1", "Cookie": cookie } HTTPHandler.request("GET", response.getheader("Location"), headers=header) response = HTTPHandler.getresponse() token = research('var tokenCSRF = "(.+?)"', response.read().decode()).group(1) print("[*] dashboard page token: ", token) return token
def save(info: CricketInfo, page): m = research(r'/([^/]+?)/([^/]+?)/[^/]+?$', page) filepath = m.group(1) filename = m.group(2) if not exists(f"data.files/match.files/{filepath}/{filename}.zip"): Path(f"data.files/match.files/{filepath}").mkdir(parents=True, exist_ok=True) zf = ZipFile(f"data.files/match.files/{filepath}/{filename}.zip", mode='w', compression=ZIP_DEFLATED) soup = Bs(markup=get(url=page).text, features='html.parser') zf.writestr('matcharchive', str(soup.find(class_=info.MainContentX))) zf.close()
def _check_and_fail_with_replacement_tags(data, message): """Searches a string for replacement tags and aborts if found. Should prevent running the program with an insufficient environment :param data: The string to search :param message: A message prepended to the failure message """ result = research("(%%[A-Z0-9_-]+%%)", data) if result: fail(message + ": Found unresolved replacement tags " "(missing env setting?): {}\n" "Ignore with -f setting" .format(", ".join(map(lambda x: "'{}'".format(x), result.groups()))))
def get_nics(): ''' List nics ''' nics = [] nics_list = listdir('/sys/class/net/') for nic_name in nics_list: if research('(br|bond|ens|enp|eth|one|ten|fourty)[0-9]+', nic_name): # Interface should be in "up" state in order to get carrier status Popen("ip li set dev " + nic_name + " up", shell=True, stdout=PIPE) with open("/sys/class/net/" + nic_name + "/carrier", 'r') as f: try: carrier = int(f.read()) except: carrier = 0 bond = "" if path.isfile("/sys/class/net/" + nic_name + "/master/uevent"): with open("/sys/class/net/" + nic_name + "/master/uevent", 'r') as f: for line in f: sline = line.strip() if 'INTERFACE=bond' in sline: bond = sline.split('=')[1] if len(bond) == 0: with open("/sys/class/net/" + nic_name + "/address", 'r') as f: macaddr = f.read().strip() else: with open("/proc/net/bonding/" + bond, 'r') as f: line = f.readline() if_struct = False while line: sline = line.strip() if 'Slave Interface: ' + nic_name in sline and not if_struct: if_struct = True if 'Permanent HW addr: ' in sline and if_struct: macaddr = sline.split()[3] break line = f.readline() with open("/sys/class/net/" + nic_name + "/mtu", 'r') as f: mtu = f.read() ip = str(get_ip(nic_name)) nics.append([nic_name, ip, macaddr, carrier, mtu]) return sorted(nics)
def get_ten_pci(): ''' List ten nics pci addresses ''' nics = [] nics_list = listdir('/sys/class/net/') for nic_name in nics_list: if research('ten[0-9]+', nic_name): with open("/sys/class/net/" + nic_name + "/device/uevent", 'r') as f: for line in f: sline = line.strip() if "PCI_SLOT_NAME=" in sline: nics.append([nic_name , sline.split("=")[1]]) return sorted(nics)
def make_wrapper(infile,outfile): """ creates new wrapper file, expands variables as they are there.""" lines = open(infile,'r').readlines() lines_out = [] for line in lines: var = research("\$\{\D+\}",line) tline = line if not var is None: match = var.group(0) key = ((match.replace("$","")).replace("{","")).replace("}","") tline = tline.replace(match,"{KEY}".format(KEY=environ[key])) lines_out.append(tline) of=open(outfile,'w') of.write("".join(lines_out)) of.close()
def POST_request(HTTPHandler, head: dict, data, path): """ try to loging, if sucess return true, else return token to keep trying""" HTTPHandler.request("POST", path, body=data, headers=head) response = HTTPHandler.getresponse() token = research('input.+?name="tokenCSRF".+?value="(.+?)"', response.read().decode()) new_path = response.getheader("Location") if new_path != None: return True elif token == None: print("\n[!] No token found...") return False else: token = token.group(1) return token
def __init__(self, apikey, proxy, **connkw): """Initialization of the BTC-E Trade API v1. @raise APIError: where no 'invalid nonce' in error @param apikey: Trade API Key {'Key': 'KEY', 'Secret': 'SECRET'} @param **connkw: ... (see: 'BTCEConnection' class)""" super(TradeAPIv1, self).__init__(proxy, **connkw) self._apikey = apikey # type 'dict' (keys: 'Key', 'Secret') self._nonce = None # type 'int' (min/max: 1 to 4294967294) try: # Get nonce value from BTC-E API. self.apirequest('/tapi', self._apikey, nonce=None) except APIError as error: if 'invalid nonce' not in error.message: raise self._nonce = int(research(r'\d+', error.message).group())
def count_up(name: str, suffix: str = "_") -> str: """ Search for pattern in name. Add that pattern if not found or add +1 to existing pattern. :param name: Name that should be unique to names. :param suffix: Will be attached to very end of name. :return: modified name """ # search for suffix and numbers at the very end of name. search = research(r"[" + suffix + r"]([0-9]+)$", name) # found something? if search: # what number was found? num = int(search.group(0)[1:]) # add +1 to that number. return name[: -len(search.group(0))] + suffix + str(num + 1) return name + suffix + "0"
def __init__(self, apikey, **connkw): """Initialization of the BTC-E Trade API v1. @raise APIError: where no 'invalid nonce' in error @param apikey: Trade API Key {'Key': 'KEY', 'Secret': 'SECRET'} @param **connkw: ... (see: 'BTCEConnection' class)""" self._apikey = apikey #: type 'dict' (keys: 'Key', 'Secret') self._nonce = None #: type 'int' (min/max: 1 to 4294967294) super(TradeAPIv1, self).__init__(**connkw) # Get the nonce parameter from the BTC-E API. try: self.apirequest('/tapi', self._apikey, nonce=self._nonce) except APIError as error: if 'invalid nonce' not in error.message: raise # BTC-E API errors self._nonce = int(research(r'\d+', error.message).group())
def argument_checker_host(HTTPHandler, host, path): header = { "Host": host, "User-Agent": "GoogleBot", "Connection": "KeepAlive", } HTTPHandler.request("GET", path, headers=header) try: response = HTTPHandler.getresponse() except: ERROR("argument_checker_host", "host seems down") try: token = research('input.+?name="tokenCSRF".+?value="(.+?)"', response.read().decode()).group(1) cookie = response.getheader("Set-Cookie").split(";")[0] except: ERROR("argument_checker_host", "bludit cookie and token doesn't found") return HTTPHandler, cookie, token
def mergefile(): files = os.listdir("/home/zhluo/Project/CRC/data_nazhang/step16_macs2") bamdir = "/home/zhluo/Project/CRC/data_nazhang/step15_merge" bamlist = os.listdir(bamdir) for file in bamlist: if not re.search("sort.bam", file): continue if re.research("Inpu.sort.bam", file): continue sample = file.replace(".sort.bam", "") sampleid = sample if sample.split("_")[0] == "7weeks" or sample.split( "_")[0] == "10weeks": tissue = "tumor" else: tissue = "normal" factor = sample.split("_")[1] bamreads = file bamcontrol = sample.split("_")[0] + "_Inpu.sort.bam"
def list_ranking_in_serp(url, source='output_1'): add_execution_deplay() page = requests.get(url) soup = BeautifulSoup(page.text, 'html.parser') links = soup.find_all('h3', class_='r') # remove image result for link in links: if 'Images' in link.text: links.remove(link) column_to_write = [research('q=(.+?)&oq', url).group(1)] # list the first five for search in links[:10]: # column_to_write += ";"+search.a['href'].split("//")[-1].split("/")[0] column_to_write.append( [search.a['href'].split("//")[-1].split("/")[0]]) print("URL: ", search.a['href']) print("Domain: ", search.a['href'].split("//")[-1].split("/")[0]) write_ranking(column_to_write, file="./" + source + "_output-" + str(os.getpid()) + ".txt")
def get_nics_csv(delim=","): ''' List nics in csv format :param delim: Delimiter char. Type: str ''' header = "server,nic_name,ip_addr,mac_addr,link,chassis_id,chassis_name,port_mac,port_descr\n" io = "" # Try to reuse lldp output if possible try: lldp_info = Popen("lldpcli -f keyvalue s n s", shell=True, stdout=PIPE).communicate()[0] except: lldp_info = "" for nic in get_nics(): lldp = "" nic_name = nic[0] if research('(one|ten|fourty)[0-9]+', nic_name): # Check if we can fetch lldp data for that nic for line in lldp_info.splitlines(): chassis = 'lldp.' + nic[0] + '.chassis' port = 'lldp.' + nic[0] + '.port' if chassis in line or port in line: lldp += delim + line.split('=')[1] if not lldp: lldp = delim + delim + delim + delim io += __salt__['config.get']('id') + \ delim + nic_name + \ delim + str(nic[1]).strip() + \ delim + str(nic[2]).strip() + \ delim + str(nic[3]).strip() + \ delim + str(nic[4]).strip() + \ lldp + "\n" return header + io
def download_video(v, folder): if 'platform' in v: return download_external(v['player'], folder) else: if 'player' not in v: return False if 'height' not in v: v['height'] = 480 if 'photo_800' in v else \ 360 if 'photo_320' in v else \ 240 url = v['player'] if ('access_key' not in v) else f"{v['player']}?access_key={v['access_key']}" data = urlopen(url).read() try: download( research(b'https://cs.*vkuservideo.*' + str(min(v['height'], v['width']) if ('width' in v) else v['height']).encode() + b'.mp4', data).group(0).decode(), folder, name=v['title'] + '_' + str(v['id']), ext='mp4' ) except AttributeError: return False
def _update_size(self): if self._w is not None and self._h is not None: return xenvs = { 'DISPLAY': ':1', } try: output = gsp.check_output([ 'x11vnc', '-query', 'dpy_x,dpy_y' ], env=xenvs).decode('utf-8') mobj = research(r'dpy_x:(\d+).*dpy_y:(\d+)', output) if mobj is not None: w, h = int(mobj.group(1)), int(mobj.group(2)) changed = False if self._w != w: changed = True self._w = w if self._h != h: changed = True self._h = h if changed: self.notify() except gsp.CalledProcessError as e: log.warn('failed to get dispaly size: ' + str(e))
def __nextsince(self): for line in self.__scrape: if research(self.__nxt_rx, line): return research(self.__nxt_rx, line).group(1) return ''
def __pageimages(self): for line in self.__scrape: imagesearch = research(self.__img_rx, line) if imagesearch and not research('square', imagesearch.group(0)): yield '%s.%s' % (imagesearch.group(2), imagesearch.group(4)) return ''