def run(self): global results while True: try: folder = self.queue.get().rstrip() resource = host + '/' + folder try: if auth_defined: url = requesocks.get(resource, auth=(auth_user, auth_password), headers=headers) elif proxy_defined: url = requesocks.get(resource, proxies=Proxies, headers=headers) elif auth_defined and proxy_defined: url = requesocks.get(resource, proxies=proxy_address, auth=(auth_user, auth_password), headers=headers) elif cookies_defined: url = requesocks.get(resource, cookies=cookies, headers=headers) else: url = requesocks.get(resource, headers=headers) code = url.status_code if (url.status_code not in [200, 401, 403, 404]): results['others'].append(folder) else: display_message("[%s] %s/%s" % (url.status_code, host, folder)) results[url.status_code].append(folder) except requesocks.ConnectionError, e: print R + "\n ERROR: Connection Error - Check host is correct or exists" + W sys.exit() self.queue.task_done() except (SystemExit): # Shutdown pass
def query(mode, nid, rel): if mode not in MODES: print("wrong mode: " + mode) return if mode == DEPLOY: requests.get(prefix_deploy+nid+suffix(rel)) if mode == DEV: requests.get(prefix_dev+nid+suffix(rel), proxies=proxies)
def download_torrent(url, filename): result = requests.get(url, proxies=proxies) result.encoding = 'utf-8' soup = BeautifulSoup(result.text) download_link = soup.select('.viewdownloadbutton a')[0]['href'] torrent_file = requests.get(download_link, proxies=proxies) with open(filename, 'wb') as torrent: torrent.write(torrent_file.content) torrent.close() print filename
def process_img(poster, filename): if (poster is None or poster == 'N/A'): return img_url = poster[:-7] + config.img_size + '.jpg' img_file = os.path.join(config.images_folder, filename + '.jpg') img_fh = open(img_file, 'wb') try: if set_proxy : img_fh.write(requests.get(img_url ,proxies=config.proxyDict).content) else: img_fh.write(requests.get(img_url).content) except requests.RequestException, e: # do nothing? pass
def GetURL(host, target): global NotFound, Found, Forbidden, Other, LinesLeft, Lines sys.stdout.write("\r\x1b[K \033[31m%d \033[0mFound, \033[33m%d \033[0mForbidden, \033[32m%d \033[0mNotFound, \033[37m%d \033[0mOther, \033[37m%d \033[0mPercent Left" % (Found, Forbidden, NotFound, Other, LinesLeft*100/Lines)) sys.stdout.flush() try: if AuthOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), auth=(Auth_User, Auth_Pwd), headers=headers) elif ProxyOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), proxies=Proxies, headers=headers) elif AuthOpt and ProxyOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), proxies=Proxy_Addr, auth=(Auth_User, Auth_Pwd), headers=headers) elif CookiesOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), cookies=cookies, headers=headers) else: url = requesocks.get(target + '/' + str(host.rstrip()), headers=headers) code = url.status_code if code == 401: Other += 1 LinesLeft -= 1 outputfile.write("<A HREF='" + target + "/" + host + "'>" + target + '/' + host + " - <STRONG>REQUIRES AUTHENTICATION</STRONG><br>\n"); if results['V']: sys.stdout.write("\r\x1b[K\033[33m %s/%s\033[0m-REQUIRES AUTHENTICATION" % (target, host)) # Doesnt print after value :S sys.stdout.flush() elif code == 403: Forbidden = Forbidden + 1 LinesLeft -= 1 elif code == 404: # Need to look at making this shizz better (array or list) NotFound += 1 LinesLeft -= 1 elif code == 200: Found += 1 LinesLeft -= 1 outputfile.write("<A HREF='" + target + "/" + host + "'>" + target + '/' + host + "<br>\n"); if results['V']: sys.stdout.write("\r\x1b[K\033[31m %s/%s\033[0m-FOUND" % (target, host)) # Doesnt print after value :S sys.stdout.flush() else: Other += 1 LinesLeft -= 1 except requesocks.ConnectionError, e: outputfile.write("We failed to reach a server.<br>Reason: Connection Error</BODY></HTML>"); outputfile.close() print R + "\n ERROR: Connection Error - Check target is correct or exists" + W sys.exit()
def getProxy(): #socks5访问pachong.org socks.set_default_proxy(socks.SOCKS5,'127.0.0.1',1080) socket.socket = socks.socksocket r = requesocks.get(proxyUrl) html = r.text.encode('utf-8') #匹配 网页定义的js声明 reg_script_head = '<script type.*?>(.*?)</script>' pattern_script_head = re.compile(reg_script_head,re.S) result_of_script_head = re.findall(pattern_script_head,html) #匹配ip端口 reg_port = '<td><script>(.*?)</script>' pattern_port = re.compile(reg_port,re.S) result_of_port = re.findall(pattern_port,html) #匹配ip地址 reg_ip = '<td>([0-9]+(?:\.[0-9]+){0,3})</td>' pattern_ip = re.compile(reg_ip,re.S) result_of_ip = re.findall(pattern_ip,html) for i,item in enumerate(result_of_ip): jsevalPort = result_of_script_head[2] + result_of_port[i] js = ''' function add(){ %s } add()''' % jsevalPort.replace('document.write','return') result = js2py.eval_js(js) ip_port[item] = result
def search_list(url, max_id): headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36' } result = requests.get(url, headers=headers, proxies=proxies) result.encoding = 'utf-8' print url, result.status_code, result.headers['date'] soup = BeautifulSoup(result.text) keywords = load_keywords() last_num_reg = re.compile('\\d+$') is_end = False new_max_id = max_id for link in soup.select('.tlistname a'): link_id = int(last_num_reg.findall(link['href'])[0]) if link_id > max_id: for key in keywords: if key in link.text: filename = '/home/himmel/torrent/' + link.text.replace("/", "").replace(" ", "") + '.torrent' if not os.path.isfile(filename): download_torrent(link['href'], filename) time.sleep(5) if link_id < max_id: is_end = True if link_id > new_max_id: new_max_id = link_id if new_max_id > get_max_id(): set_max_id(new_max_id) return is_end
def hit_the_search_api(self, query_url): tried_proxies = 0 while True: search_response = requests.get(query_url, proxies=self.proxy) try: response_dict = json.loads(search_response.text) results = response_dict['responseData'] except ValueError, KeyError: msg = u'HTTP %s\n%s' % (search_response.status_code, search_response.text) raise GoogleAPIError(msg) status = response_dict['responseStatus'] details = response_dict['responseDetails'] if status == 403 and self.use_proxy: self.switch_to_next_proxy() tried_proxies += 1 if tried_proxies >= (len(settings.PROXY_LIST)/2+1): msg = ('Tried half the proxies but all received a 403 FORBIDDEN ' 'response from the Google API. Either wait for a while ' 'or add more proxies to the PROXY_LIST in settings') raise GoogleAPIError(msg) continue elif status != 200: raise GoogleAPIError(details) return results
def log_ip_info(): #get ip and location info print streams.PROXIES ip_info = json.loads( requesocks.get('http://ip-api.com/json', proxies=streams.PROXIES).text) logger.info('IP address: {0}'.format(ip_info['query'])) logger.info('Location: {0}'.format(ip_info['country']))
def download_file(target, remote_file, local_file): print "\x1b[1;34m{+} Target: %s\x1b[0m" %(target) print "\x1b[1;34m{+} Dumping %s\x1b[0m" %(remote_file) target_url = "https://%s//common/download.php?file=%s" %(target, encode(text=remote_file)) #print target_url # debugging try: r = requesocks.get(url=target_url, proxies=proxies, verify=False) except Exception, e: sys.exit("Exception hit, printing stacktrace...\n%s" %(str(e)))
def default(self): controller = tor.conn() proxies = { "http": "socks5://localhost:9050/", "https": "socks5://localhost:9050/", } resp = requests.get("https://api.ipify.org", proxies=proxies) print(resp.text) controller.signal(Signal.NEWNYM) import time print("u--qw-dq-wd-qw-dq-wd-q-wd-") time.sleep(5) resp = requests.get("https://api.ipify.org", proxies=proxies) print(resp.text)
def download_file(target, remote_file, local_file): print "\x1b[1;34m{+} Target: %s\x1b[0m" % (target) print "\x1b[1;34m{+} Dumping %s\x1b[0m" % (remote_file) target_url = "https://%s//common/download.php?file=%s" % ( target, encode(text=remote_file)) #print target_url # debugging try: r = requesocks.get(url=target_url, proxies=proxies, verify=False) except Exception, e: sys.exit("Exception hit, printing stacktrace...\n%s" % (str(e)))
def get_all_picture(): #session = requesocks.session() proxies = { 'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050' } all_products_list = db.query(Product).all() print len(all_products_list) count = 1 for product in all_products_list: product_p = product.product_other_picture.split('|') print('Process' + str(count)) if len(product_p) < 2 and product.product_test_one_flag == 0: while True: try: req = requesocks.get(product.partner_url, proxies=proxies) #time.sleep(random.randint(0,5)) soup = BeautifulSoup.BeautifulSoup(req.text) print product.partner_url #print soup name2 = [] for t in soup.findAll("span", {"class": "img-thumb-item"}): name2.append(t.next['src']) name = re.findall(r'[^/]+\.jpg$', product.product_picture) print name print name2 assert (name2 != []) true = [] for s in name2: true.append(re.sub(ur'[^/]+\.jpg$', name[0], s)) print 'OK_PARS' + str(count) count += 1 product.product_other_picture = '|'.join(true) db.session.add(product) db.session.commit() break except: if soup.findAll("div", {"class": "ui-image-viewer-thumb-wrap"}): print 'OK_SetFlag_to_1' product.product_test_one_flag = 1 db.session.add(product) db.session.commit() count += 1 break else: print 'Change Proxy' renew_connection() else: count += 1
def run(self): logging.debug('Starting thread: %s..' % self.name) while True: try: url = self.urls.get(False) if url is None: break except: break full_url = 'http://%s/%s' % (self.host, url) print('Checking: %s' % full_url) if self.proxies is None: r = get(full_url) else: r = get(full_url, proxies=self.proxies) if r.status_code == 200: print('[+] Valid url found : %s' % full_url) else: logging.debug('[-] %s => %i' % (full_url, r.status_code)) self.urls.task_done()
def getSock(self): try: contents = requests.get('http://www.cz88.net/proxy/socks5.shtml',timeout = 5).content except: time.sleep(60) return self.getSock() ip = re.findall('''class="ip">(.+?)</div>''',contents,re.S)[1:] port = re.findall('''class="port">(.+?)</div>''',contents,re.S)[1:] try: contents = requests.get('http://www.cz88.net/proxy/socks5_2.shtml',timeout = 5).content except: time.sleep(60) return self.getSock() ip2 = re.findall('''class="ip">(.+?)</div>''',contents,re.S)[1:] port2 = re.findall('''class="port">(.+?)</div>''',contents,re.S)[1:] ip += ip2 port += port2 self.dict['ip'] = ip self.dict['port'] = port if len(self.dict['ip']) == 0: time.sleep(60) return self.getSock() else: return True
def get_instagram(url): response = None try: response = get(url, proxies=get_proxies(), timeout=60.00) except (Exception, SSLError): pass if not response: return if not response.status_code == 200: return try: return Selector(text=response.text).xpath('//meta[@property="og:image"]/@content').extract()[0] except IndexError: pass return ''
def get_facebook(url): response = None try: response = get(url, proxies=get_proxies(), timeout=60.00) except (Exception, SSLError): pass if not response: return if not response.status_code == 200: return try: return Selector(text=response.text).xpath('//img[@id="fbPhotoImage"]/@src').extract()[0] except IndexError: pass return ''
def ss_speed_test(*args, **kwargs): try: local_port = kwargs.get('local_port') socks5_proxies = { 'http': 'socks5://127.0.0.1:{}'.format(local_port), 'https': 'socks5://127.0.0.1:{}'.format(local_port) } start = time.time() resp = requesocks.get('https://www.google.com.hk', proxies=socks5_proxies, timeout=kwargs.get('timeout', 5)) cost = time.time() - start if resp.status_code == requesocks.status_codes.codes.ok: return int(int(len(resp.text)) / 1024 / cost) except Exception: return 0
def get_all_picture(): #session = requesocks.session() proxies = {'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050'} all_products_list = db.query(Product).all() print len(all_products_list) count = 1 for product in all_products_list: product_p = product.product_other_picture.split('|') print ('Process' + str(count)) if len(product_p) < 2 and product.product_test_one_flag == 0: while True: try: req = requesocks.get(product.partner_url, proxies=proxies) #time.sleep(random.randint(0,5)) soup = BeautifulSoup.BeautifulSoup(req.text) print product.partner_url #print soup name2 = [] for t in soup.findAll("span", {"class": "img-thumb-item"}): name2.append(t.next['src']) name = re.findall(r'[^/]+\.jpg$', product.product_picture) print name print name2 assert (name2 != []) true = [] for s in name2: true.append(re.sub(ur'[^/]+\.jpg$', name[0], s)) print 'OK_PARS'+str(count) count+=1 product.product_other_picture = '|'.join(true) db.session.add(product) db.session.commit() break except: if soup.findAll("div", {"class": "ui-image-viewer-thumb-wrap"}): print 'OK_SetFlag_to_1' product.product_test_one_flag = 1 db.session.add(product) db.session.commit() count += 1 break else: print 'Change Proxy' renew_connection() else: count += 1
def get_imdb_info(self): quoted_title = urllib.quote_plus(self.title) request = requesocks.get('http://www.omdbapi.com/?t={0}&y=&plot=short&r=json'.format(quoted_title)) data = json.loads(request.text) return data
def siteBurst(url,wordlist,status_code,verbose): words = wordlist.split() # Extentions of most of the web languages included here ## ASP ## CSS ## Coldfusion ## Erlang ## Flash ## HTML ## Java ## Javascript ## Perl ## PHP ## Pthon ## Ruby ## SSI ## XML ## Few Others ... #extention = ['','.asp','.aspx','.axd','.asx','.asmx','.ashx','.cfm','.css','.yaws','.swf','.html','.htm','.xhtml','.jhtml','.jsp','.jspx','.wss','.do','.action','.js','.pl','.php','.php4','.php3','.phtml','.py','.rb','.rhtml','.shtml','.xml','.rss','.svg','.cgi','.dll','.txt'] #Keeping only commonly found extentions extention = ['','.asp','.aspx','.html','.php','.txt'] print("We have "+str(len(words)*len(extention))+" combinations to try!") false_check = False # a flag to check if we are gettinng false positives or not directory = list() pages = list() for word in words: false_positive = 0 # a flag to count the number of extentions work for a single word phrase for ext in extention : try: req = requesocks.get(url+word+ext) # format -> [http://example.com][index][.html] except: print("Enountered an ERROR! Try again.") code = req.status_code if str(code) in status_code.split(','): # if the site sends a positive response code if ext == '': #directory file false_positive = false_positive + 1 print("[#] "+url+word+ext + " directory found !") directory.append(url+word+ext) else : #non-directory file false_positive = false_positive + 1 print("[+] "+url+word+ext + " found !") pages.append(url+word+ext) if false_check is False: # False Positive Check if false_positive > 5: print("We may be getting false positive results!.") print("Please check the above links manually and tell if those links are valid or not.") user_check = raw_input("Are the links valid ? (yes/no) :") if user_check.lower() == "yes": false_check = True else: print("Sorry, please try to check for response codes manually and then provide them using -s <response codes>") quit() else : # if the site sends a negative response code if bool(verbose) == True: print("[*] "+url+word+ext + " not found !") else: continue print("\n\n") # Print the webpages found print("==="+"="*max([len(page) for page in pages])+"===") print("WebPages Found :") print("==="+"="*max([len(page) for page in pages])+"===") for page in pages: print("[+] "+page) print("==="+"="*max([len(page) for page in pages])+"===") print('') # Print the directories found print("==="+"="*max([len(folder) for folder in directory])+"===") print("Directories Found :") print("==="+"="*max([len(folder) for folder in directory])+"===") for folder in directory: print("[#] "+folder) print("==="+"="*max([len(folder) for folder in directory])+"===")
import win32clipboard win32clipboard.OpenClipboard() ret = win32clipboard.GetClipboardData() win32clipboard.CloseClipboard() return ret try: proxies = { 'http': 'socks5://127.0.0.1:8090', 'https': 'socks5://127.0.0.1:8090' } tor_host = 'http://storebt.com' url_1 = get_clipboard() r_1 = requests.get(url_1, proxies=proxies, timeout=10) tree_1 = html.fromstring(r_1.text) url_2 = tree_1.xpath( '/html/body/div[1]/div[2]/div/div/table/tr[10]/td[2]/a[2]/@href')[0] r_2 = requests.get(url_2, proxies=proxies, timeout=10) tree_2 = html.fromstring(r_2.text) url_3 = tor_host + tree_2.xpath('/html/body/div[1]/a/@href')[0] filename = url_3.split('/')[-1] r_3 = requests.get(url_3, proxies=proxies, timeout=10) with open(filename, 'wb') as f: for chunk in r_3.iter_content(1024): f.write(chunk) except Exception as e:
proxies = { } # our environment returns solr results in Zulu time zone zulu = timezone('Zulu') solr_time = datetime.now(zulu) current_solr_date = solr_time.strftime("%Y-%m-%d") # hard to anticipate your query here, this variable will require some interpretation print("What is the url of your solr query? (everything before /select)"); solr_url = raw_input() # read in your source lsit with open(args.input) as f: source_list = f.readlines() for source in source_list: url = '%s/select?q=source%%3A%s&sort=dateCreated+desc&rows=10&wt=python' % (solr_url.rstrip(), source.rstrip()) r = requests.get(url, proxies=proxies) result = eval(r.content) flag = 0 for doc in result["response"]["docs"]: doc_date = doc["dateCreated"].split("T", 1)[0] if doc_date == current_solr_date: flag = 1 if flag == 1: print "%s has had assets ingested today." % (source.rstrip()) else: print "%s has NOT had assets ingested today" % (source.rstrip()) print "\n"
def GetURL(host, target): global NotFound, Found, Forbidden, Other, LinesLeft, Lines sys.stdout.write( "\r\x1b[K \033[31m%d \033[0mFound, \033[33m%d \033[0mForbidden, \033[32m%d \033[0mNotFound, \033[37m%d \033[0mOther, \033[37m%d \033[0mPercent Left" % (Found, Forbidden, NotFound, Other, LinesLeft * 100 / Lines)) sys.stdout.flush() try: if AuthOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), auth=(Auth_User, Auth_Pwd), headers=headers) elif ProxyOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), proxies=Proxies, headers=headers) elif AuthOpt and ProxyOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), proxies=Proxy_Addr, auth=(Auth_User, Auth_Pwd), headers=headers) elif CookiesOpt == True: url = requesocks.get(target + '/' + str(host.rstrip()), cookies=cookies, headers=headers) else: url = requesocks.get(target + '/' + str(host.rstrip()), headers=headers) code = url.status_code if code == 401: Other += 1 LinesLeft -= 1 outputfile.write( "<A HREF='" + target + "/" + host + "'>" + target + '/' + host + " - <STRONG>REQUIRES AUTHENTICATION</STRONG><br>\n") if results['V']: sys.stdout.write( "\r\x1b[K\033[33m %s/%s\033[0m-REQUIRES AUTHENTICATION" % (target, host)) # Doesnt print after value :S sys.stdout.flush() elif code == 403: Forbidden = Forbidden + 1 LinesLeft -= 1 elif code == 404: # Need to look at making this shizz better (array or list) NotFound += 1 LinesLeft -= 1 elif code == 200: Found += 1 LinesLeft -= 1 outputfile.write("<A HREF='" + target + "/" + host + "'>" + target + '/' + host + "<br>\n") if results['V']: sys.stdout.write("\r\x1b[K\033[31m %s/%s\033[0m-FOUND" % (target, host)) # Doesnt print after value :S sys.stdout.flush() else: Other += 1 LinesLeft -= 1 except requesocks.ConnectionError, e: outputfile.write( "We failed to reach a server.<br>Reason: Connection Error</BODY></HTML>" ) outputfile.close() print R + "\n ERROR: Connection Error - Check target is correct or exists" + W sys.exit()
def get_tweets(q): tweets = [] params = { 'composed_count': '0', 'f': 'realtime', 'include_available_features': '0', 'include_entities': '0', 'include_new_items_bar': 'false', 'interval': '30000', 'latent_count': '0', 'q': q, 'src': 'typd', } url = 'https://twitter.com/i/search/timeline' referer = urlparse( furl(url).add({ 'f': 'realtime', 'q': q, 'src': 'typd', }).url ) while True: response = None try: response = get( 'https://twitter.com/i/search/timeline', headers={ 'Accept': '*/*', 'Accept-Encoding': 'identity, deflate, compress, gzip', 'referer': '%(path)s?%(query)s' % { 'path': referer.path, 'query': referer.query, }, 'user-agent': get_user_agent(), 'x-requested-with': 'XMLHttpRequest', 'x-twitter-polling': 'true', }, params=params, proxies=get_proxies(), timeout=60.00 ) except (Exception, SSLError): break if not response: break if not response.status_code == 200: break contents = None try: contents = loads(response.text) except (JSONDecodeError, TypeError, ValueError): break if not contents: break if 'items_html' not in contents: break for tweet in Selector(text=contents['items_html']).xpath('//div[@data-tweet-id]'): tweet = get_tweet(tweet) if tweet: tweets.append(tweet) if len(tweets) >= 500: break if not contents['has_more_items']: break if 'scroll_cursor' in contents: params['scroll_cursor'] = contents['scroll_cursor'] return tweets
def main(): fabric.api.env.use_ssh_config = True fabric.api.env.forward_agent = True parser = argparse.ArgumentParser(description="(re-)produce rscfl experiments") parser.add_argument('-n', '--exp', dest="exp_name", default="noname", help="Experiment name") parser.add_argument('-c', '--config', dest="exp_cfg", default="nocfg", help="Experiment configuration") parser.add_argument('-s', '--scripts', dest="out_dir", default="%s/rscfl_exp" % os.environ["HOME"], help="Destination directory for experiment data. " "This must exist on fg_load_vm and it must contain" " all the data processing scripts") parser.add_argument('--meta', dest="meta", default="nometa", help="Additional description/metadata for experiment") parser.add_argument('--fg_load_vm', dest="fg_load_vm", default=fg_load_vm, help="Machine driving fg load (ab), data pre-processing" " and plotting") parser.add_argument('--bg_load_vm', dest="bg_load_vm", default=bg_load_vm, help="Machine driving bg load (stress), and controlling" " contention (no of VMs, containers etc.)") parser.add_argument('--target_vm', dest="target_vm", default=target_vm, help="Machine running rscfl and lighttpd (or different" " target process)") parser.add_argument('--proxy', dest="proxy", default=None, help="Set proxy for http requests") parser.add_argument('--manual', dest="manual_exp", action="store_true", help="Manually run experiment. You will be guided" " step-by-step in what needs to be done. This" " overrides -c (--config)") args = parser.parse_args() fabric.api.output["stdout"] = False fabric.api.output["running"] = False proxies = {} if(args.proxy != None): import requesocks as requests proxies["http"] = args.proxy; proxies["https"] = args.proxy; else: import requests # load experiment config unless running manually: if not args.manual_exp: if args.exp_cfg == "nocfg": print("You must specify an experiment configuration file if not passing --manual") else: print("Loading experiment configuration...") cfg_file = open(args.exp_cfg) cfg_json = json.load(cfg_file) cfg_file.close() if args.exp_name == "noname": args.exp_name = cfg_json['exp-name'] if args.meta == "nometa": args.meta = cfg_json['exp-descr'] ## Here we go, preparing global experiment metadata msg=""" Checklist (please verify that the following are true): * iptables configured on {0}; * rscfl is running on {0}, release build; * lighttpd is running on {0}; * bash scripts you run have the #!/bin/bash directive If one of those conditions is false, expect the script to stall and fail.""" run_DAQ = False if cfg_json['exp-run-DAQ'] == "True": run_DAQ = True if run_DAQ == True: print(msg.format(args.target_vm)) # Create experiment directory on fg_load_vm exp_dir = fabric.api.execute(create_experiment_dir, args.out_dir, args.exp_name, args.meta, hosts=args.fg_load_vm) # Infer basic experiment metadata (virt/no_virt, rscfl version, uname, etc) (out_path, result_path, data_path, exp_dir_name) = exp_dir[args.fg_load_vm] config_vars['exp_dir'] = out_path config_vars['exp_dir_name'] = exp_dir_name config_vars['script_dir'] = args.out_dir config_vars['data_dir'] = data_path config_vars['result_dir'] = result_path config_vars['target_vm'] = args.target_vm base_meta = {} if run_DAQ == True: base_meta = fabric.api.execute(get_target_meta, hosts=args.target_vm) else: base_meta[args.target_vm] = ", \"daq\": \"False\"" script_rev = fabric.api.execute(get_script_rev, args.out_dir, hosts=args.fg_load_vm) fabric.api.execute(add_meta, out_path, script_rev[args.fg_load_vm], hosts=args.fg_load_vm) fabric.api.execute(add_meta, out_path, base_meta[args.target_vm], hosts=args.fg_load_vm) fabric.api.execute(copy_to_remote, args.exp_cfg, out_path + "/config.json", hosts=args.fg_load_vm) if args.manual_exp == True: confirm = fabric.contrib.console.confirm(msg.format(args.target_vm)) if not confirm: end_meta(out_path, args.fg_load_vm) return; # reset lighttpd accounting data requests.get("http://%s/rscfl/clear" % args.target_vm, proxies=proxies) # send mark for id 0 (required) payload = {'mark': 'exp_%s' % args.exp_name } requests.post("http://%s/mark" % args.target_vm, payload, proxies=proxies) # Guided Experiment -- stage 1 # (running the experiment and the background load) bg_load_meta = get_experiment_load_meta(args.bg_load_vm, "Start background load script on %s and then confirm (Y)" % args.bg_load_vm) fabric.api.execute(add_meta, out_path, ">bg_load=\n" + bg_load_meta, hosts=args.fg_load_vm) fg_load_meta = get_experiment_load_meta(args.fg_load_vm, "Confirm (Y) after starting the foreground load (ab) on %s" % args.fg_load_vm) fabric.api.execute(add_meta, out_path, ">fg_load=\n" + fg_load_meta, hosts=args.fg_load_vm) else: if cfg_json['exp-run-DAQ'] == "True": # reset lighttpd accounting data requests.get("http://%s/rscfl/clear" % args.target_vm, proxies=proxies) # send mark for id 0 (required) payload = {'mark': 'exp_%s' % args.exp_name } requests.post("http://%s/mark" % args.target_vm, payload, proxies=proxies) # run background load bgld = cfg_json['bg-load'] outStream = BgLoadScanIO(args, cfg_json) if(bgld['run'] == "True"): bg_load_cmd = config_process_vars(bgld['start'], cfg_json) bg_load_cmd_esc = bg_load_cmd.replace('"', '\\\\"') fabric.api.execute(add_meta, out_path, ", \"bg_load_cmd\": \"%s\"" % bg_load_cmd_esc, hosts=args.fg_load_vm) fabric.api.output["stdout"] = True fabric.api.execute(run_bg_load, bg_load_cmd, outStream, hosts=args.bg_load_vm) fabric.api.output["stdout"] = False # run foreground load # <this is triggered by the stdout of the background load and executed # by BgLoadScanIO> fgld = cfg_json['fg-load'] if(bgld['run'] == "False" and fgld['run'] == "True"): outStream.write("$ESTART") outStream.close() # stop bg load (no reason to keep loading the vms) if(bgld['run'] == "True"): stop_bg_cmd = config_process_vars(cfg_json['stop-bg-load'], cfg_json) fabric.api.execute(run_cmd, stop_bg_cmd, "Stopping background load", hosts=args.bg_load_vm) # run processing if(fgld['run'] == "True"): process_cmd = config_process_vars(cfg_json['fg-process-raw']['script'], cfg_json) process_cmd = process_cmd + " " + config_vars['script_dir'] + "/" fabric.api.execute(run_cmd, process_cmd, "Parsing experiment data on %s" % args.fg_load_vm, hosts=args.fg_load_vm) # DAQ done local_vars = {} # train model tm = cfg_json['train-model'] train_file = "" training_meta = ", \"training\": { \"run\": \"" + tm['run'] + "\"" if(tm['run'] == "True"): bm_file = os.path.join(config_vars['script_dir'], tm['bare-metal-exp'], "data", tm['bm-sdat']) vm_file = config_process_vars(tm['virt-sdat'], cfg_json) out_tfile = config_process_vars(tm['out'], cfg_json) train_file = out_tfile local_vars['bm_file'] = bm_file local_vars['vm_file'] = vm_file local_vars['out_tfile'] = out_tfile local_vars['train_file'] = out_tfile training_meta = training_meta + ", \"bare-metal\": \"" + bm_file + "\"" training_meta = training_meta + ", \"virt\": \"" + vm_file + "\"" train_script = config_process_vars(tm['script'], cfg_json, local_vars) fabric.api.execute(run_cmd, train_script, "Training gaussian process, into %s:%s" % (args.fg_load_vm, local_vars['out_tfile']), hosts=args.fg_load_vm) for aux_out in tm['aux-out']: aux_out_s = config_process_vars(aux_out, cfg_json) fabric.api.execute(copy_file, aux_out_s, config_vars['result_dir'], True, hosts=args.fg_load_vm) elif(tm['run'] == "External"): train_fp = os.path.join(config_vars['script_dir'], tm['use-from'], "data", tm['name']) training_meta = training_meta + ", \"file\": \"" + train_fp + "\"" local_vars['train_file'] = train_fp elif(tm['run'] == "False"): print("Skipping gaussian process training phase") training_meta = training_meta + " }" fabric.api.execute(add_meta, out_path, training_meta, hosts=args.fg_load_vm) data_fp = [] out_fp = [] #plot_scatter pscttr = cfg_json['plot-scatter'] run_scatter = False; if pscttr['run'] == "True": run_scatter = True data_fp.append(config_process_vars(cfg_json['fg-process-raw']['out'][1], cfg_json)) out_fp.append(os.path.join(result_path, config_process_vars(pscttr['out'], cfg_json))) elif pscttr['run'] == "External": run_scatter = True if type(pscttr['name']) in (list,): for idx, file_name in enumerate(pscttr['name']): data_fp.insert(idx, os.path.join(config_vars['script_dir'], pscttr['use-from'], "data", file_name)) out_fp.insert(idx, os.path.join(result_path, config_process_vars(pscttr['out'][idx], cfg_json))) else: data_fp.append(os.path.join(config_vars['script_dir'], pscttr['use-from'], "data", pscttr['name'])) out_fp.append(os.path.join(result_path, config_process_vars(pscttr['out'], cfg_json))) if run_scatter == True: for idx, data_file in enumerate(data_fp): local_vars['d_file_path'] = data_file local_vars['out_file'] = out_fp[idx] scttr_script = config_process_vars(pscttr['script'], cfg_json, local_vars) fabric.api.execute(run_cmd, scttr_script, "Scatter plot latency vs sched-out [%d of %d]" % (idx + 1, len(data_fp)), hosts=args.fg_load_vm) #plot-inducedlat-hist data_fp = [] out_fp = [] pilh = cfg_json['plot-inducedlat-hist'] run_pilh = False; if pilh['run'] == "True": run_pilh = True data_fp.append(config_process_vars(cfg_json['fg-process-raw']['out'][1], cfg_json)) out_fp.append(os.path.join(result_path, config_process_vars(pilh['out'], cfg_json))) elif pilh['run'] == "External": run_pilh = True if type(pilh['name']) in (list,): for idx, file_name in enumerate(pilh['name']): data_fp.insert(idx, os.path.join(config_vars['script_dir'], pilh['use-from'], "data", file_name)) for idx, out_name in enumerate(pilh['out']): out_fp.insert(idx, os.path.join(result_path, config_process_vars(out_name, cfg_json))) else: data_fp.append(os.path.join(config_vars['script_dir'], pilh['use-from'], "data", pilh['name'])) out_fp.append(os.path.join(result_path, config_process_vars(pilh['out'], cfg_json))) if run_pilh == True: if 'multiple-file-args' in pilh.keys() and pilh['multiple-file-args'] == "True": for idx, data_file in enumerate(data_fp): local_vars['d_file_path'+str(idx)] = data_file local_vars['out_file'] = out_fp[0] pilh_script = config_process_vars(pilh['script'], cfg_json, local_vars) fabric.api.execute(run_cmd, pilh_script, "Histogram of hypervisor-induced latency", hosts=args.fg_load_vm) else: for idx, data_file in enumerate(data_fp): local_vars['d_file_path'] = data_file local_vars['out_file'] = out_fp[idx] pilh_script = config_process_vars(pilh['script'], cfg_json, local_vars) fabric.api.execute(run_cmd, pilh_script, "Histograms of hypervisor-induced latency [%d of %d]" % (idx + 1, len(data_fp)), hosts=args.fg_load_vm) end_meta(out_path, args.fg_load_vm) print("Copying results locally") fabric.api.execute(copy_from_remote, config_vars['result_dir'], os.path.join(".", exp_dir_name), hosts=args.fg_load_vm) fabric.api.execute(copy_from_remote, os.path.join(config_vars['exp_dir'], "meta"), os.path.join(".", exp_dir_name), hosts=args.fg_load_vm) fabric.api.execute(copy_from_remote, os.path.join(config_vars['exp_dir'], "config.json"), os.path.join(".", exp_dir_name), hosts=args.fg_load_vm) print("Teleporting unicorns from another dimension...[Experiment Done]")
proxies = {} # our environment returns solr results in Zulu time zone zulu = timezone('Zulu') solr_time = datetime.now(zulu) current_solr_date = solr_time.strftime("%Y-%m-%d") # hard to anticipate your query here, this variable will require some interpretation print("What is the url of your solr query? (everything before /select)") solr_url = raw_input() # read in your source lsit with open(args.input) as f: source_list = f.readlines() for source in source_list: url = '%s/select?q=source%%3A%s&sort=dateCreated+desc&rows=10&wt=python' % ( solr_url.rstrip(), source.rstrip()) r = requests.get(url, proxies=proxies) result = eval(r.content) flag = 0 for doc in result["response"]["docs"]: doc_date = doc["dateCreated"].split("T", 1)[0] if doc_date == current_solr_date: flag = 1 if flag == 1: print "%s has had assets ingested today." % (source.rstrip()) else: print "%s has NOT had assets ingested today" % (source.rstrip()) print "\n"