def process_response(response): if 300 <= response.status < 400: visited.append(sent_links.pop(0)) link_queue.insert(0,response.location) elif 500 <= response.status < 600: link = sent_links.pop(0) link_queue.insert(0,link) elif response.status == 200 and response.status_message == 'OK': visited.append(sent_links.pop(0)) soup = BeautifulSoup(response.content,"html.parser") a_tags = soup.find_all('a') h2_tags = soup.find_all('h2',{'class':'secret_flag'}) for h2_tag in h2_tags: if h2_tag.contents[0].split(" ")[1] not in secret_flags: secret_flags.append(h2_tag.contents[0].split(" ")[1]) for a_tag in a_tags: if urljoin(default_netloc,a_tag['href']) not in visited: parsed = urlparse(a_tag['href']) if (parsed.netloc == '' or parsed.netloc == urlparse(default_netloc).netloc) and \ (parsed.scheme == 'http' or parsed.scheme == ''): if parsed.netloc == '': link_queue.append(urljoin(default_netloc,a_tag['href'])) else: link_queue.append(a_tag['href'])
def Relative_URL_Checker(url,originator_url): if url[0] == u'/': parsed_url = urlparse(url) origParsed = urlparse(originator_url) hostname = u"http://" + origParsed.netloc new_full_url = urlparse.urljoin(hostname, parsed_url.path) return new_full_url else: return url
def hostname(url): from urlparse import urlparse if url: if '//' in url: return urlparse(url).hostname else: url = 'http://' + url return urlparse(url).hostname else: return None
def path(url): from urlparse import urlparse if url: if '//' in url: return urlparse(url).path else: url = 'http://' + url return urlparse(url).path else: return None
def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" if not base: return url if not url: return base bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ urlparse(base, '', allow_fragments) scheme, netloc, path, params, query, fragment = \ urlparse(url, bscheme, allow_fragments) if scheme != bscheme or scheme not in uses_relative: return url if scheme in uses_netloc: if netloc: return urlunparse((scheme, netloc, path, params, query, fragment)) netloc = bnetloc if path[:1] == '/': return urlunparse((scheme, netloc, path, params, query, fragment)) if not path and not params: path = bpath params = bparams if not query: query = bquery return urlunparse((scheme, netloc, path, params, query, fragment)) segments = bpath.split('/')[:-1] + path.split('/') # XXX The stuff below is bogus in various ways... if segments[-1] == '.': segments[-1] = '' while '.' in segments: segments.remove('.') while 1: i = 1 n = len(segments) - 1 while i < n: if (segments[i] == '..' and segments[i-1] not in ('', '..')): del segments[i-1:i+1] break i = i+1 else: break if segments == ['', '..']: segments[-1] = '' elif len(segments) >= 2 and segments[-1] == '..': segments[-2:] = [''] return urlunparse((scheme, netloc, '/'.join(segments), params, query, fragment))
def ProcessResource(url): for key in VISITED.keys(): if key == url: return VISITED[url] print("ProcessResource " + url) o = urlparse(url) path = o.path regex = re.compile(".*\/") fileNameRoot = regex.sub("", path) fileName = fileNameRoot targetFileName = "" # Ensure that fileName is unique index = 0 while True: targetFileName = CONFIG["output_dir"] + "/" + fileName if os.path.exists(targetFileName) != True: break fileName = "%d_%s" % (++index, fileNameRoot) urllib.urlretrieve(url, targetFileName) print("Stored mapping " + url + " to " + fileName) VISITED[url] = fileName return fileName
def get_facebook_info(url, size='large'): import urllib2 from urlparse import urlparse """ Tamanios de las imagenes que se pueden consultar * small * normal * large """ profile_info = {} try: res_parse = urlparse(url) used_id = res_parse.path.replace('/', '') # Get info url_info = 'http://graph.facebook.com/%s' % (used_id) profile_info = eval(urllib2.urlopen(url_info).read()) # Get profile photo url_photo = 'http://graph.facebook.com/%s/picture?type=%s' % (profile_info['username'], size) source = urllib2.urlopen(url_photo).read() if source: profile_info['photo'] = base64.b64encode(source) except: None return profile_info
def _init_client(self): ''' Parse the host URL and initialize a client connection. ''' if not isinstance(self._host, (str,unicode)): return self._host # To force scheme and netloc behavior. Easily a bug here but let it # go for now if '//' not in self._host: self._host = '//'+self._host location = urlparse(self._host) if location.scheme in ('','redis'): if ':' in location.netloc: host,port = location.netloc.split(':') else: host,port = location.netloc,6379 # TODO: better matching here if location.path in ('', '/'): db = 0 else: db = location.path[1:] return Redis(host=host, port=int(port), db=int(db)) raise ValueError("unsupported scheme", location.scheme)
def do_POST(self): url = self.path parsed = urlparse(url) query_dict = parse_qs(parsed.query) token = query_dict.get("csrfProxyCookie", ["NOTFOUND"])[0] query_list = parse_qsl(parsed.query) query_list = [(key, value) for key, value in query_list if key != "csrfProxyCookie"] tidied_query = urllib.urlencode(query_list) self.path = urlunparse(( parsed.scheme, parsed.netloc, parsed.path, parsed.params, tidied_query, parsed.fragment)) cookie = "csrfProxyCookie={}".format(token) if any(cookie in header for header in self.headers.getallmatchingheaders("cookie")): self.proxy_request("POST") else: print "CSRF attempt caught!" print "Cookie was {}".format(self.headers.getallmatchingheaders("cookie")) print "Token was {}".format(token) self.send_response(401, "Unauthorized") self.end_headers() self.wfile.write("401 - CSRF attempt detected") self.wfile.close()
def __init__(self, url, pretend): print 'Fetching from Klassikaraadio' self.hostname = urlparse(url).hostname self.pretend = pretend response = urllib2.urlopen(url) html = response.read().decode('utf-8', 'ignore') parsed_html = BeautifulSoup(html) self.title = parsed_html.body.find('h1').text if not os.path.exists(self.title): os.makedirs(self.title) container = parsed_html.body.find('div', attrs={'class': 'sisu_content'}) episode_urls = [] for link in container.find_all('a'): target = link.get('href') if '/helid?' in target: episode_urls.append(target) total = len(episode_urls) i = 1 for episode in episode_urls: self.fetchEpisode(episode, i, total) i += 1
def getpicname(path): """ retrive filename of url """ if os.path.splitext(path)[1] == "": return None pr = urlparse(path) path = "http://" + pr[1] + pr[2] return os.path.split(path)[1]
def parseComment(self, response): sel = Selector(response) movieItem = MovieItem() movieItem['id'] = response.meta['id'] commentLinks = sel.xpath( '//div[@id="comments"]/div[contains(@class, "comment-item")]') commentLinks.extract() comments = [] for index, commentLink in enumerate(commentLinks): comment = {} comment['avatar'] = commentLink.xpath( 'div[@class="avatar"]/a/img/@src').extract().pop() comment['uid'] = commentLink.xpath('div[@class="comment"]//span[@class="comment-info"]/a/@href').re( r"http://movie.douban.com/people/(.*)/").pop() comment['name'] = commentLink.xpath( 'div[@class="comment"]//span[@class="comment-info"]/a/text()').extract().pop() comment['comment'] = commentLink.xpath( 'div[@class="comment"]/p/text()').extract().pop() dateStr = commentLink.xpath( 'div[@class="comment"]/h3/span[@class="comment-info"]/span/text()').re(r'\d+-\d+-\d+').pop() comment['date'] = datetime.strptime(dateStr, "%Y-%m-%d") comment['vote'] = int( commentLink.xpath('div[@class="comment"]//span[@class="comment-vote"]/span[contains(@class, "votes")]/text()').extract().pop()) comments.append(comment) movieItem['comments'] = comments yield movieItem paginator = sel.xpath( '//div[@id="paginator"]/a[@class="next"]/@href').extract() parsedUrl = urlparse(response.url) return # yan dd yield Request(url=parsedUrl.scheme + '://' + parsedUrl.netloc + parsedUrl.path + paginator.pop(), callback=self.parseComment, meta={'id': response.meta['id']})
def file_key(self, url): """ return the SHA1 hash of the file url """ file_guid = hashlib.sha1(url).hexdigest() return '%s_%s' % (urlparse(url).netloc, file_guid)
def parse_git_show(remote, verb): fetch_url = "" for line in run_command("git remote show -n %s" % remote).split("\n"): if line.strip().startswith("%s" % verb): fetch_url = line.split()[2] parsed_url = urlparse(fetch_url) project_name = parsed_url.path.lstrip("/") if project_name.endswith(".git"): project_name = project_name[:-4] hostname = parsed_url.netloc username = None port = parsed_url.port if VERBOSE: print("Found origin %s URL:" % verb, fetch_url) # Workaround bug in urlparse on OSX if parsed_url.scheme == "ssh" and parsed_url.path[:2] == "//": hostname = parsed_url.path[2:].split("/")[0] if "@" in hostname: (username, hostname) = hostname.split("@") if ":" in hostname: (hostname, port) = hostname.split(":") # Is origin an ssh location? Let's pull more info if parsed_url.scheme == "ssh" and port is None: port = 22 if port is not None: port = str(port) return (hostname, username, port, project_name)
def download_torrent(self, url): content = self.httpRequest.getHttpContent(url) if(content==""): return False f = GetIdList() f.feed(content) boundary = "----WebKitFormBoundarydMcOM7W0mij63Igr" parts=[] for k,v in f.formdata.items(): parts.append('--' + boundary) parts.append('Content-Disposition: form-data; name="'+k+'"') parts.append('') parts.append(v) parts.append('--' + boundary + '--') parts.append('\r\n') postdata = '\r\n'.join(parts) r = urlparse(url) downloadUrl="http://"+r.netloc+"/"+f.action content = self.httpRequest.getHttpContent(downloadUrl, postdata, { "Content-Type":"multipart/form-data; boundary="+boundary}) if(content==""): return False filename=f.formdata['ref']+".torrent" f=open(filename, "wb") f.write(content) f.close() return True
def getGoogleResults(self,pluginname,latest,cve): try: gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'", random_agent=True) gs.results_per_page = 100 numberOfprocessed = 0 self.all_run = [] for i in range(int(limitForSearch)): results = gs.get_results() if not results: break # Semaphore for write in order to screen self.checkSimultaneus = threading.Semaphore(int(NumThreats)) # Semaphore for write to file self.writeFile = threading.Semaphore(int(NumThreats)-1) for res in results: self.checkSimultaneus.acquire() host_name = urlparse(res.url.encode()).hostname # Create thread t = threading.Thread(target=self.__getGoogleResults, args=(host_name,latest,pluginname,cve)) self.all_run.append(t) # run thread self.all_run[len(self.all_run)-1].start() except SearchError, e: print "Search failed: %s" % e
def urlappend(self, link, u): global domain urls = urlparse(u) if urls.scheme not in ["", "http", "https"]: return None print "In appending", urls if urls.fragment == "": if urls.netloc == "": # not shown domain urls m = p11.search(u) tp = p10.search(link) if m != None: if m.group(1) != None: # path if tp != None: # print 'return',tp.group(1)+'/'+m.group(1) return tp.group(1) + "/" + m.group(1) else: # print 'return',link+'/'+m.group(1) return link + "/" + m.group(1) elif m.group(3) != None: # ./path if tp != None: return tp.group(1) + "/" + m.group(3) else: return link + "/" + m.group(3) elif m.group(4) != None: # /path return domain + "/" + m.group(4) else: # ../path k = m.group(5) k1 = tp.group(1) l = r"^../(.*)" pl = re.compile(l, re.IGNORECASE) while pl.search(k) != None: m1 = pl.search(k) tmp = p10.search(k1) if tmp != None: k1 = tmp.group(1) k = m1.group(1) else: tmp = p10.search(k1) if tmp != None: k1 = tmp.group(1) return k1 + "/" + k else: return k1 + "/" + k else: if urls.netloc[-(domain.__len__()) :] == domain: # shown domain and sub domain urls if urls.query != "": return urls.netloc + urls.path + urls.params + "?" + urls.query else: return urls.netloc + urls.path + urls.params else: # print 'inside :81' m2 = p8.search(urls.netloc) if m2 != None: if m2.group(1)[-(domain.__len__()) :] == domain: if urls.query != "": return urls.netloc + urls.path + urls.params + "?" + urls.query else: return urls.netloc + urls.path + urls.params
def attacking(self, target, rounds): print "[Info] [AI] ICMP Broadcast (SMURF) is redirecting: [" , rounds, "base stations ]" if target.startswith('http://'): target = target.replace('http://','') sport = 80 elif target.startswith('https://'): target = target.replace('https://','') sport = 443 try: ip = socket.gethostbyname(target) except: try: import dns.resolver r = dns.resolver.Resolver() r.nameservers = ['8.8.8.8', '8.8.4.4'] # google DNS resolvers url = urlparse(target) a = r.query(url.netloc, "A") # A record for rd in a: ip = str(rd) except: ip = target if ip == "127.0.0.1" or ip == "localhost": print "[Info] [AI] [SMURF] Sending message '1/0 %====D 2 Ur ;-0' to 'localhost' -> [OK!]\n" return smurfize(ip, sport, rounds) # attack with SMURF using threading
def render(self, context): o = urlparse(self.media_url) if o.hostname is None: domain = 'http://%s' % (Site.objects.get_current().domain, ) return "%s" % urljoin(domain, self.media_url) else: return self.media_url
def application_identification(server_banner, url): found_application_extension = False if settings.VERBOSITY_LEVEL >= 1: info_msg = "Identifying the target application ... " sys.stdout.write(settings.print_info_msg(info_msg)) sys.stdout.flush() root, application_extension = splitext(urlparse(url).path) settings.TARGET_APPLICATION = application_extension[1:].upper() if settings.TARGET_APPLICATION: found_application_extension = True if settings.VERBOSITY_LEVEL >= 1: print "[ " + Fore.GREEN + "SUCCEED" + Style.RESET_ALL + " ]" success_msg = "The target application was identified as " success_msg += settings.TARGET_APPLICATION + Style.RESET_ALL + "." print settings.print_success_msg(success_msg) # Check for unsupported target applications for i in range(0,len(settings.UNSUPPORTED_TARGET_APPLICATION)): if settings.TARGET_APPLICATION.lower() in settings.UNSUPPORTED_TARGET_APPLICATION[i].lower(): err_msg = settings.TARGET_APPLICATION + " exploitation is not yet supported." print settings.print_critical_msg(err_msg) raise SystemExit() if not found_application_extension: if settings.VERBOSITY_LEVEL >= 1: print "[ " + Fore.RED + "FAILED" + Style.RESET_ALL + " ]" warn_msg = "Heuristics have failed to identify target application." print settings.print_warning_msg(warn_msg)
def ZeusApi_copy_vela(Change_Param_Url): old_sign_urlparse = urlparse( Change_Param_Url ) old_sign_newParams = {} old_sign_urlparams = parse_qs(old_sign_urlparse.query) print old_sign_urlparams for old_sign_param in old_sign_urlparams: old_sign_paramValue = old_sign_urlparams[old_sign_param] old_sign_newParams[old_sign_param] = old_sign_paramValue[0] print old_sign_newParams old_sign_urlkeys = old_sign_newParams['keys'].split(',') print old_sign_urlkeys old_sign_urlkeys.sort() tmp = '' for i in old_sign_urlkeys: #print i #print old_sign_newParams[i] if old_sign_newParams.has_key( i ): tmp += '%s%s' % (i, old_sign_newParams[i]) else: tmp += '%s%s' % (i, "") print "tmp:" + tmp m = hashlib.md5(tmp) #新的加密后的sign old_sign_newParams['sign'] = m.hexdigest().upper() print old_sign_newParams['sign'] old_newsign=old_sign_newParams['sign'] return old_newsign
def __init__(self, url): global domain global hreflist global headers global count global picdic global path rp = robotparser.RobotFileParser() self.rp = rp headers["User-Agent"] = "vrsec crawling robot" headers["If-Modified-Since"] = "Sat, 29 Oct 1994 19:43:31 GMT" headers["Accept"] = "text/html" urlstr = urlparse(url) print " urlparsed: ", urlstr domain = p9.search(urlstr[1]).group(1) self.osexception((path + domain)) self.path = path + domain + "/" print " domain name: ", domain self.urlopenexception(domain, domain) # urlretrieve returns a tuple of two objects(filename,mime_hdrs) print " picdic: ", picdic self.urlpickle() self.domain = domain
def playURLRVideo(url, name, type_): dialog_progress_title='URL Resolver' dialog_progress_YTDL = xbmcgui.DialogProgressBG() dialog_progress_YTDL.create(dialog_progress_title ) dialog_progress_YTDL.update(10,dialog_progress_title,translation(32014) ) from urlparse import urlparse parsed_uri = urlparse( url ) domain = '{uri.netloc}'.format(uri=parsed_uri) try: import urlresolver #hmf = urlresolver.HostedMediaFile(url) dialog_progress_YTDL.update(20,dialog_progress_title,translation(32012) ) media_url = urlresolver.resolve(url) dialog_progress_YTDL.update(80,dialog_progress_title,translation(32013) ) if media_url: log( ' URLResolver stream url=' + repr(media_url )) pl = xbmc.PlayList(xbmc.PLAYLIST_VIDEO) pl.clear() pl.add(media_url, xbmcgui.ListItem(name)) xbmc.Player().play(pl, windowed=False) #scripts play video like this. else: log( " Can't URL Resolve:" + repr(url)) xbmc_notify('URLresolver', translation(32192),icon="type_urlr.png" ) #Failed to get playable url except Exception as e: xbmc_notify('URLresolver:'+domain, str(e),icon="type_urlr.png" ) dialog_progress_YTDL.close()
def analysisdomain(urls): topHostPostfix = ( '.com','.cn','.la','.io','.co','.info','.net','.org','.me','.mobi', '.us','.biz','.xxx','.ca','.co.jp','.com.cn','.net.cn','edu.cn','gov.cn' '.org.cn','.mx','.tv','.ws','.ag','.com.ag','.net.ag', '.org.ag','.am','.asia','.at','.be','.com.br','.net.br', '.bz','.com.bz','.net.bz','.cc','.com.co','.net.co', '.nom.co','.de','.es','.com.es','.nom.es','.org.es', '.eu','.fm','.fr','.gs','.in','.co.in','.firm.in','.gen.in', '.ind.in','.net.in','.org.in','.it','.jobs','.jp','.ms', '.com.mx','.nl','.nu','.co.nz','.net.nz','.org.nz', '.se','.tc','.tk','.tw','.com.tw','.idv.tw','.org.tw', '.hk','.co.uk','.me.uk','.org.uk','.vg', ".com.hk") regx = r'[^\.]+('+'|'.join([h.replace('.',r'\.') for h in topHostPostfix])+')$' pattern = re.compile(regx,re.IGNORECASE) hosts=[] for url in urls: #print url parts = urlparse(url) host = parts.netloc m = pattern.search(host) res = m.group() if m else host if res: hosts.append(res) return hosts
def getUrl(self): """\ """ url_obj = urlparse(self.url) scheme = self.getScheme(url_obj) hostname = self.getHostname(url_obj)
def setup(bot): global url_finder, exclusion_char, api_url, api_key, api_user, api_private if bot.config.bookie.api_url: try: # say we have "https://example.com/prefix/api/v1/admin/account?api_key=XXXXXX" p = urlparse(bot.config.bookie.api_url) # "https://example.com" api_url = p.scheme + "://" + p.netloc # "/prefix" prefix = p.path.split(api_suffix)[0] if prefix: api_url += prefix # "/api/v1/" api_url += api_suffix # the path element after api_suffix # that is, "admin" api_user = p.path.split(api_suffix)[1].split("/")[0] # "XXXXXX" api_key = p.query.split("=")[1] except Exception as e: raise ConfigurationError("Bookie api_url badly formatted: %s" % str(e)) else: raise ConfigurationError("Bookie module not configured") api_private = validate_private(bot.config.bookie.private) if bot.config.has_option("url", "exclusion_char"): exclusion_char = bot.config.url.exclusion_char url_finder = re.compile(r"(?u)(.*?)\s*(%s?(?:http|https|ftp)(?:://\S+)\s*(.*?))" % (exclusion_char)) if bot.config.bookie.auto: if not bot.memory.contains("url_callbacks"): bot.memory["url_callbacks"] = tools.SopelMemory() bot.memory["url_callbacks"][re.compile(".*")] = bmark
def attacking(self, target, rounds): print "[Info] [AI] TCP Starvation (NUKE) is ready to fire: [" , rounds, "nukes ]" if target.startswith('http://'): target = target.replace('http://','') port = 80 elif target.startswith('https://'): target = target.replace('https://','') port = 443 try: ip = socket.gethostbyname(target) except: try: import dns.resolver r = dns.resolver.Resolver() r.nameservers = ['8.8.8.8', '8.8.4.4'] # google DNS resolvers url = urlparse(target) a = r.query(url.netloc, "A") # A record for rd in a: ip = str(rd) except: ip = target if ip == "127.0.0.1" or ip == "localhost": print "[Info] [AI] [NUKE] Sending message '1/0 %====D 2 Ur ;-0' to 'localhost' -> [OK!]\n" return nukeize(ip, port, rounds) # attack with NUKE using threading
def sanitize_redirect(host, redirect_to): """ Given the hostname and an untrusted URL to redirect to, this method tests it to make sure it isn't garbage/harmful and returns it, else returns None, similar as how's it done on django.contrib.auth.views. >>> print sanitize_redirect('myapp.com', None) None >>> print sanitize_redirect('myapp.com', '') None >>> print sanitize_redirect('myapp.com', {}) None >>> print sanitize_redirect('myapp.com', 'http://notmyapp.com/path/') None >>> print sanitize_redirect('myapp.com', 'http://myapp.com/path/') http://myapp.com/path/ >>> print sanitize_redirect('myapp.com', '/path/') /path/ """ # Quick sanity check. if not redirect_to: return None # Heavier security check, don't allow redirection to a different host. try: netloc = urlparse(redirect_to)[1] except TypeError: # not valid redirect_to value return None if netloc and netloc != host: return None return redirect_to
def handle_relay(self): """ provided an URL localhost:7777 or app.marathon.mesos:7777 relay will ping that url http://localhost:7777/ping and respond back. It is used for network testing in a cluster. """ query = urlparse(self.path).query query_components = dict(qc.split("=") for qc in query.split("&")) logging.info(query_components) full_url = 'http://{}/ping'.format(query_components['url']) url_req = Request(full_url, headers={"User-Agent": "Mozilla/5.0"}) response = urlopen(url_req) res = response.read() status = response_status(response) logging.debug("Relay request is %s, %s", res, status) self.send_response(status) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(res) marathonId = os.getenv("MARATHON_APP_ID", "NO_MARATHON_APP_ID_SET") msg = "\nRelay from {}".format(marathonId) self.wfile.write(byte_type(msg, "UTF-8")) return
def sendRequests(csrf,session,s): while link_queue.__len__() > 0: try: link = link_queue.pop(0) except IndexError: break if link not in visited: #print sent_links.__len__() parsed = urlparse(link) linkrequest = HTTPrequest() linkrequest.type = "GET" linkrequest.version = "1.1" linkrequest.host = parsed.netloc linkrequest.path = parsed.path linkrequest.cookies['csrf'] = csrf linkrequest.cookies['sessionid'] = session linkrequest.connection = "Keep-Alive" #linkrequest.encoding = "gzip" try: sendRequestOnly(linkrequest,s) sent_links.append(link) print(link + " "+ str(secret_flags.__len__())) except socket.error as err: link_queue.insert(0,link) #s.close() break
def __validateUri(self, url): splitUri = urlparse(url) return splitUri.netloc + splitUri.path
try: with open('HostsList', 'r') as fp: HostsList = [ ''.join(i.split()) for i in fp.readlines() if len(i) > 4 ] if not HostsList: sys.exit(putColor('[X]', 'red') + 'HostsList is empty') except Exception, e: if str(e): sys.exit(putColor('[X]', 'red') + 'Where is your HostsList?') else: HostsList = [HostsList] Sites = [] for Host in HostsList: Host = urlparse(Host) Host = Host.netloc if Host.netloc else Host.path print '[!]Searching Sites for:', Host, '\n' if Host == '': print putColor('[X]', 'red') + 'Error Host or IP' continue # ---------------- API ---------------------- Sites.extend(dns_aizhan_com.search(Host, s)) Sites.extend(webscan_cc.search(Host, s)) Sites.extend(site_ip138_com.search(Host, s)) Sites.extend(bing_com.search(Host, s)) # ---------------- end ----------------------
def listLinksInComment(url, name, type_): from domains import parse_reddit_link, build_DirectoryItem_url_based_on_media_type from utils import markdown_to_bbcode, unescape from guis import progressBG log('listLinksInComment:%s:%s' %(type_,url) ) directory_items=[] author="" ShowOnlyCommentsWithlink=False if type_=='linksOnly': ShowOnlyCommentsWithlink=True url=urllib.quote_plus(url,safe=':/?&') if '?' in url: url=url.split('?', 1)[0]+'.json?'+url.split('?', 1)[1] else: url+= '.json' loading_indicator=progressBG(translation(30024)) loading_indicator.update(0,'Retrieving comments') content = reddit_request(url) if not content: loading_indicator.end() return loading_indicator.update(10,'Parsing') content = json.loads(content) del harvest[:] r_linkHunter(content[0]['data']['children']) try:submitter=content[0]['data']['children'][0]['data']['author'] except: submitter='' try:post_title=content[0]['data']['children'][0]['data']['title'] except:post_title='' r_linkHunter(content[1]['data']['children']) comment_score=0 loading_indicator.set_tick_total(len(harvest)) for i, h in enumerate(harvest): try: comment_score=h[0] link_url=h[2] desc100=h[3].replace('\n',' ')[0:100] #first 100 characters of description kind=h[6] #reddit uses t1 for user comments and t3 for OP text of the post. like a poster describing the post. d=h[5] #depth of the comment tab=" "*d if d>0 else "-" from urlparse import urlparse domain = '{uri.netloc}'.format( uri=urlparse( link_url ) ) author=h[7] DirectoryItem_url='' if comment_score < int_CommentTreshold: continue ld=parse_reddit_link(link_url=link_url, assume_is_video=False, needs_preview=True, get_playable_url=True ) if kind=='t1': list_title=r"[COLOR cadetblue]%3d[/COLOR] %s" %( h[0], tab ) elif kind=='t3': list_title=r"[COLOR cadetblue]Title [/COLOR] %s" %( tab ) plot=h[3].replace('](', '] (') plot= markdown_to_bbcode(plot) plot=unescape(plot) #convert html entities e.g.:(') liz=xbmcgui.ListItem(label=list_title +': '+ desc100) liz.setInfo( type="Video", infoLabels={ "Title": h[1], "plot": plot, "studio": domain, "votes": str(comment_score), "director": author } ) isFolder=False if link_url: DirectoryItem_url, setProperty_IsPlayable, isFolder, title_prefix = build_DirectoryItem_url_based_on_media_type(ld, link_url) liz.setProperty('IsPlayable', setProperty_IsPlayable) liz.setProperty('url', DirectoryItem_url) #<-- needed by the xml gui skin liz.setPath(DirectoryItem_url) if domain: plot= " [COLOR greenyellow][%s] %s"%(domain, plot ) + "[/COLOR]" else: plot= " [COLOR greenyellow][%s]"%( plot ) + "[/COLOR]" liz.setLabel(list_title+plot) if ld: liz.setArt({"thumb": ld.poster, "poster":ld.poster, "banner":ld.poster, "fanart":ld.poster, "landscape":ld.poster }) if DirectoryItem_url: directory_items.append( (DirectoryItem_url, liz, isFolder,) ) else: if not ShowOnlyCommentsWithlink: result=h[3].replace('](', '] (') result=markdown_to_bbcode(result) liz=xbmcgui.ListItem(label=list_title + desc100) liz.setInfo( type="Video", infoLabels={ "Title": h[1], "plot": result, "studio": domain, "votes": str(h[0]), "director": author } ) liz.setProperty('IsPlayable', 'false') directory_items.append( ("", liz, False,) ) except Exception as e: log(' EXCEPTION:' + str(e) ) loading_indicator.tick(1, desc100) loading_indicator.end() xbmcplugin.setContent(pluginhandle, "movies") #files, songs, artists, albums, movies, tvshows, episodes, musicvideos xbmcplugin.setPluginCategory(pluginhandle,'Comments') xbmcplugin.addDirectoryItems(handle=pluginhandle, items=directory_items ) xbmcplugin.endOfDirectory(pluginhandle) if comments_viewMode: xbmc.executebuiltin('Container.SetViewMode(%s)' %comments_viewMode)
def validate_inputs(config, args, unknown_args): error_arr = [] try: config.read(pil.config_path) if args.download: pil.dl_user = args.download if args.downloadfollowing or args.batchfile: logger.banner() logger.warn( "Please use only one download method. Use -h for more information." ) logger.separator() return False elif not args.clean and not args.info and not args.assemble and not args.downloadfollowing and not args.batchfile: logger.banner() logger.error( "Please use a download method. Use -h for more information.") logger.separator() return False if helpers.bool_str_parse(config.get('pyinstalive', 'log_to_file')) == "Invalid": pil.log_to_file = True error_arr.append(['log_to_file', 'True']) elif helpers.bool_str_parse(config.get('pyinstalive', 'log_to_file')): pil.log_to_file = True else: pil.log_to_file = False logger.banner() if args.batchfile: if os.path.isfile(args.batchfile): pil.dl_batchusers = [ user.rstrip('\n') for user in open(args.batchfile) ] if not pil.dl_batchusers: logger.error("The specified file is empty.") logger.separator() return False else: logger.info( "Downloading {:d} users from batch file.".format( len(pil.dl_batchusers))) logger.separator() else: logger.error('The specified file does not exist.') logger.separator() return False if unknown_args: pil.uargs = unknown_args logger.warn( "The following unknown argument(s) were provided and will be ignored: " ) logger.warn(' ' + ' '.join(unknown_args)) logger.separator() pil.ig_user = config.get('pyinstalive', 'username') pil.ig_pass = config.get('pyinstalive', 'password') pil.dl_path = config.get('pyinstalive', 'download_path') pil.run_at_start = config.get('pyinstalive', 'run_at_start') pil.run_at_finish = config.get('pyinstalive', 'run_at_finish') pil.ffmpeg_path = config.get('pyinstalive', 'ffmpeg_path') pil.args = args pil.config = config pil.proxy = config.get('pyinstalive', 'proxy') if args.configpath: pil.config_path = args.configpath if not os.path.isfile(pil.config_path): pil.config_path = os.path.join(os.getcwd(), "pyinstalive.ini") logger.warn( "Custom config path is invalid, falling back to default path: {:s}" .format(pil.config_path)) logger.separator() if args.dlpath: pil.dl_path = args.dlpath if helpers.bool_str_parse( config.get('pyinstalive', 'show_cookie_expiry')) == "Invalid": pil.show_cookie_expiry = False error_arr.append(['show_cookie_expiry', 'False']) elif helpers.bool_str_parse( config.get('pyinstalive', 'show_cookie_expiry')): pil.show_cookie_expiry = True else: pil.show_cookie_expiry = False if helpers.bool_str_parse(config.get('pyinstalive', 'use_locks')) == "Invalid": pil.use_locks = False error_arr.append(['use_locks', 'False']) elif helpers.bool_str_parse(config.get('pyinstalive', 'use_locks')): pil.use_locks = True else: pil.use_locks = False if helpers.bool_str_parse(config.get('pyinstalive', 'clear_temp_files')) == "Invalid": pil.clear_temp_files = False error_arr.append(['clear_temp_files', 'False']) elif helpers.bool_str_parse( config.get('pyinstalive', 'clear_temp_files')): pil.clear_temp_files = True else: pil.clear_temp_files = False if helpers.bool_str_parse(config.get('pyinstalive', 'do_heartbeat')) == "Invalid": pil.do_heartbeat = True error_arr.append(['do_heartbeat', 'True']) if helpers.bool_str_parse(config.get('pyinstalive', 'do_heartbeat')): pil.do_heartbeat = True if args.noheartbeat or not helpers.bool_str_parse( config.get('pyinstalive', 'do_heartbeat')): pil.do_heartbeat = False logger.warn( "Getting livestream heartbeat is disabled, this may cause degraded performance." ) logger.separator() if not args.nolives and helpers.bool_str_parse( config.get('pyinstalive', 'download_lives')) == "Invalid": pil.dl_lives = True error_arr.append(['download_lives', 'True']) elif helpers.bool_str_parse(config.get('pyinstalive', 'download_lives')): pil.dl_lives = True else: pil.dl_lives = False if not args.noreplays and helpers.bool_str_parse( config.get('pyinstalive', 'download_replays')) == "Invalid": pil.dl_replays = True error_arr.append(['download_replays', 'True']) elif helpers.bool_str_parse( config.get('pyinstalive', 'download_replays')): pil.dl_replays = True else: pil.dl_replays = False if helpers.bool_str_parse( config.get('pyinstalive', 'download_comments')) == "Invalid": pil.dl_comments = True error_arr.append(['download_comments', 'True']) elif helpers.bool_str_parse( config.get('pyinstalive', 'download_comments')): pil.dl_comments = True else: pil.dl_comments = False if args.nolives: pil.dl_lives = False if args.noreplays: pil.dl_replays = False if not pil.dl_lives and not pil.dl_replays: logger.error( "You have disabled both livestream and replay downloading.") logger.error("Please enable at least one of them and try again.") logger.separator() return False if pil.ffmpeg_path: if not os.path.isfile(pil.ffmpeg_path): pil.ffmpeg_path = None cmd = "where" if platform.system() == "Windows" else "which" logger.warn( "Custom ffmpeg binary path is invalid, falling back to default path: {:s}" .format( subprocess.check_output([cmd, 'ffmpeg' ]).decode('UTF-8').rstrip())) else: logger.binfo("Overriding ffmpeg binary path: {:s}".format( pil.ffmpeg_path)) if not pil.ig_user or not len(pil.ig_user): raise Exception( "Invalid value for 'username'. This value is required.") if not pil.ig_pass or not len(pil.ig_pass): raise Exception( "Invalid value for 'password'. This value is required.") if not pil.dl_path.endswith('/'): pil.dl_path = pil.dl_path + '/' if not pil.dl_path or not os.path.exists(pil.dl_path): pil.dl_path = os.getcwd() if not args.dlpath: error_arr.append(['download_path', os.getcwd()]) else: logger.warn( "Custom config path is invalid, falling back to default path: {:s}" .format(pil.dl_path)) logger.separator() if pil.proxy and pil.proxy != '': parsed_url = urlparse(pil.proxy) if not parsed_url.netloc or not parsed_url.scheme: error_arr.append(['proxy', 'None']) pil.proxy = None if error_arr: for error in error_arr: logger.warn( "Invalid value for '{:s}'. Using default value: {:s}". format(error[0], error[1])) logger.separator() if args.info: helpers.show_info() return False elif args.clean: helpers.clean_download_dir() return False elif args.assemble: pil.assemble_arg = args.assemble assembler.assemble() return False return True except Exception as e: logger.error("An error occurred: {:s}".format(str(e))) logger.error( "Make sure the config file and given arguments are valid and try again." ) logger.separator() return False
def get_domain(url): return urlparse(url)[1]
def find_domain(url): from urlparse import urlparse #return url.split("//")[-1].split("/")[0] domain = urlparse(url).hostname.split('.')[1] return domain
# -*- coding: utf-8 -*- import pymssql import sys import os from urlparse import * reload(sys) sys.setdefaultencoding('utf8') os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' conn = pymssql.connect('10.101.7.128', 'sa', 'Password01!', 'ZhongYan_SustainableDevelpoment', charset='utf8') cursor = conn.cursor() # sql = "exec PROC_INSERT_PARSE_RESULT @parse_id='%s',@index_id='%s',@index_base_id='%s',@data=N'%s',@source_id='%s',@relation=%s" % ('9FFE6E7D56A543ABA612B63203AEB9F1', 'FD306F8D6C04409F8EA9D780C30FD5B8', '00A14D4BAE92CA443FAAC16A8B171BC', '阿斯顿自行车阿斯顿现在','02647911-2DCF-4086-94AB-658934B84663', '2.13221') # # # sql = "insert into TB_CRAWL_RESULT(ID,PARSE_HISTORY_ID,SADP_INDEX_ID,SADP_INDEX_BASE_DATA_ID,DATA) values ('%s','%s','%s','%s',N'%s')" % (str(uuid.uuid4()),str(uuid.uuid4()),str(uuid.uuid4()),str(uuid.uuid4()),'啊实打实大苏打') # cursor.execute(sql) # # conn.commit() cursor.execute("SELECT URL,ID FROM [dbo].[View_COMPANY_SOURCE]") row = cursor.fetchone() while row: ym = urlparse(row[0]).netloc print ym row = cursor.fetchone()
def pre_request_hook(req): if 'Host' not in req.headers: hostname = urlparse(req.full_url)[1] req.headers['Host'] = hostname
def get_suffix1(p): if len(p) == 1: #return "pure domain" return "nil" fields = p.split("/") if len(fields) == 0 or len(fields) == 1: return "null" fields1 = fields[len(fields) - 1].split(".") if len(fields1) == 0 or len(fields1) == 1: return "null" else: return fields1[len(fields1) - 1] for line in fd.readlines(): fields = line.strip().split("'") domain = fields[13] if len(domain) < 3: continue hit_miss = fields[25] if len(hit_miss) < 2: continue if len(fields[15]) < 5: continue result = urlparse(fields[15]) sfx = get_suffix1(result.path) print sfx
def __call__(self, env, start_response): def custom_start_response(status, headers): if self.delay_auth_decision: headers.append( ('WWW-Authenticate', "Basic realm='Use guest/guest'")) return start_response(status, headers) #Prep headers to proxy request to remote service proxy_headers = env.copy() user = '' #Look for authentication if 'HTTP_AUTHORIZATION' not in env: #No credentials were provided if self.delay_auth_decision: _decorate_request_headers("X_IDENTITY_STATUS", "Invalid", proxy_headers, env) else: # If the user isn't authenticated, we reject the request and # return 401 indicating we need Basic Auth credentials. ret = HTTPUnauthorized( "Authentication required", [('WWW-Authenticate', 'Basic realm="Use guest/guest"')]) return ret(env, start_response) else: # Claims were provided - validate them import base64 auth_header = env['HTTP_AUTHORIZATION'] _auth_type, encoded_creds = auth_header.split(None, 1) user, password = base64.b64decode(encoded_creds).split(':', 1) if not self.validateCreds(user, password): #Claims were rejected if not self.delay_auth_decision: # Reject request (or ask for valid claims) ret = HTTPUnauthorized("Authentication required", [ ('WWW-Authenticate', 'Basic realm="Use guest/guest"') ]) return ret(env, start_response) else: # Claims are valid, forward request _decorate_request_headers("X_IDENTITY_STATUS", "Invalid", proxy_headers, env) # TODO(Ziad): add additional details we may need, # like tenant and group info _decorate_request_headers('X_AUTHORIZATION', "Proxy %s" % user, proxy_headers, env) _decorate_request_headers("X_IDENTITY_STATUS", "Confirmed", proxy_headers, env) _decorate_request_headers('X_TENANT', 'blank', proxy_headers, env) #Auth processed, headers added now decide how to pass on the call if self.app: # Pass to downstream WSGI component env['HTTP_AUTHORIZATION'] = "Basic %s" % self.service_pass return self.app(env, custom_start_response) proxy_headers['AUTHORIZATION'] = "Basic %s" % self.service_pass # We are forwarding to a remote service (no downstream WSGI app) req = Request(proxy_headers) parsed = urlparse(req.url) conn = http_connect(self.service_host, self.service_port, \ req.method, parsed.path, \ proxy_headers, \ ssl=(self.service_protocol == 'https')) resp = conn.getresponse() data = resp.read() #TODO(ziad): use a more sophisticated proxy # we are rewriting the headers now return Response(status=resp.status, body=data)(env, start_response)
def get_domain(url): return urlparse(url, scheme='')[1]
def main(): module = AnsibleModule(argument_spec=dict( group_id=dict(default=None), artifact_id=dict(default=None), version=dict(default="latest"), classifier=dict(default=None), extension=dict(default='jar'), repository_url=dict(default=None), username=dict(default=None, aliases=['aws_secret_key']), password=dict( default=None, no_log=True, aliases=['aws_secret_access_key']), state=dict(default="present", choices=["present", "absent"] ), # TODO - Implement a "latest" state timeout=dict(default=10, type='int'), dest=dict(type="path", default=None), validate_certs=dict(required=False, default=True, type='bool'), )) repository_url = module.params["repository_url"] if not repository_url: repository_url = "http://repo1.maven.org/maven2" try: parsed_url = urlparse(repository_url) except AttributeError as e: module.fail_json(msg='url parsing went wrong %s' % e) if parsed_url.scheme == 's3' and not HAS_BOTO: module.fail_json( msg= 'boto3 required for this module, when using s3:// repository URLs') group_id = module.params["group_id"] artifact_id = module.params["artifact_id"] version = module.params["version"] classifier = module.params["classifier"] extension = module.params["extension"] repository_username = module.params["username"] repository_password = module.params["password"] state = module.params["state"] dest = module.params["dest"] #downloader = MavenDownloader(module, repository_url, repository_username, repository_password) downloader = MavenDownloader(module, repository_url) try: artifact = Artifact(group_id, artifact_id, version, classifier, extension) except ValueError as e: module.fail_json(msg=e.args[0]) prev_state = "absent" if os.path.isdir(dest): dest = posixpath.join(dest, artifact_id + "-" + version + "." + extension) if os.path.lexists(dest) and downloader.verify_md5( dest, downloader.find_uri_for_artifact(artifact) + '.md5'): prev_state = "present" else: path = os.path.dirname(dest) if not os.path.exists(path): os.makedirs(path) if prev_state == "present": module.exit_json(dest=dest, state=state, changed=False) try: if downloader.download(artifact, dest): module.exit_json(state=state, dest=dest, group_id=group_id, artifact_id=artifact_id, version=version, classifier=classifier, extension=extension, repository_url=repository_url, changed=True) else: module.fail_json(msg="Unable to download the artifact") except ValueError as e: module.fail_json(msg=e.args[0])
#STEP 4: scrape those pages in google with beautiful soup web scraper pages = [] pages.append([]) pages.append([]) ''' EXAMPLE: # Add elements to empty lists. elements[0].append(1) elements[0].append(2) elements[1].append(3) elements[1].append(4) ''' for result in search_results: //GoogleResult object pages[0].append(str(BeautifulSoup(result.link, ‘html.parser’)) to layer 1 of pages (adds the string-ified html)) pages[1].append(add str(urlparse(result.link).netloc) to correspond with ^^^ (adds the homepage url)) ''' So in my mind it kinda looks like this: |-------------|-------------| |HTML OF PAGE1|HTML OF PAGE2| <-----pages[0] |-------------|-------------| ETC. |PAGE1.com |PaGe2.net | <-----pages[1] |-------------|-------------| ''' goodchars= {"1", "2", "3", "4",
f=open("dmozURLS","r") f2=open("dmozURLsranked","w") for line in f: url=line.split(",")[0] indom=0 outdom=0 suburls=[] try: html = requests.get(url,headers={'User-Agent': 'Mozilla/5.0'}).text except: continue soup=BeautifulSoup(html) try: parsed_uri = urlparse(url) domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) except: domain='++++++' for link in soup.find_all('a', href=True): try: n=urlparse.urlparse(link['href']).netloc except: continue if domain in link['href'] or not bool(n): indom+=1 else: outdom+=1 suburls.append(link['href']) #except: # continue
def host_injection(url, vuln_parameter, payload): payload = urlparse(url).netloc + payload def inject_host(url, vuln_parameter, payload, proxy): if proxy == None: opener = urllib2.build_opener() else: opener = urllib2.build_opener(proxy) # Check if defined POST data if menu.options.data: menu.options.data = settings.USER_DEFINED_POST_DATA request = urllib2.Request(url, menu.options.data) else: url = parameters.get_url_part(url) request = urllib2.Request(url) #Check if defined extra headers. headers.do_check(request) payload = checks.newline_fixation(payload) request.add_header('Host', payload) try: headers.check_http_traffic(request) response = opener.open(request) return response except ValueError: pass if settings.TIME_RELATIVE_ATTACK : start = 0 end = 0 start = time.time() proxy = None #response = inject_host(url, vuln_parameter, payload, proxy) # Check if defined any HTTP Proxy. if menu.options.proxy: try: proxy = urllib2.ProxyHandler({settings.SCHEME : menu.options.proxy}) response = inject_host(url, vuln_parameter, payload, proxy) except urllib2.HTTPError, err_msg: if str(err_msg.code) == settings.INTERNAL_SERVER_ERROR: response = False elif settings.IGNORE_ERR_MSG == False: err = str(err_msg) + "." if not settings.VERBOSITY_LEVEL >= 1 and settings.TIME_BASED_STATE == False or \ settings.VERBOSITY_LEVEL >= 1 and settings.EVAL_BASED_STATE == None: print("") if settings.VERBOSITY_LEVEL >= 1 and settings.LOAD_SESSION == False: print("") print(settings.print_critical_msg(err)) continue_tests = checks.continue_tests(err_msg) if continue_tests == True: settings.IGNORE_ERR_MSG = True else: raise SystemExit() response = False except urllib2.URLError, err_msg: err_msg = str(err_msg.reason).split(" ")[2:] err_msg = ' '.join(err_msg)+ "." if settings.VERBOSITY_LEVEL >= 1 and settings.LOAD_SESSION == False: print("") print(settings.print_critical_msg(err_msg)) raise SystemExit()
def register_node(self, address): url = urlparse(address) if url.netloc: self.nodes.add(url)
def GetModelList(self, dName): """ Get the list of files from dName directory. """ ### import are here because the simulator (PyDEVS or PyPDEVS) require it from DomainInterface.DomainBehavior import DomainBehavior ### list of py file from __init__.py if LibraryTree.EXT_LIB_PYTHON_FLAG: ### list of py file from url if dName.startswith('http'): o = urlparse(dName) c = httplib.HTTPConnection(o.netloc) c.request('GET', o.path + '/__init__.py') r = c.getresponse() code = r.read() if r.status == 200: exec code tmp = filter( lambda s: s.replace('\n', '').replace('\t', ''). replace(',', '').replace('"', "").replace( '\'', "").strip(), __all__) ### test if the content of __init__.py file is python file (isfile equivalent) py_file_list = [ s for s in tmp if 'python' in urlopen(dName + '/' + s + '.py').info().type ] else: py_file_list = [] return py_file_list else: try: name_list = getFileListFromInit( os.path.join(dName, '__init__.py')) py_file_list = [] for s in name_list: python_file = os.path.join(dName, s + '.py') ### test if tmp is only composed by python file (case of the user write into the __init__.py file directory name is possible ! then we delete the directory names) if os.path.isfile(python_file): cls = GetClass(python_file) if cls is not None and not isinstance(cls, tuple): ### only model that herite from DomainBehavior is shown in lib if issubclass(cls, DomainBehavior): py_file_list.append(s) else: sys.stderr.write( _("%s not imported : Class is not DomainBehavior \n" % (s))) ### If cls is tuple, there is an error but we load the model to correct it. ### If its not DEVS model, the Dnd don't allows the instantiation and when the error is corrected, it don't appear before a update. else: py_file_list.append(s) except Exception, info: py_file_list = [] # if dName contains a python file, __init__.py is forced for f in os.listdir(dName): if f.endswith('.py'): sys.stderr.write( _("%s not imported : %s \n" % (dName, info))) break