Example #1
0
    def process_response(response):


        if 300 <= response.status < 400:
            visited.append(sent_links.pop(0))
            link_queue.insert(0,response.location)
        elif 500 <= response.status < 600:
            link = sent_links.pop(0)
            link_queue.insert(0,link)
        elif response.status == 200 and response.status_message == 'OK':
            visited.append(sent_links.pop(0))
            soup = BeautifulSoup(response.content,"html.parser")

            a_tags = soup.find_all('a')
            h2_tags = soup.find_all('h2',{'class':'secret_flag'})

            for h2_tag in h2_tags:
                if h2_tag.contents[0].split(" ")[1] not in secret_flags:
                    secret_flags.append(h2_tag.contents[0].split(" ")[1])

            for a_tag in a_tags:
                if urljoin(default_netloc,a_tag['href']) not in visited:
                    parsed = urlparse(a_tag['href'])
                    if (parsed.netloc == '' or parsed.netloc == urlparse(default_netloc).netloc) and \
                        (parsed.scheme == 'http' or parsed.scheme == ''):
                        if parsed.netloc == '':
                            link_queue.append(urljoin(default_netloc,a_tag['href']))
                        else:
                            link_queue.append(a_tag['href'])
def Relative_URL_Checker(url,originator_url):
    if url[0] == u'/':
        parsed_url = urlparse(url)
        origParsed = urlparse(originator_url)
        hostname = u"http://" + origParsed.netloc
        new_full_url = urlparse.urljoin(hostname, parsed_url.path)
        return new_full_url
    else: 
    	return url
Example #3
0
def hostname(url):
  from urlparse import urlparse
  if url:
    if '//' in url:
      return urlparse(url).hostname
    else:
      url = 'http://' + url
      return urlparse(url).hostname
  else:
    return None
Example #4
0
def path(url):
  from urlparse import urlparse
  if url:
    if '//' in url:
      return urlparse(url).path
    else:
      url = 'http://' + url
      return urlparse(url).path
  else:
    return None
Example #5
0
 def urljoin(base, url, allow_fragments=True):
     """Join a base URL and a possibly relative URL to form an absolute
     interpretation of the latter."""
     if not base:
         return url
     if not url:
         return base
     bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
             urlparse(base, '', allow_fragments)
     scheme, netloc, path, params, query, fragment = \
             urlparse(url, bscheme, allow_fragments)
     if scheme != bscheme or scheme not in uses_relative:
         return url
     if scheme in uses_netloc:
         if netloc:
             return urlunparse((scheme, netloc, path,
                                params, query, fragment))
         netloc = bnetloc
     if path[:1] == '/':
         return urlunparse((scheme, netloc, path,
                            params, query, fragment))
     if not path and not params:
         path = bpath
         params = bparams
         if not query:
             query = bquery
         return urlunparse((scheme, netloc, path,
                            params, query, fragment))
     segments = bpath.split('/')[:-1] + path.split('/')
     # XXX The stuff below is bogus in various ways...
     if segments[-1] == '.':
         segments[-1] = ''
     while '.' in segments:
         segments.remove('.')
     while 1:
         i = 1
         n = len(segments) - 1
         while i < n:
             if (segments[i] == '..'
                 and segments[i-1] not in ('', '..')):
                 del segments[i-1:i+1]
                 break
             i = i+1
         else:
             break
     if segments == ['', '..']:
         segments[-1] = ''
     elif len(segments) >= 2 and segments[-1] == '..':
         segments[-2:] = ['']
     return urlunparse((scheme, netloc, '/'.join(segments),
                        params, query, fragment))
def ProcessResource(url):

    for key in VISITED.keys():
        if key == url:
            return VISITED[url]

    print("ProcessResource " + url)

    o = urlparse(url)
    path = o.path
    regex = re.compile(".*\/")
    fileNameRoot = regex.sub("", path)

    fileName = fileNameRoot
    targetFileName = ""

    # Ensure that fileName is unique
    index = 0
    while True:
        targetFileName = CONFIG["output_dir"] + "/" + fileName
        if os.path.exists(targetFileName) != True:
            break
        fileName = "%d_%s" % (++index, fileNameRoot)

    urllib.urlretrieve(url, targetFileName)

    print("Stored mapping " + url + " to " + fileName)
    VISITED[url] = fileName

    return fileName
Example #7
0
def get_facebook_info(url, size='large'):
    import urllib2
    from urlparse import urlparse
    """
    Tamanios de las imagenes que se pueden consultar
     * small
     * normal
     * large
    """
    profile_info = {}
    try:
        res_parse = urlparse(url)
        used_id = res_parse.path.replace('/', '')
        
        # Get info
        url_info = 'http://graph.facebook.com/%s' % (used_id)
        profile_info = eval(urllib2.urlopen(url_info).read())
        
        # Get profile photo
        url_photo = 'http://graph.facebook.com/%s/picture?type=%s' % (profile_info['username'], size)
        source = urllib2.urlopen(url_photo).read()
        if source:
            profile_info['photo'] = base64.b64encode(source)
    except:
        None 
    return profile_info
Example #8
0
  def _init_client(self):
    '''
    Parse the host URL and initialize a client connection.
    '''
    if not isinstance(self._host, (str,unicode)):
      return self._host


    # To force scheme and netloc behavior. Easily a bug here but let it 
    # go for now
    if '//' not in self._host:
      self._host = '//'+self._host
    location = urlparse(self._host)

    if location.scheme in ('','redis'):
      if ':' in location.netloc:
        host,port = location.netloc.split(':')
      else:
        host,port = location.netloc,6379

      # TODO: better matching here
      if location.path in ('', '/'):
        db = 0
      else:
        db = location.path[1:]

      return Redis(host=host, port=int(port), db=int(db))
      

    raise ValueError("unsupported scheme", location.scheme)      
Example #9
0
 def do_POST(self):
     url = self.path
     parsed = urlparse(url)
     query_dict = parse_qs(parsed.query)
     token = query_dict.get("csrfProxyCookie", ["NOTFOUND"])[0]
     query_list = parse_qsl(parsed.query)
     query_list = [(key, value) for key, value in query_list if key != "csrfProxyCookie"]
     tidied_query = urllib.urlencode(query_list)
     self.path = urlunparse((
         parsed.scheme,
         parsed.netloc,
         parsed.path,
         parsed.params,
         tidied_query,
         parsed.fragment))
     cookie = "csrfProxyCookie={}".format(token)
     if any(cookie in header for header in self.headers.getallmatchingheaders("cookie")):
         self.proxy_request("POST")
     else:
         print "CSRF attempt caught!"
         print "Cookie was {}".format(self.headers.getallmatchingheaders("cookie"))
         print "Token was {}".format(token)
         self.send_response(401, "Unauthorized")
         self.end_headers()
         self.wfile.write("401 - CSRF attempt detected")
         self.wfile.close()
Example #10
0
	def __init__(self, url, pretend):
		print 'Fetching from Klassikaraadio'
		self.hostname = urlparse(url).hostname
		self.pretend = pretend

		response = urllib2.urlopen(url)
		html = response.read().decode('utf-8', 'ignore')
		
		parsed_html = BeautifulSoup(html)
		self.title = parsed_html.body.find('h1').text
		
		if not os.path.exists(self.title):
			os.makedirs(self.title)
		
		container = parsed_html.body.find('div', attrs={'class': 'sisu_content'})
		
		episode_urls = []
		for link in container.find_all('a'):
			target = link.get('href')
			if '/helid?' in target:
				episode_urls.append(target)
		
		total = len(episode_urls)
		i = 1
		for episode in episode_urls:
			self.fetchEpisode(episode, i, total)
			i += 1
Example #11
0
def getpicname(path):
    """    retrive filename of url        """
    if os.path.splitext(path)[1] == "":
        return None
    pr = urlparse(path)
    path = "http://" + pr[1] + pr[2]
    return os.path.split(path)[1]
Example #12
0
 def parseComment(self, response):
     sel = Selector(response)
     movieItem = MovieItem()
     movieItem['id'] = response.meta['id']
     commentLinks = sel.xpath(
         '//div[@id="comments"]/div[contains(@class, "comment-item")]')
     commentLinks.extract()
     comments = []
     for index, commentLink in enumerate(commentLinks):
         comment = {}
         comment['avatar'] = commentLink.xpath(
             'div[@class="avatar"]/a/img/@src').extract().pop()
         comment['uid'] = commentLink.xpath('div[@class="comment"]//span[@class="comment-info"]/a/@href').re(
             r"http://movie.douban.com/people/(.*)/").pop()
         comment['name'] = commentLink.xpath(
             'div[@class="comment"]//span[@class="comment-info"]/a/text()').extract().pop()
         comment['comment'] = commentLink.xpath(
             'div[@class="comment"]/p/text()').extract().pop()
         dateStr = commentLink.xpath(
             'div[@class="comment"]/h3/span[@class="comment-info"]/span/text()').re(r'\d+-\d+-\d+').pop()
         comment['date'] = datetime.strptime(dateStr, "%Y-%m-%d")
         comment['vote'] = int(
             commentLink.xpath('div[@class="comment"]//span[@class="comment-vote"]/span[contains(@class, "votes")]/text()').extract().pop())
         comments.append(comment)
     movieItem['comments'] = comments
     yield movieItem
     paginator = sel.xpath(
         '//div[@id="paginator"]/a[@class="next"]/@href').extract()
     parsedUrl = urlparse(response.url)
     return  # yan dd
     yield Request(url=parsedUrl.scheme + '://' + parsedUrl.netloc + parsedUrl.path + paginator.pop(), callback=self.parseComment, meta={'id': response.meta['id']})
Example #13
0
    def file_key(self, url):
        """
            return the SHA1 hash of the file url
        """

        file_guid = hashlib.sha1(url).hexdigest()
        return '%s_%s' % (urlparse(url).netloc, file_guid)
Example #14
0
def parse_git_show(remote, verb):
    fetch_url = ""
    for line in run_command("git remote show -n %s" % remote).split("\n"):
        if line.strip().startswith("%s" % verb):
            fetch_url = line.split()[2]

    parsed_url = urlparse(fetch_url)
    project_name = parsed_url.path.lstrip("/")
    if project_name.endswith(".git"):
        project_name = project_name[:-4]

    hostname = parsed_url.netloc
    username = None
    port = parsed_url.port

    if VERBOSE:
        print("Found origin %s URL:" % verb, fetch_url)

    # Workaround bug in urlparse on OSX
    if parsed_url.scheme == "ssh" and parsed_url.path[:2] == "//":
        hostname = parsed_url.path[2:].split("/")[0]

    if "@" in hostname:
        (username, hostname) = hostname.split("@")
    if ":" in hostname:
        (hostname, port) = hostname.split(":")

    # Is origin an ssh location? Let's pull more info
    if parsed_url.scheme == "ssh" and port is None:
        port = 22

    if port is not None:
        port = str(port)

    return (hostname, username, port, project_name)
Example #15
0
    def download_torrent(self, url):
        content = self.httpRequest.getHttpContent(url)
        if(content==""):
            return False
        f = GetIdList()
        f.feed(content)

        boundary = "----WebKitFormBoundarydMcOM7W0mij63Igr"
        parts=[]
        for k,v in f.formdata.items():
            parts.append('--' + boundary)
            parts.append('Content-Disposition: form-data; name="'+k+'"')
            parts.append('')
            parts.append(v)
        parts.append('--' + boundary + '--')
        parts.append('\r\n')
        postdata = '\r\n'.join(parts)

        r = urlparse(url)
        downloadUrl="http://"+r.netloc+"/"+f.action
        content = self.httpRequest.getHttpContent(downloadUrl, postdata, { "Content-Type":"multipart/form-data; boundary="+boundary})
        if(content==""):
            return False
        filename=f.formdata['ref']+".torrent"
        f=open(filename, "wb")
        f.write(content)
        f.close()
        return True
    def getGoogleResults(self,pluginname,latest,cve):
        try:
		gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'", random_agent=True)
        	gs.results_per_page = 100
                        

		numberOfprocessed = 0	
                self.all_run = []
                
       		for i in range(int(limitForSearch)):
        		results = gs.get_results()
            		if not results:
            		        break

                        # Semaphore for write in order to screen
                        self.checkSimultaneus = threading.Semaphore(int(NumThreats))            
                        # Semaphore for write to file
                        self.writeFile = threading.Semaphore(int(NumThreats)-1)

			for res in results:
			        self.checkSimultaneus.acquire()
				host_name = urlparse(res.url.encode()).hostname
                                # Create thread
                                t = threading.Thread(target=self.__getGoogleResults, args=(host_name,latest,pluginname,cve))
                                self.all_run.append(t)
                                # run thread
                                self.all_run[len(self.all_run)-1].start()
				


				
	except SearchError, e:
  		print "Search failed: %s" % e
Example #17
0
    def urlappend(self, link, u):
        global domain
        urls = urlparse(u)
        if urls.scheme not in ["", "http", "https"]:
            return None
        print "In appending", urls
        if urls.fragment == "":
            if urls.netloc == "":  # not shown domain urls
                m = p11.search(u)
                tp = p10.search(link)
                if m != None:
                    if m.group(1) != None:  # path
                        if tp != None:
                            # print 'return',tp.group(1)+'/'+m.group(1)
                            return tp.group(1) + "/" + m.group(1)
                        else:
                            # print 'return',link+'/'+m.group(1)
                            return link + "/" + m.group(1)

                    elif m.group(3) != None:  # ./path
                        if tp != None:
                            return tp.group(1) + "/" + m.group(3)
                        else:
                            return link + "/" + m.group(3)
                    elif m.group(4) != None:  # /path
                        return domain + "/" + m.group(4)
                    else:  # ../path
                        k = m.group(5)
                        k1 = tp.group(1)
                        l = r"^../(.*)"
                        pl = re.compile(l, re.IGNORECASE)
                        while pl.search(k) != None:
                            m1 = pl.search(k)
                            tmp = p10.search(k1)
                            if tmp != None:
                                k1 = tmp.group(1)
                            k = m1.group(1)
                        else:
                            tmp = p10.search(k1)
                            if tmp != None:
                                k1 = tmp.group(1)
                                return k1 + "/" + k
                            else:
                                return k1 + "/" + k

            else:
                if urls.netloc[-(domain.__len__()) :] == domain:  # shown domain and sub domain urls
                    if urls.query != "":
                        return urls.netloc + urls.path + urls.params + "?" + urls.query
                    else:
                        return urls.netloc + urls.path + urls.params
                else:
                    # print 'inside :81'
                    m2 = p8.search(urls.netloc)
                    if m2 != None:
                        if m2.group(1)[-(domain.__len__()) :] == domain:
                            if urls.query != "":
                                return urls.netloc + urls.path + urls.params + "?" + urls.query
                            else:
                                return urls.netloc + urls.path + urls.params
Example #18
0
 def attacking(self, target, rounds):
     print "[Info] [AI] ICMP Broadcast (SMURF) is redirecting: [" , rounds, "base stations ]"
     if target.startswith('http://'):
         target = target.replace('http://','')
         sport = 80
     elif target.startswith('https://'):
         target = target.replace('https://','')
         sport = 443
     try:
         ip = socket.gethostbyname(target)
     except:
         try:
             import dns.resolver
             r = dns.resolver.Resolver()
             r.nameservers = ['8.8.8.8', '8.8.4.4'] # google DNS resolvers
             url = urlparse(target)
             a = r.query(url.netloc, "A") # A record
             for rd in a:
                 ip = str(rd)
         except:
             ip = target
     if ip == "127.0.0.1" or ip == "localhost":
         print "[Info] [AI] [SMURF] Sending message '1/0 %====D 2 Ur ;-0' to 'localhost' -> [OK!]\n"
         return
     smurfize(ip, sport, rounds) # attack with SMURF using threading
 def render(self, context):
     o = urlparse(self.media_url)
     if o.hostname is None:
         domain = 'http://%s' % (Site.objects.get_current().domain, )
         return "%s" % urljoin(domain, self.media_url)
     else:
         return self.media_url
Example #20
0
def application_identification(server_banner, url):
  found_application_extension = False
  if settings.VERBOSITY_LEVEL >= 1:
    info_msg = "Identifying the target application ... " 
    sys.stdout.write(settings.print_info_msg(info_msg))
    sys.stdout.flush()
  root, application_extension = splitext(urlparse(url).path)
  settings.TARGET_APPLICATION = application_extension[1:].upper()
  
  if settings.TARGET_APPLICATION:
    found_application_extension = True
    if settings.VERBOSITY_LEVEL >= 1:
      print "[ " + Fore.GREEN + "SUCCEED" + Style.RESET_ALL + " ]"           
      success_msg = "The target application was identified as " 
      success_msg += settings.TARGET_APPLICATION + Style.RESET_ALL + "."
      print settings.print_success_msg(success_msg)

    # Check for unsupported target applications
    for i in range(0,len(settings.UNSUPPORTED_TARGET_APPLICATION)):
      if settings.TARGET_APPLICATION.lower() in settings.UNSUPPORTED_TARGET_APPLICATION[i].lower():
        err_msg = settings.TARGET_APPLICATION + " exploitation is not yet supported."  
        print settings.print_critical_msg(err_msg)
        raise SystemExit()

  if not found_application_extension:
    if settings.VERBOSITY_LEVEL >= 1:
      print "[ " + Fore.RED + "FAILED" + Style.RESET_ALL + " ]"
    warn_msg = "Heuristics have failed to identify target application."
    print settings.print_warning_msg(warn_msg)
def ZeusApi_copy_vela(Change_Param_Url):
    old_sign_urlparse = urlparse( Change_Param_Url )
    old_sign_newParams = {}
    old_sign_urlparams = parse_qs(old_sign_urlparse.query)
    print old_sign_urlparams
    for old_sign_param in old_sign_urlparams:
        old_sign_paramValue = old_sign_urlparams[old_sign_param]
        old_sign_newParams[old_sign_param] = old_sign_paramValue[0]

    print old_sign_newParams
    old_sign_urlkeys = old_sign_newParams['keys'].split(',')
    print old_sign_urlkeys
    old_sign_urlkeys.sort()
    tmp = ''

    for i in old_sign_urlkeys:
        #print i
        #print old_sign_newParams[i]
        if old_sign_newParams.has_key( i ):
            tmp += '%s%s' % (i, old_sign_newParams[i])
        else:
            tmp += '%s%s' % (i, "")
    print "tmp:" + tmp
    m = hashlib.md5(tmp)
    #新的加密后的sign
    old_sign_newParams['sign'] = m.hexdigest().upper()
    print old_sign_newParams['sign']
    old_newsign=old_sign_newParams['sign']
    return old_newsign
Example #22
0
    def __init__(self, url):
        global domain
        global hreflist
        global headers
        global count
        global picdic
        global path

        rp = robotparser.RobotFileParser()
        self.rp = rp
        headers["User-Agent"] = "vrsec crawling robot"
        headers["If-Modified-Since"] = "Sat, 29 Oct 1994 19:43:31 GMT"
        headers["Accept"] = "text/html"

        urlstr = urlparse(url)
        print "		urlparsed: ", urlstr

        domain = p9.search(urlstr[1]).group(1)

        self.osexception((path + domain))
        self.path = path + domain + "/"
        print "		domain name: ", domain
        self.urlopenexception(domain, domain)  # urlretrieve returns a tuple of two objects(filename,mime_hdrs)
        print "		picdic: ", picdic
        self.urlpickle()

        self.domain = domain
Example #23
0
def playURLRVideo(url, name, type_):
    dialog_progress_title='URL Resolver'
    dialog_progress_YTDL = xbmcgui.DialogProgressBG()
    dialog_progress_YTDL.create(dialog_progress_title )
    dialog_progress_YTDL.update(10,dialog_progress_title,translation(32014)  )

    from urlparse import urlparse
    parsed_uri = urlparse( url )
    domain = '{uri.netloc}'.format(uri=parsed_uri)
    try:
        import urlresolver
        #hmf = urlresolver.HostedMediaFile(url)
        dialog_progress_YTDL.update(20,dialog_progress_title,translation(32012)  )

        media_url = urlresolver.resolve(url)
        dialog_progress_YTDL.update(80,dialog_progress_title,translation(32013)  )
        if media_url:
            log( '  URLResolver stream url=' + repr(media_url ))

            pl = xbmc.PlayList(xbmc.PLAYLIST_VIDEO)
            pl.clear()
            pl.add(media_url, xbmcgui.ListItem(name))
            xbmc.Player().play(pl, windowed=False)  #scripts play video like this.
        else:
            log( "  Can't URL Resolve:" + repr(url))
            xbmc_notify('URLresolver', translation(32192),icon="type_urlr.png" )  #Failed to get playable url
    except Exception as e:
        xbmc_notify('URLresolver:'+domain, str(e),icon="type_urlr.png" )
    dialog_progress_YTDL.close()
Example #24
0
def analysisdomain(urls):
    topHostPostfix = (
    '.com','.cn','.la','.io','.co','.info','.net','.org','.me','.mobi',
    '.us','.biz','.xxx','.ca','.co.jp','.com.cn','.net.cn','edu.cn','gov.cn'
    '.org.cn','.mx','.tv','.ws','.ag','.com.ag','.net.ag',
    '.org.ag','.am','.asia','.at','.be','.com.br','.net.br',
    '.bz','.com.bz','.net.bz','.cc','.com.co','.net.co',
    '.nom.co','.de','.es','.com.es','.nom.es','.org.es',
    '.eu','.fm','.fr','.gs','.in','.co.in','.firm.in','.gen.in',
    '.ind.in','.net.in','.org.in','.it','.jobs','.jp','.ms',
    '.com.mx','.nl','.nu','.co.nz','.net.nz','.org.nz',
    '.se','.tc','.tk','.tw','.com.tw','.idv.tw','.org.tw',
    '.hk','.co.uk','.me.uk','.org.uk','.vg', ".com.hk")

    regx = r'[^\.]+('+'|'.join([h.replace('.',r'\.') for h in topHostPostfix])+')$'
    pattern = re.compile(regx,re.IGNORECASE)
    hosts=[]
    for url in urls:
        #print url
        parts = urlparse(url)
        host = parts.netloc
        m = pattern.search(host)
        res =  m.group() if m else host
        if res:
            hosts.append(res)
    return hosts
Example #25
0
    def getUrl(self):
        """\

        """
        url_obj = urlparse(self.url)
        scheme = self.getScheme(url_obj)
        hostname = self.getHostname(url_obj)
Example #26
0
def setup(bot):
    global url_finder, exclusion_char, api_url, api_key, api_user, api_private

    if bot.config.bookie.api_url:
        try:
            # say we have "https://example.com/prefix/api/v1/admin/account?api_key=XXXXXX"
            p = urlparse(bot.config.bookie.api_url)
            # "https://example.com"
            api_url = p.scheme + "://" + p.netloc
            # "/prefix"
            prefix = p.path.split(api_suffix)[0]
            if prefix:
                api_url += prefix
            # "/api/v1/"
            api_url += api_suffix
            # the path element after api_suffix
            # that is, "admin"
            api_user = p.path.split(api_suffix)[1].split("/")[0]
            # "XXXXXX"
            api_key = p.query.split("=")[1]
        except Exception as e:
            raise ConfigurationError("Bookie api_url badly formatted: %s" % str(e))
    else:
        raise ConfigurationError("Bookie module not configured")

    api_private = validate_private(bot.config.bookie.private)
    if bot.config.has_option("url", "exclusion_char"):
        exclusion_char = bot.config.url.exclusion_char

    url_finder = re.compile(r"(?u)(.*?)\s*(%s?(?:http|https|ftp)(?:://\S+)\s*(.*?))" % (exclusion_char))
    if bot.config.bookie.auto:
        if not bot.memory.contains("url_callbacks"):
            bot.memory["url_callbacks"] = tools.SopelMemory()
        bot.memory["url_callbacks"][re.compile(".*")] = bmark
Example #27
0
 def attacking(self, target, rounds):
     print "[Info] [AI] TCP Starvation (NUKE) is ready to fire: [" , rounds, "nukes ]"
     if target.startswith('http://'):
         target = target.replace('http://','')
         port = 80
     elif target.startswith('https://'):
         target = target.replace('https://','')
         port = 443
     try:
         ip = socket.gethostbyname(target)
     except:
         try:
             import dns.resolver
             r = dns.resolver.Resolver()
             r.nameservers = ['8.8.8.8', '8.8.4.4'] # google DNS resolvers
             url = urlparse(target)
             a = r.query(url.netloc, "A") # A record
             for rd in a:
                 ip = str(rd)
         except:
             ip = target
     if ip == "127.0.0.1" or ip == "localhost":
         print "[Info] [AI] [NUKE] Sending message '1/0 %====D 2 Ur ;-0' to 'localhost' -> [OK!]\n"
         return
     nukeize(ip, port, rounds) # attack with NUKE using threading
Example #28
0
def sanitize_redirect(host, redirect_to):
    """
    Given the hostname and an untrusted URL to redirect to,
    this method tests it to make sure it isn't garbage/harmful
    and returns it, else returns None, similar as how's it done
    on django.contrib.auth.views.

    >>> print sanitize_redirect('myapp.com', None)
    None
    >>> print sanitize_redirect('myapp.com', '')
    None
    >>> print sanitize_redirect('myapp.com', {})
    None
    >>> print sanitize_redirect('myapp.com', 'http://notmyapp.com/path/')
    None
    >>> print sanitize_redirect('myapp.com', 'http://myapp.com/path/')
    http://myapp.com/path/
    >>> print sanitize_redirect('myapp.com', '/path/')
    /path/
    """
    # Quick sanity check.
    if not redirect_to:
        return None

    # Heavier security check, don't allow redirection to a different host.
    try:
        netloc = urlparse(redirect_to)[1]
    except TypeError:  # not valid redirect_to value
        return None
    if netloc and netloc != host:
        return None
    return redirect_to
Example #29
0
        def handle_relay(self):
            """
                provided an URL localhost:7777 or app.marathon.mesos:7777 relay will
                ping that url http://localhost:7777/ping and respond back.
                It is used for network testing in a cluster.
            """
            query = urlparse(self.path).query
            query_components = dict(qc.split("=") for qc in query.split("&"))
            logging.info(query_components)
            full_url = 'http://{}/ping'.format(query_components['url'])

            url_req = Request(full_url, headers={"User-Agent": "Mozilla/5.0"})
            response = urlopen(url_req)
            res = response.read()
            status = response_status(response)
            logging.debug("Relay request is %s, %s", res, status)

            self.send_response(status)
            self.send_header('Content-type', 'text/html')
            self.end_headers()

            self.wfile.write(res)
            marathonId = os.getenv("MARATHON_APP_ID", "NO_MARATHON_APP_ID_SET")
            msg = "\nRelay from {}".format(marathonId)
            self.wfile.write(byte_type(msg, "UTF-8"))

            return
Example #30
0
    def sendRequests(csrf,session,s):

        while link_queue.__len__() > 0:


            try:
                link = link_queue.pop(0)
            except IndexError:
                break
            if link not in visited:
                #print sent_links.__len__()

                parsed = urlparse(link)
                linkrequest = HTTPrequest()
                linkrequest.type = "GET"
                linkrequest.version = "1.1"
                linkrequest.host = parsed.netloc
                linkrequest.path = parsed.path
                linkrequest.cookies['csrf'] = csrf
                linkrequest.cookies['sessionid'] = session
                linkrequest.connection = "Keep-Alive"
                #linkrequest.encoding = "gzip"

                try:
                    sendRequestOnly(linkrequest,s)
                    sent_links.append(link)
                    print(link + " "+ str(secret_flags.__len__()))

                except socket.error as err:
                    link_queue.insert(0,link)
                    #s.close()
                    break
Example #31
0
 def __validateUri(self, url):
     splitUri = urlparse(url)
     return splitUri.netloc + splitUri.path
Example #32
0
    try:
        with open('HostsList', 'r') as fp:
            HostsList = [
                ''.join(i.split()) for i in fp.readlines() if len(i) > 4
            ]
            if not HostsList:
                sys.exit(putColor('[X]', 'red') + 'HostsList is empty')
    except Exception, e:
        if str(e):
            sys.exit(putColor('[X]', 'red') + 'Where is your HostsList?')
else:
    HostsList = [HostsList]

Sites = []
for Host in HostsList:
    Host = urlparse(Host)
    Host = Host.netloc if Host.netloc else Host.path
    print '[!]Searching Sites for:', Host, '\n'
    if Host == '':
        print putColor('[X]', 'red') + 'Error Host or IP'
        continue

    # ---------------- API ----------------------

    Sites.extend(dns_aizhan_com.search(Host, s))
    Sites.extend(webscan_cc.search(Host, s))
    Sites.extend(site_ip138_com.search(Host, s))
    Sites.extend(bing_com.search(Host, s))

    # ---------------- end ----------------------
Example #33
0
def listLinksInComment(url, name, type_):
    from domains import parse_reddit_link, build_DirectoryItem_url_based_on_media_type
    from utils import markdown_to_bbcode, unescape
    from guis import progressBG

    log('listLinksInComment:%s:%s' %(type_,url) )


    directory_items=[]
    author=""
    ShowOnlyCommentsWithlink=False

    if type_=='linksOnly':
        ShowOnlyCommentsWithlink=True

    url=urllib.quote_plus(url,safe=':/?&')
    if '?' in url:
        url=url.split('?', 1)[0]+'.json?'+url.split('?', 1)[1]
    else:
        url+= '.json'

    loading_indicator=progressBG(translation(30024))
    loading_indicator.update(0,'Retrieving comments')

    content = reddit_request(url)
    if not content:
        loading_indicator.end()
        return

    loading_indicator.update(10,'Parsing')
    content = json.loads(content)

    del harvest[:]

    r_linkHunter(content[0]['data']['children'])

    try:submitter=content[0]['data']['children'][0]['data']['author']
    except: submitter=''

    try:post_title=content[0]['data']['children'][0]['data']['title']
    except:post_title=''

    r_linkHunter(content[1]['data']['children'])

    comment_score=0

    loading_indicator.set_tick_total(len(harvest))

    for i, h in enumerate(harvest):
        try:

            comment_score=h[0]

            link_url=h[2]
            desc100=h[3].replace('\n',' ')[0:100] #first 100 characters of description

            kind=h[6] #reddit uses t1 for user comments and t3 for OP text of the post. like a poster describing the post.
            d=h[5]   #depth of the comment

            tab=" "*d if d>0 else "-"

            from urlparse import urlparse
            domain = '{uri.netloc}'.format( uri=urlparse( link_url ) )

            author=h[7]
            DirectoryItem_url=''

            if comment_score < int_CommentTreshold:
                continue


            ld=parse_reddit_link(link_url=link_url, assume_is_video=False, needs_preview=True, get_playable_url=True )

            if kind=='t1':
                list_title=r"[COLOR cadetblue]%3d[/COLOR] %s" %( h[0], tab )
            elif kind=='t3':
                list_title=r"[COLOR cadetblue]Title [/COLOR] %s" %( tab )

            plot=h[3].replace('](', '] (')
            plot= markdown_to_bbcode(plot)
            plot=unescape(plot)  #convert html entities e.g.:(&#39;)

            liz=xbmcgui.ListItem(label=list_title +': '+ desc100)

            liz.setInfo( type="Video", infoLabels={ "Title": h[1], "plot": plot, "studio": domain, "votes": str(comment_score), "director": author  } )
            isFolder=False

            if link_url:
                DirectoryItem_url, setProperty_IsPlayable, isFolder, title_prefix = build_DirectoryItem_url_based_on_media_type(ld, link_url)

                liz.setProperty('IsPlayable', setProperty_IsPlayable)
                liz.setProperty('url', DirectoryItem_url)  #<-- needed by the xml gui skin
                liz.setPath(DirectoryItem_url)

                if domain:
                    plot= "  [COLOR greenyellow][%s] %s"%(domain, plot )  + "[/COLOR]"
                else:
                    plot= "  [COLOR greenyellow][%s]"%( plot ) + "[/COLOR]"
                liz.setLabel(list_title+plot)

                if ld:
                    liz.setArt({"thumb": ld.poster, "poster":ld.poster, "banner":ld.poster, "fanart":ld.poster, "landscape":ld.poster   })

            if DirectoryItem_url:

                directory_items.append( (DirectoryItem_url, liz, isFolder,) )

            else:

                if not ShowOnlyCommentsWithlink:
                    result=h[3].replace('](', '] (')
                    result=markdown_to_bbcode(result)
                    liz=xbmcgui.ListItem(label=list_title + desc100)
                    liz.setInfo( type="Video", infoLabels={ "Title": h[1], "plot": result, "studio": domain, "votes": str(h[0]), "director": author } )
                    liz.setProperty('IsPlayable', 'false')

                    directory_items.append( ("", liz, False,) )

        except Exception as e:
            log('  EXCEPTION:' + str(e) )


        loading_indicator.tick(1, desc100)
    loading_indicator.end()

    xbmcplugin.setContent(pluginhandle, "movies")    #files, songs, artists, albums, movies, tvshows, episodes, musicvideos
    xbmcplugin.setPluginCategory(pluginhandle,'Comments')

    xbmcplugin.addDirectoryItems(handle=pluginhandle, items=directory_items )
    xbmcplugin.endOfDirectory(pluginhandle)

    if comments_viewMode:
        xbmc.executebuiltin('Container.SetViewMode(%s)' %comments_viewMode)
Example #34
0
def validate_inputs(config, args, unknown_args):
    error_arr = []
    try:
        config.read(pil.config_path)

        if args.download:
            pil.dl_user = args.download
            if args.downloadfollowing or args.batchfile:
                logger.banner()
                logger.warn(
                    "Please use only one download method. Use -h for more information."
                )
                logger.separator()
                return False
        elif not args.clean and not args.info and not args.assemble and not args.downloadfollowing and not args.batchfile:
            logger.banner()
            logger.error(
                "Please use a download method. Use -h for more information.")
            logger.separator()
            return False

        if helpers.bool_str_parse(config.get('pyinstalive',
                                             'log_to_file')) == "Invalid":
            pil.log_to_file = True
            error_arr.append(['log_to_file', 'True'])
        elif helpers.bool_str_parse(config.get('pyinstalive', 'log_to_file')):
            pil.log_to_file = True
        else:
            pil.log_to_file = False

        logger.banner()

        if args.batchfile:
            if os.path.isfile(args.batchfile):
                pil.dl_batchusers = [
                    user.rstrip('\n') for user in open(args.batchfile)
                ]
                if not pil.dl_batchusers:
                    logger.error("The specified file is empty.")
                    logger.separator()
                    return False
                else:
                    logger.info(
                        "Downloading {:d} users from batch file.".format(
                            len(pil.dl_batchusers)))
                    logger.separator()
            else:
                logger.error('The specified file does not exist.')
                logger.separator()
                return False

        if unknown_args:
            pil.uargs = unknown_args
            logger.warn(
                "The following unknown argument(s) were provided and will be ignored: "
            )
            logger.warn('    ' + ' '.join(unknown_args))
            logger.separator()

        pil.ig_user = config.get('pyinstalive', 'username')
        pil.ig_pass = config.get('pyinstalive', 'password')
        pil.dl_path = config.get('pyinstalive', 'download_path')
        pil.run_at_start = config.get('pyinstalive', 'run_at_start')
        pil.run_at_finish = config.get('pyinstalive', 'run_at_finish')
        pil.ffmpeg_path = config.get('pyinstalive', 'ffmpeg_path')
        pil.args = args
        pil.config = config
        pil.proxy = config.get('pyinstalive', 'proxy')

        if args.configpath:
            pil.config_path = args.configpath
            if not os.path.isfile(pil.config_path):
                pil.config_path = os.path.join(os.getcwd(), "pyinstalive.ini")
                logger.warn(
                    "Custom config path is invalid, falling back to default path: {:s}"
                    .format(pil.config_path))
                logger.separator()

        if args.dlpath:
            pil.dl_path = args.dlpath

        if helpers.bool_str_parse(
                config.get('pyinstalive', 'show_cookie_expiry')) == "Invalid":
            pil.show_cookie_expiry = False
            error_arr.append(['show_cookie_expiry', 'False'])
        elif helpers.bool_str_parse(
                config.get('pyinstalive', 'show_cookie_expiry')):
            pil.show_cookie_expiry = True
        else:
            pil.show_cookie_expiry = False

        if helpers.bool_str_parse(config.get('pyinstalive',
                                             'use_locks')) == "Invalid":
            pil.use_locks = False
            error_arr.append(['use_locks', 'False'])
        elif helpers.bool_str_parse(config.get('pyinstalive', 'use_locks')):
            pil.use_locks = True
        else:
            pil.use_locks = False

        if helpers.bool_str_parse(config.get('pyinstalive',
                                             'clear_temp_files')) == "Invalid":
            pil.clear_temp_files = False
            error_arr.append(['clear_temp_files', 'False'])
        elif helpers.bool_str_parse(
                config.get('pyinstalive', 'clear_temp_files')):
            pil.clear_temp_files = True
        else:
            pil.clear_temp_files = False

        if helpers.bool_str_parse(config.get('pyinstalive',
                                             'do_heartbeat')) == "Invalid":
            pil.do_heartbeat = True
            error_arr.append(['do_heartbeat', 'True'])
        if helpers.bool_str_parse(config.get('pyinstalive', 'do_heartbeat')):
            pil.do_heartbeat = True
        if args.noheartbeat or not helpers.bool_str_parse(
                config.get('pyinstalive', 'do_heartbeat')):
            pil.do_heartbeat = False
            logger.warn(
                "Getting livestream heartbeat is disabled, this may cause degraded performance."
            )
            logger.separator()

        if not args.nolives and helpers.bool_str_parse(
                config.get('pyinstalive', 'download_lives')) == "Invalid":
            pil.dl_lives = True
            error_arr.append(['download_lives', 'True'])
        elif helpers.bool_str_parse(config.get('pyinstalive',
                                               'download_lives')):
            pil.dl_lives = True
        else:
            pil.dl_lives = False

        if not args.noreplays and helpers.bool_str_parse(
                config.get('pyinstalive', 'download_replays')) == "Invalid":
            pil.dl_replays = True
            error_arr.append(['download_replays', 'True'])
        elif helpers.bool_str_parse(
                config.get('pyinstalive', 'download_replays')):
            pil.dl_replays = True
        else:
            pil.dl_replays = False

        if helpers.bool_str_parse(
                config.get('pyinstalive', 'download_comments')) == "Invalid":
            pil.dl_comments = True
            error_arr.append(['download_comments', 'True'])
        elif helpers.bool_str_parse(
                config.get('pyinstalive', 'download_comments')):
            pil.dl_comments = True
        else:
            pil.dl_comments = False

        if args.nolives:
            pil.dl_lives = False

        if args.noreplays:
            pil.dl_replays = False

        if not pil.dl_lives and not pil.dl_replays:
            logger.error(
                "You have disabled both livestream and replay downloading.")
            logger.error("Please enable at least one of them and try again.")
            logger.separator()
            return False

        if pil.ffmpeg_path:
            if not os.path.isfile(pil.ffmpeg_path):
                pil.ffmpeg_path = None
                cmd = "where" if platform.system() == "Windows" else "which"
                logger.warn(
                    "Custom ffmpeg binary path is invalid, falling back to default path: {:s}"
                    .format(
                        subprocess.check_output([cmd, 'ffmpeg'
                                                 ]).decode('UTF-8').rstrip()))
            else:
                logger.binfo("Overriding ffmpeg binary path: {:s}".format(
                    pil.ffmpeg_path))

        if not pil.ig_user or not len(pil.ig_user):
            raise Exception(
                "Invalid value for 'username'. This value is required.")

        if not pil.ig_pass or not len(pil.ig_pass):
            raise Exception(
                "Invalid value for 'password'. This value is required.")

        if not pil.dl_path.endswith('/'):
            pil.dl_path = pil.dl_path + '/'
        if not pil.dl_path or not os.path.exists(pil.dl_path):
            pil.dl_path = os.getcwd()
            if not args.dlpath:
                error_arr.append(['download_path', os.getcwd()])
            else:
                logger.warn(
                    "Custom config path is invalid, falling back to default path: {:s}"
                    .format(pil.dl_path))
                logger.separator()

        if pil.proxy and pil.proxy != '':
            parsed_url = urlparse(pil.proxy)
            if not parsed_url.netloc or not parsed_url.scheme:
                error_arr.append(['proxy', 'None'])
                pil.proxy = None

        if error_arr:
            for error in error_arr:
                logger.warn(
                    "Invalid value for '{:s}'. Using default value: {:s}".
                    format(error[0], error[1]))
                logger.separator()

        if args.info:
            helpers.show_info()
            return False
        elif args.clean:
            helpers.clean_download_dir()
            return False
        elif args.assemble:
            pil.assemble_arg = args.assemble
            assembler.assemble()
            return False

        return True
    except Exception as e:
        logger.error("An error occurred: {:s}".format(str(e)))
        logger.error(
            "Make sure the config file and given arguments are valid and try again."
        )
        logger.separator()
        return False
Example #35
0
def get_domain(url):
    return urlparse(url)[1]
Example #36
0
def find_domain(url):
    from urlparse import urlparse
    #return url.split("//")[-1].split("/")[0]
    domain = urlparse(url).hostname.split('.')[1]
    return domain
Example #37
0
# -*- coding: utf-8 -*-
import pymssql
import sys
import os
from urlparse import *

reload(sys)
sys.setdefaultencoding('utf8')
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'

conn = pymssql.connect('10.101.7.128',
                       'sa',
                       'Password01!',
                       'ZhongYan_SustainableDevelpoment',
                       charset='utf8')
cursor = conn.cursor()
# sql = "exec PROC_INSERT_PARSE_RESULT @parse_id='%s',@index_id='%s',@index_base_id='%s',@data=N'%s',@source_id='%s',@relation=%s" % ('9FFE6E7D56A543ABA612B63203AEB9F1', 'FD306F8D6C04409F8EA9D780C30FD5B8', '00A14D4BAE92CA443FAAC16A8B171BC', '阿斯顿自行车阿斯顿现在','02647911-2DCF-4086-94AB-658934B84663', '2.13221')
#
# # sql = "insert into TB_CRAWL_RESULT(ID,PARSE_HISTORY_ID,SADP_INDEX_ID,SADP_INDEX_BASE_DATA_ID,DATA) values ('%s','%s','%s','%s',N'%s')" % (str(uuid.uuid4()),str(uuid.uuid4()),str(uuid.uuid4()),str(uuid.uuid4()),'啊实打实大苏打')
# cursor.execute(sql)
#
# conn.commit()

cursor.execute("SELECT URL,ID FROM [dbo].[View_COMPANY_SOURCE]")
row = cursor.fetchone()
while row:
    ym = urlparse(row[0]).netloc
    print ym
    row = cursor.fetchone()
Example #38
0
def pre_request_hook(req):
    if 'Host' not in req.headers:
        hostname = urlparse(req.full_url)[1]
        req.headers['Host'] = hostname
Example #39
0
def get_suffix1(p):
    if len(p) == 1:
        #return "pure domain"
        return "nil"
    fields = p.split("/")
    if len(fields) == 0 or len(fields) == 1:
        return "null"
    fields1 = fields[len(fields) - 1].split(".")
    if len(fields1) == 0 or len(fields1) == 1:
        return "null"
    else:
        return fields1[len(fields1) - 1]


for line in fd.readlines():
    fields = line.strip().split("'")
    domain = fields[13]
    if len(domain) < 3:
        continue

    hit_miss = fields[25]
    if len(hit_miss) < 2:
        continue
    if len(fields[15]) < 5:
        continue

    result = urlparse(fields[15])

    sfx = get_suffix1(result.path)
    print sfx
Example #40
0
    def __call__(self, env, start_response):
        def custom_start_response(status, headers):
            if self.delay_auth_decision:
                headers.append(
                    ('WWW-Authenticate', "Basic realm='Use guest/guest'"))
            return start_response(status, headers)

        #Prep headers to proxy request to remote service
        proxy_headers = env.copy()
        user = ''

        #Look for authentication
        if 'HTTP_AUTHORIZATION' not in env:
            #No credentials were provided
            if self.delay_auth_decision:
                _decorate_request_headers("X_IDENTITY_STATUS", "Invalid",
                                          proxy_headers, env)
            else:
                # If the user isn't authenticated, we reject the request and
                # return 401 indicating we need Basic Auth credentials.
                ret = HTTPUnauthorized(
                    "Authentication required",
                    [('WWW-Authenticate', 'Basic realm="Use guest/guest"')])
                return ret(env, start_response)
        else:
            # Claims were provided - validate them
            import base64
            auth_header = env['HTTP_AUTHORIZATION']
            _auth_type, encoded_creds = auth_header.split(None, 1)
            user, password = base64.b64decode(encoded_creds).split(':', 1)
            if not self.validateCreds(user, password):
                #Claims were rejected
                if not self.delay_auth_decision:
                    # Reject request (or ask for valid claims)
                    ret = HTTPUnauthorized("Authentication required", [
                        ('WWW-Authenticate', 'Basic realm="Use guest/guest"')
                    ])
                    return ret(env, start_response)
                else:
                    # Claims are valid, forward request
                    _decorate_request_headers("X_IDENTITY_STATUS", "Invalid",
                                              proxy_headers, env)

            # TODO(Ziad): add additional details we may need,
            #             like tenant and group info
            _decorate_request_headers('X_AUTHORIZATION', "Proxy %s" % user,
                                      proxy_headers, env)
            _decorate_request_headers("X_IDENTITY_STATUS", "Confirmed",
                                      proxy_headers, env)
            _decorate_request_headers('X_TENANT', 'blank', proxy_headers, env)
            #Auth processed, headers added now decide how to pass on the call
            if self.app:
                # Pass to downstream WSGI component
                env['HTTP_AUTHORIZATION'] = "Basic %s" % self.service_pass
                return self.app(env, custom_start_response)

            proxy_headers['AUTHORIZATION'] = "Basic %s" % self.service_pass
            # We are forwarding to a remote service (no downstream WSGI app)
            req = Request(proxy_headers)
            parsed = urlparse(req.url)
            conn = http_connect(self.service_host, self.service_port, \
                                req.method, parsed.path, \
                                proxy_headers, \
                                ssl=(self.service_protocol == 'https'))
            resp = conn.getresponse()
            data = resp.read()
            #TODO(ziad): use a more sophisticated proxy
            # we are rewriting the headers now
            return Response(status=resp.status, body=data)(env, start_response)
Example #41
0
def get_domain(url):
    return urlparse(url, scheme='')[1]
Example #42
0
def main():
    module = AnsibleModule(argument_spec=dict(
        group_id=dict(default=None),
        artifact_id=dict(default=None),
        version=dict(default="latest"),
        classifier=dict(default=None),
        extension=dict(default='jar'),
        repository_url=dict(default=None),
        username=dict(default=None, aliases=['aws_secret_key']),
        password=dict(
            default=None, no_log=True, aliases=['aws_secret_access_key']),
        state=dict(default="present", choices=["present", "absent"]
                   ),  # TODO - Implement a "latest" state
        timeout=dict(default=10, type='int'),
        dest=dict(type="path", default=None),
        validate_certs=dict(required=False, default=True, type='bool'),
    ))

    repository_url = module.params["repository_url"]
    if not repository_url:
        repository_url = "http://repo1.maven.org/maven2"

    try:
        parsed_url = urlparse(repository_url)
    except AttributeError as e:
        module.fail_json(msg='url parsing went wrong %s' % e)

    if parsed_url.scheme == 's3' and not HAS_BOTO:
        module.fail_json(
            msg=
            'boto3 required for this module, when using s3:// repository URLs')

    group_id = module.params["group_id"]
    artifact_id = module.params["artifact_id"]
    version = module.params["version"]
    classifier = module.params["classifier"]
    extension = module.params["extension"]
    repository_username = module.params["username"]
    repository_password = module.params["password"]
    state = module.params["state"]
    dest = module.params["dest"]

    #downloader = MavenDownloader(module, repository_url, repository_username, repository_password)
    downloader = MavenDownloader(module, repository_url)

    try:
        artifact = Artifact(group_id, artifact_id, version, classifier,
                            extension)
    except ValueError as e:
        module.fail_json(msg=e.args[0])

    prev_state = "absent"
    if os.path.isdir(dest):
        dest = posixpath.join(dest,
                              artifact_id + "-" + version + "." + extension)
    if os.path.lexists(dest) and downloader.verify_md5(
            dest,
            downloader.find_uri_for_artifact(artifact) + '.md5'):
        prev_state = "present"
    else:
        path = os.path.dirname(dest)
        if not os.path.exists(path):
            os.makedirs(path)

    if prev_state == "present":
        module.exit_json(dest=dest, state=state, changed=False)

    try:
        if downloader.download(artifact, dest):
            module.exit_json(state=state,
                             dest=dest,
                             group_id=group_id,
                             artifact_id=artifact_id,
                             version=version,
                             classifier=classifier,
                             extension=extension,
                             repository_url=repository_url,
                             changed=True)
        else:
            module.fail_json(msg="Unable to download the artifact")
    except ValueError as e:
        module.fail_json(msg=e.args[0])
Example #43
0
#STEP 4: scrape those pages in google with beautiful soup web scraper
pages = []
pages.append([])
pages.append([])
''' EXAMPLE:
# Add elements to empty lists.
elements[0].append(1)
elements[0].append(2)

elements[1].append(3)
elements[1].append(4)
'''
for result in search_results: //GoogleResult object
  pages[0].append(str(BeautifulSoup(result.link, ‘html.parser’)) to layer 1 of pages (adds the string-ified html))
  pages[1].append(add str(urlparse(result.link).netloc) to correspond with ^^^ (adds the homepage url))
  
'''
So in my mind it kinda looks like this:

|-------------|-------------|
|HTML OF PAGE1|HTML OF PAGE2|             <-----pages[0]
|-------------|-------------|  ETC.
|PAGE1.com    |PaGe2.net    |             <-----pages[1]
|-------------|-------------|
'''

goodchars= {"1",
"2",
"3",
"4",
Example #44
0
f=open("dmozURLS","r")
f2=open("dmozURLsranked","w")

for line in f:
	url=line.split(",")[0]
	indom=0
	outdom=0
	suburls=[]
	try:
		html = requests.get(url,headers={'User-Agent': 'Mozilla/5.0'}).text
	except:
		continue
	soup=BeautifulSoup(html)
	try:
		parsed_uri = urlparse(url)
		domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
	except:
		domain='++++++'
	for link in soup.find_all('a', href=True):
		try:
			n=urlparse.urlparse(link['href']).netloc
		except:
			continue
		if domain in link['href'] or not bool(n):
			indom+=1
		else:
			outdom+=1
		suburls.append(link['href'])
		#except:
		#	continue
Example #45
0
def host_injection(url, vuln_parameter, payload):
  
  payload = urlparse(url).netloc + payload

  def inject_host(url, vuln_parameter, payload, proxy):

    if proxy == None:
      opener = urllib2.build_opener()
    else:
      opener = urllib2.build_opener(proxy)

    # Check if defined POST data
    if menu.options.data:
      menu.options.data = settings.USER_DEFINED_POST_DATA
      request = urllib2.Request(url, menu.options.data)
    else:
      url = parameters.get_url_part(url)
      request = urllib2.Request(url)
    #Check if defined extra headers.
    headers.do_check(request)
    payload = checks.newline_fixation(payload)  
    request.add_header('Host', payload)
    try:
      headers.check_http_traffic(request)
      response = opener.open(request)
      return response
    except ValueError:
      pass

  if settings.TIME_RELATIVE_ATTACK :
    start = 0
    end = 0
    start = time.time()

  proxy = None 
  #response = inject_host(url, vuln_parameter, payload, proxy)
  # Check if defined any HTTP Proxy.
  if menu.options.proxy:
    try:
      proxy = urllib2.ProxyHandler({settings.SCHEME : menu.options.proxy})
      response = inject_host(url, vuln_parameter, payload, proxy)
    except urllib2.HTTPError, err_msg:
      if str(err_msg.code) == settings.INTERNAL_SERVER_ERROR:
        response = False  
      elif settings.IGNORE_ERR_MSG == False:
        err = str(err_msg) + "."
        if not settings.VERBOSITY_LEVEL >= 1 and settings.TIME_BASED_STATE == False or \
          settings.VERBOSITY_LEVEL >= 1 and settings.EVAL_BASED_STATE == None:
          print("")
        if settings.VERBOSITY_LEVEL >= 1 and settings.LOAD_SESSION == False:
          print("") 
        print(settings.print_critical_msg(err))
        continue_tests = checks.continue_tests(err_msg)
        if continue_tests == True:
          settings.IGNORE_ERR_MSG = True
        else:
          raise SystemExit()
      response = False 
    except urllib2.URLError, err_msg:
      err_msg = str(err_msg.reason).split(" ")[2:]
      err_msg = ' '.join(err_msg)+ "."
      if settings.VERBOSITY_LEVEL >= 1 and settings.LOAD_SESSION == False:
        print("")
      print(settings.print_critical_msg(err_msg))
      raise SystemExit()
Example #46
0
 def register_node(self, address):
     url = urlparse(address)
     if url.netloc:
         self.nodes.add(url)
Example #47
0
    def GetModelList(self, dName):
        """ Get the list of files from dName directory.
		"""

        ### import are here because the simulator (PyDEVS or PyPDEVS) require it
        from DomainInterface.DomainBehavior import DomainBehavior

        ### list of py file from __init__.py
        if LibraryTree.EXT_LIB_PYTHON_FLAG:

            ### list of py file from url
            if dName.startswith('http'):

                o = urlparse(dName)
                c = httplib.HTTPConnection(o.netloc)
                c.request('GET', o.path + '/__init__.py')

                r = c.getresponse()
                code = r.read()

                if r.status == 200:
                    exec code
                    tmp = filter(
                        lambda s: s.replace('\n', '').replace('\t', '').
                        replace(',', '').replace('"', "").replace(
                            '\'', "").strip(), __all__)
                    ### test if the content of __init__.py file is python file (isfile equivalent)
                    py_file_list = [
                        s for s in tmp
                        if 'python' in urlopen(dName + '/' + s +
                                               '.py').info().type
                    ]

                else:
                    py_file_list = []

                return py_file_list
            else:
                try:
                    name_list = getFileListFromInit(
                        os.path.join(dName, '__init__.py'))
                    py_file_list = []

                    for s in name_list:
                        python_file = os.path.join(dName, s + '.py')
                        ### test if tmp is only composed by python file (case of the user write into the __init__.py file directory name is possible ! then we delete the directory names)
                        if os.path.isfile(python_file):

                            cls = GetClass(python_file)

                            if cls is not None and not isinstance(cls, tuple):

                                ### only model that herite from DomainBehavior is shown in lib
                                if issubclass(cls, DomainBehavior):
                                    py_file_list.append(s)
                                else:
                                    sys.stderr.write(
                                        _("%s not imported : Class is not DomainBehavior \n"
                                          % (s)))

                            ### If cls is tuple, there is an error but we load the model to correct it.
                            ### If its not DEVS model, the Dnd don't allows the instantiation and when the error is corrected, it don't appear before a update.
                            else:

                                py_file_list.append(s)

                except Exception, info:
                    py_file_list = []
                    # if dName contains a python file, __init__.py is forced
                    for f in os.listdir(dName):
                        if f.endswith('.py'):
                            sys.stderr.write(
                                _("%s not imported : %s \n" % (dName, info)))
                            break