Python extract_headersの例、core.utils.extract_headers Pythonの例

コード例 #1

0

ファイルを表示

ファイル: http.py プロジェクト: itsdivyanshjain/python-nuclei

    def handle_raw(self, raw_data):

        self.Body = None
        self.Headers = {}

        # print(raw_data)
        if '{{BaseURL}}' in raw_data:
            raw_data = raw_data.replace('{{BaseURL}}', self.Url)
        if '{{Hostname}}' in raw_data:
            raw_data = raw_data.replace('{{Hostname}}', self.Url)

        if '\n\n' in raw_data:
            raw_data = raw_data.replace('\n\n', 'Below-is-response-body', 1)
            raw_data, self.Body = raw_data.split('Below-is-response-body')

        if '\n' in raw_data:
            raw_data = raw_data.replace('\n', 'Below-is-headers', 1)
            raw_data, self.Headers = raw_data.split('Below-is-headers')
            self.Headers = extract_headers((self.Headers))

        raw_data = raw_data.split(' ')
        self.Method = raw_data[0]
        self.Path = [self.Url + raw_data[1]]

コード例 #2

0

ファイルを表示

ファイル: photon.py プロジェクト: xingsu56/Photon

external = set()  # URLs that don't belong to the target i.e. out-of-scope
# URLs that have get params in them e.g. example.com/page.php?id=2
fuzzable = set()
endpoints = set()  # URLs found from javascript files
processed = set()  # URLs that have been crawled
# URLs that belong to the target i.e. in-scope
internal = set([s for s in args.seeds])

everything = []
bad_intel = set()  # Unclean intel urls
bad_scripts = set()  # Unclean javascript file urls

core.config.verbose = verbose

if headers:
    headers = extract_headers(prompt())

# If the user hasn't supplied the root URL with http(s), we will handle it
if main_inp.startswith('http'):
    main_url = main_inp
else:
    try:
        requests.get('https://' + main_inp)
        main_url = 'https://' + main_inp
    except:
        main_url = 'http://' + main_inp

schema = main_url.split('//')[0]  # https: or http:?
# Adding the root URL to internal for crawling
internal.add(main_url)
# Extracts host out of the URL

コード例 #3

0

ファイルを表示

ファイル: photon.py プロジェクト: tyutzhangyukang/PythonTools

custom = set()  # 由自定义的正则表达式匹配的字符串
failed = set()  # 没有成功爬取的URLs
scripts = set()  # JS文件
external = set()  # 不属于目标网站范围的URLs
fuzzable = set()  # 已在其中获取参数的网址，比如：example.com/page.php?id=2
endpoints = set()  # 从JS文件中找到的URLs
processed = set()  # 已爬取过的URLs
internal = set([s for s in args.seeds])  # 属于目标网站范围内的URLs

bad_intel = set()  # 失效的网站URLs
bad_scripts = set()  # 失效的JS文件URLs

# 处理 http 请求头
if headers:
    headers = extract_headers(
        prompt()
    )  # 这里涉及到 core/utils.py 文件中的 extract_headers() 函数、core/prompt.py 文件中的 prompt() 函数

# 如果用户已经提供了一个URL
if args.root:
    main_inp = args.root
    if main_inp.endswith('/'):  # 如果该URL以 '/' 结尾
        # 使用切片的方式移除它，因为它可能会在后续的代码中造成问题
        main_inp = main_inp[:-1]
# 如果用户没有提供一个URL
else:
    print('\n' + parser.format_help().lower())  # 打印命令行参数用法提示信息并退出
    quit()

# 如果用户提供的 root url 没有 http 或者 http(s)，需要处理一下数据
if main_inp.startswith('http'):  # 如果是以 http 开头则直接使用该 URL

コード例 #4

0

ファイルを表示

ファイル: photon.py プロジェクト: lucmichalski/M-moire

    def post(self, target, level_):

        global keys
        global files
        global intel
        global robots
        global custom
        global failed
        global scripts
        global external
        global fuzzable
        global endpoints
        global processed
        global internal
        global main_url
        global delay
        global cook
        global headers
        global timeout
        global host
        global proxies
        global user_agents
        global only_urls
        global bad_intel
        global bad_scripts
        global clone
        global schema
        global args
        global supress_regex
        global results

        results = {}
        # Disable SSL related warnings
        warnings.filterwarnings('ignore')

        # Processing command line arguments
        parser = argparse.ArgumentParser()
        # Options
        parser.add_argument('-u', '--url', help='root url', dest='root')
        parser.add_argument('-c', '--cookie', help='cookie', dest='cook')
        parser.add_argument('-r',
                            '--regex',
                            help='regex pattern',
                            dest='regex')
        parser.add_argument('-e',
                            '--export',
                            help='export format',
                            dest='export',
                            choices=['csv', 'json'])
        parser.add_argument('-o',
                            '--output',
                            help='output directory',
                            dest='output')
        parser.add_argument('-l',
                            '--level',
                            help='levels to crawl',
                            dest='level',
                            type=int)
        parser.add_argument('-t',
                            '--threads',
                            help='number of threads',
                            dest='threads',
                            type=int)
        parser.add_argument('-d',
                            '--delay',
                            help='delay between requests',
                            dest='delay',
                            type=float)
        parser.add_argument('-v',
                            '--verbose',
                            help='verbose output',
                            dest='verbose',
                            action='store_true')
        parser.add_argument('-s',
                            '--seeds',
                            help='additional seed URLs',
                            dest='seeds',
                            nargs="+",
                            default=[])
        parser.add_argument('--stdout',
                            help='send variables to stdout',
                            dest='std')
        parser.add_argument('--user-agent',
                            help='custom user agent(s)',
                            dest='user_agent')
        parser.add_argument('--exclude',
                            help='exclude URLs matching this regex',
                            dest='exclude')
        parser.add_argument('--timeout',
                            help='http request timeout',
                            dest='timeout',
                            type=float)
        parser.add_argument('-p',
                            '--proxy',
                            help='Proxy server IP:PORT or DOMAIN:PORT',
                            dest='proxies',
                            type=proxy_type)

        # Switches
        parser.add_argument('--clone',
                            help='clone the website locally',
                            dest='clone',
                            action='store_true')
        parser.add_argument('--headers',
                            help='add headers',
                            dest='headers',
                            action='store_true')
        parser.add_argument('--dns',
                            help='enumerate subdomains and DNS data',
                            dest='dns',
                            action='store_true')
        parser.add_argument('--keys',
                            help='find secret keys',
                            dest='api',
                            action='store_true')
        parser.add_argument('--update',
                            help='update photon',
                            dest='update',
                            action='store_true')
        parser.add_argument('--only-urls',
                            help='only extract URLs',
                            dest='only_urls',
                            action='store_true')
        parser.add_argument('--wayback',
                            help='fetch URLs from archive.org as seeds',
                            dest='archive',
                            action='store_true')
        args = parser.parse_args()

        print("------------------------------------------------")
        print(args.root)
        print(type(args.level))
        print(type(args.threads))
        print(args.api)
        print(args.archive)
        print(args.export)
        args.root = "http://" + target
        args.level = int(level_)
        args.threads = 30
        args.api = True
        args.archive = True
        args.export = "json"

        # If the user has supplied --update argument
        if args.update:
            updater()
            quit()

        # If the user has supplied a URL
        if args.root:
            main_inp = args.root
            if main_inp.endswith('/'):
                # We will remove it as it can cause problems later in the code
                main_inp = main_inp[:-1]
        # If the user hasn't supplied an URL
        else:
            print('\n' + parser.format_help().lower())
            quit()

        clone = args.clone
        headers = args.headers  # prompt for headers
        verbose = args.verbose  # verbose output
        delay = args.delay or 0  # Delay between requests
        timeout = args.timeout or 6  # HTTP request timeout
        cook = args.cook or None  # Cookie
        api = bool(
            args.api)  # Extract high entropy strings i.e. API keys and stuff

        proxies = []
        if args.proxies:
            print("%s Testing proxies, can take a while..." % info)
            for proxy in args.proxies:
                if is_good_proxy(proxy):
                    proxies.append(proxy)
                else:
                    print("%s Proxy %s doesn't seem to work or timedout" %
                          (bad, proxy['http']))
            print("%s Done" % info)
            if not proxies:
                print("%s no working proxies, quitting!" % bad)
                exit()
        else:
            proxies.append(None)

        crawl_level = args.level or 2  # Crawling level
        thread_count = args.threads or 2  # Number of threads
        only_urls = bool(args.only_urls)  # Only URLs mode is off by default

        # Variables we are gonna use later to store stuff
        keys = set()  # High entropy strings, prolly secret keys
        files = set()  # The pdf, css, png, etc files.
        intel = set(
        )  # The email addresses, website accounts, AWS buckets etc.
        robots = set()  # The entries of robots.txt
        custom = set()  # Strings extracted by custom regex pattern
        failed = set()  # URLs that photon failed to crawl
        scripts = set()  # THe Javascript files
        external = set(
        )  # URLs that don't belong to the target i.e. out-of-scope
        # URLs that have get params in them e.g. example.com/page.php?id=2
        fuzzable = set()
        endpoints = set()  # URLs found from javascript files
        processed = set(['dummy'])  # URLs that have been crawled
        # URLs that belong to the target i.e. in-scope
        internal = set(args.seeds)

        everything = []
        bad_scripts = set()  # Unclean javascript file urls
        bad_intel = set()  # needed for intel filtering

        core.config.verbose = verbose

        if headers:
            try:
                prompt = prompt()
            except FileNotFoundError as e:
                print('Could not load headers prompt: {}'.format(e))
                quit()
            headers = extract_headers(prompt)

        # If the user hasn't supplied the root URL with http(s), we will handle it
        if main_inp.startswith('http'):
            main_url = main_inp
        else:
            try:
                requests.get('https://' + main_inp,
                             proxies=random.choice(proxies))
                main_url = 'https://' + main_inp
            except:
                main_url = 'http://' + main_inp

        schema = main_url.split('//')[0]  # https: or http:?
        # Adding the root URL to internal for crawling
        internal.add(main_url)
        # Extracts host out of the URL
        host = urlparse(main_url).netloc

        output_dir = args.output or host
        output_dir = "results"

        try:
            domain = top_level(main_url)
        except:
            domain = host

        if args.user_agent:
            user_agents = args.user_agent.split(',')
        else:
            with open(sys.path[0] + '/core/user-agents.txt', 'r') as uas:
                user_agents = [agent.strip('\n') for agent in uas]

        supress_regex = False

        # Records the time at which crawling started
        then = time.time()

        # Step 1. Extract urls from robots.txt & sitemap.xml
        zap(main_url, args.archive, domain, host, internal, robots, proxies)

        # This is so the level 1 emails are parsed as well
        internal = set(remove_regex(internal, args.exclude))

        # Step 2. Crawl recursively to the limit specified in "crawl_level"
        for level in range(crawl_level):
            # Links to crawl = (all links - already crawled links) - links not to crawl
            links = remove_regex(internal - processed, args.exclude)
            # If links to crawl are 0 i.e. all links have been crawled
            if not links:
                break
            # if crawled links are somehow more than all links. Possible? ;/
            elif len(internal) <= len(processed):
                if len(internal) > 2 + len(args.seeds):
                    break
            print('%s Level %i: %i URLs' % (run, level + 1, len(links)))
            try:
                flash(self.extractor, links, thread_count)
            except KeyboardInterrupt:
                print('')
                break

        if not only_urls:
            for match in bad_scripts:
                if match.startswith(main_url):
                    scripts.add(match)
                elif match.startswith('/') and not match.startswith('//'):
                    scripts.add(main_url + match)
                elif not match.startswith('http') and not match.startswith(
                        '//'):
                    scripts.add(main_url + '/' + match)
            # Step 3. Scan the JavaScript files for endpoints
            print('%s Crawling %i JavaScript files' % (run, len(scripts)))
            flash(self.jscanner, scripts, thread_count)

            for url in internal:
                if '=' in url:
                    fuzzable.add(url)

            for match, intel_name, url in bad_intel:
                if isinstance(match, tuple):
                    for x in match:  # Because "match" is a tuple
                        if x != '':  # If the value isn't empty
                            if intel_name == "CREDIT_CARD":
                                if not luhn(match):
                                    # garbage number
                                    continue
                            intel.add("%s:%s" % (intel_name, x))
                else:
                    if intel_name == "CREDIT_CARD":
                        if not luhn(match):
                            # garbage number
                            continue
                    intel.add("%s:%s:%s" % (url, intel_name, match))
                for url in external:
                    try:
                        if top_level(url, fix_protocol=True) in INTELS:
                            intel.add(url)
                    except:
                        pass

        # Records the time at which crawling stopped
        now = time.time()
        # Finds total time taken
        diff = (now - then)
        minutes, seconds, time_per_request = timer(diff, processed)

        # Step 4. Save the results
        if not os.path.exists(output_dir):  # if the directory doesn't exist
            os.mkdir(output_dir)  # create a new directory

        datasets = [
            files, intel, robots, custom, failed, internal, scripts, external,
            fuzzable, endpoints, keys
        ]
        dataset_names = [
            'files', 'intel', 'robots', 'custom', 'failed', 'internal',
            'scripts', 'external', 'fuzzable', 'endpoints', 'keys'
        ]

        writer(datasets, dataset_names, output_dir)
        # Printing out results
        print(('%s-%s' % (red, end)) * 50)
        for dataset, dataset_name in zip(datasets, dataset_names):
            if dataset:
                print('%s %s: %s' %
                      (good, dataset_name.capitalize(), len(dataset)))
        print(('%s-%s' % (red, end)) * 50)

        print('%s Total requests made: %i' % (info, len(processed)))
        print('%s Total time taken: %i minutes %i seconds' %
              (info, minutes, seconds))
        print('%s Requests per second: %i' %
              (info, int(len(processed) / diff)))

        datasets = {
            'files': list(files),
            'intel': list(intel),
            'robots': list(robots),
            'custom': list(custom),
            'failed': list(failed),
            'internal': list(internal),
            'scripts': list(scripts),
            'external': list(external),
            'fuzzable': list(fuzzable),
            'endpoints': list(endpoints),
            'keys': list(keys)
        }

        if args.dns:
            print('%s Enumerating subdomains' % run)
            from plugins.find_subdomains import find_subdomains
            subdomains = find_subdomains(domain)
            print('%s %i subdomains found' % (info, len(subdomains)))
            writer([subdomains], ['subdomains'], output_dir)
            datasets['subdomains'] = subdomains
            from plugins.dnsdumpster import dnsdumpster
            print('%s Generating DNS map' % run)
            dnsdumpster(domain, output_dir)

        if args.export:
            from plugins.exporter import exporter
            # exporter(directory, format, datasets)
            results = datasets
            exporter(output_dir, args.export, datasets)

        print('%s Results saved in %s%s%s directory' %
              (good, green, output_dir, end))

        if args.std:
            for string in datasets[args.std]:
                sys.stdout.write(string + '\n')

        return results, 200

コード例 #5

0

ファイルを表示

ファイル: photon.py プロジェクト: security-geeks/Photon

# URLs that belong to the target i.e. in-scope
internal = set(args.seeds)

everything = []
bad_scripts = set()  # Unclean javascript file urls
bad_intel = set() # needed for intel filtering

core.config.verbose = verbose

if headers:
    try:
        prompt = prompt()
    except FileNotFoundError as e:
        print('Could not load headers prompt: {}'.format(e))
        quit()
    headers = extract_headers(prompt)

# If the user hasn't supplied the root URL with http(s), we will handle it
if main_inp.startswith('http'):
    main_url = main_inp
else:
    try:
        requests.get('https://' + main_inp, proxies=random.choice(proxies))
        main_url = 'https://' + main_inp
    except:
        main_url = 'http://' + main_inp

schema = main_url.split('//')[0] # https: or http:?
# Adding the root URL to internal for crawling
internal.add(main_url)
# Extracts host out of the URL