Python Crawlの例

プログラミング言語: Python

名前空間/パッケージ名: crawler

メソッド/関数: Crawl

hotexamples.comのコード掲載数: 5

Python Crawl - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcrawler.Crawlの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: special.py プロジェクト: flyotlin/ptt_beauty_crawler

def main(page_url, page_count):
    count = 1
    page = cl.Crawl(page_url)
    page.link = []
    page.parse_aTags("div.thread-item a")
    page.set_link("", "")
    for i in page.link:
        if (i == "https://www.pttweb.cc/bbs/Gossiping"):
            continue
        post = cl.Crawl(i)
        post.link = []
        post.parse_aTags("a.externalHref")
        post.set_link("", "")
        print(post.link)
        for t in post.link:
            if ("i.imgur.com" not in t):
                continue
            if (".gif" in t):
                continue
            file_path = directory_path + "Page" + str(
                page_count) + " Photo" + str(count) + ".jpg"
            count += 1
            # hplink = cl.Crawl(t)
            # no new instantiate makes the process faster
            hplink = requests.get(t)
            post.download(file_path, hplink)
        print("==========================")

コード例 #2

ファイルを表示

ファイル: views.py プロジェクト: zhyshkevich/Searching_engine

def indexURL(request):
    urls = []
    site = request.POST.get('query')

    if str(site).find(' ') != -1:
        urls.extend(str(site).split(" "))
    else:
        urls.append(site)

    file_links = request.FILES.get('file_links')
    if file_links:
        for url in file_links:
            urls.append(url.strip())

    for url in urls:
        if len(str(url)) < 5: continue
        str_url = str(url)
        if str_url.find('http') != 0:
            url = "http://" + str(url)
        print url
        crawler.Crawl(url)
        print "started indexing"

        workers = []
        for i in range(4):
            worker = mp.Process(target=indexer.indexing())
            workers.append(worker)
            worker.start()

        for i in range(4):
            workers[i].join(None)

        indexer.indexing()

    return render(request, 'indexURL.html')

コード例 #3

ファイルを表示

def xss(args):
    if args.url:
        links = []

        path = os.getcwd() + '/lib/website_scanner/xss'
        sys.path.insert(0, path)

        if args.this:
            colors.success('Performing XSS Vulnerability Scan on : {}'.format(
                args.url))
            links.append(args.url)
        else:
            colors.success('Collecting all the links, crawling : {}'.format(
                args.url))

            try:
                import crawler
                crawlObj = crawler.Crawl(url=args.url)
                links = crawlObj.getList()
            except ImportError:
                colors.error('Could not import the required module.')
                LOGGER.error('[-] Could not import the required module.')
            except Exception as e:
                LOGGER.error(e)

        try:
            import xss

            xssScanObj = xss.XSS(url=links,
                                 payload_file=os.getcwd() +
                                 '/payloads/xss_payloads.txt')
            xssScanObj.initiateEngine()
        except ImportError:
            colors.error('Could not import the required module')
            LOGGER.error('[-] Could not import the required module')
            sys.exit(1)
        except Exception as e:
            LOGGER.error(e)
    else:
        colors.error('Please enter an URL for XSS Scanning')
        LOGGER.error('[-] Please enter an URL for XSS Scanning')
        sys.exit(1)

コード例 #4

ファイルを表示

ファイル: main.py プロジェクト: yoengbae/WebCrawler

 def __init__(self):
     super().__init__()
     self.crawl = crawler.Crawl()
     self.ui = uic.loadUi("asd.ui", self)  # ui 파일 불러오기
     self.ui.show()
     self.working = False

コード例 #5

ファイルを表示

    if args.xss:
        if args.url:
            links = []

            path = os.getcwd() + '/lib/website_scanner/xss'
            sys.path.insert(0, path)

            if args.this:
                colors.success('Performing XSS Vulnerability Scan on : {}'.format(args.url))
                links.append(args.url)
            else:
                colors.success('Collecting all the links, crawling : {}'.format(args.url))

                try:
                    import crawler
                    crawlObj = crawler.Crawl(url=args.url)
                    links = crawlObj.getList()
                except ImportError:
                    colors.error('Could not import the required module.')
                    LOGGER.error('[-] Could not import the required module.')
                except Exception as e:
                    LOGGER.error(e)

            try:
                import xss

                xssScanObj = xss.XSS(url=links,
                                     payload_file=os.getcwd()+'/payloads/xss_payloads.txt')
                xssScanObj.initiateEngine()
            except ImportError:
                colors.error('Could not import the required module')