Python CrawlInfo Examples

Programming Language: Python

Namespace/Package Name: crawler.models

Class/Type: CrawlInfo

Examples at hotexamples.com: 4

Python CrawlInfo - 4 examples found. These are the top rated real world Python examples of crawler.models.CrawlInfo extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

save(4)

Example #1

Show file

File: startcrawlfrom.py Project: moinfar/ResearchGate-Analyser

    def handle(self, *args, **options):
        crawl_info = CrawlInfo(init_url=options['url'][0], limit=options['limit'])
        crawl_info.save()

        if not os.path.exists("managed_data/crawled_publications/%d" % crawl_info.id):
            os.makedirs("managed_data/crawled_publications/%d" % crawl_info.id)

        start_crawl.delay(crawl_info.id, options['N'][0])

Example #2

Show file

File: startauthorcrawlfrom.py Project: moinfar/ResearchGate-Analyser

    def handle(self, *args, **options):
        crawl_info = CrawlInfo(init_url=options['url'][0], limit=options['limit'],
                               type="author", i_limit=0, o_limit=options['branch_factor'][0])
        crawl_info.save()

        if not os.path.exists("managed_data/crawled_authors/%d" % crawl_info.id):
            os.makedirs("managed_data/crawled_authors/%d" % crawl_info.id)

        author_id = InformationDownloader.get_researcher_id_from_url(crawl_info.init_url)
        crawl_author_pages.delay(crawl_info.id, author_id)

Example #3

Show file

File: views.py Project: moinfar/ResearchGate-Analyser

def crawl_author_page(request):
    if request.GET.get('urls') is not None:
        urls = request.GET.get('urls').split("\n")
        crawl_info = CrawlInfo(init_url=request.GET.get('urls'), limit=request.GET.get('limit'),
                               i_limit=0, o_limit=request.GET.get('branch_factor'), type='author')
        crawl_info.save()

        if not os.path.exists("managed_data/crawled_authors/%d" % crawl_info.id):
            os.makedirs("managed_data/crawled_authors/%d" % crawl_info.id)

        for url in urls:
            author_id = InformationDownloader.get_researcher_id_from_url(url)
            crawl_author_pages.delay(crawl_info.id, author_id)

        return redirect("/crawl/status/%d/" % crawl_info.id)

    return render(request, 'crawl_authors.html')

Example #4

Show file

File: views.py Project: moinfar/ResearchGate-Analyser

def crawl_page(request):
    if request.GET.get('urls') is not None:
        urls = request.GET.get('urls').split("\n")
        crawl_info = CrawlInfo(init_url=request.GET.get('urls'), limit=request.GET.get('limit'),
                               i_limit=request.GET.get('in_degree_limit'), o_limit=request.GET.get('out_degree_limit'))
        crawl_info.save()

        if not os.path.exists("managed_data/crawled_publications/%d" % crawl_info.id):
            os.makedirs("managed_data/crawled_publications/%d" % crawl_info.id)

        for url in urls:
            if "/publication/" in url:
                crawl_publication_page.delay(crawl_info.id, InformationDownloader.get_publication_id_from_url(url))
            else:
                start_crawl.delay(crawl_info.id, int(request.GET.get('out_degree_limit')))

        return redirect("/crawl/status/%d/" % crawl_info.id)

    return render(request, 'crawl.html')