Example #1
0
# -*- coding: utf-8 -*-
"""
Pravda news articles spy
"""

from grab.tools.logs import default_logging

from spiders.pravda_archive import PravdaArchiveSpider
from config import default_spider_params


if __name__ == '__main__':
    default_logging()

    print "Scape python projects"
    bot = PravdaArchiveSpider(**default_spider_params())

    bot.setup_grab(timeout=4096, connect_timeout=10)
    bot.run()
    print bot.render_stats()
Example #2
0
# -*- coding: utf-8 -*-
from random import randint
from grab.tools.logs import default_logging

from spiders.gis import StoreSpider
from config import default_spider_params


def my_headers():
    return {
        'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.%d' % randint(2, 5),
        'Accept-Language': 'ru-ru,ru;q=0.%d' % (randint(5, 9)),
        'Accept-Charset': 'utf-8,windows-1251;q=0.7,*;q=0.%d' % randint(5, 7),
        'Keep-Alive': '300',
        'Expect': '',
    }

if __name__ == '__main__':
    default_logging()


    bot = StoreSpider(**default_spider_params())
    bot.setup_grab(timeout=4096, connect_timeout=10, headers=my_headers())
    bot.run()
    print bot.render_stats()
Example #3
0
# -*- coding: utf-8 -*-

from random import randint
from grab import Grab
from grab.spider import Spider, Task
from grab.tools.logs import default_logging

from spiders.simple import SimpleSpider

from config import default_spider_params, Session


def my_headers():
    return {
        'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.%d' % randint(2, 5),
        'Accept-Language': 'ru-ru,ru;q=0.%d' % (randint(5, 9)),
        'Accept-Charset': 'utf-8,windows-1251;q=0.7,*;q=0.%d' % randint(5, 7),
        'Keep-Alive': '300',
        'Expect': '',
    }

if __name__ == '__main__':
    default_logging()

    bot = SimpleSpider(**default_spider_params())
    bot.setup_grab(timeout=4096, connect_timeout=10, headers=my_headers())

    bot.run()
    print bot.render_stats()
from optparse import OptionParser

from grab import Grab
from grab.spider import Spider, Task
from grab.tools.logs import default_logging

from spiders.explore import ExploreSpider
from spiders.lang_python import LangPythonSpider
from config import default_spider_params, Session

if __name__ == '__main__':
    default_logging()
    parser = OptionParser()

    # command line options
    parser.add_option("-p", "--python", action="store_true",
                      dest="parse_python", default=False)

    options, args = parser.parse_args()
    
    if options.parse_python:
        print "Scape python projects"
        bot = LangPythonSpider(**default_spider_params())
    else:
        print "Scrape trandings"
        bot = ExploreSpider(**default_spider_params())

    bot.setup_proxylist('/var/proxylist.txt', 'http', auto_change=True)
    bot.setup_grab(timeout=4096, connect_timeout=10)
    bot.run()
    print bot.render_stats()