Beispiel #1
0
def valid_proxy():
    module_ = __import__('crawler.httpproxy.settings', {}, {}, [''])
    values = {u'RETRY_ENABLED':0,
              u'DOWNLOAD_TIMEOUT':1,
              }
    settings = CrawlerSettings(module_, values=values)
    execute(argv=["scrapy", "crawl", "BaiDuHomePageSpider" ], settings=settings)
def main2(i, q):
    while True:
        pth = q.get()
	logging.debug(pth)
   
        cmdline.execute(['scrapy',  'runspider', 'code6s_allinone2.py',   '-a',  'pth='+pth])
        time.sleep(2)
Beispiel #3
0
def main():
    
   

    with project_environment():
        from scrapy.cmdline import execute
        execute()
 def run(self):
     feconfig = self.configdata[const.FE_CONFIG]
     try:
     #=======================================================================
     # if the city use the default config
     #=======================================================================
         city_config = eval(feconfig[self.city_name])
     except Exception:
         city_config = {}
     
     start_page = city_config.get(const.START_PAGE,
                          feconfig[const.DEFAULT_START_PAGE])
     end_page = city_config.get(const.END_PAGE,
                                feconfig[const.DEFAULT_END_PAGE])
     
     values = {
               const.CONFIG_DATA:self.configdata,
               const.START_PAGE:int(start_page),
               const.END_PAGE:int(end_page),
               }
     
     settings = u'crawler.shc.fe.settings'
     module_import = __import__(settings, {}, {}, [''])
     settings = CrawlerSettings(module_import, values=values)
     execute(argv=["scrapy", "crawl", 'SHCSpider' ], settings=settings)
    def run(self):

        values = configdata.get(DetailSpiderConst.DetailStatusSettings, {})
        values[const.DETAIL_LIST] = self.cis
        values.update(**{
                  const.CONFIG_DATA:self.configdata,
                  })
        
        if ScrapyConst.Console in values:
            if values[ScrapyConst.Console] == u'1':# out to console
                values[ScrapyConst.LOG_FILE] = None
            else:
                log_dir = values.get(ScrapyConst.LOG_DIR, os.getcwd())
                
                if not os.path.exists(log_dir):
                    os.makedirs(log_dir,)
                
                if ScrapyConst.LOG_FILE in values:
                    log_file = values[ScrapyConst.LOG_FILE]
                    values[ScrapyConst.LOG_FILE] = os.sep.join([log_dir , log_file])
                    
        settings_path = u'crawler.shc.fe.settings'
        module_import = __import__(settings_path, {}, {}, [''])
        settings = CrawlerSettings(module_import, values=values)
        execute(argv=["scrapy", "crawl", 'CarStatusSpider' ], settings=settings)
Beispiel #6
0
def work():
  #projectLists = ['GLYD', 'YBJR']
  #cmdline.execute("scrapy crawl mininova".split())
  #for i in xrange(len(projectLists)):
  
  cmdline.execute("scrapy crawl GLYD".split())
  print "hello world."
Beispiel #7
0
    def run(self):

        try:
            settings = CrawlerSettings(__import__(self.dp.dfcfg.settingspath, {}, {}, ['']))
            execute(argv=["scrapy", "crawl", self.dp.dfcfg.spidername ], settings=settings)
        except Exception as e:
            raise e
def main():

	##todo
	##generate range of dates
	
	#parse date arguement
	#when calling tsn, these are dates to specify range in current season
	#when calling nhl, these are season start year and season end year
	parser = argparse.ArgumentParser()
	parser.add_argument("startDate", help="input a start date: mm/dd/yr")
	parser.add_argument("endDate", help="input an end date: mm/dd/yr")
	args = parser.parse_args()
	print args
	
	#first delete boxsoreAddressList from previous executions
	try:
   		os.remove("gameCrawlerItems.json")
	except OSError:
		pass
	
	##execute call to scrapy
	##with date and output file as parameters
	##writes to JSON object boxscoreAddressList
	if(re.match('(\d)(\d)(\d)(\d) (\d)(\d)(\d)(\d)',args.startDate + ' ' + args.endDate)):
		print "season specified - scraping nhl"
		execString = ("scrapy crawl nhl -a seasonStart=%s -a seasonEnd=%s " % (args.startDate, args.endDate))
	else:
		print "date range specified - scraping tsn"
		execString = ("scrapy crawl tsn -a startDate=%s -a endDate=%s " % (args.startDate, args.endDate))
	
	cmdline.execute(execString.split())
Beispiel #9
0
def fetch_proxy():
    module_ = __import__('crawler.httpproxy.settings', {}, {}, [''])
    values = {u'DOWNLOAD_DELAY':0,
            u'DOWNLOAD_TIMEOUT':1,
            u'RETRY_ENABLED':0
             }
    
    settings = CrawlerSettings(module_, values=values)
    execute(argv=["scrapy", "crawl", "FiveOneNewHTTPProxySpider" ], settings=settings)
Beispiel #10
0
def main():
    #args:市、区
    args = sys.argv[1:]
    args_str = "scrapy crawl dbhouse"
    level = ["city", "area"]
    for i in range(len(args)):
        args_str = args_str + " -a " + level[i] + "=" + args[i]

    print(args_str)
    execute(args_str.split())
Beispiel #11
0
 def run(self):
     try:
         
         values = self.build_values()
     except Exception as e:
         raise e
     settings = u'crawler.shc.fe.settings'
     module_import = __import__(settings, {}, {}, [''])
     settings = CrawlerSettings(module_import, values=values)
     execute(argv=["scrapy", "crawl", 'SHCSpider' ], settings=settings)
Beispiel #12
0
def test():
    try:
        os.unlink("scraped/forums.db")
    except OSError:
        pass
    
    #cmd = 'scrapy crawl -L INFO -a config=phpbb3 -a url=http://www.raspberrypi.org/phpBB3 generic'
    #cmd = 'scrapy crawl -L INFO -a config=phpbb3.0.x generic -s JOBDIR=crawls/generic-1'
    #cmd = 'scrapy crawl -L INFO -a config=phpbb3.0.x generic'
    #cmd = 'scrapy crawl -L DEBUG -a config=phpbb3.0.x.test generic'
    cmd = 'scrapy crawl -L INFO -a config=qnap generic'
    execute(cmd.split())
def main2(i, q):
    while True:
        pth = q.get()
	logging.debug(pth)
        #code6_allinone.main(pth)
        #os.getenv("export SCRAPY_SETTINGS_MODULE=allinone.settings")
        #sys.path.append("/home/desktop/flipkart/allinone")
 
        #output = subprocess.check_output(['scrapy',  'runspider', 'code6s_allinone.py',   '-a',  'pth='+pth])
        #os.environ.get("export PYTHONPATH=/home/desktop/flipkart/allinone/")
        cmdline.execute(['scrapy',  'runspider', 'code6s_allinone.py',   '-a',  'pth='+pth])
        time.sleep(2)
Beispiel #14
0
def main():
    args = get_args()
    url = args.url
    user = args.login
    password = args.password
    try:
        execute(['scrapy', 'crawl', 'quickscan_spider',
                 '-a', 'url=%s' % url, '-a', 'user=%s' % user, '-a',
                 'pw=%s' % password, '-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
                 '-a', 'basic=%s' % args.basic, '-a', 'fast=%s' % args.fast])
    except KeyboardInterrupt:
        sys.exit()
Beispiel #15
0
def main():
    args = get_args()
    rate = args.ratelimit
    if rate not in [None, '0']:
        rate = str(60 / float(rate))
    try:
        execute(['scrapy', 'crawl', 'spider', 
                 '-a', 'url=%s' % args.url, 
                 '-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
                 '-s', 'DOWNLOAD_DELAY=%s' % rate])
    except KeyboardInterrupt:
        sys.exit()
Beispiel #16
0
def main():
    args = get_args()
    rate = args.ratelimit
    if rate not in [None, '0']:
        rate = str(60 / float(rate))
    try:
        execute(['scrapy', 'crawl', 'xsscrapy',
                 '-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
                 'pw=%s' % args.password, '-a', 'basic=%s' % args.basic, '-a', 'hostlimit=%s' % args.hostlimit,
                 '-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
                 '-s', 'DOWNLOAD_DELAY=%s' % rate])
    except KeyboardInterrupt:
        sys.exit()
def main(argv):
    try:
        opts, args = getopt.getopt(argv, 's:')
        if len(opts) == 0:
            raise getopt.GetoptError(u'Argumento "-s" obrigatorio')
        for opt, arg in opts:
            if opt != '-s' or arg == '':
                raise getopt.GetoptError(u'Argumento "-s" obrigatorio')
            cmdline.execute(('scrapy crawl ' + arg).split())
            break
    except getopt.GetoptError:
        print('Erro!\nUtilize o comando: main.py -s <nome do spider>')
        sys.exit(2)
Beispiel #18
0
def main():
    
    #try:
    #    os.unlink("scraped/forums.db")
    #except OSError:
    #    pass
    
    #config = Config(file('example.cfg'))
    #print config
    # Start scrapy with some arguments!
    #try:
        cmd = 'scrapy crawl -L INFO -a config=qnap generic -s JOBDIR=crawls/generic-1'
        execute(cmd.split())
Beispiel #19
0
 def handle(self, *args, **options):
     default_args = ['scrapy']
     argc = len(self._argv)
     if argc >= 4:
         default_args.extend(['crawl', 'walker'])
         default_args.extend(['-a', self._argv[2]])
         default_args.extend(['-a', self._argv[3]])
     else:
         self.stdout.write(self.help)
         return
     execute(default_args)
     self.stdout.write(str(args))
     self.stdout.write(str(options))
def main3(i, q):
    for pth in iter(q.get, None):
        try:
            cmdline.execute(['scrapy',  'runspider',  'page5_second_scrapy_amazon.py',  '-a',  'pth=%s' %(pth)])
            print pth
        except:
            pass

        logging.debug(pth)

        time.sleep(i + 2)
        q.task_done()

    q.task_done()
Beispiel #21
0
    def handle(self, *args, **options):
        scrapydir = get_scrapyroot()
        chdir(scrapydir)
        default_args = ['scrapy', 'crawl']

        if len(self._argv) == 3:
            default_args.append(self._argv[2])
        else:
            self.stdout.write(self.help)
            return
        try:
            execute(default_args)
        except KeyError, ke:
            self.stdout.write('iyo spider haionekani')
def main3(i, q):
    for pth in iter(q.get, None):
        try:
            cmdline.execute(["scrapy", "runspider", "page3_second_scrapy_homeshop18.py", "-a", "pth=%s" % (pth)])
            print pth
        except:
            pass

        logging.debug(pth)

        time.sleep(i + 2)
        q.task_done()

    q.task_done()
Beispiel #23
0
def fetch51anonymousfreeproxy():
    
    values = configdata.get(FetchProxySpiderConst.FetchFOAnonymousProxySettings, {})
    values[ScrapyConst.DOWNLOAD_TIMEOUT] = int(values.get(ScrapyConst.DOWNLOAD_TIMEOUT, 0))
    if ScrapyConst.Console in values:
        if values[ScrapyConst.Console] == u'1':# out to console
            values[ScrapyConst.LOG_FILE] = None
        else:
            log_dir = values.get(ScrapyConst.LOG_DIR, os.getcwd())
            if ScrapyConst.LOG_FILE in values:
                log_file = values[ScrapyConst.LOG_FILE]
                values[ScrapyConst.LOG_FILE] = os.sep.join([log_dir , log_file])
                
    settings = CrawlerSettings(None, values=values)
    execute(argv=["scrapy", "crawl", "FOAnonymousSpider" ], settings=settings)
Beispiel #24
0
    def handle(self, *args, **options):
        if (not len(args) == 1) or (args[0] == u"help"):
            self.stdout.write(u"Usage: {0}\n".format(self.args))
            self.stdout.write(self.help)
        else:
            # Take a filename from command line to crawl
            default = [u""]
            default.append(u"crawl")
            default.append(u"all")
            default.append(u"-s")
            default.append(u"URLS=" + unicode(args[0]))

            from scrapy.cmdline import execute

            execute(default)
Beispiel #25
0
def main():
    args = get_args()
    rate = args.ratelimit
    if rate not in [None, '0']:
        rate = str(60 / float(rate))
    try:
        cookie_key = args.cookie.split('=',1)[0] if args.cookie else None
        cookie_value = ''.join(args.cookie.split('=',1)[1:]) if args.cookie else None
        execute(['scrapy', 'crawl', 'xsscrapy',
                 '-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
                 'pw=%s' % args.password, '-a', 'basic=%s' % args.basic,
                 '-a', 'cookie_key=%s' % cookie_key, '-a', 'cookie_value=%s' % cookie_value,
                 '-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
                 '-s', 'DOWNLOAD_DELAY=%s' % rate])
    except KeyboardInterrupt:
        sys.exit()
Beispiel #26
0
def main():
    # os.environ.get("export SCRAPY_SETTINGS_MODULE=allinone.settings")
    # sys.path.append("/home/desktop/flipkart/allinone/")
    # os.environ.get("export PYTHONPATH=/home/desktop/flipkart/allinone/")
    # output = subprocess.check_output(['scrapy',  'crawl', 'collect_link_and_extract',   '-a',  'pth=dirthree08022014/women/womens-footwear/womens-footwear-xx-sports-shoes-xx-bnbcbl/ZEMgear.csv'])
    # time.sleep(2)
    # q.task_done()

    cmdline.execute(
        [
            "scrapy",
            "crawl",
            "collect_link_and_extract",
            "-a",
            "pth=dirthree08022014/women/womens-footwear/womens-footwear-xx-sports-shoes-xx-bnbcbl/ZEMgear.csv",
        ]
    )
Beispiel #27
0
    def run(self):
        feconfig = self.configdata[const.FE_CONFIG]
        try:
        #=======================================================================
        # if the city use the default config
        #=======================================================================
            city_config = eval(feconfig[self.city_name])
        except Exception:
            city_config = {}
        
        start_page = city_config.get(const.START_PAGE,
                             feconfig[const.DEFAULT_START_PAGE])
        end_page = city_config.get(const.END_PAGE,
                                   feconfig[const.DEFAULT_END_PAGE])
#        values = {
#                  const.CONFIG_DATA:self.configdata,
#                  const.START_PAGE:int(start_page),
#                  const.END_PAGE:int(end_page),
#                  }
#        settings = u'crawler.shc.fe.settings'
#        module_import = __import__(settings, {}, {}, [''])
#        settings = CrawlerSettings(module_import, values=values)
#        execute(argv=["scrapy", "crawl", 'SHCSpider' ], settings=settings)

        values = configdata.get(ListSpiderConst.ListSettings, {})
        
        values.update(**{
                  const.CONFIG_DATA:self.configdata,
                  const.START_PAGE:int(start_page),
                  const.END_PAGE:int(end_page),
                  })
        
        if ScrapyConst.Console in values:
            if values[ScrapyConst.Console] == u'1':# out to console
                values[ScrapyConst.LOG_FILE] = None
            else:
                log_dir = values.get(ScrapyConst.LOG_DIR, os.getcwd())
                if ScrapyConst.LOG_FILE in values:
                    log_file = values[ScrapyConst.LOG_FILE]
                    values[ScrapyConst.LOG_FILE] = os.sep.join([log_dir , log_file])
                    
        settings_path = u'crawler.shc.fe.settings'
        module_import = __import__(settings_path, {}, {}, [''])
        settings = CrawlerSettings(module_import, values=values)
        execute(argv=["scrapy", "crawl", 'SHCSpider' ], settings=settings)
Beispiel #28
0
def main():
    args = get_args()
    url = args.url
    user = args.login
    password = args.password
    if args.basic:
        basic = 'true'
    else:
        basic = 'false'
    if args.connections:
        conns = args.connections
    try:
        execute(['scrapy', 'crawl', 'xsscrapy', 
                 '-a', 'url=%s' % url, '-a', 'user=%s' % user, '-a', 
                 'pw=%s' % password, '-s', 'CONCURRENT_REQUESTS=%s' % conns, 
                 '-a', 'basic=%s' % basic])
    except KeyboardInterrupt:
        sys.exit()
Beispiel #29
0
 def run(self):
     values = configdata.get(const.vpsettings, {})
     values[AppConst.proxies] = self.proxies
     values[const.DOWNLOAD_TIMEOUT] = int(values.get(const.DOWNLOAD_TIMEOUT, 5))
     if const.Console in values:
         if values[const.Console] == u'1':# out to console
             values[const.LOG_FILE] = None
         else:
             log_dir = values.get(const.LOG_DIR, os.getcwd())
             if const.LOG_FILE in values:
                 logfile_prefix = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
                 log_file = '%s_%s' % (logfile_prefix, values[const.LOG_FILE])
                 values[const.LOG_FILE] = os.sep.join([log_dir , log_file])
                 
     values[const.RETRY_TIMES] = len(valid_urls)
     settings = u'vp.settings'
     module_import = __import__(settings, {}, {}, [''])
     
     settings = CrawlerSettings(module_import, values=values)
     execute(argv=["scrapy", "crawl", 'SOSOSpider' ], settings=settings)
def parse_course(url):
    if os.path.exists('video_url.json'):
        os.remove('video_url.json')
    try:
        execute([
            'scrapy', 'crawl', 'video', '-o', 'video_url.json', '-a', 'url={}'.format(url)
        ])
    except SystemExit:
        print(u'Crawled course video urls.')
        pass
    with open('video_url.json') as fp:
        rv = json.loads(fp.read())
    rv.sort()
    count = len(rv)
    print(u'Start downloading... total %d items' % count)
    # processes = 2
    pool = multiprocessing.Pool(2)
    for i in range(count):
        pool.apply_async(download_file, args=(rv[i].get('url'), rv[i].get('title')))
    pool.close()
    pool.join()
Beispiel #31
0
        pass

    def parse(self, response):
        filename = response.url.split("/")[-2]
        logger.info("filename is {0}".format(filename))
        for book in response.xpath("//li[@class='subject-item']"):
            content = ComonItemLoader(BookItem(), selector=book)

            content.add_css("book_name", "div.info h2 a::attr(title)")
            content.add_css("auth_info", "div.info div.pub::text")
            content.add_css("point", "div.star.clearfix span.rating_nums::text")
            content.add_css("person_num", "div.star.clearfix span.pl::text")
            content.add_css("resume", "div.info p::text")
            content.add_css("book_detail_url", "div.info h2 a::attr(href)")
            ci2 = content.load_item()
            # logger.info("!!!!!!!{0}".format(ci2))
            yield ci2

        # next_page = response.css("div.paginator span.next a::attr(href)").extract_first()
        # if next_page and self.page_count < 3:
        #     yield scrapy.Request(self.ORIGIN_URL + next_page, callback=self.parse)
        #     self.page_count += 1
        # with open("Resources/report/" + filename, 'wb') as f:
        #     f.write(response.body)


if __name__ == '__main__':
    # ds = DoubanSpider()
    # ds.parse()
    cmdline.execute("scrapy crawl douban".split())
Beispiel #32
0
# -*- coding: utf-8 -*-

from scrapy.cmdline import execute

import sys
import os

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
execute(["scrapy", "crawl", "btdidi"])
Beispiel #33
0
# -*- coding: utf-8 -*-
import os
import sys
from scrapy.cmdline import execute
filename = os.path.dirname(os.path.abspath(__file__))
sys.path.append(filename)
execute(['scrapy', 'crawl', 'tianqiSpider'])
Beispiel #34
0
import os
import sys
from scrapy.cmdline import execute

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# 抓取影评和回应
SPIDER_NAME = "movie_review"

execute(["scrapy", "crawl", SPIDER_NAME])
Beispiel #35
0
from scrapy import cmdline
name = 'example'
cmdline.execute(
    'scrapy crawl {} -s LOG_FILE=cuiqingcai.log'.format(name).split())
Beispiel #36
0
from scrapy import cmdline

cmdline.execute(
    'scrapy crawl northCapital -a startDate=2020-06-30 -a duration=7'.split())
Beispiel #37
0
from scrapy import cmdline
cmdline.execute('scrapy crawl douban_spider -o ../../output.json'.split())
Beispiel #38
0
from scrapy.cmdline import execute

import sys
import os

sys.path.append(os.path.dirname(os.path.abspath(__file__)))

execute(["scrapy", "crawl", "distributed_spider"])
Beispiel #39
0
#!/usr/python3.6.0/bin/python3.6

# -*- coding: utf-8 -*-
import re
import sys

from scrapy.cmdline import execute

if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])

    args = [sys.argv[0], "crawl", "zongheng"]

    sys.exit(execute(args))
# coding = utf-8
from scrapy.cmdline import execute

execute('scrapy crawl carrankSP'.split())
Beispiel #41
0
from scrapy import cmdline
cmdline.execute("scrapy crawl fcbtexas".split())
Beispiel #42
0
from scrapy.cmdline import execute
execute(['scrapy', 'crawl', 'search'])  #第三个参数是要运行的spider的name
Beispiel #43
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@version: python 3.7.0
@author: liuxuchao
@contact: [email protected]
@software: PyCharm
@file: run.py
@time: 2020-05-07 23:35
"""

from scrapy import cmdline

name = 'huawa1'
cmd = 'scrapy crawl {0}'.format(name)
cmdline.execute(cmd.split())
Beispiel #44
0
import os
import sys
from scrapy.cmdline import execute

#设置工程目录
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
#这里我们可以打印出来可以查看输出的是什么,便于理解为什么这么写

print(os.path.abspath(__file__))
print(os.path.dirname(os.path.abspath(__file__)))

#启动爬虫
execute(["scrapy", "crawl", "tencentJob"])
Beispiel #45
0
from scrapy import cmdline
cmdline.execute('scrapy crawl pm'.split())
Beispiel #46
0
from scrapy import cmdline
cmdline.execute('scrapy crawl examples -o examples.json'.split())
Beispiel #47
0
from scrapy.cmdline import execute
import sys
import os

sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# execute(["scrapy","crawl","jobbole"])
# execute(["scrapy","crawl","zhihu"])
execute(["scrapy", "crawl", "lagou"])
Beispiel #48
0
import os

from scrapy.cmdline import execute
import sys
# sys.path.append("/root/PycharmProjects/flasktools/SpiderUsingScrapy/ArticleSpider")
print(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# execute(["scrapy", "crawl", "jobbole"])
execute(["scrapy", "crawl", "zhihu"])
from scrapy import cmdline
# cmdline.execute("scrapy crawl douban_spider -o douban.csv".split())
cmdline.execute("scrapy crawl douban_spider".split())
Beispiel #50
0
# encoding='utf-8'

import scrapy.cmdline as cmd

cmd.execute('scrapy crawl douban'.split())
Beispiel #51
0
from scrapy import cmdline
cmdline.execute("scrapy crawl bancosardegnait".split())
Beispiel #52
0
# -*- coding: utf-8 -*-
'''
@time: 2018/8/22 20:22
@author: Jack Luo
@file: rum.py
'''
'''
图片与音乐同时下载,共用一个pipeline
'''
# todo:1、在每张专辑里面添加当前图片的缩略图,而非整张图片

from scrapy.cmdline import execute

execute('scrapy crawl luo'.split())
# execute('scrapy crawl luo -o luowang.xml'.split())
Beispiel #53
0
        cardNum_rule = '"cardNum":"(.*?)"'
        areaName_rule = '"areaName":"(.*?)"'
        courtName_rule = '"courtName":"(.*?)"'
        gistId_rule = '"gistId":"(.*?)"'
        duty_rule = '"duty":"(.*?)"'
        performance_rule = '"performance":"(.*?)"'

        name = re.findall(name_rule, res.text)
        cardNum = re.findall(cardNum_rule, res.text)
        areaName = re.findall(areaName_rule, res.text)
        courtName = re.findall(courtName_rule, res.text)
        gistId = re.findall(gistId_rule, res.text)
        duty = re.findall(duty_rule, res.text)
        performance = re.findall(performance_rule, res.text)
        for i in name:
            index = name.index(i)
            item = LoseMenItem()
            item['name'] = name[index]
            item['cardNum'] = cardNum[index]
            item['areaName'] = areaName[index]
            item['courtName'] = courtName[index]
            item['gistId'] = gistId[index]
            item['duty'] = duty[index]
            item['performance'] = performance[index]
            yield item


if __name__ == '__main__':
    from scrapy import cmdline
    cmdline.execute(['scrapy', 'crawl', 'poor_men'])
Beispiel #54
0
from scrapy import cmdline
cmdline.execute("scrapy crawl huangye888".split())
Beispiel #55
0
from scrapy import cmdline

cmdline.execute("scrapy crawl weibostock -o items.json".split())
Beispiel #56
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from scrapy.cmdline import execute

execute("scrapy crawl GameSpider -s JOBDIR=jobs".split())
Beispiel #57
0
from scrapy.cmdline import execute
import os
import sys

base_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(base_path)
execute(['scrapy','crawl','douban'])
Beispiel #58
0
from scrapy.cmdline import execute #调用这个可以执行scrapy脚本
import  sys
import os

sys.path.append(os.path.dirname(os.path.abspath(__file__)))#找到工程目录才能运行scrapy命令,你可以打印一下看看
# print(os.path.dirname(os.path.abspath(__file__)))
# execute(['scrapy','crawl','HuaErJie'])
# execute(['scrapy','crawl','hejnews'])
execute(['scrapy','crawl','proxySpider'])
Beispiel #59
0
# -*- coding:utf-8 -*-
__author__ = 'neuclil'

from scrapy.cmdline import execute

import sys
import os

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# execute(["scrapy", "crawl", "sz_stock_exchange"])
execute(["scrapy", "crawl", "cninfo"])
Beispiel #60
0
#-*-coding:utf-8-*-
__author__ = 'Dzr'
from scrapy import cmdline

cmdline.execute("scrapy crawl atguigu_teacher".split())