Exemple #1
0
def main(source_, citylist_, period_):
	global error_cities
	global error_messages
	source = source_
	citylist = citylist_
	period = period_
	if source:
		source = source.split(",")
		print "source: ", source
	if citylist:
		citylist = citylist.split(",")
		print "city list: ", citylist

	while True:
		if source:
			app = AsyncPipeline()
			app.start(source, citylist)
		else:
			break

		# rescue
		if len(error_cities) > 0:
			for src in error_cities.iterkeys():
				print "Try to rescue", src
				remain_cities = error_cities[src]
				error_cities[src] = []
				error_messages[src] = []
				app = AsyncPipeline()
				app.start([src], remain_cities)

		# archive first
		archiver = Archiver()
		for src in source:
			archiver.archive(src, src, True)  # False achive locally, True achive to S3

		# repeat
		if not period: break
		time.sleep( int(period) * 3600 )

		error_cities = {}
		error_messages = {}

		# check config
		stop_crawl = 0
		check_config = CheckConfig()
		config = check_config.check('crawl_config')
		for src in source:
			if src in config:
				if "period" in config[src]:
					period = config[src]["period"]
				if "stop" in config[src]:
					stop_crawl = config[src]["stop"]
				break
		if stop_crawl == 1:
			break
Exemple #2
0
def main(source_, citylist_, period_):
	source = source_
	citylist = citylist_
	period = period_
	if source:
		source = source.split(",")
		print "source: ", source
	if citylist:
		citylist = citylist.split(",")
		print "city list: ", citylist

	while True:
		if not source: break
		sleep_interval = (0,30)
		if "meituan" in source:
			meituan_app = Pipeline(MeituanCrawler(), MeituanParser(), None, "meituan")
			if not citylist:
				error = meituan_app.start(sleep_interval)
				if len(error) > 0:
					meituan_app.rescue(error, sleep_interval)
			else:
				meituan_app.rescue(citylist, sleep_interval)
		if "nuomi" in source:
			nuomi_app = Pipeline(NuomiCrawler(), NuomiParser(), None, "nuomi")
			if not citylist:
				error = nuomi_app.start(sleep_interval)
				if len(error) > 0:
					nuomi_app.rescue(error, sleep_interval)
			else:
				nuomi_app.rescue(citylist, sleep_interval)
		if "lashou" in source:
			lashou_app = Pipeline(LashouCrawler(), LashouParser(), None, "lashou")
			if not citylist:
				error = lashou_app.start(sleep_interval)
				if len(error) > 0:
					lashou_app.rescue(error, sleep_interval)
			else:
				lashou_app.rescue(citylist, sleep_interval)
		if "wowo" in source:
			wowo_app = Pipeline(WowoCrawler(), WowoParser(), None, "wowo")
			if not citylist:
				error = wowo_app.start(sleep_interval)
				if len(error) > 0:
					wowo_app.rescue(error, sleep_interval)
			else:
				wowo_app.rescue(citylist, sleep_interval)
		if "dida" in source:
			dida_app = Pipeline(DidaCrawler(), DidaParser(), None, "dida")
			if not citylist:
				error = dida_app.start(sleep_interval)
				if len(error) > 0:
					dida_app.rescue(error, sleep_interval)
			else:
				dida_app.rescue(citylist, sleep_interval)
		if "dianping" in source:
			dianping_app = Pipeline(DianpingCrawler(), DianpingParser(), None, "dianping")
			if not citylist:
				error = dianping_app.start(sleep_interval)
				if len(error) > 0:
					dianping_app.rescue(error, sleep_interval)
			else:
				dianping_app.rescue(citylist, sleep_interval)
		if "manzuo" in source:
			manzuo_app = Pipeline(ManzuoCrawler(), ManzuoParser(), None, "manzuo")
			if not citylist:
				error = manzuo_app.start(sleep_interval)
				if len(error) > 0:
					manzuo_app.rescue(error, sleep_interval)
			else:
				manzuo_app.rescue(citylist, sleep_interval)
		if "ftuan" in source:
			ftuan_app = Pipeline(FtuanCrawler(), FtuanParser(), None, "ftuan")
			if not citylist:
				error = ftuan_app.start(sleep_interval)
				if len(error) > 0:
					ftuan_app.rescue(error, sleep_interval)
			else:
				ftuan_app.rescue(citylist, sleep_interval)
		if "wuba" in source:
			wuba_app = Pipeline(WubaCrawler(), WubaParser(), None, "wuba")
			if not citylist:
				error = wuba_app.start(sleep_interval)
				if len(error) > 0:
					wuba_app.rescue(error, sleep_interval)
			else:
				wuba_app.rescue(citylist, sleep_interval)

		# archive first
		archiver = Archiver()
		for src in source:
			archiver.archive(src, src, True)  # False achive locally, True achive to S3

		# repeat
		if not period: break
		time.sleep( int(period) * 3600 )

		# check config file
		stop_crawl = 0
		check_config = CheckConfig()
		config = check_config.check('crawl_config')
		for src in source:
			if src in config:
				if "period" in config[src]:
					period = config[src]["period"]
				if "stop" in config[src]:
					stop_crawl = config[src]["stop"]
				break
		if stop_crawl == 1:
			break
Exemple #3
0
# -*- coding: utf-8 -*-
import __init__
from futuquant import *
from data_acquisition import *
from Config import Config
from check_config import CheckConfig
import sys

cc = CheckConfig()
ret, msg = cc.check_all()
if ret != RET_OK:
    print(ret, msg)
    sys.exit(1)

config = Config()

big_sub_codes = [
    'HK.02318', 'HK.02828', 'HK.00939', 'HK.01093', 'HK.01299', 'HK.00175',
    'HK.01299', 'HK.01833', 'HK.00005', 'HK.00883', 'HK.00388', 'HK.01398',
    'HK.01114', 'HK.02800', 'HK.02018', 'HK.03988', 'HK.00386', 'HK.01211'
]

ret, msg = quote_test(big_sub_codes, config.host, config.port)
if ret != RET_OK:
    print(ret, msg)
    sys.exit(1)