コード例 #1
0
ファイル: weibolauncher.py プロジェクト: liangxh/idu
def main():
	# get parameters from terminal
	optparser = OptionParser()
	optparser.add_option('-i', '--input', action = 'store', type = 'string', dest = 'infile')
	optparser.add_option('-o', '--output', action = 'store', type = 'string', dest = 'outfile')
	optparser.add_option('-a', '--account', action = 'store', type = 'string', dest = 'acc_range')
	optparser.add_option('-n', '--instance', action = 'store', type = 'int', dest = 'n_instance', default = 5)
	optparser.add_option('-r', '--restart', action = 'store_true', dest = 'restart', default = False)
	optparser.add_option('-t', '--interval', action = 'store', type = 'int', dest = 'interval', default = 3)

	opts, args = optparser.parse_args()

	if not opts.infile:
		print '-i infile not specified'
		return 

	if not opts.outfile:
		print '-o outfile not specified'
		return

	if not opts.acc_range:
		print '-a (start_idx,end_idx) not specified'
		return
	else:
		m = re.match('(\d+),(\d+)', opts.acc_range)
		if not m:
			print '-a start_idx,end_idx should contain no space'
			return
		else:
			opts.acc_range = (int(m.group(1)), int(m.group(2)))

	ftype = 'w' if opts.restart else 'a'

	# prepare the accounts
	all_accounts = weiboparser.load_accounts()
	accounts = all_accounts[opts.acc_range[0]:opts.acc_range[1] + 1]

	# prepare the 
	all_bloginfo = commdatica.load(opts.infile)

	# filter the blogs whose comments have been downloaded

	if opts.restart:
		mids = set()
	else:
		mids = set(downloaded_mids(opts.outfile))
		logger.info('%d downloaded in %s'%(len(mids), opts.outfile))

	bloginfos = [bloginfo for bloginfo in all_bloginfo if not bloginfo.mid in mids]

	# for test
	# bloginfos = bloginfos[:20]

	launch(opts.outfile, accounts, bloginfos, ftype, opts.n_instance, opts.interval)
コード例 #2
0
ファイル: weibolauncher.py プロジェクト: liangxh/weibo
def test():
	all_accounts = weiboparser.load_accounts()
	accounts = all_accounts[:25]

	all_bloginfo = commdatica.load()
	
	# do not download comments for the same blog again
	mids = set(downloaded_mids())
	filtered_bloginfo = [bloginfo for bloginfo in all_bloginfo if not bloginfo.mid in mids]

	bloginfo = filtered_bloginfo[:8]

	launch(JSONS_COMMENT, accounts, bloginfo, 4)