def setUp(self): url_patterns = UrlPatterns( Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item', FakeWikiParser)) fake_user_conf = Config(StringIO(user_conf)) self.dir = tempfile.mkdtemp() self.job = Job( 'fake wiki crawler', url_patterns, MechanizeOpener, [ 'http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F', ], user_conf=fake_user_conf) local_node = 'localhost:%s' % self.job.context.job.port nodes = [ local_node, ] self.rpc_server = ColaRPCServer( ('localhost', self.job.context.job.port)) self.loader = JobLoader(self.job) self.loader.init_mq(self.rpc_server, nodes, local_node, self.dir) thd = threading.Thread(target=self.rpc_server.serve_forever) thd.setDaemon(True) thd.start()
def __init__(self, user_conf=None, **user_defines): self.main_conf = main_conf if user_conf is not None: if isinstance(user_conf, str): self.user_conf = Config(user_conf) else: self.user_conf = user_conf else: self.user_conf = PropertyObject(dict()) self.user_defines = PropertyObject(user_defines) dicts = PropertyObject({}) for obj in (self.main_conf, self.user_conf, self.user_defines): dicts.update(obj) for k in dicts: if not k.startswith('_'): setattr(self, k, getattr(dicts, k))
def setUp(self): url_patterns = UrlPatterns( Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item', FakeWikiParser)) fake_user_conf = Config(StringIO(user_conf)) self.dir = tempfile.mkdtemp() self.job = Job( 'fake wiki crawler', url_patterns, MechanizeOpener, [ 'http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F', ], user_conf=fake_user_conf) self.local_node = 'localhost:%s' % self.job.context.job.port self.nodes = [ self.local_node, ]
from cola.core.config import Config from cola.core.utils import get_ip, import_job_desc, Clock from cola.core.logs import get_logger from cola.core.mq import MessageQueue from cola.core.dedup import FileBloomFilterDeduper, MapDeduper from cola.core.rpc import ThreadedColaRPCServer, client_call from cola.core.zip import ZipHandler from cola.functions.budget import BudgetApplyServer from cola.functions.speed import SpeedControlServer from cola.functions.counter import CounterServer from cola.job import Job, FINISHED, IDLE from cola.cluster.master import Master from cola.cluster.worker import Worker conf_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'conf') main_conf = Config(os.path.join(conf_dir, 'main.yaml')) MAX_IDLE_TIMES = 50 class ContextManager(multiprocessing.managers.SyncManager): pass ContextManager.register('FileBloomFilterDeduper', FileBloomFilterDeduper) ContextManager.register('MapDeduper', MapDeduper) ContextManager.register('mq', MessageQueue) ContextManager.register('budget_server', BudgetApplyServer) ContextManager.register('speed_server', SpeedControlServer) ContextManager.register('counter_server', CounterServer)
you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Created on 2013-6-27 @author: Chine ''' import os from cola.core.config import Config base = os.path.dirname(os.path.abspath(__file__)) user_conf = os.path.join(base, 'test.yaml') if not os.path.exists(user_conf): user_conf = os.path.join(base, 'weibosearch.yaml') user_config = Config(user_conf) mongo_host = user_config.job.mongo.host mongo_port = user_config.job.mongo.port db_name = user_config.job.db instances = user_config.job.instances
def setUp(self): self.simulate_user_conf = Config(StringIO('name: cola-unittest'))
# -*- coding: utf-8 -*- ''' Copyright (c) 2013 Qin Xuye <*****@*****.**> Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Created on 2013-5-25 @author: Chine ''' import os from cola.core.config import Config conf_base_path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'conf') conf_path = lambda name: os.path.join(conf_base_path, name) main_conf = Config(conf_path('main.yaml'))
Created on 2013-6-9 @author: Chine ''' import os from cola.core.config import Config from pymongo import MongoClient import random base = os.path.dirname(os.path.abspath(__file__)) user_conf = os.path.join(base, 'test.yaml') if not os.path.exists(user_conf): user_conf = os.path.join(base, 'weibo.yaml') user_config = Config(user_conf) startsfile = os.path.join(base, 'uid.yaml') startlist = Config(startsfile) starts = [str(start.uid) for start in startlist.starts] random.shuffle(starts) mongo_host = user_config.job.mongo.host mongo_port = user_config.job.mongo.port db_name = user_config.job.db client = MongoClient(mongo_host, mongo_port) db = client[db_name] dbuid = [] for u in db.weibo_user.find(): dbuid.append(u['uid'])