def start_processes(self): self.is_running = True for url in self.urls: self.task_queue.put( Task(url=url, rule=Rule.find_by_name(self.rule_name))) self.worker_processes = [ Process(target=self.run_worker, args=(self, )) for i in range(self.process_count) ] self.data_process = Process(target=self.run_store, args=(self, )) for t in self.worker_processes: t.daemon = True t.start() logger.info('Start worker process: %d', t.pid) time.sleep(0.5) self.data_process.daemon = True self.data_process.start() logger.info('Start data process: %d', self.data_process.pid) while 1: logger.info('main sleep') time.sleep(10)
def start_all_threads(self): self.is_running = True # Task queue for url without proxy downloading self.normal_task_queue = Queue() for url in self.urls: self.normal_task_queue.put( Task(url=url, rule=Rule.find_by_name(self.rule_name))) # Task queue for url with proxy downloading self.proxy_task_queue = Queue() # Data queue for result storage self.data_queue = Queue() self.store_thread = Thread(target=self.run_store, args=(self, )) self.store_thread.start() while 1: try: task: Task = self.normal_task_queue.get(block=False) task_pool = self.normal_task_pool task_pool.submit(self.run_worker, task, self) logger.info('Submit new task: %s', task.url) except Empty: pass
def build_rule(self, differences, records, rule_function): alternative = None for r, i in records: conjunction = self.get_implicants(differences, r, i) if alternative is None and conjunction is not None: alternative = conjunction elif conjunction is not None: alternative = ['OR'] + alternative + conjunction return None if alternative is None else RuleAndFunction( Rule(alternative), rule_function)
def test(url, rule_name): from task import Task from rule.rule import Rule import pprint from config import app_config, CacheMode app_config.cache_mode = CacheMode.LOCAL_FILE printer = pprint.PrettyPrinter(indent=2) task = Task(url=url, rule=Rule.find_by_name(rule_name)) tr = task.execute() print('-------------------- 测试结果 --------------------') print(tr.data) printer.pprint(tr.data) print('\n-------------------- 提取链接 --------------------') for task in tr.sub_tasks: print(task.url, task.rule.name) return tr.data
def start_all_threads(self): self.is_running = True self.task_normal_queue = Queue() for url in self._urls: self.task_normal_queue.put( Task(url=url, rule=Rule.find_by_name(self._rule_name))) self.worker_normal_threads = [ Thread(target=self.run_worker, args=(self, )) for i in range(self.normal_thread_count) ] self.data_queue = DataQueue() self.store_thread = Thread(target=self.run_store, args=(self, )) for t in self.worker_normal_threads: t.start() time.sleep(0.5) self.store_thread.start()
def __init__(self, task: Task, ok, data: dict = None, linked_urls: dict = None): self._ok = ok self._data = data self._task = task self._sub_tasks = [] if linked_urls: for url, rule_name in linked_urls.items(): if not url or not rule_name: print( 'Error: cannot make new task because of invalid url or rule:', url, rule_name) continue rule = Rule.find_by_name(rule_name) if rule: self._sub_tasks.append(Task(url=url, rule=rule)) else: print('Error: cannot find rule:', rule_name)
def sync(self, source_path, target_path): rule = Rule(source_path, logger=logger) print('将要复制{}到{}?'.format(source_path, target_path)) data = input('确定Y/N[N]') if data.upper() != 'Y': return 0 def _sync(source, target): if not source or not target or not os.path.exists(source): return 0 if os.path.isfile(source): if rule.check_is_ignore(source): # print('[IgnoreFile] {}'.format(source)) return 0 if self.pool is not None: self.pool.add_task(self._check_copy, source, target) return 0 else: return self._check_copy(source, target) else: if not os.path.exists(target): os.makedirs(target) logger.info('[创建文件夹] {}'.format(target)) count = 0 for item in os.listdir(source): p = os.path.join(source, item) if rule.check_is_ignore(p, item): # print('[Ignore] {}'.format(p)) continue count += _sync(os.path.join(source, item), os.path.join(target, item)) return count count = _sync(source_path, target_path) if self.pool is not None: return self.pool.wait_all_task_done() else: return count
def test_replace_vars(): cp = RuleParser(Rule('app'), "", "") cp.set_var('a', 'hello') cp.set_var('b', 'bbbbb') result = cp.replace_vars('${a}-${a}-${b}') assert result == 'hello-hello-bbbbb'
from rule.rule import Rule import pandas as pd from model.fuzzifier import Fuzzifier from data_types.stats import Stats if __name__ == "__main__": d = {'red': [-10, 1, 2, 5000], 'green': [-10, 4, 5, 20], 'blue': [-20, 5, 6, 15], 'luminance': [-1, 8, 1, 10], 'value': [3, 5, 4, 3]} df = pd.DataFrame(data=d) stats = Stats(df) fuzzy = Fuzzifier().fuzzify(df, stats) rule = Rule(["AND", "OR", "fuzzy_blue=LOW", "fuzzy_red=HIGH", "OR", "fuzzy_blue=HIGH", "fuzzy_red=MEDIUM"]) for _, r in df.iterrows(): print(rule.evaluate2((r, stats, stats)))