Exemple #1
0
    def start_processes(self):
        self.is_running = True

        for url in self.urls:
            self.task_queue.put(
                Task(url=url, rule=Rule.find_by_name(self.rule_name)))
        self.worker_processes = [
            Process(target=self.run_worker, args=(self, ))
            for i in range(self.process_count)
        ]

        self.data_process = Process(target=self.run_store, args=(self, ))

        for t in self.worker_processes:
            t.daemon = True
            t.start()
            logger.info('Start worker process: %d', t.pid)

            time.sleep(0.5)

        self.data_process.daemon = True
        self.data_process.start()
        logger.info('Start data process: %d', self.data_process.pid)

        while 1:
            logger.info('main sleep')
            time.sleep(10)
    def start_all_threads(self):
        self.is_running = True

        # Task queue for url without proxy downloading
        self.normal_task_queue = Queue()
        for url in self.urls:
            self.normal_task_queue.put(
                Task(url=url, rule=Rule.find_by_name(self.rule_name)))
        # Task queue for url with proxy downloading
        self.proxy_task_queue = Queue()
        # Data queue for result storage
        self.data_queue = Queue()

        self.store_thread = Thread(target=self.run_store, args=(self, ))
        self.store_thread.start()

        while 1:
            try:
                task: Task = self.normal_task_queue.get(block=False)
                task_pool = self.normal_task_pool

                task_pool.submit(self.run_worker, task, self)
                logger.info('Submit new task: %s', task.url)
            except Empty:
                pass
Exemple #3
0
 def build_rule(self, differences, records, rule_function):
     alternative = None
     for r, i in records:
         conjunction = self.get_implicants(differences, r, i)
         if alternative is None and conjunction is not None:
             alternative = conjunction
         elif conjunction is not None:
             alternative = ['OR'] + alternative + conjunction
     return None if alternative is None else RuleAndFunction(
         Rule(alternative), rule_function)
Exemple #4
0
def test(url, rule_name):
    from task import Task
    from rule.rule import Rule
    import pprint

    from config import app_config, CacheMode
    app_config.cache_mode = CacheMode.LOCAL_FILE

    printer = pprint.PrettyPrinter(indent=2)
    task = Task(url=url, rule=Rule.find_by_name(rule_name))
    tr = task.execute()
    print('-------------------- 测试结果 --------------------')
    print(tr.data)
    printer.pprint(tr.data)
    print('\n-------------------- 提取链接 --------------------')
    for task in tr.sub_tasks:
        print(task.url, task.rule.name)
    return tr.data
    def start_all_threads(self):
        self.is_running = True

        self.task_normal_queue = Queue()
        for url in self._urls:
            self.task_normal_queue.put(
                Task(url=url, rule=Rule.find_by_name(self._rule_name)))
        self.worker_normal_threads = [
            Thread(target=self.run_worker, args=(self, ))
            for i in range(self.normal_thread_count)
        ]

        self.data_queue = DataQueue()
        self.store_thread = Thread(target=self.run_store, args=(self, ))

        for t in self.worker_normal_threads:
            t.start()
            time.sleep(0.5)

        self.store_thread.start()
Exemple #6
0
 def __init__(self,
              task: Task,
              ok,
              data: dict = None,
              linked_urls: dict = None):
     self._ok = ok
     self._data = data
     self._task = task
     self._sub_tasks = []
     if linked_urls:
         for url, rule_name in linked_urls.items():
             if not url or not rule_name:
                 print(
                     'Error: cannot make new task because of invalid url or rule:',
                     url, rule_name)
                 continue
             rule = Rule.find_by_name(rule_name)
             if rule:
                 self._sub_tasks.append(Task(url=url, rule=rule))
             else:
                 print('Error: cannot find rule:', rule_name)
Exemple #7
0
    def sync(self, source_path, target_path):
        rule = Rule(source_path, logger=logger)
        print('将要复制{}到{}?'.format(source_path, target_path))
        data = input('确定Y/N[N]')
        if data.upper() != 'Y':
            return 0

        def _sync(source, target):
            if not source or not target or not os.path.exists(source):
                return 0
            if os.path.isfile(source):
                if rule.check_is_ignore(source):
                    # print('[IgnoreFile] {}'.format(source))
                    return 0
                if self.pool is not None:
                    self.pool.add_task(self._check_copy, source, target)
                    return 0
                else:
                    return self._check_copy(source, target)
            else:
                if not os.path.exists(target):
                    os.makedirs(target)
                    logger.info('[创建文件夹] {}'.format(target))
                count = 0
                for item in os.listdir(source):
                    p = os.path.join(source, item)
                    if rule.check_is_ignore(p, item):
                        # print('[Ignore] {}'.format(p))
                        continue
                    count += _sync(os.path.join(source, item), os.path.join(target, item))
                return count

        count = _sync(source_path, target_path)
        if self.pool is not None:
            return self.pool.wait_all_task_done()
        else:
            return count
Exemple #8
0
def test_replace_vars():
    cp = RuleParser(Rule('app'), "", "")
    cp.set_var('a', 'hello')
    cp.set_var('b', 'bbbbb')
    result = cp.replace_vars('${a}-${a}-${b}')
    assert result == 'hello-hello-bbbbb'
Exemple #9
0
from rule.rule import Rule
import pandas as pd
from model.fuzzifier import Fuzzifier
from data_types.stats import Stats

if __name__ == "__main__":
    d = {'red': [-10, 1, 2, 5000], 'green': [-10, 4, 5, 20], 'blue': [-20, 5, 6, 15], 'luminance': [-1, 8, 1, 10],
         'value': [3, 5, 4, 3]}
    df = pd.DataFrame(data=d)
    stats = Stats(df)
    fuzzy = Fuzzifier().fuzzify(df, stats)
    rule = Rule(["AND", "OR", "fuzzy_blue=LOW", "fuzzy_red=HIGH", "OR", "fuzzy_blue=HIGH", "fuzzy_red=MEDIUM"])
    for _, r in df.iterrows():
        print(rule.evaluate2((r, stats, stats)))