Exemple #1
0
async def arbiter_last_task():
    print(get_actor().name + ': Tworzę model tfidf')
    dictionary = corpora.Dictionary.load(config.dictionary_path)
    corpus_gen = nlp_utils.doc_gen(config.bow_corpus_path)
    tfidf = models.TfidfModel(corpus=corpus_gen, id2word=dictionary)
    tfidf.save(config.tfidf_model_path)
    print(get_actor().name + ': Model tfidf utworzony')
async def download_homepages(request, message):

    print("DOWNLOAD", message)
    current_actor = get_actor()
    request.actor.logger.info("Download homepages: " + str(current_actor) +
                              " Urls: " + str(message))

    urls = message
    https = []
    for url in urls:
        with urllib.request.urlopen(url) as conn:
            http = conn.read()
            https.append([http])

    spawned_actors = []
    for i, http in enumerate(https):
        spawned_actors.append(
            spawn(name=current_actor.name + "_extractor_" + str(i)))

    for actor in spawned_actors:
        await actor

    sent_actions = []
    for actor, http in zip(spawned_actors, https):
        sent_actions.append(send(actor, 'extract_data', http))

    for action in sent_actions:
        result = await action

    for actor in spawned_actors:
        await send(actor, 'stop')

    return get_actor().name
Exemple #3
0
async def work(name, docs):
    print(get_actor().name + ': Przetwarzam plik ' + name)
    bows = []
    for doc in docs:
        bows.append(get_actor().extra['dict'].doc2bow(doc))
    with open(config.bow_corpus_path + name, 'wb') as f:
        pickle.dump(bows, f)
    await asyncio.sleep(config.middle_task_wait)
async def work(name, docs):
    print(get_actor().name + ': Przetwarzam plik ' + name)
    corpus = []
    for doc in docs:
        corpus.append(get_actor().extra['tfidf'][doc])
    with open(config.tfidf_corpus_path + name, 'wb') as f:
        pickle.dump(corpus, f)
    await asyncio.sleep(config.middle_task_wait)
async def arbiter_last_task():
    print(get_actor().name + ': Zaczynam tworzenie słownika')
    gen = nlp_utils.doc_gen(config.tokenized_corpus_path)
    dictionary = corpora.Dictionary(gen)
    dictionary.filter_extremes(no_below=0, no_above=0.5, keep_n=None)
    dictionary.compactify()
    dictionary.save(config.dictionary_path)
    print(get_actor().name + ': Tworzenie słownika zakończone!')
async def arbiter_last_task():
    print(get_actor().name + ': Tworzę model lsi')
    dictionary = corpora.Dictionary.load(config.dictionary_path)
    corpus_gen = nlp_utils.doc_gen(config.tfidf_corpus_path)
    lsi = models.LsiModel(corpus=corpus_gen,
                          id2word=dictionary,
                          num_topics=config.lsi_topics)
    lsi.save(config.lsi_model_path)
    print(get_actor().name + ': Model lsi utworzony')
Exemple #7
0
def work_gen(query):
    gen = nlp_utils.page_gen(config.index_path)

    query = nlp_utils.extract_tokens_from_markdown(query)
    query = get_actor().extra['dict'].doc2bow(query)
    query = get_actor().extra['tfidf'][query]
    query = get_actor().extra['lsi'][query]

    for name, index in gen:
        yield partial(work, index, name, query)
Exemple #8
0
async def arbiter_last_task():
    print(get_actor().name + ': Porządkuję wyniki')
    result = sorted(get_actor().extra['results'],
                    key=lambda x: -x['score'])[:config.results_count]
    print('\n\n')
    for elem in result:
        print(elem)
    print('\n\n')
    with open(config.search_result_path, 'wb') as f:
        pickle.dump(result, f)
    print(get_actor().name + ': Wyniki zapisano do ' +
          config.search_result_path)
Exemple #9
0
async def work(name, docs):
    print(get_actor().name + ': Przetwarzam plik ' + name)
    corpus = []
    for doc in docs:
        corpus.append(get_actor().extra['lsi'][doc])
    with open(config.lsi_corpus_path + name, 'wb') as f:
        pickle.dump(corpus, f)

    index = similarities.MatrixSimilarity(corpus)
    index_name = 'index' + nlp_utils.get_id_from_name(name) + '.idx'
    index.save(config.index_path + index_name)
    await asyncio.sleep(config.middle_task_wait)
Exemple #10
0
 def test_registered(self):
     '''Test the arbiter in its process domain'''
     arbiter = get_actor()
     self.assertTrue(arbiter.is_arbiter())
     self.assertTrue(arbiter.registered)
     self.assertTrue('arbiter' in arbiter.registered)
     self.assertTrue('test' in arbiter.registered)
Exemple #11
0
 def test_registered(self):
     '''Test the arbiter in its process domain'''
     arbiter = get_actor()
     self.assertTrue(arbiter.is_arbiter())
     self.assertTrue(arbiter.registered)
     self.assertTrue('arbiter' in arbiter.registered)
     self.assertTrue('test' in arbiter.registered)
Exemple #12
0
 def test_arbiter_mailbox(self):
     arbiter = get_actor()
     mailbox = arbiter.mailbox
     self.assertFalse(hasattr(mailbox, 'request'))
     # Same for all monitors mailboxes
     for monitor in arbiter.monitors.values():
         mailbox = monitor.mailbox
         self.assertFalse(hasattr(mailbox, 'request'))
Exemple #13
0
 async def testMeta(self):
     app = await get_application(self.app_cfg.name)
     self.assertEqual(app.name, self.app_cfg.name)
     monitor = get_actor().get_actor(app.name)
     self.assertTrue(monitor.is_running())
     self.assertEqual(app, monitor.app)
     self.assertEqual(str(app), app.name)
     self.assertEqual(app.cfg.bind, '127.0.0.1:0')
Exemple #14
0
 def test_arbiter_mailbox(self):
     arbiter = get_actor()
     mailbox = arbiter.mailbox
     self.assertFalse(hasattr(mailbox, 'request'))
     # Same for all monitors mailboxes
     for monitor in arbiter.monitors.values():
         mailbox = monitor.mailbox
         self.assertFalse(hasattr(mailbox, 'request'))
Exemple #15
0
async def work(index, name, query):
    print(get_actor().name + ': Przetwarzam plik ' + name)
    page_id = int(nlp_utils.get_id_from_name(name))
    page = get_actor().extra['gh'].get_page(page_id)
    sims = index[query]

    result = []
    for i, elem in enumerate(sims):
        result.append({
            'score': elem,
            'name': page[i]['name'],
            'url': page[i]['html_url']
        })
    result += get_actor().extra['result']
    result = sorted(result, key=lambda x: -x['score'])[:config.results_count]
    get_actor().extra['result'] = result
    await asyncio.sleep(config.middle_task_wait)
Exemple #16
0
 def testFunctionFromConfigFile(self):
     worker = get_actor()
     cfg = config()
     self.assertEqual(cfg.connection_made(worker), None)
     module_name = 'tests.utils'
     self.assertEqual(cfg.import_from_module(module_name)[0], ('foo', 5))
     self.assertEqual(cfg.connection_made(worker), worker)
     cfg1 = pickle.loads(pickle.dumps(cfg))
     self.assertEqual(cfg1.connection_made(worker), worker)
Exemple #17
0
 def testFunction(self):
     cfg = config()
     worker = get_actor()
     self.assertTrue(cfg.post_fork)
     self.assertEqual(cfg.post_fork(worker), None)
     cfg.set('post_fork', post_fork)
     self.assertEqual(cfg.post_fork(worker), worker)
     cfg1 = pickle.loads(pickle.dumps(cfg))
     self.assertEqual(cfg1.post_fork(worker), worker)
Exemple #18
0
 def testFunction(self):
     cfg = config()
     worker = get_actor()
     self.assertTrue(cfg.post_fork)
     self.assertEqual(cfg.post_fork(worker), None)
     cfg.set('post_fork', post_fork)
     self.assertEqual(cfg.post_fork(worker), worker)
     cfg1 = pickle.loads(pickle.dumps(cfg))
     self.assertEqual(cfg1.post_fork(worker), worker)
async def work(arg, gh):
    json = gh.get_page(arg)
    tokens = []
    for repo in json:
        print(get_actor().name + ': Przetwarzam ' + repo['name'])
        tokens.append(nlp_utils.extract_tokens_from_markdown(repo['readme']))
        await asyncio.sleep(config.middle_task_wait)
    filename = 'corpus' + str(arg) + '.crp'
    with open(config.tokenized_corpus_path + filename, 'wb') as f:
        pickle.dump(tokens, f)
Exemple #20
0
 def testFunctionFromConfigFile(self):
     worker = get_actor()
     cfg = config()
     self.assertEqual(cfg.connection_made(worker), None)
     module_name = 'tests.utils'
     self.assertEqual(cfg.import_from_module(module_name)[0],
                      ('foo', 5))
     self.assertEqual(cfg.connection_made(worker), worker)
     cfg1 = pickle.loads(pickle.dumps(cfg))
     self.assertEqual(cfg1.connection_made(worker), worker)
Exemple #21
0
def dont_run_with_thread(obj):
    '''Decorator for disabling process based test cases when the test suite
    runs in threading, rather than processing, mode.
    '''
    actor = get_actor()
    if actor:
        d = unittest.skipUnless(actor.cfg.concurrency == 'process',
                                'Run only when concurrency is process')
        return d(obj)
    else:
        return obj
Exemple #22
0
def dont_run_with_thread(obj):
    '''Decorator for disabling process based test cases when the test suite
    runs in threading, rather than processing, mode.
    '''
    actor = get_actor()
    if actor:
        d = unittest.skipUnless(actor.cfg.concurrency == 'process',
                                'Run only when concurrency is process')
        return d(obj)
    else:
        return obj
Exemple #23
0
 def test_arbiter_object(self):
     '''Test the arbiter in its process domain'''
     arbiter = get_actor()
     self.assertEqual(arbiter, get_arbiter())
     self.assertTrue(arbiter.is_arbiter())
     self.assertEqual(arbiter.concurrency.kind, 'arbiter')
     self.assertEqual(arbiter.aid, 'arbiter')
     self.assertEqual(arbiter.name, 'arbiter')
     self.assertTrue(arbiter.monitors)
     self.assertEqual(arbiter.exit_code, None)
     info = arbiter.info()
     self.assertTrue('server' in info)
     server = info['server']
     self.assertEqual(server['state'], 'running')
Exemple #24
0
 def test_arbiter_object(self):
     '''Test the arbiter in its process domain'''
     arbiter = get_actor()
     self.assertEqual(arbiter, get_arbiter())
     self.assertTrue(arbiter.is_arbiter())
     self.assertEqual(arbiter.concurrency.kind, 'arbiter')
     self.assertEqual(arbiter.aid, 'arbiter')
     self.assertEqual(arbiter.name, 'arbiter')
     self.assertTrue(arbiter.monitors)
     self.assertEqual(arbiter.exit_code, None)
     info = arbiter.info()
     self.assertTrue('server' in info)
     server = info['server']
     self.assertEqual(server['state'], 'running')
Exemple #25
0
 async def test_simple_spawn(self):
     '''Test start and stop for a standard actor on the arbiter domain.'''
     proxy = await self.spawn_actor(name='simple-actor-on-%s' %
                                    self.concurrency)
     arbiter = get_actor()
     proxy_monitor = arbiter.get_actor(proxy.aid)
     self.assertEqual(proxy_monitor, proxy)
     self.assertEqual(await send(proxy, 'ping'), 'pong')
     self.assertEqual(await send(proxy.proxy, 'echo', 'Hello!'), 'Hello!')
     # We call the ActorTestMixin.stop_actors method here, since the
     # ActorTestMixin.tearDown method is invoked on the test-worker domain
     # (here we are in the arbiter domain)
     await self.stop_actors(proxy)
     is_alive = await async_while(3, proxy_monitor.is_alive)
     self.assertFalse(is_alive)
Exemple #26
0
 async def test_actor_timeout(self):
     """Test a bogus actor for timeout"""
     arbiter = get_actor()
     self.assertTrue(arbiter.is_arbiter())
     name = 'bogus-timeout-%s' % self.concurrency
     proxy = await self.spawn_actor(name=name, timeout=1)
     self.assertEqual(proxy.name, name)
     self.assertTrue(proxy.aid in arbiter.managed_actors)
     proxy = arbiter.managed_actors[proxy.aid]
     await send(proxy, 'run', cause_timeout)
     # The arbiter should soon start to stop the actor
     await wait_for_stop(self, proxy.aid, True)
     #
     self.assertTrue(proxy.stopping_start)
     self.assertFalse(proxy.aid in arbiter.managed_actors)
Exemple #27
0
 async def test_terminate(self):
     arbiter = get_actor()
     self.assertTrue(arbiter.is_arbiter())
     name = 'bogus-term-%s' % self.concurrency
     proxy = await self.spawn_actor(name=name, timeout=1)
     self.assertEqual(proxy.name, name)
     self.assertTrue(proxy.aid in arbiter.managed_actors)
     proxy = arbiter.managed_actors[proxy.aid]
     #
     await send(proxy, 'run', cause_terminate)
     #
     # The arbiter should now terminate the actor
     await wait_for_stop(self, proxy.aid, True)
     #
     self.assertTrue(proxy.stopping_start)
Exemple #28
0
 async def test_actor_timeout(self):
     """Test a bogus actor for timeout"""
     arbiter = get_actor()
     self.assertTrue(arbiter.is_arbiter())
     name = 'bogus-timeout-%s' % self.concurrency
     proxy = await self.spawn_actor(name=name, timeout=1)
     self.assertEqual(proxy.name, name)
     self.assertTrue(proxy.aid in arbiter.managed_actors)
     proxy = arbiter.managed_actors[proxy.aid]
     await send(proxy, 'run', cause_timeout)
     # The arbiter should soon start to stop the actor
     await wait_for_stop(self, proxy.aid, True)
     #
     self.assertTrue(proxy.stopping_start)
     self.assertFalse(proxy.aid in arbiter.managed_actors)
Exemple #29
0
 async def test_terminate(self):
     arbiter = get_actor()
     self.assertTrue(arbiter.is_arbiter())
     name = 'bogus-term-%s' % self.concurrency
     proxy = await self.spawn_actor(name=name, timeout=1)
     self.assertEqual(proxy.name, name)
     self.assertTrue(proxy.aid in arbiter.managed_actors)
     proxy = arbiter.managed_actors[proxy.aid]
     #
     await send(proxy, 'run', cause_terminate)
     #
     # The arbiter should now terminate the actor
     await wait_for_stop(self, proxy.aid, True)
     #
     self.assertTrue(proxy.stopping_start)
async def parse_homepages_from_wiki_lvl1(request, message):

    print("PARSE_FROM_WIKI", message)
    current_actor = get_actor()
    request.actor.logger.info("Actor: " + str(current_actor) + " Indexes: " +  str(message))

    wikicfp_file = open(WIKICFP_FILENAME, "r")
    wikicfp_file_lines = wikicfp_file.readlines()[message[0]: message[1]]
    wikicfp_file.close()

    conference_links = []
    for i in range(len(wikicfp_file_lines)):
        stripped_line = wikicfp_file_lines[i].strip()
        print("Stripped line:" , stripped_line)
        conference_link = find_conf_link_one(stripped_line)
        print("conferecne link", conference_link)
        if conference_link == None:
            continue
        conference_links.append(conference_link)

    f = open(CONFERENCES_HOMEPAGES_FILENAME, 'a')
    for link in conference_links:
        f.write(link + '\n')
    f.close()

    print(conference_links)
    conference_links_chunks = split_to_chunks(conference_links, 4)

    spawned_actors = []
    for i, links_chunk in enumerate(conference_links_chunks):
        spawned_actors.append(spawn(name=current_actor.name+"_"+str(i)))

    for actor in spawned_actors:
        await actor

    sent_actions = []
    for actor, chunk in zip(spawned_actors, conference_links_chunks):
        sent_actions.append(send(actor, 'download_homepages', chunk))

    for action in sent_actions:
        result = await action
        print(result)

    for actor in spawned_actors:
        await send(actor, 'stop')

    return
Exemple #31
0
 async def __call__(self):
     cfg = get_actor().cfg
     addr = cfg.get('%s_server' % self.name)
     if addr:
         if ('%s://' % self.name) not in addr:
             addr = '%s://%s' % (self.name, addr)
         try:
             sync_store = create_store(addr)
         except ImproperlyConfigured:
             return False
         try:
             await sync_store.ping()
             return True
         except Exception:
             return False
     else:
         return False
Exemple #32
0
 async def __call__(self):
     cfg = get_actor().cfg
     addr = cfg.get('%s_server' % self.name)
     if addr:
         if ('%s://' % self.name) not in addr:
             addr = '%s://%s' % (self.name, addr)
         try:
             sync_store = create_store(addr)
         except ImproperlyConfigured:
             return False
         try:
             await sync_store.ping()
             return True
         except Exception:
             return False
     else:
         return False
Exemple #33
0
 async def test_start(self):
     arbiter = get_actor()
     app = self.create(name='pluto')
     self.assertTrue(app)
     self.assertFalse(arbiter.get_actor('pluto'))
     self.assertFalse(arbiter.get_actor('rpc_pluto'))
     # create the application
     await app.start()
     monitor1 = arbiter.get_actor('pluto')
     self.assertTrue(monitor1)
     self.assertTrue(monitor1.is_monitor())
     monitor2 = arbiter.get_actor('rpc_pluto')
     self.assertTrue(monitor2)
     self.assertTrue(monitor2.is_monitor())
     await asyncio.sleep(2)
     await monitor1.stop()
     await monitor2.stop()
Exemple #34
0
    def __call__(self, argv, start=True, get_app=False):
        self.app.callable.command = self.name
        app = self.app
        server = self.pulsar_app(argv, self.wsgiApp,
                                 server_software=app.config['SERVER_NAME'])
        if server.cfg.nominify:
            app.params['MINIFIED_MEDIA'] = False

        if start and not server.logger:   # pragma    nocover
            if not get_actor():
                clear_logger()
            app._started = server()
            app.fire_event('on_start', data=server)
            arbiter().start()

        if not start:
            return app if get_app else server
Exemple #35
0
 async def test_spawning_in_arbiter(self):
     arbiter = get_actor()
     self.assertEqual(arbiter.name, 'arbiter')
     self.assertTrue(len(arbiter.monitors) >= 1)
     name = 'testSpawning-%s' % self.concurrency
     future = spawn(name=name, concurrency=self.concurrency)
     self.assertTrue(future.aid in arbiter.managed_actors)
     proxy = await future
     self.assertEqual(future.aid, proxy.aid)
     self.assertEqual(proxy.name, name)
     self.assertTrue(proxy.aid in arbiter.managed_actors)
     self.assertEqual(proxy, arbiter.get_actor(proxy.aid))
     #
     await asyncio.sleep(1)
     # Stop the actor
     result = await send(proxy, 'stop')
     self.assertEqual(result, None)
     #
     result = await wait_for_stop(self, proxy.aid)
     self.assertEqual(result, None)
Exemple #36
0
 async def test_spawning_in_arbiter(self):
     arbiter = get_actor()
     self.assertEqual(arbiter.name, 'arbiter')
     self.assertTrue(len(arbiter.monitors) >= 1)
     name = 'testSpawning-%s' % self.concurrency
     future = spawn(name=name, concurrency=self.concurrency)
     self.assertTrue(future.aid in arbiter.managed_actors)
     proxy = await future
     self.assertEqual(future.aid, proxy.aid)
     self.assertEqual(proxy.name, name)
     self.assertTrue(proxy.aid in arbiter.managed_actors)
     self.assertEqual(proxy, arbiter.get_actor(proxy.aid))
     #
     await asyncio.sleep(1)
     # Stop the actor
     result = await send(proxy, 'stop')
     self.assertEqual(result, None)
     #
     result = await wait_for_stop(self, proxy.aid)
     self.assertEqual(result, None)
Exemple #37
0
 async def test_config_command(self):
     proxy = await self.spawn_actor(name='actor-test-config-%s' %
                                    self.concurrency)
     arbiter = get_actor()
     proxy_monitor = arbiter.get_actor(proxy.aid)
     result = await send(proxy, 'config', 'khjkh', 'name')
     self.assertEqual(result, None)
     result = await send(proxy, 'config', 'get', 'concurrency')
     self.assertEqual(result, self.concurrency)
     result = await send(proxy, 'config', 'get', 'concurrency', 'foo')
     self.assertEqual(result, None)
     #
     result = await send(proxy, 'config', 'set', 'max_requests', 1000, 1000)
     self.assertEqual(result, None)
     result = await send(proxy, 'config', 'set', 'max_requests', 1000)
     self.assertEqual(result, True)
     result = await send(proxy, 'config', 'get', 'max_requests')
     self.assertEqual(result, 1000)
     #
     await self.stop_actors(proxy)
     is_alive = await async_while(3, proxy_monitor.is_alive)
     self.assertFalse(is_alive)
Exemple #38
0
 async def test_spawn_from_actor(self):
     proxy = await self.spawn_actor(name='spawning-actor-%s' %
                                    self.concurrency)
     arbiter = get_actor()
     self.assertTrue(repr(proxy).startswith('spawning-actor-'))
     self.assertEqual(proxy, proxy.proxy)
     proxy_monitor = arbiter.get_actor(proxy.aid)
     self.assertFalse(proxy != proxy_monitor)
     #
     # do the spawning
     name = 'spawned-actor-%s-from-actor' % self.concurrency
     aid = await send(proxy, 'run', spawn_actor_from_actor, name)
     self.assertTrue(aid)
     proxy_monitor2 = arbiter.get_actor(aid)
     self.assertEqual(proxy_monitor2.name, name)
     self.assertNotEquals(proxy_monitor, proxy_monitor2)
     #
     # stop them
     await self.stop_actors(proxy, proxy_monitor2)
     is_alive = await async_while(3, proxy_monitor.is_alive)
     self.assertFalse(is_alive)
     is_alive = await async_while(3, proxy_monitor2.is_alive)
     self.assertFalse(is_alive)
Exemple #39
0
def actor_init_task():
    print(get_actor().name + ': Wczytuję model lsi...')
    lsi = models.LsiModel.load(config.lsi_model_path)
    get_actor().extra['lsi'] = lsi
Exemple #40
0
 def test_TestSuiteMonitor(self):
     arbiter = get_actor()
     self.assertTrue(len(arbiter.monitors) >= 1)
     monitor = arbiter.registered['test']
     app = monitor.app
     self.assertTrue(isinstance(app, TestSuite))
Exemple #41
0
 def test_bad_monitor(self):
     arbiter = get_actor()
     self.assertTrue(arbiter.monitors)
     name = list(arbiter.monitors.values())[0].name
     self.assertRaises(KeyError, arbiter.add_monitor, name)
Exemple #42
0
 def test_bad_monitor(self):
     arbiter = get_actor()
     self.assertTrue(arbiter.monitors)
     name = list(arbiter.monitors.values())[0].name
     self.assertRaises(KeyError, arbiter.add_monitor, name)
Exemple #43
0
 def testMe(self):
     worker = get_actor()
     info = system.process_info(worker.pid)
     system.process_info()
     self.assertTrue(isinstance(info, dict))