Beispiel #1
0
def get_taotu_pages(category_url):
    # 找到某个分类下全部的分页URL
    print('process category: {0}'.format(category_url))
    soup = commons.soup(category_url, encoding='utf8')
    print('process index: {0}'.format(soup.title))
    last_no = get_last_page_no(soup)
    urls = ['{0}/list_{1}.html'.format(category_url, i) for i in range(2, last_no + 1)]
    # for url in urls:
    # download_by_page(url)
    retry = 0
    while True:
        pool = ThreadPool(4)
        try:
            pool.map(download_by_page, urls)
            pool.close()
            pool.join()
            print('all images downloaded completely.')
            break
        except KeyboardInterrupt, e:
            print('download terminated by user, quit now.', e)
            pool.terminate()
            pool.join()
            break
        except Exception, e:
            pool.terminate()
            pool.join()
            retry += 1
            traceback.print_exc()
            try:
                print('download error: {0}, {1} retry in {2}s'.format(
                    e, retry, retry * 20 % 120))
            except Exception:
                pass
            time.sleep(retry * 20 % 120)
def download_urls_to_zip(zf, urls):

    urls = set(urls)

    pool = ThreadPool(10)
    download_to_zip_func = lambda url: download_url_to_zip(zf, url)
    pool.map(download_to_zip_func, urls)
	def __init__(self,records,outfile):
		#cat_xml = "../itma.cat.soutron_20160216.xml"
		print 'Parsing XML data...'
		#records = etree.parse(cat_xml)
		self.counter = 0
		self.roles = ['performer']
		self.locations = []
		cfields = []
		refnos = []

		self.records = records
		[[cfields.append(y) for y in x.xpath('*[self::Creator or self::Contributors]')] for x in records]
		[[self.locations.append(y) for y in x.xpath('GeographicalLocation/text()')] for x in records]
		
		map(self.parse_roles,cfields)
		self.roles = list(set([x.replace('\n','').strip() for x in filter(lambda x:len(x) > 1,self.roles)]))
		self.locations = list(set(self.locations))

		self.role_list = etree.Element("NamedRoles")

		self.cache = {}

		print 'extracting roles...'
		pool = Pool(processes=4)
		pool.map(self.process_recordlist,records)
			
		return etree.ElementTree(self.role_list).write(outfile,pretty_print=True)
Beispiel #4
0
    def test_threading(self):
        pool = ThreadPool(4)
        results = pool.map(self.parser.parse, Dictionary().version.terms)
        self.assertSetEqual({str(t) for t in results}, {'[{0}]'.format(str(t)) for t in Dictionary().version.terms})

        results = pool.map(script, Dictionary().version.terms)
        self.assertSetEqual({str(t) for t in results}, set(Dictionary().version.terms))
Beispiel #5
0
def get_offline_user_data():
    if DEBUG_MODE:
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'get_offline_user_data')
    if r_session.exists('api_error_info'):
        return

    if datetime.now().minute < 50:
        return

    offline_users = []
    for b_user in r_session.mget(*['user:%s' % name.decode('utf-8') for name in r_session.sdiff('users', *r_session.smembers('global:online.users'))]):
        user_info = json.loads(b_user.decode('utf-8'))

        username = user_info.get('username')

        if not user_info.get('active'): continue

        every_hour_key = 'user:%s:cron_queued' % username
        if r_session.exists(every_hour_key): continue

        offline_users.append(username)

    pool = ThreadPool(processes=5)

    pool.map(get_data, offline_users)
    pool.close()
    pool.join()
Beispiel #6
0
def build_words_weight():
    st = time.time()
    bigvs = BigVs.objects.all()
    def _build(b):
        data = ArticlePostedResults.active_objects.filter(bigv__v_id=b.v_id, is_correct__in=(0, 1)).values('is_correct').annotate(count=Count('is_correct')).order_by('is_correct')
        sum_c , w, c = 0, 0, 0
        for d in data:
            if d['is_correct'] == 1:
                c = d['count']
            sum_c += d['count']
        if sum_c:
            w = c * 1.0 / sum_c
            c = w * 200
            sum_c = 200
        data = Judgement.objects.filter(article__bigv=b, judge__isnull=False).values('judge').annotate(count=Count('judge')).order_by('judge')
        for d in data:
            if d['judge'] == 'right':
                c += d['count']
            sum_c += d['count']
        if sum_c:
            w = int(round(c * 1.0 / sum_c * 100))
            b.words_weight = w
            b.save()
            print b.name, c, sum_c, w
    pool = Pool(8)
    pool.map(_build, bigvs)
    pool.close()
    pool.join()
    ed = time.time()
    debug('build_words_weight', ed - st)
def load_rowdata_to_mongo_zh(is_incremental):
    print("start loading row data(zh) from JSON file to MongoDB...")
    all_start = timeit.default_timer()
    static = Static()
    bydim_dir = static.output_folder + static.dataset_bydim_folder
    
    client = MongoClient(static.mongo_url, static.mongo_port)
    db = client[static.database_name]
    dataset_col = db[static.dataset_col_name]
    if not is_incremental:
        dataset_col.drop()

    file_path_array = []
    for idx, file in enumerate(os.listdir(bydim_dir)):
        file_path = os.path.join(bydim_dir, file)
        if os.path.isfile(file_path):
            file_path_array.append(file_path)
    print(str(len(file_path_array)) + " files are loaded")

    counter = []
    mapfunc = partial(insert_by_dim, counter=counter, dataset_col=dataset_col, all_start=all_start)
    pool = ThreadPool(12)
    pool.map(mapfunc, file_path_array)
    pool.close() 
    pool.join()
    
    print("All the threads are completed. Total number is " + str(len(counter)) + "\n")
    print("total time cost: " + str(round(timeit.default_timer() - all_start)) + 's')
Beispiel #8
0
def collect_crystal():
    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'collect_crystal')
    pool = ThreadPool(processes=5)

    pool.map(check_collect, (json.loads(c.decode('utf-8')) for c in r_session.smembers('global:auto.collect.cookies')))
    pool.close()    
    pool.join()
Beispiel #9
0
def test_multi_threading():
    import time
    import random
    from multiprocessing.dummy import Pool

    def op_a(a, b):
        time.sleep(random.random()*.02)
        return a+b

    def op_b(c, b):
        time.sleep(random.random()*.02)
        return c+b

    def op_c(a, b):
        time.sleep(random.random()*.02)
        return a*b

    pipeline = compose(name="pipeline", merge=True)(
        operation(name="op_a", needs=['a', 'b'], provides='c')(op_a),
        operation(name="op_b", needs=['c', 'b'], provides='d')(op_b),
        operation(name="op_c", needs=['a', 'b'], provides='e')(op_c),
    )

    def infer(i):
        # data = open("616039-bradpitt.jpg").read()
        outputs = ["c", "d", "e"]
        results = pipeline({"a": 1, "b":2}, outputs)
        assert tuple(sorted(results.keys())) == tuple(sorted(outputs)), (outputs, results)
        return results

    N = 100
    for i in range(20, 200):
        pool = Pool(i)
        pool.map(infer, range(N))
        pool.close()
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser(description='Checks a LegalOne application for broken links')
    parser.add_argument('-d', '--domain',
                        help='URL to check for broken links. Ex. http://colucci.release.dco.novajus.com.br',
                        required=True)
    parser.add_argument("-e", '--escritorio',
                        help='Account to check for broken links, Ex. xxxx, where xxx.release.dco.novajus.com.br',
                        required=True)
    parser.add_argument("-l", '--loginpage',
                        help='URL to login on the application. Ex. http://release.dco.novajus.com.br/conta/login',
                        required=True)
    parser.add_argument("-t", '--threads',
                        type=int,
                        help='How many threads sarching for broken links at the same time. Default is 10',
                        required=False, default=10)
    args = parser.parse_args()
    loginpage = args.loginpage
    escritorio = args.escritorio
    domain = args.domain
    threads = args.threads
    pages_to_test = queue.Queue(maxsize=0)
    cookie_login = login(domain, escritorio, loginpage)
    pages_to_test.put(domain + "/contatos/contatos/search")
    test_url(cookie_login, pages_to_test, domain, pages_to_test.get())
    while not pages_to_test.empty():
        pool = ThreadPool(threads)
        links_to_check = []
        for x in range(0, threads):
            links_to_check.append(pages_to_test.get())
        partialtest_url = partial(test_url, cookie_login, pages_to_test, domain)
        pool.map(partialtest_url, links_to_check)
        pool.close()
        pool.join()
def simTrans(hosts, prm):
	fname = str(prm.n) + 'nodes.' + str(prm.data_size) + 'MB.' + str(prm.pipes) + 'pipes.out'
	for h in hosts:
		full_name = "results/%04d/%s"%(int(h.name.split('h')[1]), fname)
		os.system("rm %s" % full_name)
		status[h.name] = [0 for i in range(prm.pipes)]
		ip[h.name] = h.IP()
		h.cmdPrint('iperf -s -f M >> %s &'%full_name)
	'''for h1 in hosts:
		for h2 in hosts:
			if h1 == h2:
				continue
			print "Testing %s and %s after running server" % (h1.name, h2.name)
			net.iperf( (h1, h2) )
	'''
	print neib
	status['h1'] = [2 for i in range(prm.pipes)]	#start node
	print status
	k = []
	for h in hosts:
		k.append((h, prm))
	pool = ThreadPool(50)
	pool.map(perNodeProc, k)
	pool.close()
	pool.join()

	for h in hosts:
		h.cmdPrint('kill %iperf')
def main():
    parser = argparse.ArgumentParser(usage='%(prog)s [options] SERVER_URL',
                                     description=__doc__)
    parser.add_argument(
        '-t', '--threads',
        help='Number of threads (simultaneous connections)',
        dest='threads', default=1, type=int)
    parser.add_argument('server', help='URL of server')
    args = parser.parse_args()

    server = args.server

    if not server.startswith('http://'):
        server = 'http://{}'.format(server)

    icons = []
    for font_id, font in fonts.items():
        for char in font['characters']:
            url = os.path.join(server, 'icon', font_id, '000', char)
            icons.append((font_id, char, url))

    icons.sort()

    print('{} icons to test on {} ...'.format(len(icons), args.server))

    if MAX_ICONS:
        icons = icons[:MAX_ICONS]

    pool = Pool(args.threads)
    pool.map(check_icon, icons)
    pool.close()
    pool.join()
Beispiel #13
0
 def get_proxy(self):
     self._parse_proxy()
     pool = ThreadPool(8)
     pool.map(self._check_proxy, self.proxies)
     pool.close()
     pool.join()
     return self.checked_proxies
def test_upload_chunk__expired_url():
    upload_parts = [{'uploadPresignedUrl': 'https://www.fake.url/fake/news',
                     'partNumber': 420},
                    {'uploadPresignedUrl': 'https://www.google.com',
                     'partNumber': 421},
                    {'uploadPresignedUrl': 'https://rito.pls/',
                     'partNumber': 422},
                    {'uploadPresignedUrl': 'https://never.lucky.gg',
                     'partNumber': 423}
                    ]

    value_doesnt_matter = None
    expired = Value(c_bool, False)
    mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4])

    with patch.object(multipart_upload, "_put_chunk",
                      side_effect=SynapseHTTPError("useless message",
                                                   response=MagicMock(status_code=403))) as mocked_put_chunk, \
         patch.object(warnings, "warn") as mocked_warn:
        def chunk_upload(part):
            return _upload_chunk(part, completed=value_doesnt_matter, status=value_doesnt_matter, syn=syn,
                                 filename=value_doesnt_matter, get_chunk_function=mocked_get_chunk_function,
                                 fileSize=value_doesnt_matter, partSize=value_doesnt_matter,
                                 t0=value_doesnt_matter, expired=expired, bytes_already_uploaded=value_doesnt_matter)
        # 2 threads both with urls that have expired
        mp = Pool(4)
        mp.map(chunk_upload, upload_parts)
        assert_true(expired.value)

        # assert warnings.warn was only called once
        mocked_warn.assert_called_once_with("The pre-signed upload URL has expired. Restarting upload...\n")

        # assert _put_chunk was called at least once
        assert_greater_equal(len(mocked_put_chunk.call_args_list), 1)
Beispiel #15
0
def BurstDz(host, path, user, passfile):
    hostuser = host.split('.')
    hostuser = hostuser[len(hostuser)-2]
    hostdir = [hostuser,hostuser+hostuser,'admin'+hostuser,hostuser+'123','manage'+hostuser,hostuser+'123456',hostuser+'admin','123'+hostuser]

    opts_list = []

    f = open(passfile, 'r')
    password = f.read().split()
    dic = password+hostdir
    pool = ThreadPool(10)
    host1 = host+path

    for x in range(len(dic)):
        mima = dic[x]
        opts = {
            'host': host1,
            'user': user,
            'password': mima
        }
        opts_list.append(opts)

    #print hostr
    #print result
    pool.map(LoginDisCuz, opts_list)
    #pool.join()
    print 'All PassWord Run Over'
def format_data(dealer_data):
     start_time = time.time()
     pool = Pool(1)
     dealers=[]
     today = datetime.now()
     for data in dealer_data:
         temp = {}
         temp['id'] = data[0]
         temp['service_advisor_id'] = data[1]
         temp['name'] = data[2]
         temp['phone_number'] = data[3]
         temp['order'] = data[4]
         temp['password'] = data[1]+'@123'
         temp['last_login'] = today
         temp['is_superuser'] = 0
         temp['username'] = data[1]
         temp['first_name'] = ' '
         temp['last_name'] = ' '
         temp['email'] = ''
         temp['is_staff'] = 0
         temp['is_active'] = 1
         temp['date_joined'] = today
         dealers.append(temp)
     pool.map(process_query, dealers)
     end_time = time.time()
     print "..........Total TIME TAKEN.........", end_time-start_time
Beispiel #17
0
def e_cal(l, cores):
    global LOOPS
    '''
    e calculator
    this function will recive digits of float
    and calculate and print status during working.
    
    This function will return value of e.
    '''
    p = Pool()
    getcontext().prec = l
    e = Decimal(0)
    i = 0
    temp = 0
    c = 0
    while True:
        fact = p.map(math.factorial, range(i, i+cores)) #parallel process factorial
        e += sum(p.map(one_div, fact)) #processed factorial will total in here
        i += cores
        c += 1
        LOOPS += 1
        sys.stdout.write("\r%i loops passed." % (c) ) #Print Loop status
        sys.stdout.flush()
        #print i, "loops passed."
        if e == temp:
            break
        temp = e
    sys.stdout.write("\r%i loops passed.\n" % (c) )
    print i
    p.close()
    p.join()

    return e
Beispiel #18
0
def _main():
    proxies = p.get_all_proxies()
    random.shuffle(proxies)
    pool = ThreadPool(2)
    pool.map(download, proxies[:100])
    pool.close()
    pool.join()
Beispiel #19
0
def validate_proxies(proxies, test_url=None):
    """
    Return tuple. 
    First element is available proxies list. 
    Second element is unavailable proxies list

    Arguments:
    - `proxies`:
    """
    availabes, unavailables = [], []
    # for proxy in proxies:
    #     if is_proxy_available(proxy, test_url):
    #         availabes.append(proxy)
    #     else:
    #         unavailables.append(proxy)
    # return (availabes, unavailables)

    def worker(proxy):
        if is_proxy_available(proxy, test_url):
            availabes.append(proxy)
        else:
            unavailables.append(proxy)
    
    pool = ThreadPool(10)
    pool.map(worker, proxies)
    pool.close()
    pool.join()
    return (availabes, unavailables)
Beispiel #20
0
    def thread_patch(self):
        urls = self.get_urls()

        pool = ThreadPool(8)
        pool.map(self.patch_news, [(uri, _id) for uri, _id in urls])
        pool.close()
        pool.join()
Beispiel #21
0
def main():
    mht_list = get_mht_list()
    if not mht_list:
        print u'请确保目录下有mht文件\n'
        return
    print u'共有%s个mht文件中的图片需要备份\n'%len(mht_list)

    print u'请输入你的QQ号码(6-10位纯数字):'
    qq = raw_input()
    print u'正在搜索mht文件中待备份图片,请稍后....'
    get_mht_pic(mht_list)
    if not mht_pic_md5:
        print u'mht文件中未包含可备份图片\n'
        return
    print u'共找到%s张待备份图片'%len(mht_pic_md5)
    # QQ图片文件夹
    documents_path = os.getenv('USERPROFILE') + os.sep + '\Documents'
    img_path = documents_path + os.sep + 'Tencent Files/' + qq + '/Image'
    print u'正在统计QQ聊天记录图片, 请稍后....'
    pic_list = get_pic_list(img_path)
    if not pic_list:
        print u'未找到QQ聊天记录图片文件夹,请确保输入了正确的QQ号码\n'
        main()
    
    pool = ThreadPool(thread_num)
    print u'正在备份....'
    pool.map(backup, pic_list)
    print u'备份完成\n图片保存在当前路径的bak文件夹下\n'
Beispiel #22
0
 def update_all_posts_thread(self):
     pool = ThreadPool(4)
     posts = list(self.all_posts.values())
     pool.map(self.update_post_detail, posts)
     pool.close()
     pool.join()
     self.save_history()
Beispiel #23
0
    def test_ip_speed(self):

        # t0l = time.time()
        # pooll = Pool(processes=20)
        # for item in self.ip_items:
        #     pooll.apply(self.ping_one_ip, args=(item.ip,))
        # #pooll.map(self.ping_one_ip, self.ip_items)
        # pooll.close()
        # pooll.join

        t0 = time.time()

        #多线程
        pool = ThreadPool(processes=20)
        pool.map(self.ping_one_ip, range(len(self.ip_items)))
        pool.close()
        pool.join()

        t1 = time.time()
        #print "Total time running multi: %s seconds" % ( str(t0-t0l))
        print "Total time running multi: %s seconds" % ( str(t1-t0))

        #单线程
        # for index in range(len(self.ip_items)):
        #     self.ping_one_ip(index)
        # t2 = time.time()
        # print "Total time running multi: %s seconds" % ( str(t1-t0))
        # print "Total time running single: %s seconds" % ( str(t2-t1))

        print len(self.ip_items)
        self.ip_items = [item for item in self.ip_items if item.speed >=0 and item.speed < 1500.0]    # 超时1.5s以内
        print len(self.ip_items)
Beispiel #24
0
 def get_web_dict_multithreading(self):
     if POOL_NUM > 1 and POOL_NUM < 50:
         pool = Pool(POOL_NUM)
         logging.info("You are using multiple threads to get info from web:"
                      "  [ %d ]\n" % POOL_NUM)
     else:
         pool = Pool()
     if os.path.isfile(LOCAL_JSON_CACHE_FILE):
         with open(LOCAL_JSON_CACHE_FILE, 'rb') as f:
             local_web_cache_dict = json.loads(f.read())
         species_in_local_json_cache = local_web_cache_dict.keys()
         logging.info(
             '[ CACHE ] Get cache from local JSON file:\n  |- %s' %
             '\n  |- '.join(species_in_local_json_cache))
     else:
         species_in_local_json_cache = []
         local_web_cache_dict = {}
     species_not_in_cache = list(
         set(self.non_repeatitive_species_name_list)
         .difference(set(species_in_local_json_cache)))
     pool.map(self._single_query, species_not_in_cache)
     _web_data_cache_dict.update(local_web_cache_dict)
     with open(LOCAL_JSON_CACHE_FILE, 'wb') as f:
         json.dump(_web_data_cache_dict, f,
                   indent=4, separators=(',', ': '))
         logging.info(
             '[ CACHE ] Write all cache to local JSON file:\n  |- %s' %
             '\n  |- '.join(_web_data_cache_dict.keys()))
     pool.close()
     pool.join()
def main():
	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('-i',help='comma separated input map files from HMP', required=True)
	parser.add_argument('-s',help='comma separated list of body sites',required=True)
	parser.add_argument('-j',help='threads',type=int,default=1)
	args = parser.parse_args()
	logname = 'import-%s' % strftime("%Y%m%d%H%M")
	logger = make_log(logname)
	map_files = args.i.split(',')
	locs_list = args.s.split(',')
	mapper = {}
	map_to_write = []
	for filename in map_files:
		with open(filename, 'rU') as f:
			r = csv.reader(f,delimiter='\t')
			header = r.next()
			for row in r:
				this_mapper = dict(zip(header,row))
				rid = this_mapper['RunID']
				bs = this_mapper['HMPBodySubsiteHMPBodySite']
				sid = this_mapper['SRS_SampleID']
				if bs not in locs_list:
					continue
				map_to_write.append(make_map_to_write_row(this_mapper))
				if rid not in mapper:
					mapper[rid] = this_mapper
				else:
					logger.warning('found two entries for %s saved just one fastq file but both %s and %s will be added to map' % (rid,sid,mapper[rid]['SRS_SampleID']))
	pool = ThreadPool(args.j)
	holder = [[mapper_item, logger] for nom,mapper_item in mapper.iteritems()]
	pool.map(dummy,holder)
	pool.close()
	pool.join()
	write_map(map_to_write, logger)
	logger.info('done')
Beispiel #26
0
    def spider(self):
        pages = self.__total_pages()

        pool = ThreadPool(16)
        pool.map(self.make_market_data, range(pages))
        pool.close()
        pool.join()
Beispiel #27
0
    def test_multithread(self):
        logger = HitLogger(300)

        def single_thread_process(logger):
            time.time = mock.Mock(return_value=0)
            logger.log_hit()
            time.sleep(1)
            time.time = mock.Mock(return_value=10)
            logger.get_hits()
            logger.log_hit()
            logger.get_hits()
            logger.log_hit()
            time.sleep(1)
            time.time = mock.Mock(return_value=11)
            logger.get_hits()
            time.sleep(1)
            time.time = mock.Mock(return_value=100)
            logger.log_hit()
            time.sleep(1)
            time.time = mock.Mock(return_value=200)
            logger.log_hit()
            logger.get_hits()
            time.sleep(1)
            time.time = mock.Mock(return_value=300)
            logger.log_hit()
            logger.get_hits()
            time.sleep(1)
            time.time = mock.Mock(return_value=310)
            logger.get_hits()

        pool = Pool(5)
        pool.map(single_thread_process, [ logger ] * 5)

        input('Press any key to exit...')
Beispiel #28
0
def start_download(page_url):
    links = get_pattern_group(image_pattern, get_url_content(page_url))

    pool = ThreadPool(3)
    pool.map(download, links)
    pool.close()
    pool.join()
def test_generate_in_progress_resizer_option_true(
    redis_cache,
    resizetarget_opts,
    image1_data,
    image1_name,
    tmpdir
):
    config = Config(
        root=str(tmpdir),
        url='/',
        redis_host=redis_cache.redis,
        raise_on_generate_in_progress=True
    )
    resizer = flask_resize.make_resizer(config)

    # Save original file
    resizer.storage_backend.save(image1_name, image1_data)

    def run(x):
        return resizer(image1_name)

    pool = Pool(2)

    with pytest.raises(flask_resize.exc.GenerateInProgress):
        pool.map(run, [None] * 2)
Beispiel #30
0
def get_reviews(widget, nb_reviews_limit):
    print "fetching users' reviews..."
    soup = BeautifulSoup(widget.text, 'lxml')
    url_basic = soup.body.find('iframe', id = 'the_iframe')['src']
    reviews = []
    start = 1
    while True:
        nb_pages = int(ceil((nb_reviews_limit - len(reviews)) / 7.0))
        urls = [re.sub('min_rating=&', 'min_rating=&page=' + str(i) + '&', url_basic) for i in range(start, nb_pages + start)]
        start += nb_pages
        pool_reviews = ThreadPool(4)
        reviews_url = list(chain.from_iterable(pool_reviews.map(get_reviews_url, urls)))
        pool_reviews.close()
        pool_reviews.join()
        reviews_url = set(reviews_url)
        pool_reviews = ThreadPool(4)
        reviews_this = pool_reviews.map(get_review_single, reviews_url)
        pool_reviews.close()
        pool_reviews.join()
        if reviews_this is None:
            break
        reviews_this_non_empty = filter(lambda x: x != {}, reviews_this)
        if len(reviews_this_non_empty) == 0:
            break
        reviews.extend(reviews_this_non_empty)
        if len(reviews) >= nb_reviews_limit:
            break
    return reviews
Beispiel #31
0
page = 1

urls = []
for i in xrange(5):
    urls.append(url + str(i + 1))

comments = []


def getsource(url):
    html = requests.get(url)
    selector = etree.HTML(html.text)
    content = selector.xpath(
        '//cc/div[starts-with(@id,"post_content_")]/text()')
    for i in content:
        i = i.replace('\n', '').replace('\t', '').strip()
    comments.extend(content)
    print url
    print len(content)
    print len(comments)


if __name__ == '__main__':
    # content = getsource(urls[0])
    pool.map(getsource, urls)
    for i in comments:
        print i
'''
//*[@id="post_content_79742208605"]
'''
Beispiel #32
0
 def run(self):
     pool = ThreadPool(4)
     pool.map(self.retrieve_pic, self.process_multiple())
     pool.close()
     pool.join()
Beispiel #33
0
def start_clawer(user_id):
    time.sleep(2)
    logger.info('开始爬取第%d个用户信息 ' % user_id)
    user_base_information = get_user_base_information(user_id)
    follow_fans = get_user_follow_fans(user_id)
    if user_base_information is not None:
        if follow_fans is not None:
            user_base_information.update(follow_fans)
            logger.debug(user_base_information)
            print(user_base_information.keys())
        put_into_database(user_base_information, user_id)
    else:
        logger.warning('用户%d的信息未爬取' % user_id)


if __name__ == '__main__':
    logger.info('爬虫开始抓取用户信息')
    pool = ThreadPool(processes=4)
    i = 1
    j = 10000
    for k in range(37020):
        pool.map(start_clawer,
                 [t for t in range(i + k * 10000, j + k * 10000 + 1)])
    """
    for i in range(1,370200000):
        pool.apply_async(start_clawer, (i,))
    pool.close()
    pool.join()
    """
Beispiel #34
0
def get_current_match_details(s, region, champion_id):
    matchlist = cass.get_match_history(summoner=s, champions=[champion_id], begin_index=0, end_index=10)
    len(matchlist) # to fill matchlist

    try:
        pool = Pool(10)
        pool.map(load_match, matchlist)
        pool.close()
        pool.join()
    except:
        pool.close()
        return HttpResponse(status=500)

    response = {}

    q = {}
    leagues = cass.get_league_positions(summoner=s, region=region)
    for league in leagues:
        q[league.queue.value] = {
            'tier': league.tier.value,
            'division': league.division.value,
            'points': league.league_points
        }
        if league.promos is not None:
            q[league.queue.value]['promos'] = league.promos.progress
            q[league.queue.value]['notPlayed'] = league.promos.not_played

    # summoner stats for past 20 matches on a champion
    stats = {
        "kills": 0,
        "deaths": 0,
        "assists": 0,
        "totalCs": 0,
        "cs10": 0,
        "cs20": 0,
        "cs30": 0,
        "gold10": 0,
        "gold20": 0,
        "gold30": 0,
        "wins": 0,
        "losses": 0
    }
    match_history = []

    games10 = 0
    games20 = 0
    games30 = 0
    cs10 = 0
    cs20 = 0
    cs30 = 0
    gold10 = 0
    gold20 = 0
    gold30 = 0
    match_count = 0
    for match in matchlist:
        if (match.region.value == region):
            match_count += 1

            participants = match.participants
            for participant in participants:
                if participant.summoner.id == s.id:
                    user = participant
                    break

            stats["kills"] += user.stats.kills
            stats["deaths"] += user.stats.deaths
            stats["assists"] += user.stats.assists
            stats["totalCs"] += user.stats.total_minions_killed

            if user.stats.win:
                stats["wins"] += 1
                match_history.append(1)
            else:
                stats["losses"] += 1
                match_history.append(0)

            dur = match.duration.seconds
            try:
                if dur > 10 * 60:
                    gold10 += user.timeline.gold_per_min_deltas['0-10'] * 10
                    cs10 += user.timeline.creeps_per_min_deltas['0-10'] * 10
                    games10 += 1
                if dur > 20 * 60:
                    gold20 += (user.timeline.gold_per_min_deltas['0-10'] + user.timeline.gold_per_min_deltas['10-20']) * 10
                    cs20 += (user.timeline.creeps_per_min_deltas['0-10'] + user.timeline.creeps_per_min_deltas['10-20']) * 10
                    games20 += 1
                if dur > 30 * 60:
                    gold30 += (user.timeline.gold_per_min_deltas['0-10'] + user.timeline.gold_per_min_deltas['10-20'] + user.timeline.gold_per_min_deltas['20-30']) * 10
                    cs30 += (user.timeline.creeps_per_min_deltas['0-10'] + user.timeline.creeps_per_min_deltas['10-20'] + user.timeline.creeps_per_min_deltas['20-30']) * 10
                    games30 += 1
            except:
                log.warn('user timeline data does not exist', match.id)

    stats["kills"] /= 10
    stats["deaths"] /= 10
    stats["assists"] /= 10
    stats["totalCs"] /= 10

    try:
        stats["cs10"] = round(cs10 / games10, 2)
        stats["cs20"] = round(cs20 / games20, 2)
        stats["cs30"] = round(cs30 / games30, 2)
        stats["gold10"] = round(gold10 / games10, 2)
        stats["gold20"] = round(gold20 / games20, 2)
        stats["gold30"] = round(gold30 / games30, 2)
    except:
        # divide by 0
        pass

    build = {}
    boots = {}
    core = {}
    situational = {}
    all_items = {}

    # get recommended build
    if match_count > 0:
        try:
            champ_items = ChampionItems.objects.get(champ_id=user.champion.id)
            items_blob = ujson.loads(champ_items.item_blob)

            blob_items = items_blob.items()
            for item, occurence in blob_items:
                if int(item) in Items.boots:
                    boots[item] = occurence
                elif int(item) in Items.full_items:
                    all_items[item] = occurence

            sorted_all = sorted(all_items, key=all_items.get, reverse=True) 
            core_arr = sorted_all[:3]
            situational_arr = sorted_all[3:8]

            for item in core_arr:
                core[item] = all_items[item]

            for item in situational_arr:
                situational[item] = all_items[item]
        except:
            pass

    build['boots'] = boots
    build['core'] = core
    build['situational'] = situational

    response['stats'] = stats
    response['build'] = build
    response['leagues'] = q

    return response
Beispiel #35
0
url = 'https://www.pearvideo.com/category_5'
page_text = requests.get(url=url).text

tree = etree.HTML(page_text)
li_list = tree.xpath('//ul[@id="listvideoListUl"]/li')
url_list = []
for i in li_list:
    detail_url = "https://www.pearvideo.com/" + i.xpath('./div/a/@href')[0]
    name = i.xpath('./div/a/div[2]/text()')[0] + '.mp4'
    detail_page = requests.get(url=detail_url).text
    ex = 'srcUrl="(.*?)",vdoUrl'
    video_url = re.findall(ex, detail_page)[0]
    dic = {'name': name, 'url': video_url}
    url_list.append(dic)


def get_video_data(d):
    url = d['url']
    data = requests.get(url=url).content
    print(d['name'], "正在下载。。。")
    with open(d['name'], 'wb') as f:
        f.write(data)
        print(d['name'], "下载成功。。。")


pool = Pool(4)
pool.map(get_video_data, url_list)
pool.close()
pool.join()
Beispiel #36
0
    def ping(self, targets=list(), filename=str(), status=str()):
        """
        Attempt to ping a list of hosts or networks (can be a single host)
        :param targets: List - Name(s) or IP(s) of the host(s).
        :param filename: String - name of the file containing hosts to ping
        :param status: String - if one of ['alive', 'dead', 'noip'] then only
        return results that have that status. If this is not specified,
        then all results will be returned.
        :return: Type and results depends on whether status is specified:
                 if status == '': return dict: {targets: results}
                 if status != '': return list: targets if targets == status
        """

        if targets and filename:
            raise SyntaxError("You must specify only one of either targets=[] "
                              "or filename=''.")
        elif not targets and not filename:
            raise SyntaxError("You must specify either a list of targets or "
                              "filename='', but not both.")
        elif filename:
            targets = self.read_file(filename)

        my_targets = {'hosts': [], 'nets': []}
        addresses = []

        # Check for valid networks and add hosts and nets to my_targets
        for target in targets:
            # Targets may include networks in the format "network mask", or,
            # a file could contain multiple hosts or IP's on a single line.
            if len(target.split()) > 1:
                target_items = target.split()
                for item in target_items:
                    try:
                        ip = IPAddress(item)
                        # If it is an IPv4 address or mask put in in addresses
                        if ip.version == 4:
                            addresses.append(str(ip))
                    except AddrFormatError:
                        # IP Address not detected, so assume it's a host name
                        my_targets['hosts'].append(item)
                    except ValueError:
                        # CIDR network detected
                        net = IPNetwork(item)
                        # Make sure it is a CIDR address acceptable to fping
                        if net.ip.is_unicast() and net.version == 4 and \
                                net.netmask.netmask_bits() in range(8, 31):
                            my_targets['nets'].append(target_items[0])
                        else:
                            msg = str(str(net) + ':Only IPv4 unicast addresses'
                                      ' with bit masks\n               '
                                      ' from 8 to 30 are supported.')
                            raise AttributeError(msg)
                # Iterate over the IP strings in addresses
                while len(addresses) > 1:
                    ip = IPAddress(addresses[0])
                    mask = IPAddress(addresses[1])
                    # Test to see if IP is unicast, and mask is an actual mask
                    if ip.is_unicast() and mask.is_netmask():
                        net = IPNetwork(str(ip) + '/' + str(
                            mask.netmask_bits()))
                        # Convert ip and mask to CIDR and remove from addresses
                        my_targets['nets'].append(str(net.cidr))
                        addresses.pop(0)
                        addresses.pop(0)
                    elif ip.is_unicast() and not ip.is_netmask():
                        # mask was not a mask so only remove IP and start over
                        my_targets['hosts'].append(str(ip))
                        addresses.pop(0)
                # There could be one more item in addresses, so check it
                if addresses:
                    ip = IPAddress(addresses[0])
                    if ip.is_unicast() and not ip.is_netmask():
                        my_targets['hosts'].append(addresses[0])
                        addresses.pop()
            # target has only one item, so check it
            else:
                try:
                    ip = IPAddress(target)
                    if ip.version == 4 and ip.is_unicast() and \
                            not ip.is_netmask():
                        my_targets['hosts'].append(target)
                    else:
                        msg = str(target + 'Only IPv4 unicast addresses are '
                                  'supported.')
                        raise AttributeError(msg)
                except AddrFormatError:
                    # IP Address not detected, so assume it's a host name
                    my_targets['hosts'].append(target)
                except ValueError:
                    # CIDR network detected
                    net = IPNetwork(target)
                    if net.ip.is_unicast() and net.version == 4 and \
                            net.netmask.netmask_bits() in range(8, 31):
                        my_targets['nets'].append(target)
                    else:
                        msg = str(str(net) + ':Only IPv4 unicast addresses'
                                  ' with bit masks\n               '
                                  ' from 8 to 30 are supported.')
                        raise AttributeError(msg)

        """
        Build the list of commands to run.
        """
        commands = []
        if len(my_targets['hosts']) != 0:
            for target in range(len(my_targets['hosts'])):
                commands.append([self.fping, '-nV', my_targets['hosts'][
                    target]])
        if len(my_targets['nets']) != 0:
            for target in range(len(my_targets['nets'])):
                commands.append([self.fping, '-ngV', my_targets['nets'][
                    target]])

        """
        Start pinging each item in my_targets and return the requested results
        when done.
        """
        pool = ThreadPool(self.num_pools)
        raw_results = pool.map(self.get_results, commands)
        pool.close()
        pool.join()
        self.results = {host: result for host, result in csv.reader(
            ''.join(raw_results).splitlines())}
        if not status:
            return self.results
        elif status == 'alive':
            return self.alive
        elif status == 'dead':
            return self.dead
        elif status == 'noip':
            return self.noip
        else:
            raise SyntaxError("Valid status options are 'alive', 'dead' or "
                              "'noip'")
Beispiel #37
0
# from multiprocessing import Pool
# import multiprocessing as mp
from multiprocessing.dummy import Pool  # 虛擬池化
import time
import os


def longTimeTask(i):
    print('task: {}, PID: {}'.format(i, os.getpid()))
    time.sleep(2)
    result = 10**30
    return result


if __name__ == '__main__':
    start_time = time.time()
    print('母程序PID', os.getpid())

    # 觀察PID
    p = Pool(4)
    # data is a list catch 每次遞迴的回傳值
    data = p.map(longTimeTask, iterable=[2, 4, 6,
                                         8])  # iterable=range(4) 也可用 list 進行疊代
    p.close()
    p.join()

    print(data)

    end_time = time.time()
    print('花了 {} 秒'.format(end_time - start_time))
Beispiel #38
0
##########  #######   #######        ########  ###    ### 
''', 'red')
print banner

try:
    target = [i.strip() for i in open(sys.argv[1], mode='r').readlines()]
except IndexError:
    exit('python3 script.py urilist.txt 50')

usragt = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}

def check(uri):
    try:
        if not uri.endswith('/'):
            uri = uri + '/'
        if not uri.startswith('http'):
            uri = 'http://' + uri

        r = requests.get(url, headers=usragt, timeout=10)
        if r.status_code == 200:
            print (uri+' OK\n')
            with open('200.txt', mode='a') as d:
                d.write(uri + '/\n')
    except:
        pass

mp = Pool(sys.argv[2])
mp.map(check, target)
mp.close()
mp.join()
Beispiel #39
0
def trigger_tasks(tasks, thread_count):
    pool = ThreadPool(int(thread_count))
    pool.map(Task.run_tests, tasks)
Beispiel #40
0
                    self.urls.append(make_url(self.domain, url))


urls = []


def test(url):

    print("URLS : ", len(urls))
    ax = Spider(url)
    urls.extend(ax.urls)
    return url


if __name__ == '__main__':
    content = ""
    url = input('Enter the url : ')
    urls.append(url)
    spider = Spider(url)

    print("Will visit :" + str(len(spider.urls)) + " urls")
    pool = ThreadPool(10)
    results = pool.map(test, spider.urls)
    """for i in spider.urls:
        print("URLS : ", len(urls))
        ax = Spider(i)
        urls.extend(ax.urls)
    print(len(urls))"""
    #spider.find_urls(content)
    #print(content)
Beispiel #41
0
 def initiate_threads():
     _pool = Pool(5)
     _pool.map(traverse_directory, self.valid_directories)
     _pool.close()
     _pool.join()
Beispiel #42
0
remote_server_ip = socket.gethostbyname(remote_server)
ports = []

print('Please wait, scanning remote host ', remote_server_ip)

socket.setdefaulttimeout(5)


def scan_port(port):
    try:
        s = socket.socket(2, 1)
        res = s.connect_ex((remote_server_ip, port))
        if res == 0:
            print('Port {}: OPEN'.format(port))
        s.close()
    except Exception as e:
        print(str(e.message))


for i in range(1, 65535):
    ports.append(i)

t1 = datetime.now()

pool = ThreadPool(32)
results = pool.map(scan_port, ports)
pool.close()
pool.join()

print('Multiprocess Scanning Completed in  ', datetime.now() - t1)
Beispiel #43
0
def paths_to_bids(path_to_dataset, path_to_csv, bids_dir, modality):
    """
         This method converts all the T1 images found in the AIBL dataset
         downloaded in BIDS

         :param path_to_dataset: path_to_dataset
         :param path_to_csv: path to the csv file containing clinical data
         :param bids_dir: path to save the AIBL-T1-dataset converted in a
         BIDS format
         :param modality: string 't1', 'av45', 'flute' or 'pib'

         :return: list of all the images that are potentially converted in a
         BIDS format and saved in the bids_dir. This does not guarantee
         existence
     """
    from os.path import join, exists
    from numpy import nan
    import pandas as pds
    from clinica.utils.stream import cprint
    from multiprocessing.dummy import Pool
    from multiprocessing import cpu_count, Value
    import glob

    if modality.lower() not in ['t1', 'av45', 'flute', 'pib']:
        # This should never be reached
        raise RuntimeError(modality.lower() +
                           ' is not supported for conversion')

    counter = None

    def init(args):
        """ store the counter for later use """
        global counter
        counter = args

    def create_file(image):
        global counter
        subject = image.Subjects_ID
        session = image.Session_ID
        name_of_path = {
            't1': 'Path_to_T1',
            'av45': 'Path_to_pet',
            'flute': 'Path_to_pet',
            'pib': 'Path_to_pet'
        }
        # depending on the dataframe, there is different way of accessing
        # the iage object
        image_path = image[name_of_path[modality]]
        with counter.get_lock():
            counter.value += 1
        if image_path is nan:
            cprint('No path specified for ' + subject + ' in session ' +
                   session)
            return nan
        cprint('[' + modality.upper() + '] Processing subject ' +
               str(subject) + ' - session ' + session + ', ' +
               str(counter.value) + ' / ' + str(total))
        session = viscode_to_session(session)
        # creation of the path
        if modality == 't1':
            output_path = join(bids_dir, 'sub-AIBL' + subject,
                               'ses-' + session, 'anat')
            output_filename = 'sub-AIBL' + subject + '_ses-' + session + '_T1w'
        elif modality in ['flute', 'pib', 'av45']:
            output_path = join(bids_dir, 'sub-AIBL' + subject,
                               'ses-' + session, 'pet')
            output_filename = 'sub-AIBL' + subject + '_ses-' + session \
                              + '_task-rest_acq-' + modality + '_pet'
        # image is saved following BIDS specifications

        if exists(join(output_path, output_filename + '.nii.gz')):
            cprint('Subject ' + str(subject) + ' - session ' + session +
                   ' already processed.')
            output_image = join(output_path, output_filename + '.nii.gz')
        else:
            output_image = dicom_to_nii(subject, output_path, output_filename,
                                        image_path)
        return output_image

    # it reads the dataframe where subject_ID, session_ID and path are saved
    if modality == 't1':
        images = find_path_to_T1(path_to_dataset, path_to_csv)
    else:
        path_to_csv_pet_modality = glob.glob(
            join(path_to_csv, 'aibl_' + modality + 'meta_*.csv'))[0]
        if not exists(path_to_csv_pet_modality):
            raise FileNotFoundError(path_to_csv_pet_modality +
                                    ' file not found in clinical data folder')
        # Latest version of Flutemetamol CSV file (aibl_flutemeta_01-Jun-2018.csv)
        # has an extra column for some rows. However, each CSV file (regarding PET tracers)
        # contains the same columns. The usecols fixes this issue.
        df_pet = pds.read_csv(path_to_csv_pet_modality,
                              sep=',|;',
                              usecols=list(range(0, 36)))
        images = find_path_to_pet_modality(path_to_dataset, df_pet)
    images.to_csv(join(bids_dir, modality + '_paths_aibl.tsv'),
                  index=False,
                  sep='\t',
                  encoding='utf-8')

    counter = Value('i', 0)
    total = images.shape[0]
    # Reshape inputs to give it as a list to the workers
    images_list = []
    for i in range(total):
        images_list.append(images.iloc[i])

    # intializer are used with the counter variable to keep track of how many
    # files have been processed
    poolrunner = Pool(cpu_count(), initializer=init, initargs=(counter, ))
    output_file_treated = poolrunner.map(create_file, images_list)
    del counter
    return output_file_treated
Beispiel #44
0
def main():
    expected_arg = "[A valid PAGE_URL or IMAGE_URL]"
    num_args = len(sys.argv)
    if num_args < 2 or num_args > 2:
        print("\n* INVALID RUN COMMAND! *  Usage:")
        print("classify %s\n" % expected_arg)
    elif num_args == 2:
        url = sys.argv[1]
        valid_url = web_core.is_valid_url(url)
        if not valid_url:
            file_path = url
            if isfile(file_path):
                best_guess = image_base.classify_local_image(file_path)
            elif isdir(file_path):
                best_guess = image_base.classify_folder_images(
                    file_path, return_dict=True)
            else:
                raise Exception("Error: %s is not a valid image path!" % url)
            print("\n*** Best match classification: ***")
            print(best_guess)
            print("")
            return
        content_type = web_core.get_content_type(url)
        if content_type == 'other':
            raise Exception(
                "Error: %s does not evaluate to %s" % (url, expected_arg))
        elif content_type == 'image':
            best_guess = image_base.classify_image_url(url)
            print("\n*** Best match classification: ***")
            print(best_guess)
            print("")
        elif content_type == 'html':
            global images_classified
            image_list = image_base.get_all_images_on_page(url)

            if 'linux2' not in sys.platform and settings.MAX_THREADS > 1:
                # Multi-threading the work when not using Docker Linux
                pool = ThreadPool(settings.MAX_THREADS)
                pool.map(download_and_classify_image, image_list)
                pool.close()
                pool.join()
            else:
                # Single-threading the image classification work
                min_w_h = settings.MIN_W_H  # Minimum size for classification
                for image in image_list:
                    web_core.save_file_as(image, "temp_image.png")
                    image_base.convert_image_file_to_jpg(
                        "downloads_folder/temp_image.png")
                    width, height = image_base.get_image_file_dimensions(
                        "downloads_folder/temp_image.jpg")
                    if width >= min_w_h and height >= min_w_h:
                        best_guess = classify_image.external_run(
                            "downloads_folder/temp_image.jpg")
                        if images_classified == 0:
                            print("\n*** "
                                  "Best match classifications for page images:"
                                  " ***")
                        images_classified += 1
                        print(best_guess)
                        if images_classified >= settings.MAX_IMAGES_PER_PAGE:
                            break

            if images_classified >= settings.MAX_IMAGES_PER_PAGE:
                print("\n(NOTE: Exceeded page classification limit "
                      "of %d images per URL! Stopping early.)" % (
                        settings.MAX_IMAGES_PER_PAGE))

            if images_classified == 0:
                print("\nCould not find images to classify on the page! "
                      "(Min size = %dx%d pixels)" % (
                        settings.MIN_W_H, settings.MIN_W_H))
            print("")
        else:
            raise Exception(
                "Unexpected content type %s. Fix the code!" % content_type)
Beispiel #45
0
    valid_pred[:, c] = np.round(bag_pred_f)
    if not os.path.isfile('dir/tmp_%d_%d.tar.gz' % (c, c0)):
        os.system('tar -zcf dir/tmp_%d_%d.tar.gz dir/tmp_%d_%d' %
                  (c, c0, c, c0))
    os.system('rm -r dir/tmp_%d_%d' % (c, c0))


pool = ThreadPool(nthreads)
for c0, svmC in enumerate(clist):

    runC = [c for c in range(C) if np.sum(trlbl[:, c]) > 0]
    valid_pred = np.zeros(vlbl.shape)
    # write the mfiles
    #for c in range(C):
    pool.map(train_model_class, runC)
    vccr[c0] = np.mean((vlbl == valid_pred)**2)
    print('>>>>', c0, svmC, vccr[c0])

c0 = np.argmax(vccr)
bestC = clist[c0]

# make prediction on the test set
'''test_pred = np.zeros(tlbl.shape)
temp_sent = np.zeros(gt_sent2.shape)
test_sent_pred = np.zeros(gt_sent.shape)
svmC = bestC
#for c in range(C):
def test_model_class(c):
	global test_pred
	global test_sent_pred
Beispiel #46
0
import sys
import argparse

from threading import Thread
from multiprocessing.dummy import Pool as ThreadPool 

topdir = os.path.dirname(sys.argv[0]) + '/../../'
topdir = os.path.abspath(topdir)
sys.path.insert(0, topdir)
import iota.harness.infra.utils.parser as parser

cmdargs = argparse.ArgumentParser(description='Cleanup Script')
cmdargs.add_argument('--testbed', dest='testbed_json', default="/warmd.json",
                     help='Testbed JSON file')
GlobalOptions = cmdargs.parse_args()

SSHCMD = "sshpass -p docker ssh -o StrictHostKeyChecking=no"
SCPCMD = "sshpass -p docker scp -o StrictHostKeyChecking=no"
def cleanup_node(node_ip):
    print("Cleaning up %s" % node_ip)
    os.system("%s %s/iota/scripts/cleanup_node.sh root@%s:/tmp/" % (SCPCMD, topdir, node_ip))
    os.system("%s root@%s source /tmp/cleanup_node.sh 2>&1" % (SSHCMD, node_ip))
    return

tbspec = parser.JsonParse(GlobalOptions.testbed_json)
pool = ThreadPool(len(tbspec.Instances))
mgmt_ips = []
for instance in tbspec.Instances:
    mgmt_ips.append(instance.NodeMgmtIP)
results = pool.map(cleanup_node, mgmt_ips)
        ip2 = ch[1]
        taz = str(ip1) + '.' + str(ip2) + '.'
        i = 0
        while i <= 254:
            i += 1
            o = 0
            while o <= 255:
                o += 1
                jembot  = str(taz) + str(i) + '.' + str(o)
                jembots = GREEN + str(taz) + CYAN + str(i) + '.' + str(o) + WHITE
                ngecek  = requests.get("https://sonar.omnisint.io/reverse/"+jembot+"", headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'}, verify=False, timeout=20)
                if 'null' in ngecek.text or '{"error":' in ngecek.text:
                    print WHITE + "Ranging ==> " + jembots + " => NO"
                else:
                    result = json.loads(ngecek.text)
                    total  = len(result)
                    print WHITE + "Ranging ==> " + jembots + " => " + str(total) + " DOMAINS"
                    for domain in result:
                        open('Resultz.txt', 'a').write(domain + "\n")

    except (requests.exceptions.RequestException, ValueError) as e:
        print WHITE + "Ranging ==> " + jembots + str(e)


site = open(raw_input(Fore.WHITE+'List:~# '),'r').read().replace(' ', '').splitlines()
Thread = raw_input('Thread :~# ')
pool = ThreadPool(int(Thread))
pool.map(scan, site)
pool.close()
pool.join()
Beispiel #48
0
#!/usr/bin/env python2
# -*- encoding: utf-8 -*-
#hackhttp: https://github.com/BugScanTeam/hackhttp/
#Author: afei

import hackhttp
from multiprocessing.dummy import Pool as ThreadPool


def upload(value):
    h = hackhttp.hackhttp()
    code, head, html, redirect_url, log = h.http(
        "http://afei123.com:8020/upload/x.php")
    print code


pool = ThreadPool(50)
pool.map(upload, range(5000000))
pool.close()
pool.join()
Beispiel #49
0
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
from scrapers2 import *

scrapers = [DraftKingsScraper(), FTDScraper(), FanduelScraper()]

pool = ThreadPool(3)
pool.map(lambda s: s.scrape(), scrapers)
pool.close()
pool.join()

for s in scrapers:
    for c in s.contests[:5]:
        print c
    print(name)
    #对详情页的url发起请求
    detail_page_text = requests.get(url=detail_url, headers=headers).text
    #从详情页中解析出视频的地址(url)
    ex = 'srcUrl="(.*?)",vdoUrl'

    video_url = re.findall(ex, detail_page_text)[0]
    print(video_url)
    dic = {'name': name, 'url': video_url}
    urls.append(dic)
    print(urls)


#对视频链接发起请求获取视频的二进制数据,然后将视频数据进行返回
def get_video_data(dic):
    url = dic['url']
    print(dic['name'], '正在下载......')
    data = requests.get(url=url, headers=headers).content
    #持久化存储操作
    with open(dic['name'], 'wb') as fp:
        fp.write(data)
        print(dic['name'], '下载成功!')


#使用线程池对视频数据进行请求(较为耗时的阻塞操作)
pool = Pool(4)
pool.map(get_video_data, urls)

pool.close()
pool.join()
Beispiel #51
0
    # Load audio file
    with sr.AudioFile(name) as source:
        print("Audio Loaded")
        audio = r.record(source)
    # Transcribe audio file
    print("Reaching transcribing")
    text = r.recognize_google_cloud(
        audio,
        credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS,
        language="hi-IN")
    print(name + " done")
    # sampleout=sampleout+" "+text
    return {"idx": idx, "text": text}


all_text = pool.map(transcribe, enumerate(files))
pool.close()
pool.join()

transcript = ""
print(transcript + " is empty")
# print(all_text+" is all text")
for t in sorted(all_text, key=lambda x: x['idx']):
    #sampleout=sampleout+t['text']
    #total_seconds = t['idx'] * 30
    # Cool shortcut from:
    # https://stackoverflow.com/questions/775049/python-time-seconds-to-hms
    # to get hours, minutes and seconds
    #m, s = divmod(total_seconds, 60)
    #h, m = divmod(m, 60)
Beispiel #52
0
 def run_map_on_cluster(self) -> List:
     pool = Pool(mp.cpu_count() * 3)
     map_result = pool.map(self.trigger_map, self.slaves)
     return map_result
	TP = (assignment_matrix >= IoU_thresh).sum()
	FP = (assignment_matrix < IoU_thresh).sum()
	FN = len(trains.keys()) - TP
	#stats.append([TP, FP, FN])
	return [TP, FP, FN]

# MAIN PART
# compute the IoUs
for dir_name in glob.glob(test_dir+'*'):
	print dir_name
	
	# threading things
	stats = []
	list_of_files = glob.glob(dir_name+'/'+'*.npy')
	pool = ThreadPool(len(list_of_files))
	stats = pool.map(get_results, list_of_files)

	global_stats.append(stats)
	#global_IoU_vals.append(assignment_matrix)

	# assemble stats
	stats = np.asarray(stats)
	print stats.shape
	avg_TP = float(stats[:,0].sum()) / float(stats.shape[0])
	avg_FP = float(stats[:,1].sum()) / float(stats.shape[0])
	avg_FN = float(stats[:,2].sum()) / float(stats.shape[0])

	# compute P, R
	precision = float(avg_TP) / float(avg_TP + avg_FP)
	recall = float(avg_TP) / float(avg_TP + avg_FN)
Beispiel #54
0
def calculateParallel(lines, threads=2):
    pool = ThreadPool(threads)
    rslt = pool.map(run_one, lines)
    pool.close()
    pool.join()
    return rslt
Beispiel #55
0
    def _selection(self, samples_x, samples_y_aggregation, samples_y,
                   x_bounds, x_types, max_resampling_per_x=3,
                   threshold_samplessize_exploitation=12,
                   threshold_samplessize_resampling=50, no_candidates=False,
                   minimize_starting_points=None, minimize_constraints_fun=None):

        next_candidate = None
        candidates = []
        samples_size_all = sum([len(i) for i in samples_y])
        samples_size_unique = len(samples_y)

        # ===== STEP 1: Compute the current optimum =====
        gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
        lm_current = gp_selection.selection(
            "lm",
            samples_y_aggregation,
            x_bounds,
            x_types,
            gp_model['model'],
            minimize_starting_points,
            minimize_constraints_fun=minimize_constraints_fun)
        if not lm_current:
            return None

        if no_candidates is False:
            candidates.append({'hyperparameter': lm_current['hyperparameter'],
                               'expected_mu': lm_current['expected_mu'],
                               'expected_sigma': lm_current['expected_sigma'],
                               'reason': "exploitation_gp"})

            # ===== STEP 2: Get recommended configurations for exploration =====
            results_exploration = gp_selection.selection(
                "lc",
                samples_y_aggregation,
                x_bounds,
                x_types,
                gp_model['model'],
                minimize_starting_points,
                minimize_constraints_fun=minimize_constraints_fun)

            if results_exploration is not None:
                if _num_past_samples(results_exploration['hyperparameter'], samples_x, samples_y) == 0:
                    candidates.append({'hyperparameter': results_exploration['hyperparameter'],
                                       'expected_mu': results_exploration['expected_mu'],
                                       'expected_sigma': results_exploration['expected_sigma'],
                                       'reason': "exploration"})
                    logger.info("DEBUG: 1 exploration candidate selected\n")
            else:
                logger.info("DEBUG: No suitable exploration candidates were")

            # ===== STEP 3: Get recommended configurations for exploitation =====
            if samples_size_all >= threshold_samplessize_exploitation:
                print("Getting candidates for exploitation...\n")
                try:
                    gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
                    results_exploitation = gmm_selection.selection(
                        x_bounds,
                        x_types,
                        gmm['clusteringmodel_good'],
                        gmm['clusteringmodel_bad'],
                        minimize_starting_points,
                        minimize_constraints_fun=minimize_constraints_fun)

                    if results_exploitation is not None:
                        if _num_past_samples(results_exploitation['hyperparameter'], samples_x, samples_y) == 0:
                            candidates.append({'hyperparameter': results_exploitation['hyperparameter'],\
                                               'expected_mu': results_exploitation['expected_mu'],\
                                               'expected_sigma': results_exploitation['expected_sigma'],\
                                               'reason': "exploitation_gmm"})
                            logger.info("DEBUG: 1 exploitation_gmm candidate selected\n")
                    else:
                        logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n")

                except ValueError as exception:
                    # The exception: ValueError: Fitting the mixture model failed
                    # because some components have ill-defined empirical covariance
                    # (for instance caused by singleton or collapsed samples).
                    # Try to decrease the number of components, or increase reg_covar.
                    logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.")
                    logger.info(exception)

            # ===== STEP 4: Get a list of outliers =====
            if (threshold_samplessize_resampling is not None) and \
                        (samples_size_unique >= threshold_samplessize_resampling):
                logger.info("Getting candidates for re-sampling...\n")
                results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)

                if results_outliers is not None:
                    for results_outlier in results_outliers:
                        if _num_past_samples(samples_x[results_outlier['samples_idx']], samples_x, samples_y) < max_resampling_per_x:
                            candidates.append({'hyperparameter': samples_x[results_outlier['samples_idx']],\
                                               'expected_mu': results_outlier['expected_mu'],\
                                               'expected_sigma': results_outlier['expected_sigma'],\
                                               'reason': "resampling"})
                    logger.info("DEBUG: %d re-sampling candidates selected\n")
                else:
                    logger.info("DEBUG: No suitable resampling candidates were found\n")

            if candidates:
                # ===== STEP 5: Compute the information gain of each candidate towards the optimum =====
                logger.info("Evaluating information gain of %d candidates...\n")
                next_improvement = 0

                threads_inputs = [[
                    candidate, samples_x, samples_y, x_bounds, x_types,
                    minimize_constraints_fun, minimize_starting_points
                ] for candidate in candidates]
                threads_pool = ThreadPool(4)
                # Evaluate what would happen if we actually sample each candidate
                threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
                threads_pool.close()
                threads_pool.join()

                for threads_result in threads_results:
                    if threads_result['expected_lowest_mu'] < lm_current['expected_mu']:
                        # Information gain
                        temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']

                        if next_improvement > temp_improvement:
                            next_improvement = temp_improvement
                            next_candidate = threads_result['candidate']
            else:
                # ===== STEP 6: If we have no candidates, randomly pick one =====
                logger.info(
                    "DEBUG: No candidates from exploration, exploitation,\
                                 and resampling. We will random a candidate for next_candidate\n"
                )

                next_candidate = _rand_with_constraints(x_bounds, x_types) \
                                    if minimize_starting_points is None else minimize_starting_points[0]
                next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types)
                expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model'])
                next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
                                  'expected_mu': expected_mu, 'expected_sigma': expected_sigma}

        # ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step  =====
        outputs = self._pack_output(lm_current['hyperparameter'])
        ap = random.uniform(0, 1)
        if outputs in self.history_parameters or ap<=self.exploration_probability:
            if next_candidate is not None:
                outputs = self._pack_output(next_candidate['hyperparameter'])
            else:
                random_parameter = _rand_init(x_bounds, x_types, 1)[0]
                outputs = self._pack_output(random_parameter)
        self.history_parameters.append(outputs)
        return outputs
Beispiel #56
0
def _send_bulk(emails, uses_multiprocessing=True, log_level=None):
    # Multiprocessing does not play well with database connection
    # Fix: Close connections on forking process
    # https://groups.google.com/forum/#!topic/django-users/eCAIY9DAfG0
    if uses_multiprocessing:
        db_connection.close()

    if log_level is None:
        log_level = get_log_level()

    sent_emails = []
    failed_emails = []  # This is a list of two tuples (email, exception)
    email_count = len(emails)

    logger.info('Process started, sending %s emails' % email_count)

    def send(email):
        try:
            email.dispatch(log_level=log_level, commit=False,
                           disconnect_after_delivery=False)
            sent_emails.append(email)
            logger.debug('Successfully sent email #%d' % email.id)
        except Exception as e:
            logger.debug('Failed to send email #%d' % email.id)
            failed_emails.append((email, e))

    # Prepare emails before we send these to threads for sending
    # So we don't need to access the DB from within threads
    for email in emails:
        # Sometimes this can fail, for example when trying to render
        # email from a faulty Django template
        try:
            email.prepare_email_message()
        except Exception as e:
            failed_emails.append((email, e))

    number_of_threads = min(get_threads_per_process(), email_count)
    pool = ThreadPool(number_of_threads)

    pool.map(send, emails)
    pool.close()
    pool.join()

    connections.close()

    # Update statuses of sent emails
    email_ids = [email.id for email in sent_emails]
    Email.objects.filter(id__in=email_ids).update(status=STATUS.sent)

    # Update statuses and conditionally requeue failed emails
    num_failed, num_requeued = 0, 0
    max_retries = get_max_retries()
    scheduled_time = timezone.now() + get_retry_timedelta()
    emails_failed = [email for email, _ in failed_emails]

    for email in emails_failed:
        if email.number_of_retries is None:
            email.number_of_retries = 0
        if email.number_of_retries < max_retries:
            email.number_of_retries += 1
            email.status = STATUS.requeued
            email.scheduled_time = scheduled_time
            num_requeued += 1
        else:
            email.status = STATUS.failed
            num_failed += 1

    Email.objects.bulk_update(emails_failed, ['status', 'scheduled_time', 'number_of_retries'])

    # If log level is 0, log nothing, 1 logs only sending failures
    # and 2 means log both successes and failures
    if log_level >= 1:

        logs = []
        for (email, exception) in failed_emails:
            logs.append(
                Log(email=email, status=STATUS.failed,
                    message=str(exception),
                    exception_type=type(exception).__name__)
            )

        if logs:
            Log.objects.bulk_create(logs)

    if log_level == 2:

        logs = []
        for email in sent_emails:
            logs.append(Log(email=email, status=STATUS.sent))

        if logs:
            Log.objects.bulk_create(logs)

    logger.info(
        'Process finished, %s attempted, %s sent, %s failed, %s requeued',
        email_count, len(sent_emails), num_failed, num_requeued,
    )

    return len(sent_emails), num_failed, num_requeued
Beispiel #57
0
def multi_thread():
    from multiprocessing.dummy import Pool
    pool = Pool(3)
    pool.map(hello, persons)
    pool.close()
    pool.join()
Beispiel #58
0
def extract_features(extract_fn, image_path, ofile, params={}):

    cwd = os.getcwd()
    image_path=utils.absolute_path(image_path)

    # Read filenames
    files=[]
    if os.path.isdir(image_path):
        for dirpath,_,filenames in os.walk(image_path):
            for f in filenames:
                path=os.path.abspath(os.path.join(dirpath, f))
                if not utils.is_valid_image(path):
                    print("Warning, please provide a valid image: ", f)
                else:
                    files.append(path)
    else:
        files=[image_path]

    files.sort(key=utils.natural_sort_key)

    output_file=utils.absolute_path(ofile)
    
    if os.path.isdir(output_file):
        print("The provided file is a directory:", output_file)
        sys.exit(0)

    if os.path.exists(output_file):
        print("Output file already exists! cotinue ...")
        with open(output_file+".label", 'r') as f:
            labels = [os.path.join(image_path, x) for x in f.read().splitlines()]

        pending_files = [x for x in files if x not in labels]
        files = pending_files
        print("Pending files:", len(files))

    def extract_and_save(path):
        try:
            X = extract_fn(path, **params)
        except Exception as e:
            print("Cannot extract feactures from", path)
            print(str(e))
            return

        X = X.reshape((1, X.shape[0]))
        lock.acquire()
        with open(output_file, 'a+') as f_handle:
            with open(output_file+".label", 'a+') as f_handle_label:
                numpy.savetxt(f_handle, X)
                f_handle_label.write(os.path.basename(path)+"\n")
        lock.release()

    pool = ThreadPool(cpu_count())
    results = pool.map(extract_and_save, files)
    pool.close()
    pool.terminate()
    pool.join()

    """
    for path in files:
        X = feaext.SRM_extract(path, **params)
        print X.shape
        X = X.reshape((1, X.shape[0]))
        with open(sys.argv[3], 'a+') as f_handle:
            numpy.savetxt(f_handle, X)
    """

    os.chdir(cwd)
Beispiel #59
0
RuntimeContext.register_slot('operation_id', '<empty>')
_console_lock = threading.Lock()


def println(msg):
    with _console_lock:
        print(msg)


def work(name):
    println('Entering worker[{}]: {}'.format(name, RuntimeContext))
    RuntimeContext.operation_id = name
    time.sleep(0.01)
    println('Exiting worker[{}]: {}'.format(name, RuntimeContext))


if __name__ == "__main__":
    println('Main thread: {}'.format(RuntimeContext))
    RuntimeContext.operation_id = 'main'
    pool = ThreadPool(2)  # create a thread pool with 2 threads
    pool.map(RuntimeContext.with_current_context(work), [
        'bear',
        'cat',
        'dog',
        'horse',
        'rabbit',
    ])
    pool.close()
    pool.join()
    println('Main thread: {}'.format(RuntimeContext))
Beispiel #60
0
async def periodic(sleep_for): # NOQA [C901]
    while True:
        await asyncio.sleep(sleep_for)

        """ Проверка доступности сайтов """

        urls = Urls()
        urls = urls.get_all_urls()
        pool = ThreadPool(len(urls))

        old_urls_443 = pool.map(getcode200.check_443, urls)
        old_urls_80 = pool.map(getcode200.check_80, urls)

        pool.close()
        pool.join()

        new_urls_80 = []
        new_urls_443 = []
        for url_80, url_443, url in zip(old_urls_80, old_urls_443, urls):
            if 'Error' in url_443 or 'False' in url_443:
                new_urls_443.append(url + ' - ' + url_443)
                db.set_alarm_url(url)
            if 'Error' in url_80 or 'False' in url_80:
                new_urls_80.append(url + ' - ' + url_80)
                db.set_alarm_url(url)

        users = ChatUsers()
        names, surnames, usernames, chat_ids = users.get_all_chat_users_with_sub()
        for chat_id in chat_ids:
            if (db.access_check_collegue(str(chat_id)) or db.access_check_admin(str(chat_id))):
                if new_urls_80 != []:
                    answer_80 = 'ALARM!\nНе открываются на порту 80:\n'
                    answer_80 += '\n'.join(new_urls_80)
                    bot.send_message(chat_id, answer_80, disable_web_page_preview=True)
                if new_urls_443 != []:
                    answer_443 = 'ALARM!\nНе открываются на порту 443:\n'
                    answer_443 += '\n'.join(new_urls_443)
                    bot.send_message(chat_id, answer_443, disable_web_page_preview=True)

        """ Проверяем ssl """

        urls = Urls()
        urls = urls.get_all_urls()

        await asyncio.sleep(1)
        data_3 = check_ssl_cirt(urls)

        valid_list = list(map(list, zip(urls, data_3)))

        await asyncio.sleep(1)
        reply = ''
        for j in valid_list:
            if 'invalid' in j[1]:
                db.set_alarm_url(j[0])
                reply += '{}'.format(j[0] + ', ssl: ' + j[1] + '\nPlease, update this url\n')
                users = ChatUsers()
                names, surnames, usernames, chat_ids = users.get_all_chat_users_with_sub()
                for chat_id in chat_ids:
                    if (db.access_check_collegue(str(chat_id)) or db.access_check_admin(str(chat_id))):
                        await bot.send_message(chat_id, reply, disable_notification=True)

        """ Проверяем сервера """

        servers = Servers()
        servers = servers.get_all_servers()
        hosts = []
        usernames = []
        for r in servers:
            n = (r.split('@'))
            host = n[1]
            usname = n[0]

            hosts.append(host)
            usernames.append(usname)
        keys = Keys()
        keys = keys.get_all_keys()

        for i in range(len(hosts)):
            ssh = paramiko.SSHClient()
            ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
            host_temp = hosts[i]
            username_temp = usernames[i]
            if '-----BEGIN' in keys[i]:
                pr_key_temp = keys[i]
                pr_key_temp = pr_key_temp[1:-1]
                pr_key_temp = StringIO(pr_key_temp)
                privkey = paramiko.RSAKey.from_private_key(pr_key_temp)
            else:
                pr_key_temp = "src/bot/db/" + keys[i]
                try:
                    privkey = paramiko.RSAKey.from_private_key_file(pr_key_temp)
                except SSHException:
                    privkey = paramiko.DSSKey.from_private_key_file(pr_key_temp)
            try:
                ssh.connect(hostname=host_temp, username=username_temp, pkey=privkey)
                logging.debug("Connect Server")
            finally:
                logging.debug("Finally Connect Server")
                stdin, stdout, stderr = ssh.exec_command('df --output=pcent /home')
                pcent = []
                for line in stdout:
                    pcent.append(line.strip('\n'))

                stdin, stdout, stderr = ssh.exec_command('df --output=avail --block-size=G /home')
                mem_avail_gb = []
                for line in stdout:
                    mem_avail_gb.append(line.strip('\n'))

                ssh.close()
            pcent_num = pcent[1].replace('%', '')
            pcent_num = int(pcent_num.replace(' ', ''))
            mem_avail_gb_num = mem_avail_gb[1].replace('G', '')
            mem_avail_gb_num = int(mem_avail_gb_num.replace(' ', ''))
            if ((100 - pcent_num) <= memorythreshold_pc) or (mem_avail_gb_num <= memorythreshold_gb):
                db.set_alarm_server(host_temp)
                reply = """{}@{}\nCRITICAL! LOW MEMORY!\n{}% Used\n{}Gb Avaible""".format(
                    username_temp,
                    host_temp,
                    pcent_num,
                    mem_avail_gb_num)
                users = ChatUsers()
                names, surnames, usernames, chat_ids = users.get_all_chat_users_with_sub()
                for chat_id in chat_ids:
                    if (db.access_check_collegue(str(chat_id)) or db.access_check_admin(str(chat_id))):
                        await bot.send_message(chat_id, reply, disable_notification=True)