async def save_to_database(start, stop): html = await asyncio.gather(asyncio.ensure_future(run(start, stop))) # print('单次解析数量:',len(html)) result = [d for d in html[0] if d] # 把单次一个跨度的请求放入列表 print('rs长度', len(result)) pool = Threadpool(64) pool.map(parse_resp, result) pool.close() pool.join()
def start_download(self, nr_of_threads=8): if self._authenticated_session is None: self._authenticated_session = self.__create_authenticated_sesseion() # Create the download folder. os.makedirs(self.download_path, exist_ok=True) # p = multiprocessing.Pool(nr_of_processes) p = Threadpool(nr_of_threads) p.map(self._mp_download_wrapper, self.download_urls) p.close() p.join()
def main(cfg: Dict): # get list of job configurations jobs = create_jobs(config) # spawn threads pool = Threadpool(cfg["threads"]) # process jobs in parallel for _ in tqdm(pool.imap_unordered(process_job, jobs), total=len(jobs)): pass
def mobilize_threads(items, thread_count, function): """ Maps function to some iterable :param items: :param thread_count: :param function: :return results: """ pool = Threadpool(thread_count) results = pool.map(function, items) # Maps function to each Url pool.close() # Closes Pool pool.join() # Joins pool back to synchronous return results
def multiListen(self): with self.source as source: self.audio = self.recognizer.record(source, duration=5, offset=None) # audio = self.recognizer.listen(source) if self.audio: print("Audio complete") pool = Threadpool(len(self.providers)) #results = pool.starmap(self.multiListener, zip(itertools.repeat(self.audio), self.providers)) results = pool.map(self.multiListener, self.providers) pool.close() pool.join() print(self.results)
# Create more threads will not help when a single CORE max out # from multiprocessing.dummy import Pool as Threadpool def f(x): while True: x = x + 1 if __name__ == "__main__": p = Threadpool(10) p.map(f, range(10)) p.close()
str(product_id)) response = requests.get(price_url, headers=headers).content response_json = json.loads(response) for info in response_json: return info.get('p') def save_data(product_list): client = pymongo.MongoClient('localhost') db = client['product_dict'] #schema content = db['list'] #table content.insert(product_list) if __name__ == '__main__': headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' } urls = [ 'https://list.jd.com/list.html?cat=9987,653,655&page={}'.format(page) for page in range(1, 50) ] pool = Threadpool(2) start_time = time.time() pool.map(get_response, urls) pool.close() pool.join() end_time = time.time() print u'need time {}'.format(str(end_time - start_time))
def parse_to_ndjson( topic="books", output_path="topic_articles", input_dir="wikipedia_dump", partitions_dir="partitions", limit=None, delete_parsed_files=False, multicore=True, verbose=True, ): """ Finds all Wikipedia entries for the given topic and convert them to json files Parameters ---------- topic : str (default=books) The topics that articles should be subset by. Note: this corresponds to the type of infobox from Wikipedia articles output_path : str (default=topic_articles) The name of the final output ndjson file input_dir : str (default=wikipedia_dump) The path to the directory where the data is stored partitions_dir : str (default=partitions) The path to the directory where the output should be stored limit : int (default=None) An optional limit of the number of topic articles per dump file to find delete_parsed_files : bool (default=False) Whether to delete the separate parsed files after combining them multicore : bool (default=True) Whether to use multicore processesing verbose : bool (default=True) Whether to show a tqdm progress bar for the processes Returns ------- Wikipedia dump files parsed for the given template type and converted to json files """ output_dir = "/".join([i for i in output_path.split("/")[:-1]]) if not os.path.exists(output_dir): print(f"Making {output_dir} directory for the output") os.makedirs(output_dir) if topic in input_conversion_dict().keys(): topic = input_conversion_dict()[topic] if multicore == True: num_cores = os.cpu_count() elif multicore == False: num_cores = 1 elif type(multicore) == int: num_cores = multicore if output_path == None: timestr = time.strftime("%Y%m%d-%H%M%S") output_path = "parsed_data" + timestr output_file_name = output_path + ".ndjson" else: if output_path[-len(".ndjson"):] != ".ndjson": output_file_name = output_path + ".ndjson" else: output_file_name = output_path if not os.path.exists(output_file_name): if not os.path.exists(partitions_dir): print(f"Making {partitions_dir} directory for the partitions") os.makedirs(partitions_dir) target_files = [ input_dir + "/" + f for f in os.listdir(input_dir) if "pages-articles" in f ] parse_inputs = zip( [topic] * len(target_files), target_files, [partitions_dir] * len(target_files), [limit] * len(target_files), [False] * len(target_files), ) if __name__ == "wikirec.data_utils": with Pool(processes=num_cores) as pool: for _ in tqdm( pool.imap_unordered(iterate_and_parse_file, parse_inputs), total=len(target_files), desc="Files partitioned", unit="file", disable=not verbose, ): pass def read_and_combine_json(file_path): """Read in json data from a file_path""" data = [] with open(file_path, "r") as f: for l in f.readlines(): data.append(json.loads(l)) return data threadpool = Threadpool(processes=num_cores) partition_files = [ partitions_dir + "/" + f for f in os.listdir(partitions_dir) if f[-len(".ndjson"):] == ".ndjson" ] if __name__ == "wikirec.data_utils": results = threadpool.map(read_and_combine_json, partition_files) file_list = list(chain(*results)) with open(output_file_name, "wt") as fout: for f in file_list: fout.write(json.dumps(f) + "\n") print( f"File {output_file_name} with articles for the given topic saved") else: print( f"File {output_file_name} with articles for the given topic already exists" ) if delete_parsed_files: if os.path.exists(partitions_dir): print(f"Deleting {partitions_dir} directory") os.system(f"rm -rf {partitions_dir}") return
# 此处为从别的地方弄过来的存数据库的参考代码(可以忽略) # self.cursor.execute( # '''insert into # python_xiangche_hefei_hangzhou_copy(building_id, pic_label, oss_urls, commit_time) # values (%s,'1',%s, %s)''', # ( # pj, # item['images_url'], # time_now # ) # ) # self.connect.commit() def run(self): # 主要运行函数 next_url = self.start_url while next_url: time.sleep(random.randint(1, 2)) print(next_url) html = self.parse(next_url) next_url = self.get_list_url(html) if __name__ == "__main__": a = Cnvdspider() pool = Threadpool(2) # 本来想弄个多线程的,没用的上,懒得改了,怕被封IP ,还是算了。。。 a.run() # pool.map(a.run(),self.parse()) pool.close() pool.join()
each_one_content)[0] item['last_reply'] = re.findall( r'title="最后回复时间">\r\n[\s]*?(\d+:\d+|\d+-\d+)[\s]*?</span>', each_one_content)[0] item['content'] = re.findall( r'threadlist_abs threadlist_abs_onlyline ">\n[\s]*(.*?)\n[\s]*</div>', each_one_content)[0] print(item) self.save_data(item) except: pass def save_data(self, item): """保存数据""" table.update({'title': item['title']}, {'$set': item}, True) if __name__ == '__main__': start_time = time.time() spider = Spider() urls = spider.get_urls() pool = Threadpool(32) pool.map(spider.get_resp, urls) pool.close() pool.join() end_time = time.time() total_time = end_time - start_time print(total_time)
def calcOpticalFlowPyrLK(prevImg, nextImg, prevPts, nextPts=None, status=None, err=None, winSize=(5, 5), maxLevel=2, criteria=(cv.TERM_CRITERIA_COUNT, 10), flags=[], minEigThreshold=None): # check window size if winSize[0] % 2 != 1 or winSize[1] % 2 != 1: print("winSize must be an odd number!") exit(-1) # check for flags if cv.OPTFLOW_USE_INITIAL_FLOW not in flags: nextPts = np.copy(prevPts) # set error type err_type = 0 if cv.OPTFLOW_LK_GET_MIN_EIGENVALS in flags: err_type = cv.OPTFLOW_LK_GET_MIN_EIGENVALS # criteria check if (criteria[0] != cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT and criteria[0] != cv.TERM_CRITERIA_EPS and criteria[0] != cv.TERM_CRITERIA_COUNT): print("Wrong criteria passed!") exit(-1) if status is None: status = np.ones((prevPts.shape[0], 1), dtype=bool) if err is None: err = np.zeros((prevPts.shape[0], 1), dtype=float) # initialize multiprocessing pool pool = Threadpool(mp.cpu_count()) # create neighborhood vectors Ix_v = np.zeros(winSize[0] * winSize[1]) Iy_v = np.zeros(winSize[0] * winSize[1]) It_v = np.zeros(winSize[0] * winSize[1]) Ixyt_v = (Ix_v, Iy_v, It_v) # create weight matrix W = cv.getGaussianKernel(winSize[0] * winSize[1], -1) W = np.diagflat(W) prev_pyramid = buildOpticalFlowPyramid(prevImg, maxLevel) next_pyramid = buildOpticalFlowPyramid(nextImg, maxLevel) scalePts(nextPts, 1 / (2**(maxLevel + 1))) # iterate from highest to lowest level for i in range(len(prev_pyramid) - 1, -1, -1): # get derivatives Ix = cv.Sobel(prev_pyramid[i], -1, 1, 0, 3) Iy = cv.Sobel(prev_pyramid[i], -1, 0, 1, 3) It = next_pyramid[i] - prev_pyramid[i] Ixyt = (Ix, Iy, It) # scale points for this pyramid level scalePts(nextPts, 2) # run everything sequentially # for i in range(status.shape[0]): # nextPt, st, error = calcNextPt(Ixyt, Ixyt_v, W, # nextPts[i], status[i], err[i], # winSize, # criteria, err_type, minEigThreshold) # status[i] = st # err[i] = error # nextPts[i] = nextPt # run all point calculations in parallel result = pool.starmap( calcNextPt, [(Ixyt, Ixyt_v, W, nextPt, st, error, winSize, criteria, err_type, minEigThreshold) for nextPt, st, error in zip(nextPts, status, err)]) j = 0 for nextPt, st, error in result: nextPts[j] = nextPt status[j] = st err[j] = error # print(nextPt, st, error) j += 1 # close up pool pool.close() pool.join() return nextPts, status, err
from multiprocessing.dummy import Pool as Threadpool from urllib import parse import requests import os # 001 ~ 821 tmp = parse.quote('玄幻奇幻/庆余年_秋水雁翎') target_url = [ 'http://mp3f.ting89.com:9090/' + tmp + '/%s.mp3' % str(i + 1).zfill(3) for i in range(821) ] PATH = '/Users/jasonzhang/Downloads/庆余年语音小说/' def download(url): path = PATH + url.split('/')[-1] if os.path.exists(path) and os.path.getsize(path) > 10: return res = requests.get(url, stream=False) with open(path, "wb") as f: f.write(res.content) print(url.split('/')[-1]) th = Threadpool(20) th.map(download, [i for i in target_url])
# Open the file and load in json with open(file_path, 'r') as fin: for l in fin.readlines(): data.append(json.loads(l)) return data start = timer() # List of files to read in saved_files = [partition_data_dir + x for x in os.listdir(partition_data_dir)] # Create a threadpool for reading in files threadpool = Threadpool(processes=12) # Read in the files as a list of lists results = threadpool.map(read_data, saved_files) # Flatten the list of lists to a single list book_list = list(chain(*results)) end = timer() print(f'Found {len(book_list)} books in {round(end - start)} seconds.') if not os.path.exists(os.getcwd() + combined_data_file): with open(combined_data_file, 'wt') as fout: for book in book_list: fout.write(json.dumps(book) + '\n')
import threading import time from multiprocessing.dummy import Pool as Threadpool #一个自带的包 #线程池 threading.Thread() #通过thread来创建线程 def print_hello(name): print "hello", name time.sleep(2) #延时 name_list = ["keli", "xiaoxi"] #定义了一个list start_time = time.time() #导入的方法 pool = Threadpool(1) ''' ''' pool.map(print_hello, name_list) pool.close() pool.join() ''' 主线程等待子线程结束 ''' end_time = time.time() print "%d second " % (end_time - start_time) #字符串格式化%d
DATABASE_NAME_REF = "TEST_REF" DATABASE_NAME_CHILD = "TEST_CHILD" DATABASE_LIST_WORDS = "TEST_WORDS_TEST" DATABASE_LIST_ERROR = "TEST_ERROR" reload(sys) # 2 sys.setdefaultencoding('utf-8') # 3 dbm.create_tables(dbm.get_sql(DATABASE_NAME_LIST)) dbm.create_tables(dbm.get_sql(DATABASE_NAME_REF)) dbm.create_tables(dbm.get_sql(DATABASE_NAME_CHILD)) dbm.create_tables(dbm.get_sql(DATABASE_LIST_ERROR)) dbm.create_words_tables(DATABASE_LIST_WORDS) # logging.debug("DataBase(TestOP) is Created!") # pool = Threadpool(100) # start_time = time.time() # 访问列表获取 m_dict = getl.get_main_html('https://developer.android.com/index.html') end_time = time.time() print u"获取列表使用时间为{}秒".format(end_time - start_time) logging.debug("----------------列表获取完毕,开始数据库插入----------------------") if m_dict[0]: for item in m_dict[0]: o = item.split('*') sql = us.get_i_sql(DATABASE_NAME_LIST, {'NAME': o[0], 'URL': o[1]}) dbm.insert_data(sql)
def inverse_kinematic_ga(chain, target_frame, starting_nodes_angles, orientation_weight=.4, max_iter=5, second_chain=None, second_target_frame=None, include_orientation=False, method="ga_simple", population_size=12, mutation_rate=.01, num_generations=500, num_elites=3, distance_acc=.001, orientation_acc=.01): starting_nodes_angles = chain.inverse_kinematics(target_frame, method="L-BFGS-B") if starting_nodes_angles is None: raise ValueError("starting_nodes_angles must be specified") # Only get the position target = target_frame[:3, -1] targetQuad = m3d.quaternion.UnitQuaternion( m3d.orientation.Orientation(target_frame[:3, :3])) def fwki_with_orientation(x): import math # Calculate position distance y = chain.active_to_full(x, starting_nodes_angles) fwk = chain.forward_kinematics(y) distance = math.sqrt(np.linalg.norm(fwk[:3, -1] - target)) #print "Position distance: " + str(squared_distance) # Calculate orientation distance recentQuad = m3d.quaternion.UnitQuaternion( m3d.orientation.Orientation(fwk[:3, :3])) #targetQuad = m3d.quaternion.UnitQuaternion(m3d.orientation.Orientation(target_frame[:3, :3])) orientation_distance = targetQuad.ang_dist(recentQuad) #import transforms3d #import math # k=transforms3d.euler.EulerFuncs() #i_al, i_be, i_ga = transforms3d.taitbryan.mat2euler(fwk[:3, :3]) #t_al, t_be, t_ga = transforms3d.taitbryan.mat2euler(target_frame[:3, :3]) # d_al=i_al-t_al #d_be = i_be - t_be #d_ga = i_ga - t_ga #orientation_distance = math.sqrt(d_al*d_al+d_be*d_be+d_ga*d_ga) #orientation_distance = math.sqrt(d_al*d_al) #print "Orientation distance: " + str(orientation_distance) # Set the weight random for every calculation # orientation_weight=1-random.random() #orientation_weight = random.random() min(max(0, orientation_weight), 1) return ((distance * (1 - orientation_weight) + orientation_distance * orientation_weight, distance, orientation_distance)) def fwki_only_position(x): # Calculate position distance y = chain.active_to_full(x, starting_nodes_angles) fwk = chain.forward_kinematics(y) squared_distance = np.linalg.norm(fwk[:3, -1] - target) return (squared_distance) def eval_func(chromosome): # be in the limits of the joint bounds bound_arr = [] for t in chain.links: # print t if t.bounds != (None, None): bound_arr.append(t.bounds) # if gene not in bounds, fit the value into the bounds x = [] for t, value in enumerate(chromosome): down, up = bound_arr[t] if value < down: value = down if value > up: value = up x.append(value) if (include_orientation): opt, dist, or_dist = fwki_with_orientation(x) #opt = fwki_only_position(x) else: opt = fwki_only_position(x) score = float(opt) return score, dist, or_dist def refresh_fitness(indiv): indiv.fitness, indiv.distance, indiv.orientation_distance = eval_func( indiv.gene) class Individual(object): def __init__(self, gene): self.gene = gene self.fitness, self.distance, self.orientation_distance = eval_func( self.gene) def change_gene(self, index, value): self.gene[index] = value #self.fitness = eval_func(self.gene) def refresh_fitness(self): self.fitness, self.distance, self.orientation_distance = eval_func( self.gene) # def __cmp__(self, other): # if hasattr(other, 'fitness'): # return self.fitness.__cmp__(other.fitness) def __repr__(self): return '{}: {} f: {}'.format(self.__class__.__name__, self.gene, self.fitness) class Population(object): def __init__(self): self.individuals = [] self.sorted = False def tournamentSelection(pop, num): indiNum = random.randint(0, len(pop.individuals) - 1) for i in range(num): indiOther = random.randint(0, len(pop.individuals) - 1) if pop.individuals[indiOther].fitness < pop.individuals[ indiNum].fitness: indiNum = indiOther return indiNum def genCrossover(gMom, gDad): off = [] weight = random.random() for i in xrange(len(gMom)): off.append((gMom[i] * weight + gDad[i] * (1 - weight))) # if random.random() > 0.5: # brother[i] = ((gMom[i] + gDad[i]) / 2) + (gMom[i] - gDad[i]) - random.random() # # brother=gDad.clone() # else: # brother[i] = ((gMom[i] + gDad[i]) / 2) + (gDad[i] - gMom[i]) + random.random() # brother=gMom.clone() return (off) def mutate(indi): off = [] for i in xrange(len(indi.gene)): off.append(indi.gene[i]) # if random.random()<indi.fitness*5: if True: if random.random() < mutation_rate: # !!!! ToDo Strength of mutation, dependend on success off[i] = off[i] + (random.random() * 2 - 1) indi.gene = off # print "Mutant: gene: " + str(off) # raw_input() return (off) def mutation(parent): parent.gene = mutate(parent) parent.gene = chain.active_from_full( chain.inverse_kinematics(target_frame, initial_position=chain.active_to_full( parent.gene, starting_nodes_angles), method="SLSQP", include_orientation=True, max_iter=max_iter * 5)) def initGene(bounds=None): import math chrome = np.zeros(chromLength) for t in range(chromLength): if (bounds == None): init = random.uniform(-math.pi, math.pi) else: up, down = bounds[t] init = random.uniform(up, down) chrome[t] = init # Take out or Leave in: Optimize at creation #chrome=chain.active_from_full(chain.inverse_kinematics(target_frame, initial_position=chain.active_to_full(chrome, starting_nodes_angles), method="SLSQP",include_orientation=True)) return (chrome) chromLength = len(chain.active_from_full(starting_nodes_angles)) # Build bounds array from URDF data bound_arr = [] for t in chain.links: #print t if t.bounds != (None, None): bound_arr.append(t.bounds) # initialize genomes pop = Population() for i in range(population_size): chrome = initGene(bounds=bound_arr) #chrome = np.zeros(chromLength) # for t in range(chromLength): # init=random.uniform(-3.141, 3.141) # chrome[t]=init # !!!! ToDo change parameter of SLSQP optimization. Do not go so deep to save calculation time #chrome=chain.active_from_full(chain.inverse_kinematics(target_frame, initial_position=chain.active_to_full(chrome, starting_nodes_angles), method="SLSQP",include_orientation=True, max_iter=max_iter)) # indi=Individual(chrome) indi = Individual(initGene(bound_arr)) pop.individuals.append(indi) #chrome = chain.active_from_full(chain.inverse_kinematics(target_frame, method="SLSQP",include_orientation=True, max_iter=max_iter)) # indi=Individual(chrome) indi = Individual(initGene(bound_arr)) pop.individuals.append(indi) pop.individuals = sorted(pop.individuals, key=lambda individual: individual.fitness) # for individual in pop.individuals: # print individual i = 0 nic = 0 # No improvement counter minFitness = 10 lastFitness = 10 min_distance = 10 min_orientation_distance = 10 acc_reached = False # while i < num_generations and minFitness > reachFitness: # iterate through the generations pool = Threadpool() while i < num_generations and not acc_reached: # iterate through the generations start_t = time.time() # if lastFitness-minFitness<0.01: # nic+=1 # else: # lastFitness=minFitness # nic=0 i += 1 # print("(Gen: #%s) Total error: %s\n" % (i, np.sum([ind.fitness for ind in pop.individuals]))) #print "Min Error: " + str(pop.individuals[0].fitness) newPop = Population() # winners = np.zeros((params[4], params[3])) #20x2 # clear population from Individuals with the same fitness t = 1 # Get all individiduals out, that are nearly equal for t in range(1, len(pop.individuals)): if np.allclose(pop.individuals[t - 1].fitness, pop.individuals[t].fitness, rtol=1e-03, atol=1e-04): pop.individuals[t].gene = initGene(bound_arr) #print "cleared" # raw_input() start_mutation = time.time() # get the Elites and optimize it with numerical method parents = copy.deepcopy(pop.individuals[:num_elites]) pool.map(mutation, parents) # for t in range(num_elites): # start_mutation_it = time.time() # parent = copy.deepcopy(pop.individuals[t]) # print("Copy {}s".format(time.time() - start_mutation_it)) # parent.gene = mutate(parent) # print("Mutate {}s".format(time.time() - start_mutation_it)) # parent.gene = chain.active_from_full(chain.inverse_kinematics(target_frame, initial_position=chain.active_to_full( # parent.gene, starting_nodes_angles), method="SLSQP", include_orientation=True, max_iter=max_iter * 5)) # print("Gene {}s".format(time.time() - start_mutation_it)) for parent in parents: if len(newPop.individuals) > 0: if not np.array_equal(parent.gene, newPop.individuals[0].gene): #print "Elite: " + str(parent.gene) newPop.individuals.append(parent) else: #print "First Elite: " + str(parent.gene) newPop.individuals.append(parent) # print("Mutation Process {}s".format(time.time() - start_mutation)) start_cross = time.time() # Crossover the population, select by tournament selection while len(newPop.individuals) < population_size and len( pop.individuals) > 2: # start_cross_it = time.time() momNum = tournamentSelection(pop, 3) mom = pop.individuals[momNum].gene dadNum = tournamentSelection(pop, 3) dad = pop.individuals[dadNum].gene # print("Selection {}s".format(time.time() - start_cross_it)) off = genCrossover(mom, dad) indi = Individual(off) indi.gene = mutate(indi) # invest in optimization dependent from current generations if (random.random() * i > num_generations * 0.5): indi.gene = chain.active_from_full( chain.inverse_kinematics( target_frame, initial_position=chain.active_to_full( off, starting_nodes_angles), method="SLSQP", include_orientation=True, max_iter=max_iter)) # print("Offspring {}s".format(time.time() - start_cross_it)) if (indi.fitness < pop.individuals[momNum].fitness) or ( indi.fitness < pop.individuals[momNum].fitness): if momNum != dadNum: del pop.individuals[dadNum] del pop.individuals[momNum - 1] else: del pop.individuals[dadNum] newPop.individuals.append(indi) # print("Cross_it {}s".format(time.time() - start_cross_it)) end_cross = time.time() # Fill up the rest of the population with new individuals while len(newPop.individuals) < population_size: indi = Individual(initGene(bound_arr)) if (random.random() * i > num_generations * 0.5): indi.gene = chain.active_from_full( chain.inverse_kinematics( target_frame, initial_position=chain.active_to_full( indi.gene, starting_nodes_angles), method="SLSQP", include_orientation=True, max_iter=max_iter)) newPop.individuals.append(indi) pop = newPop start_fit = time.time() # Taken out for multithreading # for indi in pop.individuals: # indi.refresh_fitness() results = pool.map(refresh_fitness, pop.individuals) pop.individuals = sorted(pop.individuals, key=lambda individual: individual.fitness) minFitness = pop.individuals[0].fitness min_distance = pop.individuals[0].distance min_orientation_distance = pop.individuals[0].orientation_distance acc_reached = min_distance < distance_acc and min_orientation_distance < orientation_acc print("Elite Mutation: {}s\n Crossover: {}s\n Fill: {}s\n Fit: {}s". format(start_cross - start_t, end_cross - start_cross, start_fit - end_cross, time.time() - start_fit)) print("Eval time: {}s".format(time.time() - start_t)) print "End criteria: " + \ str(acc_reached) + ' ' + str(min_distance) + \ ' ' + str(min_orientation_distance) # raw_input() # for indi in pop.individuals: # print indi #print "Give return" # raw_input() pool.close() pool.join() return (chain.active_to_full(pop.individuals[0].gene, starting_nodes_angles))
db = client['product_dict'] content = db['jd'] content.insert(product_list) if __name__ == '__main__': headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:54.0) Gecko/20100101 Firefox/54.0' } urls = [ 'https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&suggest=1.def.0.V00&wq=shouji&cid2=653&cid3=655&page={}&s=57&click=0' .format(page) for page in range(1, 10, 2) ] start_time = time.time() pool = Threadpool(5) pool.map(get_response, urls) pool.close() pool.join() end_time = time.time() print u'用时()秒'.format(str(end_time - start_time)) # def print_hello(name): # print 'Hello',name # time.sleep(2) # 延时 # # name_list = ['tony','xiao','lvshe'] # start_time = time.time() # pool = Threadpool(3) # 创建线程池池 参数是线程的个数 # pool.map(print_hello, name_list) # map 映射 能接受一个方法,一个序列 # pool.close()
#解密js获取cookie # def decrypt_cookie(self, data): func_data = "function getClearance(){" + data + "};" func_data = func_data.replace("</script>", "") func_data = func_data.replace("eval", "return") func_data = func_data.replace("<script>", "") real_data = js2py.eval_js(func_data) str_data = str(real_data()) str_data = str_data.strip('\'') index1 = str_data.find("document.") index2 = str_data.find("};if((") data = str_data[index1:index2].replace("document.cookie", "cookie") data = "function getClearance(){" + data + ";return cookie;}" data = js2py.eval_js(data) return data() if __name__ == "__main__": a = Cnvdspider() pool = Threadpool( 1 ) # 单线程跑的慢,但是基本很稳定没有被cnvd封杀。本次跑完整个cnvd库大概十天左右,期间网络波动中断需要维护代码,及爬虫重新爬的页面范围。 a.run() # pool.map(a.run(),self.parse()) pool.close() pool.join()
with open(config_filename, 'w') as config_out: config_out.write(config_data) session.disconnect() return #===================== Main- configuration =============================================================== #========================================================================================================= devices = read_devices('devices-file') creds = read_devices_creds('encrypted-device-creds','cisco') num_of_threads = input('Enter the number of threads to be in pool (5): ') or '5' num_of_threads = int(num_of_threads) # creating a list of parameter to pass to config_worker method config_parameter_list = [] for ipaddr,device in devices.items(): config_parameter_list.append((device,creds[ipaddr])) starting_time = time() #----- creating thread pool and launching config_worker ------# print('\n-------creating thread pool and launching config_worker method ------\n') threads = Threadpool(num_of_threads) results = threads.map(config_worker, config_parameter_list) print('\n\n------------ End get config sequential, elapsed time= ',time()-starting_time)