def GetAllInspId(): global opener, InspID, is_ip_ok, total p = TPool(10) p.map(GetInspBySkip, range(0, total / 25 + 1)) p.close() p.join() print 'Okay..Begin to save InspID...' f = open(insp_list_filename, 'w') for i in InspID: f.write(i + '\r\n') f.close() print len(InspID), 'inspids have been saved ~'
def MultiProcessingCleanAndSave(): f = codecs.open('infoIndex.txt','r',encoding='utf-8') files = [] for line in f.xreadlines(): files.append(line.split()[0]) f.close() from multiprocessing import Pool from multiprocessing.dummy import Pool as TPool start = time.time() p = TPool(10)#设置线程池 p.map(CleanSegAndSave,files) p.close() p.join() print time.time()-start
def parse_google_location_data(filename): """parse GPS data from Takeout archive""" def arow(args): idx, row = args try: j_ = js.activity[idx] if isinstance(j_, float): return np.nan if len(j_) > 0: result = j_[0]['activity'][0]['type'] return result else: return np.nan except Exception: return np.nan with open(filename, 'r') as f: js = json.load(f) js = pd.DataFrame(js['locations']) if 'verticalAccuracy' in js.columns: js.drop(columns='verticalAccuracy', inplace=True) # if 'altitude' in js.columns: # js.drop(columns='altitude', inplace=True) if 'heading' in js.columns: js.drop(columns='heading', inplace=True) # if 'velocity' in js.columns: # js.drop(columns='velocity', inplace=True) js.timestampMs = pd.to_datetime(js.timestampMs, unit='ms') js.latitudeE7 = np.round(js.latitudeE7 / 10e6, 5) js.longitudeE7 = np.round(js.longitudeE7 / 10e6, 5) if 'activity' in js.columns: pool = TPool(secrets.CLEANING_THREADS) js.activity = list(pool.map(arow, list(js.iterrows()))) pool.close() pool.join() js.rename(columns={ 'latitudeE7': 'lat', 'longitudeE7': 'lon', 'timestampMs': 'time' }, inplace=True) return js
def GetDetailInfo(): global InspID, count, total count = 0 if (len(InspID) != total): InspID = set() f = open(insp_list_filename, 'r') for l in f.xreadlines(): InspID.add(l) f.close() p = TPool(10) p.map(GetDetailInfoByInspId, InspID) p.close() p.join() print "All detail pages are done ..."
def start(self): """Запуск парсера.""" variant = [('https://ru.investing.com/news/forex-news/', 'Новости валютного рынка', 'forex'), ('https://ru.investing.com/news/commodities-news/', 'Новости фьючерсов и сырьевых рынков', 'commodities'), ('https://ru.investing.com/news/stock-market-news/', 'Новости фондовых рынков', 'market'), ('https://ru.investing.com/news/economic-indicators/', 'Экономические показатели', 'indicators'), ('https://ru.investing.com/news/economy/', 'Новости экономики', 'economy'), ('https://ru.investing.com/news/cryptocurrency-news/', 'Новости криптовалют', 'crypto')] message = 'Выберите что загружать:\n' message += '0) загрузить все\n' for i in range(len(variant)): message += '{}) {} - {}\n'.format(i + 1, variant[i][1], variant[i][0]) print(message) while True: try: ans = int(input('номер: ')) - 1 address, table_name = variant[ans][0], variant[ans][2] break except Exception: print('введите число от 1 до {}'.format(len(variant))) msg = "Загружать полный текст статей? (y/n)\n" msg += "это может занять много времени: " load_full = input(msg) load_full = True if load_full == 'y' else False if ans != -1: Db().init_db([table_name]) self.load(address, table_name, load_full) else: threads = input('сколько тредов использовать (рекомендуется 4): ') Db().init_db([x[2] for x in variant]) p = TPool(int(threads)) p.starmap(self.load, [(x[0], x[2], load_full) for x in variant]) p.close() p.join()
def exe(filename): if (os.path.exists('ori_txt/' + filename + '.txt')): print filename, 'exists' return f = open('html/' + filename) bs = BeautifulSoup(f.read(), "html.parser") f.close() ori_text = clean_html(bs.get_text()) ori_text = " ".join(jieba.cut(ori_text)) f = open('ori_txt/' + filename + '.txt', 'w') f.write(ori_text) f.close() print filename, 'is ok' f = codecs.open('infoIndex.txt', 'r', encoding='utf-8') filenames = [] for line in f.xreadlines(): ls = line.split() filenames.append(ls[0]) f.close() from multiprocessing import Pool from multiprocessing.dummy import Pool as TPool p = TPool(30) start = datetime.now() p.map(exe, filenames) p.close() p.join() print datetime.now() - start
def lineup_expected_value(team: List[Player], iterations: int = 1000) -> float: with TPool(16) as pool: results = pool.map(simulate_team, repeat(team, iterations)) return mean(results)
# falseCount += 1 import time from multiprocessing import Pool from multiprocessing.dummy import Pool as TPool def work(i): global mbf ranLen = random.randint(minl, maxl + 1) strr = string.join( random.sample([ 'z', 'y', 'x', 'w', 'v', 'u', 't', 's', 'r', 'q', 'p', 'o', 'n', 'm', 'l', 'k', 'j', 'i', 'h', 'g', 'f', 'e', 'd', 'c', 'b', 'a' ], ranLen)).replace(' ', '') return (strr not in words) and (mbf.lookup(strr)) start = time.time() p = TPool(13) #设置线程池 res = p.map(work, range(1, amount)) p.close() p.join() falseCount = sum(res) print 'falseCount = ', falseCount print 'amount = ', amount print 'The Rate is', falseCount / (amount + 0.0) print 'using ', time.time() - start, 's'