예제 #1
0
def GetAllInspId():
    global opener, InspID, is_ip_ok, total
    p = TPool(10)
    p.map(GetInspBySkip, range(0, total / 25 + 1))
    p.close()
    p.join()

    print 'Okay..Begin to save InspID...'
    f = open(insp_list_filename, 'w')
    for i in InspID:
        f.write(i + '\r\n')
    f.close()
    print len(InspID), 'inspids have been saved ~'
예제 #2
0
def MultiProcessingCleanAndSave():
	f = codecs.open('infoIndex.txt','r',encoding='utf-8')
	files = []
	for line in f.xreadlines():
		files.append(line.split()[0])
	f.close()
	from multiprocessing import Pool
	from multiprocessing.dummy import Pool as TPool
	start = time.time()
	p = TPool(10)#设置线程池
	p.map(CleanSegAndSave,files)
	p.close()
	p.join()
	print time.time()-start
예제 #3
0
def parse_google_location_data(filename):
    """parse GPS data from Takeout archive"""
    def arow(args):
        idx, row = args
        try:
            j_ = js.activity[idx]
            if isinstance(j_, float):
                return np.nan
            if len(j_) > 0:
                result = j_[0]['activity'][0]['type']
                return result
            else:
                return np.nan
        except Exception:
            return np.nan

    with open(filename, 'r') as f:
        js = json.load(f)

    js = pd.DataFrame(js['locations'])

    if 'verticalAccuracy' in js.columns:
        js.drop(columns='verticalAccuracy', inplace=True)

    # if 'altitude' in js.columns:
    #     js.drop(columns='altitude', inplace=True)

    if 'heading' in js.columns:
        js.drop(columns='heading', inplace=True)

    # if 'velocity' in js.columns:
    #     js.drop(columns='velocity', inplace=True)

    js.timestampMs = pd.to_datetime(js.timestampMs, unit='ms')
    js.latitudeE7 = np.round(js.latitudeE7 / 10e6, 5)
    js.longitudeE7 = np.round(js.longitudeE7 / 10e6, 5)

    if 'activity' in js.columns:
        pool = TPool(secrets.CLEANING_THREADS)
        js.activity = list(pool.map(arow, list(js.iterrows())))
        pool.close()
        pool.join()

    js.rename(columns={
        'latitudeE7': 'lat',
        'longitudeE7': 'lon',
        'timestampMs': 'time'
    },
              inplace=True)
    return js
예제 #4
0
def GetDetailInfo():
    global InspID, count, total
    count = 0
    if (len(InspID) != total):
        InspID = set()
        f = open(insp_list_filename, 'r')
        for l in f.xreadlines():
            InspID.add(l)
        f.close()

    p = TPool(10)
    p.map(GetDetailInfoByInspId, InspID)
    p.close()
    p.join()

    print "All detail pages are done ..."
예제 #5
0
    def start(self):
        """Запуск парсера."""
        variant = [('https://ru.investing.com/news/forex-news/',
                    'Новости валютного рынка', 'forex'),
                   ('https://ru.investing.com/news/commodities-news/',
                    'Новости фьючерсов и сырьевых рынков', 'commodities'),
                   ('https://ru.investing.com/news/stock-market-news/',
                    'Новости фондовых рынков', 'market'),
                   ('https://ru.investing.com/news/economic-indicators/',
                    'Экономические показатели', 'indicators'),
                   ('https://ru.investing.com/news/economy/',
                    'Новости экономики', 'economy'),
                   ('https://ru.investing.com/news/cryptocurrency-news/',
                    'Новости криптовалют', 'crypto')]

        message = 'Выберите что загружать:\n'
        message += '0) загрузить все\n'
        for i in range(len(variant)):
            message += '{}) {} - {}\n'.format(i + 1, variant[i][1],
                                              variant[i][0])
        print(message)
        while True:
            try:
                ans = int(input('номер: ')) - 1
                address, table_name = variant[ans][0], variant[ans][2]
                break
            except Exception:
                print('введите число от 1 до {}'.format(len(variant)))

        msg = "Загружать полный текст статей? (y/n)\n"
        msg += "это может занять много времени: "
        load_full = input(msg)
        load_full = True if load_full == 'y' else False

        if ans != -1:
            Db().init_db([table_name])
            self.load(address, table_name, load_full)
        else:
            threads = input('сколько тредов использовать (рекомендуется 4): ')
            Db().init_db([x[2] for x in variant])
            p = TPool(int(threads))
            p.starmap(self.load, [(x[0], x[2], load_full) for x in variant])
            p.close()
            p.join()
예제 #6
0
def exe(filename):
    if (os.path.exists('ori_txt/' + filename + '.txt')):
        print filename, 'exists'
        return
    f = open('html/' + filename)
    bs = BeautifulSoup(f.read(), "html.parser")
    f.close()
    ori_text = clean_html(bs.get_text())
    ori_text = " ".join(jieba.cut(ori_text))
    f = open('ori_txt/' + filename + '.txt', 'w')
    f.write(ori_text)
    f.close()
    print filename, 'is ok'


f = codecs.open('infoIndex.txt', 'r', encoding='utf-8')
filenames = []
for line in f.xreadlines():
    ls = line.split()
    filenames.append(ls[0])
f.close()
from multiprocessing import Pool
from multiprocessing.dummy import Pool as TPool

p = TPool(30)
start = datetime.now()
p.map(exe, filenames)
p.close()
p.join()
print datetime.now() - start
예제 #7
0
def lineup_expected_value(team: List[Player], iterations: int = 1000) -> float:
    with TPool(16) as pool:
        results = pool.map(simulate_team, repeat(team, iterations))
    return mean(results)
예제 #8
0
#         falseCount += 1

import time
from multiprocessing import Pool
from multiprocessing.dummy import Pool as TPool


def work(i):
    global mbf
    ranLen = random.randint(minl, maxl + 1)
    strr = string.join(
        random.sample([
            'z', 'y', 'x', 'w', 'v', 'u', 't', 's', 'r', 'q', 'p', 'o', 'n',
            'm', 'l', 'k', 'j', 'i', 'h', 'g', 'f', 'e', 'd', 'c', 'b', 'a'
        ], ranLen)).replace(' ', '')
    return (strr not in words) and (mbf.lookup(strr))


start = time.time()

p = TPool(13)  #设置线程池
res = p.map(work, range(1, amount))
p.close()
p.join()

falseCount = sum(res)
print 'falseCount = ', falseCount
print 'amount = ', amount
print 'The Rate is', falseCount / (amount + 0.0)
print 'using ', time.time() - start, 's'