def test_index_row_dict_from_csv(): display = True with Timer(display=display) as timer: table1 = csv_io.index_row_dict_from_csv( data_file_path, index_col="_id", iterator=True, chunksize=1000) with Timer(display=display) as timer: table2 = csv_io.index_row_dict_from_csv( data_file_path, index_col="_id")
def test(): with Timer(title="pandas") as timer: df = pd.read_csv(data_file_path) elapse_pandas = timer._elapsed with Timer(title="tablib") as timer: data = tablib.Dataset().load(open(data_file_path).read()) elapse_tablib = timer._elapsed assert elapse_pandas < elapse_tablib
def test_itertuple(): display = False df = pd.read_csv(data_file_path) with Timer(display=display, title="DataFrame.itertuples()") as timer1: for _id, a, b in df.itertuples(index=False): pass with Timer(display=display, title="itertuples()") as timer2: for _id, a, b in transform.itertuple(df): pass if not timer2.elapsed < timer1.elapsed: warnings.warn( "DataFrame.itertuples() should not faster than itertuples(df)!")
def test_to_index_row_dict(): display = False df = create_test_df(1000) df.index = df["_id"] with Timer(display=display, title="DataFrame.iterrows()") as timer1: table = dict() for ind, row in df.iterrows(): table[ind] = dict(row) with Timer(display=display, title="to_index_row_dict()") as timer2: table = transform.to_index_row_dict(df) for ind in df.index: row = table[ind] if not timer2.elapsed < timer1.elapsed: warnings.warn("to_index_row_dict() is slower than iterrows()!")
def test_iter_tuple_from_csv(): display = False with Timer(display=display, title="pandas.read_csv()") as timer1: for _id, a, b in pd.read_csv(data_file_path).itertuples(index=False): pass with Timer(display=display, title="iter_tuple_from_csv(iterator=True)") as timer2: for _id, a, b in csv_io.iter_tuple_from_csv( data_file_path, iterator=True, chunksize=1000): pass with Timer(display=display, title="iter_tuple_from_csv(iterator=False)") as timer3: for _id, a, b in csv_io.iter_tuple_from_csv( data_file_path): pass if not timer3.elapsed < timer1.elapsed: warnings.warn( "csv_io.iter_tuple_from_csv() is slower than itertuples()!")
# -*- coding: utf-8 -*- from urls import url_list import aiohttp import asyncio from sfm.timer import DateTimeTimer as Timer async def fetch(url, session): async with session.get(url) as response: return await response.text(errors="ignore") async def run(): tasks = [] async with aiohttp.ClientSession() as session: for url in url_list: task = asyncio.ensure_future(fetch(url, session)) tasks.append(task) responses = await asyncio.gather(*tasks) print([len(html) for html in responses]) with Timer("aiohttp"): loop = asyncio.get_event_loop() future = asyncio.ensure_future(run()) loop.run_until_complete(future)
# -*- coding: utf-8 -*- """ grequests 是 gevent + requests """ from urls import url_list import grequests from sfm.timer import DateTimeTimer as Timer with Timer(title="use grequests"): req_list = [ grequests.AsyncRequest(method="GET", url=url) for url in url_list ] res_list = grequests.map(req_list) results = [len(res.text) for res in res_list] print(results)
# -*- coding: utf-8 -*- from urls import url_list from requests_futures.sessions import FuturesSession from sfm.timer import DateTimeTimer as Timer ses = FuturesSession() with Timer("with requests_future"): future_list = list() for url in url_list: future = ses.get(url) future_list.append(future) results = [len(future.result().text) for future in future_list] print(results)