Esempio n. 1
0
def test_index_row_dict_from_csv():
    display = True

    with Timer(display=display) as timer:
        table1 = csv_io.index_row_dict_from_csv(
            data_file_path, index_col="_id", iterator=True, chunksize=1000)

    with Timer(display=display) as timer:
        table2 = csv_io.index_row_dict_from_csv(
            data_file_path, index_col="_id")
Esempio n. 2
0
def test():
    with Timer(title="pandas") as timer:
        df = pd.read_csv(data_file_path)
    elapse_pandas = timer._elapsed

    with Timer(title="tablib") as timer:
        data = tablib.Dataset().load(open(data_file_path).read())
    elapse_tablib = timer._elapsed

    assert elapse_pandas < elapse_tablib
Esempio n. 3
0
def test_itertuple():
    display = False

    df = pd.read_csv(data_file_path)

    with Timer(display=display, title="DataFrame.itertuples()") as timer1:
        for _id, a, b in df.itertuples(index=False):
            pass

    with Timer(display=display, title="itertuples()") as timer2:
        for _id, a, b in transform.itertuple(df):
            pass

    if not timer2.elapsed < timer1.elapsed:
        warnings.warn(
            "DataFrame.itertuples() should not faster than itertuples(df)!")
Esempio n. 4
0
def test_to_index_row_dict():
    display = False

    df = create_test_df(1000)
    df.index = df["_id"]

    with Timer(display=display, title="DataFrame.iterrows()") as timer1:
        table = dict()
        for ind, row in df.iterrows():
            table[ind] = dict(row)

    with Timer(display=display, title="to_index_row_dict()") as timer2:
        table = transform.to_index_row_dict(df)
        for ind in df.index:
            row = table[ind]

    if not timer2.elapsed < timer1.elapsed:
        warnings.warn("to_index_row_dict() is slower than iterrows()!")
Esempio n. 5
0
def test_iter_tuple_from_csv():
    display = False

    with Timer(display=display, title="pandas.read_csv()") as timer1:
        for _id, a, b  in pd.read_csv(data_file_path).itertuples(index=False):
            pass

    with Timer(display=display, title="iter_tuple_from_csv(iterator=True)") as timer2:
        for _id, a, b in csv_io.iter_tuple_from_csv(
                data_file_path, iterator=True, chunksize=1000):
            pass

    with Timer(display=display, title="iter_tuple_from_csv(iterator=False)") as timer3:
        for _id, a, b in csv_io.iter_tuple_from_csv(
                data_file_path):
            pass

    if not timer3.elapsed < timer1.elapsed:
        warnings.warn(
            "csv_io.iter_tuple_from_csv() is slower than itertuples()!")
Esempio n. 6
0
# -*- coding: utf-8 -*-

from urls import url_list
import aiohttp
import asyncio
from sfm.timer import DateTimeTimer as Timer


async def fetch(url, session):
    async with session.get(url) as response:
        return await response.text(errors="ignore")


async def run():
    tasks = []
    async with aiohttp.ClientSession() as session:
        for url in url_list:
            task = asyncio.ensure_future(fetch(url, session))
            tasks.append(task)
        responses = await asyncio.gather(*tasks)
        print([len(html) for html in responses])


with Timer("aiohttp"):
    loop = asyncio.get_event_loop()
    future = asyncio.ensure_future(run())
    loop.run_until_complete(future)
Esempio n. 7
0
# -*- coding: utf-8 -*-

"""
grequests 是 gevent + requests
"""

from urls import url_list
import grequests
from sfm.timer import DateTimeTimer as Timer

with Timer(title="use grequests"):
    req_list = [
        grequests.AsyncRequest(method="GET", url=url)
        for url in url_list
    ]
    res_list = grequests.map(req_list)
    results = [len(res.text) for res in res_list]
    print(results)
Esempio n. 8
0
# -*- coding: utf-8 -*-

from urls import url_list
from requests_futures.sessions import FuturesSession
from sfm.timer import DateTimeTimer as Timer

ses = FuturesSession()

with Timer("with requests_future"):
    future_list = list()
    for url in url_list:
        future = ses.get(url)
        future_list.append(future)

    results = [len(future.result().text) for future in future_list]
    print(results)