コード例 #1
0
ファイル: single.py プロジェクト: ckitagawa/PythonSnippets
def main():
	ts = time()
	download_dir = setup_download_dir()
	links = [l for l in get_links() if l.endswith('.csv')]
	for link in links:
		download_link(download_dir, link)
	print('Took {}s'.format(time() - ts))
コード例 #2
0
def main():
    ts = time()
    # To set the environment variable in the terminal:
    # export IMGUR_CLIENT_ID='my-client-id'
    client_id = os.getenv('IMGUR_CLIENT_ID')

    if not client_id:
        raise Exception('Check your IMGUR_CLIENT_ID environment variable')

    if not os.path.isdir('images'):
        os.makedirs('images')

    links = get_links(client_id)

    queue = Queue()
    for x in range(4):
        worker = DownloadWorker(queue)
        worker.daemon = True  # let the main thread exit
        worker.start()

    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((Path('images'), link))

    queue.join()
    logging.info('Took {}'.format(time() - ts))
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)

    queue = Queue()

    # Creating 3 worker threads
    for x in range(3):
        worker = DownloadWorker(queue)

        # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()

    # Put the tasks into the queue as a tuple
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))

    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    logging.info('Took %s', time() - ts)
コード例 #4
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)

    # By placing the executor inside a with block, the executors shutdown method
    # will be called cleaning up threads.
    #
    # By default, the executor sets number of workers to 5 times the number of
    # CPUs.
    with ThreadPoolExecutor() as executor:

        # Create a new partially applied function that stores the directory
        # argument.
        #
        # This allows the download_link function that normally takes two
        # arguments to work with the map function that expects a function of a
        # single argument.
        fn = partial(download_link, download_dir)

        # Executes fn concurrently using threads on the links iterable. The
        # timeout is for the entire process, not a single call, so downloading
        # all images must complete within 30 seconds.
        executor.map(fn, links, timeout=30)
コード例 #5
0
ファイル: process_toutiao.py プロジェクト: oencoding/ListenTV
def main():
    ts = time()

    url1 = 'https://item.taobao.com/item.htm?spm=a217l.8087239.620352.3.512Gng&id=536843329282'
    url2 = 'https://item.taobao.com/item.htm?spm=a217l.8087239.620352.4.512Gng&id=44022485238'

    download_dir = setup_download_dir('process_imgs')
    links = list(chain(
        get_links(url1),
        get_links(url2),
    ))

    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    print('一共下载了 {} 张图片'.format(len(links)))
    print('Took {}s'.format(time() - ts))
コード例 #6
0
ファイル: single.py プロジェクト: gitrookie/codesnippets
def main():
    print("In Main")
    ts = time()
    download_dir = setup_download_dir()
    links = [l for l in get_links('c53645e1e12ad62') if l.endswith('.jpg')]
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() -ts))
コード例 #7
0
async def main():
    client_id = 'f8f603617f590ed'
    download_dir = setup_download_dir()
    # We use a session to take advantage of tcp keep-alive
    # Set a 3 second read and connect timeout. Default is 5 minutes
    async with aiohttp.ClientSession(conn_timeout=3, read_timeout=3) as session:
        tasks = [(async_download_link(session, download_dir, l)) for l in get_links(client_id)]
        # gather aggregates all the tasks and schedules them in the event loop
        await asyncio.gather(*tasks, return_exceptions=True)
コード例 #8
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    q = Queue(connection=Redis(host='localhost', port=6379))
    for link in links:
        q.enqueue(download_link, download_dir, link)
コード例 #9
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    q = Queue(connection=Redis(host='localhost', port=6379))
    for link in links:
        q.enqueue(download_link, download_dir, link)
コード例 #10
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception('Couldn\'t find IMGUR_CLIENT_ID environment variable!')
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() - ts))
コード例 #11
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = (l for l in get_links(client_id) if l.endswith('.jpg'))
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)
コード例 #12
0
def main():
    ts = time()
    client_id = 'bef2d9292d6bcbd'
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)
コード例 #13
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    loop = asyncio.get_event_loop()
    # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available
    # in Python >= 3.4.2
    tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id)]
コード例 #14
0
def main():
	st= time()
	client_id = 'ee43c9d73f7dcc9'
	if not client_id:
		raise Exception("Couldn't find IMGUR_CLIENT_ID enviroment variable")
	download_dir = setup_download_dir()
	loop = asyncio.get_event_loop()
	#Instead of asyncio.async you can use loop.create_task, but loop.create_task is only avaible
	# in python >=3.4.2
	tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id)]
コード例 #15
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)
コード例 #16
0
ファイル: single.py プロジェクト: luoaijun/LogLoadDemo
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() - ts))
コード例 #17
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    logging.info('Took %s seconds', time() - ts)
コード例 #18
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    download = partial(download_link, download_dir)
    with Pool(4) as p:
        p.map(download, links)
    logger.info('Took: %s', time() - ts)
コード例 #19
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = (l for l in get_links(client_id) if l.endswith('.jpg'))
    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    logging.info('Took %s seconds', time() - ts)
コード例 #20
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception('Couldn\'t find IMGUR_CLIENT_ID environment variable!')
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    download = partial(download_link, download_dir)
    with Pool(num_processes) as p:
        p.map(download, links)
    print('Took {}s'.format(time() - ts))
コード例 #21
0
def main():
    ts = time()
    try:
        client_id = key.client_id
    except ImportError:
        logger.error('Cannot Import client_id')
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logger.info(f"Took {time() - ts} seconds")
コード例 #22
0
def main():
    ts = time()

    url1 = 'http://www.toutiao.com/a6333981316853907714'
    url2 = 'http://www.toutiao.com/a6334459308533350658'
    url3 = 'http://www.toutiao.com/a6313664289211924737'
    url4 = 'http://www.toutiao.com/a6334337170774458625'
    url5 = 'http://www.toutiao.com/a6334486705982996738'
    download_dir = setup_download_dir('thread_imgs')
    # Create a queue to communicate with the worker threads
    queue = Queue()

    links = list(
        chain(
            get_links(url1),
            get_links(url2),
            get_links(url3),
            get_links(url4),
            get_links(url5),
        ))

    # Create 8 worker threads
    for x in range(16):
        worker = DownloadWorker(queue)
        # Setting daemon to True will let the main thread exit even though the
        # workers are blocking
        worker.daemon = True
        worker.start()

    # Put the tasks into the queue as a tuple
    i = 1
    for link in links:
        queue.put((download_dir, link))
        print(i, link)
        i += 1

    # Causes the main thread to wait for the queue to finish processing all
    # the tasks
    queue.join()
    print('Total photos {}'.format(len(links)))
    print('Took {}s'.format(time() - ts))
コード例 #23
0
ファイル: multithreaded_future.py プロジェクト: kemathur/ctci
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    print (links, len(links))
    download = partial(download_link, download_dir)
    with ThreadPoolExecutor() as ex:
        ex.map(download, links)
    logging.info('Took %s seconds', time() - ts)
コード例 #24
0
ファイル: single.py プロジェクト: Samoye/download
def main():
    download_dir=set_download_dir()
    url=ConfigParser.ConfigParser()
    url.readfp(open('api_url.ini','rw'))
    api_url=url.get('api','api_url1')
    links={'%d' % l['id']:l['thumbUrl'] for l in get_links(api_url)['response']}
    api_url=url.get('api','api_url2')
    for l in get_links(api_url)['response']:
        links['%d' % l['id']]=l['thumbUrl']
    api_url=url.get('api','api_url3')
    for l in get_links(api_url)['response']:
        links['%d' % l['id']]=l['thumbUrl']
    i=0
    time1=time.time()
    for link in links:
        name=link+'.jpg'
        download_links(download_dir,links[link],name)
 	i=i+1
    time2=time.time() 
    print time2-time1
    print 'the number of photos is %d' % i
コード例 #25
0
def main():
    ts = time()

    url1 = 'http://www.toutiao.com/a6333981316853907714'
    url2 = 'http://www.toutiao.com/a6334459308533350658'
    url3 = 'http://www.toutiao.com/a6313664289211924737'
    url4 = 'http://www.toutiao.com/a6334337170774458625'
    url5 = 'http://www.toutiao.com/a6334486705982996738'
    download_dir = setup_download_dir('single_imgs')
    links = list(
        chain(
            get_links(url1),
            get_links(url2),
            get_links(url3),
            get_links(url4),
            get_links(url5),
        ))
    for link in links:
        download_link(download_dir, link)
    print('一共下载了 {} 张图片'.format(len(links)))
    print('Took {}s'.format(time() - ts))
コード例 #26
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    q = Queue(connection=Redis(host='localhost', port=6379))
    for link in links:
        # puts the job in a redis server that can be in other machine
        # rqworker in a terminal window and it will start a worker listening on the default queue.
        # rqworker queue_name will listen to that named queue.
        q.enqueue(download_link, download_dir, link)
コード例 #27
0
async def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    # We use a session to take advantage of tcp keep-alive
    # Set a 3 second read and connect timeout. Default is 5 minutes
    async with aiohttp.ClientSession(conn_timeout=3,
                                     read_timeout=3) as session:
        tasks = [(async_download_link(session, download_dir, l))
                 for l in get_links(client_id)]
        # gather aggregates all the tasks and schedules them in the event loop
        await asyncio.gather(*tasks, return_exceptions=True)
コード例 #28
0
def main():
    ts = time()

    url1 = 'http://www.toutiao.com/a6333981316853907714'
    url2 = 'http://www.toutiao.com/a6334459308533350658'
    url3 = 'http://www.toutiao.com/a6313664289211924737'
    url4 = 'http://www.toutiao.com/a6334337170774458625'
    url5 = 'http://www.toutiao.com/a6334486705982996738'
    download_dir = setup_download_dir('process_imgs')
    links = list(
        chain(
            get_links(url1),
            get_links(url2),
            get_links(url3),
            get_links(url4),
            get_links(url5),
        ))

    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    print('一共下载了 {} 张图片'.format(len(links)))
    print('Took {}s'.format(time() - ts))
コード例 #29
0
def main():
   ts = time()
   hello()
   client_id = os.getenv('IMGUR_CLIENT_ID')
   if not client_id:
       raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
   download_dir = setup_download_dir()
   links = [l for l in get_links(client_id) if l.endswith('.jpg')]
   print(download_dir)
   testurl = u'http://i.imgur.com/i5QjTPA.jpg'
   download_link(download_dir, testurl)
   for link in links:
       download_link(download_dir, link)
   print('Took {}s'.format(time() - ts))
コード例 #30
0
def main():
    ts = time()
    # To set the environment variable in the terminal:
    # export IMGUR_CLIENT_ID='my-client-id'
    client_id = os.getenv('IMGUR_CLIENT_ID')

    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")

    if not os.path.isdir('images'):
        os.makedirs('images')

    links = get_links(client_id)
    for link in links:
        download_link(Path('images'), link)

    logging.info('Took %s seconds', time() - ts)
コード例 #31
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    loop = asyncio.get_event_loop()
    # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available
    # in Python >= 3.4.2
    tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id) if l.endswith('.jpg')]
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    logger.info('Took %s seconds to complete', time() - ts)


if __name__ == '__main__':
    main()
コード例 #32
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    loop = asyncio.get_event_loop()
    # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available
    # in Python >= 3.4.2
    tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id) if l.endswith('.jpg')]
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    logger.info('Took %s seconds to complete', time() - ts)


if __name__ == '__main__':
    main()
コード例 #33
0
ファイル: multithreaded.py プロジェクト: kemathur/ctci
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable")

    download_dir = setup_download_dir()
    links = get_links(client_id)
    # create a queue to communicate with worker threads
    queue = Queue()
    # Create 8 threads
    for x in range(8):
        worker = DownloadWorker(queue)
        worker.daemon = True
        worker.start()
    # Put tasks into queue as a tuple
    for link in links:
        logger.info('Queuing {}'.format(link))
        queue.put((download_dir, link))
    # Make the main thread wait
    queue.join()
    logger.info('Took %s', time() - ts)
コード例 #34
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = (l for l in get_links(client_id) if l.endswith('.jpg'))
    # Create a queue to communicate with the worker threads
    queue = Queue()
    # Create 8 worker threads
    for x in range(8):
        worker = DownloadWorker(queue)
        # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()
    # Put the tasks into the queue as a tuple
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))
    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    logging.info('Took %s', time() - ts)
コード例 #35
0
def main():
    ts = time()
    s = requests.session()
    #download_dir = setup_download_dir()
    # links = []
    login(s)
    # for state in states:
    #     l = get_links(state, s)
    #     links += l
    #     p_to_f(str(l))
    # print(links)

    # logger.info("Total brew and beer data: %s",download_link(links[0],s))
    # Create a queue to communicate with the worker threads
    queue = Queue()
    # # Create 8 worker threads


    for x in range(8):
        conn = db.establish_connection()
        cur = db.create_cursor(conn)
        worker = DownloadWorker(queue, cur, conn)
    #     # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()
    # # Put the tasks into the queue as a tuple
    for state in states:
        links = get_links(state,s)
        for link in links:
            # logger.info('Queueing {}'.format(link))
            queue.put((link, s, state))
    # # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    conn.commit()
    db.close_connection(conn,cur)
    logging.info('Took %s', time() - ts)
コード例 #36
0
def main():
    ts = time()
    client_id = IMGUR_CLIENT_ID
    if not client_id:
        raise Exception("Need a valid IMGUR_CLIENT_ID to use the API!")
    download_dir = setup_download_dir()
    # get only image links from the API
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    # Create a queue to communicate with the worker threads
    queue = Queue()  
    for x in range(8):
        # Create 8 worker threads
        logging.info('Starting thread %s', x)
        worker = DownloadWorker(queue)
        # main thread can exit even though workers are blocked
        worker.daemon = True
        worker.start()
    # Create a task in the queue for each image link
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))
    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    print('Execution time: {} seconds.'.format(time() - ts))
コード例 #37
0
import logging
from functools import partial
from multiprocessing import Pool
from download import get_links, setup_download_dir,\
                     download_link, CLIENT_ID

logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logger = logging.getLogger(__name__)

PROCESSES = 4

if __name__ == '__main__':
    download_dir = setup_download_dir('images')
    download = partial(download_link, download_dir)
    links = get_links(CLIENT_ID)
    with Pool(PROCESSES) as p:
        p.map(download, links)
コード例 #38
0
import logging
from queue import Queue
from download import get_links, setup_download_dir,\
                     DownloadWorker, CLIENT_ID

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logger = logging.getLogger(__name__)

THREADS = 4

if __name__ == '__main__':
    download_dir = setup_download_dir('images')
    queue = Queue()
    for link in get_links(CLIENT_ID):
        queue.put((download_dir, link))
    for i in range(THREADS):
        t = DownloadWorker(queue)
        t.daemon = True
        t.start()
    queue.join()
コード例 #39
0
import logging
import os
from time import time

from download import setup_download_dir, get_links, download_link

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s|%(levelname)s|%(threadName)s|%(message)s')
log = logging.getLogger(__name__)

if __name__ == '__main__':
    ts = time()

    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID env variable")

    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)

    log.info('Took %s seconds', time() - ts)