Ejemplo n.º 1
0
def send_emails(modeladmin, request, queryset):
    messages = Queue()
    for user in queryset:
        process = Process(target=send_email, args=(user, messages))
        process.start()
        messages.get().send()
        process.join()
def _get_system_information_threaded(host):
    system_information_functions = [
        collect_win_application_stats, collect_win_bios_stats,
        collect_win_disk_stats, collect_win_local_account_stats,
        collect_win_local_group_stats, collect_win_mem_stats,
        collect_win_network_stats, collect_os_stats,
        collect_win_processes_stats, collect_win_cpu_stats,
        collect_win_services_stats
    ]
    system_information = {}
    queue = Queue()
    list_of_processes = []

    for hardware in system_information_functions:
        process = _Process(target=hardware, args=(
            host,
            1,
            queue,
        ))
        list_of_processes.append(process)
        process.start()

    for process in list_of_processes:
        process.join()
        system_information.update(queue.get())
    return system_information
Ejemplo n.º 3
0
def main():
    '''load video, process frames, display to user'''
    tque = Queue()  #(maxsize=120)
    framequeue = Queue()  #(maxsize=120)

    cthread = threading.Thread(target=cvworker, args=(tque, ))
    cthread.daemon = True
    cthread.start()

    tthread = threading.Thread(target=tfworker, args=(tque, framequeue))
    tthread.daemon = True  #terminate testloop when user closes window
    tthread.start()

    start = time.time()

    frame = 0
    videoend = False
    while True:
        cvw = cv2.waitKey(1)
        if cvw & 0xFF == ord('q'): break
        if not videoend:
            print('got', frame, time.time())
            frame += 1
            print('frame:', frame)
            f = framequeue.get()
            if type(f) == type(None):
                videoend = True
                pass  #whats this do
            else:
                #time.sleep(1/30) #limit to realtime
                cv2.imshow('frame', f)

    print('new took:', time.time() - start)
    cv2.destroyAllWindows()
Ejemplo n.º 4
0
class Actor(object):
    def __init__(self):
        # Actor内部的消息缓存队列
        self.__mailbox = Queue()

    def send(self, msg):
        self.__mailbox.put(msg)

    def recv(self):
        return self.__mailbox.get()
Ejemplo n.º 5
0
def find_words(start_words,
               center_words=None,
               neg_words=None,
               min_sim=0.6,
               max_sim=1.,
               alpha=0.25):
    if center_words == None and neg_words == None:
        min_sim = max(min_sim, 0.6)
    center_vec, neg_vec = np.zeros([word_size]), np.zeros([word_size])
    if center_words:  # 中心向量是所有中心种子词向量的平均
        _ = 0
        for w in center_words:
            if w in word2vec.wv.vocab:
                center_vec += word2vec[w]
                _ += 1
        if _ > 0:
            center_vec /= _
    if neg_words:  # 负向量是左右负种子词向量的平均(此处无用)
        _ = 0
        for w in neg_words:
            if w in word2vec.wv.vocab:
                neg_vec += word2vec[w]
                _ += 1
        if _ > 0:
            neg_vec /= _
    queue_count = 1
    task_count = 0
    cluster = []
    queue = Queue()  # 建立队列
    for w in start_words:
        queue.put((0, w))
        if w not in cluster:
            cluster.append(w)
    while not queue.empty():
        idx, word = queue.get()
        queue_count -= 1
        task_count += 1
        sims = most_similar(word, center_vec, neg_vec)
        min_sim_ = min_sim + (max_sim - min_sim) * (1 - np.exp(-alpha * idx))
        if task_count % 10 == 0:
            log = '%s in cluster, %s in queue, %s tasks done, %s min_sim' % (
                len(cluster), queue_count, task_count, min_sim_)
            print(log)
        for i, j in sims:
            if j >= min_sim_:
                if i not in cluster and is_good(i):  # is_good是人工写的过滤规则
                    queue.put((idx + 1), i)
                    if i not in cluster and is_good(i):
                        cluster.append(i)
                    queue_count += 1
    return cluster
Ejemplo n.º 6
0
def _add_doi(metadata, identifier, citekey):
    """Add an entry from a DOI."""
    info_messages = []
    with StatusMessage('Querying DOI metadata...') as message:
        if metadata.doi_exists(identifier):
            raise ZoiaAddException(f'DOI {identifier} already exists.')

        # Query Semantic Scholar to get the corresponding arxiv ID (if there is
        # one) in a separate thread.
        arxiv_queue = ThreadQueue()
        arxiv_process = ThreadProcess(
            target=lambda q, x: q.put(requests.get(x)),
            args=(
                arxiv_queue,
                f'https://api.semanticscholar.org/v1/paper/{identifier}',
            ),
        )
        arxiv_process.start()

        doi_metadata = _get_doi_metadata(identifier)

        metadatum = zoia.backend.metadata.Metadatum.from_dict(doi_metadata)

        if citekey is None:
            citekey = zoia.parse.citekey.create_citekey(metadata, metadatum)

        paper_dir = os.path.join(metadata.config.library_root, citekey)
        os.mkdir(paper_dir)

        message.update(
            'Querying Semantic Scholar for corresponding arXiv ID...')
        arxiv_metadata_response = arxiv_queue.get()
        arxiv_process.join()

        arxiv_metadata = json.loads(arxiv_metadata_response.text)

        if (arxiv_id := arxiv_metadata.get('arxivId')) is not None:
            doi_metadata['arxiv_id'] = arxiv_id
            message.update('Downloading PDF from arXiv...')
            pdf_response = requests.get(
                f'https://arxiv.org/pdf/{arxiv_id}.pdf')

            if pdf_response.status_code == 200:
                with open(os.path.join(paper_dir, 'document.pdf'), 'wb') as fp:
                    fp.write(pdf_response.content)
                doi_metadata['pdf_md5'] = hashlib.md5(
                    pdf_response.content).hexdigest()
            else:
                info_messages.append('Was unable to fetch a PDF')

        metadata[citekey] = doi_metadata
Ejemplo n.º 7
0
def run_tasks(tasks, context=None, nb_threads=1, watchdog=None):
    got_keyboard_interrupt = False
    watchdogs = [
        lambda _: _KEYBOARD_INTERRUPT_ERROR_MESSAGE
        if got_keyboard_interrupt else None
    ]
    if watchdog:
        watchdogs.append(watchdog)

    for task in tasks:
        check_task_dependencies(task)

    remaining_tasks = list(tasks)
    completed_tasks = list()

    pool = Pool(nb_threads)
    completed_tasks_queue = Queue()

    try:
        schedule_tasks_to_be_run(
            pop_runnable_tasks(remaining_tasks, completed_tasks, nb_threads),
            watchdogs, context, pool, completed_tasks_queue)

        while len(completed_tasks) != len(tasks):
            # wait for one task to complete
            completed_task = completed_tasks_queue.get()
            completed_tasks.append(completed_task)

            # schedule tasks to be run waiting for task success or simple completion
            tasks_to_be_run = pop_runnable_tasks(remaining_tasks,
                                                 completed_tasks, nb_threads)
            schedule_tasks_to_be_run(tasks_to_be_run, watchdogs, context, pool,
                                     completed_tasks_queue)

    except KeyboardInterrupt:
        got_keyboard_interrupt = True
        skip_all_tasks(tasks, remaining_tasks, completed_tasks, context, pool,
                       completed_tasks_queue,
                       _KEYBOARD_INTERRUPT_ERROR_MESSAGE)

    finally:
        pool.close()

    exceptions = [
        task.result.stacktrace for task in tasks
        if isinstance(task.result, TaskResultException)
    ]
    if exceptions:
        raise TasksExecutionFailure("Caught exceptions:\n%s" %
                                    "\n".join(exceptions))
Ejemplo n.º 8
0
class Recon:
    def __init__(self, model_dir, path_group_dict: Dict[str, int]):
        self.feed = Queue()
        self.mid = Queue()
        self.out = Queue()
        t = MtCNN(self.feed, self.mid)
        t.start()
        t1 = EmbeddingCmp(self.feed, self.mid, self.out, path_group_dict,
                          model_dir)
        t1.start()

    def face_check(self, _im: np.ndarray, group: int):
        self.feed.put((_im, group))
        return self.out.get()
Ejemplo n.º 9
0
class BaseActor(object):
    def __init__(self):
        """queue:Actor内部的邮箱队列"""
        self.__mailbox = Queue()

    def recv(self):
        """Actor接受消息"""
        msg = self.__mailbox.get()
        if msg is ActorExit:
            # 抛出异常(模版方法会处理)
            raise ActorExit
        return msg

    def send(self, msg):
        """Actor发送消息"""
        self.__mailbox.put(msg)

    def close(self):
        """发送结束标识"""
        self.send(ActorExit)

    def start(self):
        self.__terminated_event = Event()  # 为Join服务
        t = threading.Thread(target=self.__templet)
        t.setDaemon(True)  # 设置为守护线程
        t.start()

    def __templet(self):
        """模版方法(run会被子类重写)"""
        try:
            self.run()  # 执行Run代码
        except ActorExit:
            pass  # 防止线程挂掉
        finally:
            # 设置Event标识
            self.__terminated_event.set()

    def join(self):
        # Event在set之后便结束等待
        self.__terminated_event.wait()

    def run(self):
        """
        由子类实现即可,eg:
        while True:
            msg = self.recv()
            print(msg)
        """
        pass
Ejemplo n.º 10
0
def get_stats():
	print 'Fetching NBA player stats...'
	stats_outfile = RUNDAY+'_nba_stats.csv'
	csvout = open(stats_outfile, 'wb')

	NUM_THREADS = 8

	in_queue = Queue()
	out_queue = Queue()
	queue_players(in_queue)

	while not in_queue.empty():	
		jobs = []

		for i in range(NUM_THREADS):
			if not in_queue.empty():
				thread = Process(target=get_stats_helper, args=(in_queue, out_queue))
				jobs.append(thread)
				thread.start()
		for thread in jobs:
			thread.join()	

		while not out_queue.empty():
			player = out_queue.get()
			del player['SUCCESS']
			try: 
				name = player['NAME']
			except KeyError as e:
				continue
			player['TIME'] = RUNDAY
			fieldnames = [
				'TIME',
				'NAME', 
				'JERSEY',
				'SPORT',
				'TEAM',
				'POSITION',
				'PTS',
				'REB',
				'AST',
				'URL'
			]
		
			csvwriter = csv.DictWriter(csvout, delimiter='|', fieldnames=fieldnames)
			csvwriter.writerow(player)
	csvout.close()

	print 'Finished fetching NBA player stats.'
	print 'Ouput saved in %s' % stats_outfile
Ejemplo n.º 11
0
def _add_arxiv_id(metadata, identifier, citekey=None):
    info_messages = []
    with StatusMessage('Querying arXiv...') as message:
        if metadata.arxiv_id_exists(identifier):
            raise ZoiaAddException(f'arXiv paper {identifier} already exists.')

        # Downloading the PDF can take a while, so start it early in a separate
        # thread.
        pdf_queue = ThreadQueue()
        pdf_process = ThreadProcess(
            target=lambda q, x: q.put(requests.get(x)),
            args=(pdf_queue, f'https://arxiv.org/pdf/{identifier}.pdf'),
        )
        pdf_process.start()

        arxiv_metadata = _get_arxiv_metadata(identifier)

        if 'doi' in arxiv_metadata:
            message.update('Querying DOI information...')
            arxiv_metadata.update(_get_doi_metadata(arxiv_metadata['doi']))

        if citekey is None:
            metadatum = zoia.backend.metadata.Metadatum.from_dict(
                arxiv_metadata)
            citekey = zoia.parse.citekey.create_citekey(metadata, metadatum)
        paper_dir = os.path.join(metadata.config.library_root, citekey)
        os.mkdir(paper_dir)

        message.update(text='Downloading PDF...')
        pdf = pdf_queue.get()
        pdf_process.join()

        if pdf.status_code == 200:
            with open(os.path.join(paper_dir, 'document.pdf'), 'wb') as fp:
                fp.write(pdf.content)
            md5_hash = hashlib.md5(pdf.content).hexdigest()
            arxiv_metadata['pdf_md5'] = md5_hash
            if metadata.pdf_md5_hash_exists(md5_hash):
                raise ZoiaAddException(
                    f'arXiv paper {identifier} already exists.')
        else:
            info_messages.append('Was unable to fetch a PDF')

        metadata[citekey] = arxiv_metadata

    return citekey, metadatum, info_messages
Ejemplo n.º 12
0
def main(test_cases):
    procs = []
    queue = Queue()
    case_number = 0
    for test_case in test_cases:
        proc = Process(target=handle_test_case,
                       args=(test_case, case_number, queue))
        procs.append(proc)
        case_number += 1
    for proc in procs:
        proc.start()
    for proc in procs:
        proc.join()

    results = [queue.get() for proc in procs]
    results.sort()
    for r in results:
        print(r[1])
Ejemplo n.º 13
0
def stat_files():
	all_files = []
	for root, dirs, files in os.walk('/home/gzguoyubo/mf/tw2/res/entities/custom_type'):
		ignore = False
		for ig_path in ignore_paths:
			if ig_path in root:
				ignore = True
		if ignore:
			continue
		for fname in files:
			if not fname.endswith('.py'):
				continue
			abs_file_path = join(root, fname)
			all_files.append(abs_file_path)
	
	file_sections = []
	file_total_nums = len(all_files)
	for i in xrange(P_NUM):
		start = i * file_total_nums / P_NUM
		stop = start + file_total_nums / P_NUM
		if i == P_NUM - 1:
			stop = -1
		file_sections.append(all_files[start : stop])

	res_queue = Queue()
	processes = []
	for section in file_sections:
		p = Process(target=stat_file, args=(section, res_queue))
		p.start()
		processes.append(p)
	
	for p in processes:
		p.join()
	
	total_stats = defaultdict(int)
	while not res_queue.empty():
		stat = res_queue.get()
		for author, cnt in stat.iteritems():
			total_stats[author] += cnt
	
	print total_stats
class TreeCrawler(object):
    def __init__(self, base_path, callback=None):

        if not os.path.isdir(base_path):
            raise IOError("Base path not found: " + base_path)

        self.base_path = base_path
        self.unsearched = Manager().Queue()
        self.dirpath_queue = Queue()
        self.cpu_count = multiprocessing.cpu_count()
        self.pool = Pool(self.cpu_count)
        self.first_level_dirs = ""
        self.callback = callback

    def __explore_path(self):
        directories = []
        dirpath = self.dirpath_queue.get()
        for filename in os.walk(dirpath).next()[1]:
            fullname = os.path.join(dirpath, filename)
            directories.append(fullname)
        return directories

    def run_crawler(self):
        # acquire the list of all paths inside base path
        self.first_level_dirs = next(os.walk(self.base_path))[1]
        for path in self.first_level_dirs:
            self.unsearched.put(self.base_path + "/" + path)
        self.pool.map_async(self.parallel_worker, range(self.cpu_count))
        self.pool.close()
        self.unsearched.join()

    def parallel_worker(self, task_num):
        while True:
            dirpath = self.unsearched.get()
            print "Task: " + str(task_num) + " >>> Explored path: " + dirpath
            self.dirpath_queue.put(dirpath)
            dirs = self.__explore_path()
            for newdir in dirs:
                self.unsearched.put(newdir)
            self.unsearched.task_done()
Ejemplo n.º 15
0
class ExThread(Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.__status_queue = Queue()

    def run(self, *args, **kwargs):
        try:
            super().run(*args, **kwargs)
        except Exception as ex:
            #except_type, except_class, tb = sys.exc_info()
            self.__status_queue.put(ex)
        self.__status_queue.put(None)

    def wait_for_exc_info(self):
        return self.__status_queue.get()

    def join(self):
        ex = self.wait_for_exc_info()
        if ex is None:
            return
        else:
            raise ex
Ejemplo n.º 16
0
class ExThread(Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.__status_queue = Queue()

    def run(self, *args, **kwargs):
        try:
            super().run(*args, **kwargs)
        except Exception as ex:
            #except_type, except_class, tb = sys.exc_info()
            self.__status_queue.put(ex)
        self.__status_queue.put(None)

    def wait_for_exc_info(self):
        return self.__status_queue.get()

    def join(self):
        ex = self.wait_for_exc_info()
        if ex is None:
            return
        else:
            raise ex
def downloadFiles(downloadFiles, n_downloadFiles, size_downloadFiles):
    global q, writeDict
    downloadChunks = partitionDownload(downloadFiles, n_downloadFiles, size_downloadFiles)
    n_downloadChunks = len(downloadChunks)
    n_threads = min(n_concurrentConnections, n_downloadChunks)
    if (n_threads < n_concurrentConnections):
        print('Number of connection threads was limited by download size.')

    q = Queue()
    completedDict = {}
    writeDict = {}
    delList = []
    for df in downloadFiles:
        writeDict[df['file_name']] = [0, int(df['file_size']/size_chunk)]
    print('Starting {} connection threads.'.format(n_threads))
    with progress.Bar(expected_size=size_downloadFiles) as bar:
        tp = ThreadPool(n_threads)
        tp.imap_unordered(downloadChunk, downloadChunks)
        current_size = 0
        while current_size < size_downloadFiles:
            if q.qsize() > 0:
                f, chunk_id, content = q.get()
                completedDict[(f['file_name'], chunk_id)] = (content, f['path'])
            for writer in writeDict:
                chunk = completedDict.get((writer, writeDict[writer][0]))
                if chunk != None:
                    current_size += writeChunk(chunk, writer, writeDict[writer][0])
                    del completedDict[(writer, writeDict[writer][0])]
                    writeDict[writer][0] += 1
                    if writeDict[writer][0] > writeDict[writer][1]:
                        delList.append(writer)
            if len(delList) > 0:
                for item in delList:
                    del writeDict[item]
                delList.clear()
            bar.show(current_size)
        tp.close()
        tp.join()
    print('Download complete.')
def _get_hardware_threaded(host):
    hardware_functions = [
        collect_win_bios_stats, collect_win_disk_stats, collect_win_mem_stats,
        collect_win_network_stats, collect_win_cpu_stats
    ]
    hardware_info = {}
    queue = Queue()
    list_of_processes = []

    for hardware in hardware_functions:
        process = _Process(target=hardware, args=(
            host,
            1,
            queue,
        ))
        list_of_processes.append(process)
        process.start()

    for process in list_of_processes:
        process.join()
        hardware_info.update(queue.get())
    return hardware_info
Ejemplo n.º 19
0
def proxy_thr(fun):
    """
    多线程获取代理地址
    :param fun: 处理url函数,用于获取页面内的proxy
    :return:
    """
    q = Queue()
    pool = Pool(40)

    proxys = []
    i = 1
    for u in proxy_url_list():
        pool.apply_async(fun, (q, u))
    pool.close()
    pool.join()

    while 1:
        if q.empty():
            break
        s = q.get().split("\n")
        proxys += s

    proxys = set(proxys)
    return list(proxys)
Ejemplo n.º 20
0
class mmonly:
    def __init__(self):
        self.ua = UserAgent()
        self.headers = {}
        self.q1 = Queue(300)
        self.q2 = Queue(1000)
        self.lock = Lock()
        # self.path = 'D:/IMG/'
        self.main_page_urls = []
        self.subpageurls = []
        conn = sqlite3.connect('mmonly.db')
        conn.isolation_level = None
        try:
            conn.execute(
                '''create table subpageurl(url text primary key not null)''')
            conn.execute(
                '''create table imgurl(url text primary key not null)''')
        except (Exception) as e:
            print('创建表:{}'.format(e).decode('utf-8').encode(type))
        finally:
            conn.close()
        self.rootpath = os.getcwd().replace('\\', '/')
        self.path = os.path.join(self.rootpath, 'imges/')
        if not os.path.exists(self.path):
            os.mkdir(self.path)

    def get_mainpage_urls(self, inurl):
        # 得到所有主页url
        self.headers['User-Agent'] = self.ua.random
        try:
            req = requests.get(inurl, headers=self.headers, timeout=10)
            req.encoding = 'gbk'
            cont = req.text
            content = pq(cont)
            elem = list(content('div #pageNum').children('a').items())
            for ele in elem:
                if ele.text() == '末页':
                    pgnum = int(ele.attr('href').split('_')[-1].split('.')[0])
            spurl = inurl.split('_')
            for i in range(1, pgnum + 1):
                self.main_page_urls.append('{}_{}_{}.html'.format(
                    spurl[0], spurl[1], str(i)))
            print('主页计算完毕!!'.decode('utf-8').encode(type))
        except (Exception) as e:
            self.lock.acquire()
            print('主页读取错误:{}'.format(e).decode('utf-8').encode(type))
            self.lock.release()
            return

    def get_subpage_urls(self, inurl):
        # 得到所有子页面url
        self.headers['User-Agent'] = self.ua.random
        try:
            req = requests.get(inurl, headers=self.headers, timeout=10)
            req.encoding = 'gbk'
            cont = req.text
            content = pq(cont)
            elems = list(content('div .ABox').children('a').items())
            for ele in elems:
                url = ele.attr('href')
                self.q1.put(url)
                print('取得子页面地址:{}'.format(url).decode('utf-8').encode(type))
        except (Exception) as e:
            self.lock.acquire()
            print('遍历主页面读取错误:{}'.format(e).decode('utf-8').encode(type))
            self.lock.release()
            return

    def savesuburl(self):
        # 将子页面url存入数据库subpageurl表中

        while 1:
            try:
                suburl = self.q1.get(timeout=20)
                self.subpageurls.append(suburl)
                print('列表存入子页面:{}'.format(suburl).decode('utf-8').encode(type))
            except (Exception) as e:
                print('读取子页面url:{}'.format(e).decode('utf-8').encode(type))
                time.sleep(2)
                if self.q1.empty():
                    time.sleep(2)
                    if self.q1.empty():
                        break
        conn = sqlite3.connect('mmonly.db')
        cur = conn.cursor()
        time.sleep(4)
        print('开始将子页面url写入数据库'.decode('utf-8').encode(type))
        for date in self.subpageurls:
            try:
                cur.execute('insert into subpageurl values(?)', (date, ))
                print('写入:{}'.format(date).decode('utf-8').encode(type))
            except (Exception) as er:
                print('写入数据库错误:{}'.format(er).decode('utf-8').encode(type))

        conn.commit()
        conn.close()
        print('写入完毕!!'.decode('utf-8').encode(type))

    def get_img_url(self, inurl):
        # get图片地址
        self.headers['User-Agent'] = self.ua.random
        try:
            req = requests.get(inurl, headers=self.headers, timeout=10)
            time.sleep(0.2)
            req.encoding = 'gbk'
            cont = req.text
            content = pq(cont)
            imgnum = int(content('.totalpage').text())
            urlsp = '.'.join(inurl.split('.')[:-1])
            for n in range(1, imgnum + 1):
                imgpage = '{}_{}.html'.format(urlsp, n)
                self.headers['User-Agent'] = self.ua.random
                try:
                    req = requests.get(imgpage,
                                       headers=self.headers,
                                       timeout=10)
                    time.sleep(0.3)
                    req.encoding = 'gbk'
                    cont = req.text
                    content = pq(cont)
                    imgurl = content('.down-btn').attr('href')
                    self.q2.put(imgurl)
                except (Exception) as ee:
                    print('get图片url错误:{}'.format(ee).decode('utf-8').encode(
                        type))
                print(
                    'get图片url:{}'.format(imgurl).decode('utf-8').encode(type))
        except (Exception) as e:
            print('get图片页面地址错误:{}'.format(e).decode('utf-8').encode(type))
            return

    def download(self, inurl):
        # 下载图片
        # inurl = q.get(timeout=10)
        na = inurl.split('/')
        imgname = '{}{}'.format(na[-2], na[-1])
        imgpath = '{}{}'.format(self.path, imgname)
        statu = os.path.exists(imgpath)
        if not statu:
            self.headers['User-Agent'] = self.ua.random
            try:
                req = requests.get(inurl, headers=self.headers,
                                   timeout=8).content
                with open(imgpath, 'wb') as f:
                    f.write(req)
                self.lock.acquire()
                print('下载图片:{}'.format(imgname).decode('utf-8').encode(type))
                self.lock.release()
            except (Exception) as e:
                self.lock.acquire()
                print('下载错误:{}'.format(e).decode('utf-8').encode(type))
                self.lock.release()
        else:
            self.lock.acquire()
            print('重复图片:{}'.format(imgname).decode('utf-8').encode(type))
            self.lock.release()

    def run(self, inurl):
        ch = eval(
            input('输入1表示采集页面\n输入2表示下载图片\n输入3退出程序\n输入:'.decode('utf-8').encode(
                type)))
        if ch == 1:
            self.get_mainpage_urls(inurl)
            time.sleep(4)
            pool1 = Pool(20)
            for mainurl in self.main_page_urls:
                pool1.apply_async(self.get_subpage_urls, (mainurl, ))
            time.sleep(1)
            self.savesuburl()
            pool1.close()
            pool1.join()
            print('子页面采集完毕!!!'.decode('utf-8').encode(type))
            self.run('http://www.mmonly.cc/mmtp/list_9_2.html')
        elif ch == 2:
            conn = sqlite3.connect('mmonly.db')
            cur = conn.cursor()

            pool2 = Pool(10)
            pool3 = Pool(30)
            cur.execute('select * from subpageurl')
            suburls = cur.fetchall()

            while 1:
                for nn in range(200):
                    try:
                        for i in suburls:
                            pool2.apply_async(self.get_img_url, i)
                            cur.execute('delete from subpageurl where url=?',
                                        i)

                        while 1:
                            img = self.q2.get(timeout=20)
                            pool3.apply_async(self.download, (img, ))
                    except (Exception) as e:
                        print('数据库读取子页面url:{}'.format(e).decode(
                            'utf-8').encode(type))
                        time.sleep(2)
                        if self.q2.empty():
                            time.sleep(2)
                            if self.q2.empty():
                                break

                conn.commit()
                conn.close()
                conn = sqlite3.connect('mmonly.db')
                cur = conn.cursor()
                cur.execute('select * from subpageurl')
                suburls = cur.fetchall()
                time.sleep(2)
                if self.q2.empty():
                    time.sleep(2)
                    if self.q2.empty():
                        break
            pool3.close()
            pool2.close()
            pool3.join()
            pool2.join()
        else:
            print('结束程序!'.decode('utf-8').encode(type))
Ejemplo n.º 21
0
class ParallelDownloader(URL_Fetcher):
    'Parallel threaded web page downloader'

    def __init__(self,
                 db_name,
                 proc_count,
                 site_base_url,
                 fUseCache=True,
                 fCacheSearchPages=True,
                 fUseCookies=False,
                 timeout=secHTTP_WAIT_TIMEOUT,
                 search_proc_count=2,
                 proxies=None):

        self.proxies = proxies
        self.queue = Queue()
        self.fSaveSearchPages = fCacheSearchPages
        self.site_base_url = site_base_url
        self.pool = Pool(processes=proc_count)

        self.search_queue = Queue()
        self.url_extract_pool = Pool(processes=search_proc_count)

        URL_Fetcher.__init__(self,
                             db_name,
                             fUseCache,
                             fUseCookies,
                             timeout=timeout,
                             proxies=proxies)

    def process_urls_from_search_queue(self):
        while not self.search_queue.empty():
            search_page_url = self.search_queue.get()
            #			logOut('search pages queue size: %d'%self.search_queue.qsize())
            logDbg('search page: %s' % search_page_url)

            search_page = self.get_page(search_page_url,
                                        fUseCache=self.fSaveSearchPages)
            rel_urls = extract_data_xpath(search_page, self.url_extract_xpath)
            #rel_urls = self.extract_page_xpath(self.url_extract_xpath, search_page_url)
            #			logOut('URLs from %s extracted'%search_page_url)

            logOut('%d urls extracted from [%s]. Queuing...' %
                   (len(rel_urls), search_page_url))
            logDbg('Extracted urls: %s. Queuing to download...' % rel_urls)
            list(map(self.queue.put, self.prefix_site_base_url(rel_urls)))

        self.queue.put(None)
        self.postprocess_search_page_list(rel_urls, search_page)

    def queue_pages(self, url_list):
        list(map(self.queue.put, url_list))

        # признак завершения очереди заданий
        self.queue.put(None)

    def postprocess_search_page_list(self, url, page):
        pass

    def prefix_site_base_url(self, rel_urls):
        return [self.site_base_url + url for url in rel_urls]

    def process_pages(self, page_processor, *add_processor_args):
        self.page_processor = page_processor
        self.add_pprocessor_args = add_processor_args
        self.pool.apply(self.process_page)

    def process_page(self):
        while True:
            url = self.queue.get()
            logDbg('Url got from queue: %s' % url)
            if not url:
                break

            page = self.get_page(url)  #, proxies=self.proxies

            #logOut('pp_arg_list: [%s]'%pp_arg_list)
            if page:
                self.page_processor(url, page, *self.add_pprocessor_args)
Ejemplo n.º 22
0
class tizyoutubeproxy(object):
    """A class that accesses YouTube, retrieves stream URLs and creates and manages
    a playback queue.

    """
    def __init__(self):
        self.queue = list()
        self.queue_index = -1
        self.play_queue_order = list()
        self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"])
        self.current_play_mode = self.play_modes.NORMAL
        self.now_playing_stream = None
        # Create multiprocess queues
        self.task_queue = Queue()
        self.done_queue = Queue()
        # Workers
        self.workers = list()

    def set_play_mode(self, mode):
        """ Set the playback mode.

        :param mode: current valid values are "NORMAL" and "SHUFFLE"

        """
        self.current_play_mode = getattr(self.play_modes, mode)
        self.__update_play_queue_order()

    def enqueue_audio_stream(self, arg):
        """Add the audio stream of a YouTube video to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:

            yt_video = pafy.new(arg)
            yt_audio = yt_video.getbestaudio(preftype="webm")
            if not yt_audio:
                raise ValueError(str("No WebM audio stream for : %s" % arg))

            yt_info = VideoInfo(ytid=arg, title=yt_audio.title)
            self.add_to_playback_queue(audio=yt_audio,
                                       video=yt_video,
                                       info=yt_info)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Video not found : %s" % arg))

    def enqueue_audio_playlist(self, arg):
        """Add all audio streams in a YouTube playlist to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info('arg : %s', arg)
        try:
            count = len(self.queue)

            playlist = pafy.get_playlist2(arg)
            if len(playlist) > 0:
                for yt_video in playlist:
                    self.add_to_playback_queue(video=yt_video, \
                                               info=VideoInfo(ytid=yt_video.videoid, \
                                                              title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Playlist not found : %s" % arg))

    def enqueue_audio_search(self, arg):
        """Search YouTube and add the audio streams to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = 0
            while True:
                for track_info in get_tracks_from_json(wdata2):
                    self.add_to_playback_queue(info=track_info)
                    count += 1

                if count > 100:
                    break
                if not wdata2.get('nextPageToken'):
                    break
                query['pageToken'] = wdata2['nextPageToken']
                wdata2 = pafy.call_gdata('search', query)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_mix(self, arg, feelinglucky=True):
        """Obtain a YouTube mix associated to a given video id or url and add all audio
        streams in the mix playlist to the playback queue.

        :param arg: a YouTube video id

        :param feelinglucky: If True, it will perform another YouTube search to find
        alternatives if the original mix cannot be found.

        """
        logging.info('arg : %s', arg)
        yt_video = None
        try:
            count = len(self.queue)

            yt_video = pafy.new(arg)
            playlist = yt_video.mix
            if len(playlist) > 0:
                for yt_video in playlist:
                    video_id = yt_video.videoid
                    video_title = yt_video.title
                    yt_info = VideoInfo(ytid=video_id, title=video_title)
                    self.add_to_playback_queue(video=yt_video, info=yt_info)

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except IndexError:
            if not feelinglucky:
                raise ValueError
            else:
                print_wrn("[YouTube] Could not find a mix for '{0}'. "\
                          "Searching YouTube instead. Feeling lucky?." \
                          .format(arg.encode('utf-8')))
                if yt_video.title:
                    self.enqueue_audio_search(yt_video.title)
                else:
                    self.enqueue_audio_stream(arg)

    def enqueue_audio_mix_search(self, arg):
        """Obtain a YouTube mix associated to a given textual search and add all the
        audio streams in the mix playlist to the playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = len(self.queue)
            for track_info in get_tracks_from_json(wdata2):
                if track_info and track_info.ytid:
                    try:
                        self.enqueue_audio_mix(track_info.ytid,
                                               feelinglucky=False)
                        break
                    except ValueError:
                        logging.info(
                            'Could not find a mix. Trying another video')

            if count == len(self.queue):
                raise ValueError

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def current_audio_stream_title(self):
        """ Retrieve the current stream's title.

        """
        stream = self.now_playing_stream
        title = ''
        if stream:
            title = to_ascii(stream['a'].title).encode("utf-8")
        return title

    def current_audio_stream_author(self):
        """ Retrieve the current stream's author.

        """
        stream = self.now_playing_stream
        author = ''
        if stream:
            author = to_ascii(stream['v'].author).encode("utf-8")
        return author

    def current_audio_stream_file_size(self):
        """ Retrieve the current stream's file size.

        """
        stream = self.now_playing_stream
        size = 0
        if stream:
            size = stream['a'].get_filesize()
        return size

    def current_audio_stream_duration(self):
        """ Retrieve the current stream's duration.

        """
        stream = self.now_playing_stream
        duration = ''
        if stream:
            duration = to_ascii(stream['v'].duration).encode("utf-8")
        return duration

    def current_audio_stream_bitrate(self):
        """ Retrieve the current stream's bitrate.

        """
        stream = self.now_playing_stream
        bitrate = ''
        if stream:
            bitrate = stream['a'].bitrate
        return bitrate

    def current_audio_stream_view_count(self):
        """ Retrieve the current stream's view count.

        """
        stream = self.now_playing_stream
        viewcount = 0
        if stream:
            viewcount = stream['v'].viewcount
        return viewcount

    def current_audio_stream_description(self):
        """ Retrieve the current stream's description.

        """
        stream = self.now_playing_stream
        description = ''
        if stream:
            description = to_ascii(stream['v'].description).encode("utf-8")
        return description

    def current_audio_stream_file_extension(self):
        """ Retrieve the current stream's file extension.

        """
        stream = self.now_playing_stream
        file_extension = ''
        if stream:
            file_extension = to_ascii(stream['a'].extension).encode("utf-8")
        return file_extension

    def current_audio_stream_video_id(self):
        """ Retrieve the current stream's video id.

        """
        stream = self.now_playing_stream
        video_id = ''
        if stream:
            video_id = to_ascii(stream['i'].ytid).encode("utf-8")
        return video_id

    def current_audio_stream_published(self):
        """ Retrieve the current stream's upload date and time.

        """
        stream = self.now_playing_stream
        if stream:
            published = to_ascii(stream['v'].published).encode("utf-8")
        return published

    def current_audio_stream_queue_index_and_queue_length(self):
        """ Retrieve index in the queue (starting from 1) of the current stream and the
        length of the playback queue.

        """
        return self.queue_index + 1, len(self.queue)

    def clear_queue(self):
        """ Clears the playback queue.

        """
        self.queue = list()
        self.queue_index = -1

    def remove_current_url(self):
        """Remove the currently active url from the playback queue.

        """
        logging.info("")
        if len(self.queue) and self.queue_index:
            stream = self.queue[self.queue_index]
            print_nfo("[YouTube] [Stream] '{0}' removed." \
                      .format(to_ascii(stream['i'].title).encode("utf-8")))
            del self.queue[self.queue_index]
            self.queue_index -= 1
            if self.queue_index < 0:
                self.queue_index = 0
            self.__update_play_queue_order()

    def next_url(self):
        """ Retrieve the url of the next stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index += 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    next_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(
                        next_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = -1
                    return self.next_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.next_url()

    def prev_url(self):
        """ Retrieve the url of the previous stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index -= 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    prev_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(
                        prev_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = len(self.queue)
                    return self.prev_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.prev_url()

    def __update_play_queue_order(self):
        """ Update the queue playback order.

        A sequential order is applied if the current play mode is "NORMAL" or a
        random order if current play mode is "SHUFFLE"

        """
        total_streams = len(self.queue)
        if total_streams:
            if not len(self.play_queue_order):
                # Create a sequential play order, if empty
                self.play_queue_order = range(total_streams)
            if self.current_play_mode == self.play_modes.SHUFFLE:
                random.shuffle(self.play_queue_order)
            print_nfo("[YouTube] [Streams in queue] '{0}'." \
                      .format(total_streams))

    def __retrieve_stream_url(self, stream, queue_index):
        """ Retrieve a stream url

        """
        try:
            if not len(self.workers):
                for _ in range(WORKER_PROCESSES):
                    proc = Process(target=obtain_stream, \
                                   args=(self.task_queue, \
                                         self.done_queue)).start()
                    self.workers.append(proc)

            while not self.done_queue.empty():
                stream = self.done_queue.get()
                self.queue[stream['q']] = stream

            stream = self.queue[queue_index]
            if not stream.get('v') or not stream.get('a'):
                logging.info("ytid : %s", stream['i'].ytid)
                video = stream.get('v')
                if not video:
                    video = pafy.new(stream['i'].ytid)
                audio = video.getbestaudio(preftype="webm")
                if not audio:
                    logging.info("no suitable audio found")
                    raise AttributeError()
                stream.update({'a': audio, 'v': video})

            # streams = stream.get('v').audiostreams[::-1]
            # pprint.pprint(streams)
            # dump_stream_info(streams)

            self.now_playing_stream = stream
            return stream['a'].url.encode("utf-8")

        except AttributeError:
            logging.info("Could not retrieve the stream url!")
            raise

    def add_to_playback_queue(self, audio=None, video=None, info=None):
        """ Add to the playback queue. """

        if audio:
            print_nfo("[YouTube] [Stream] '{0}' [{1}]." \
                      .format(to_ascii(audio.title).encode("utf-8"), \
                              to_ascii(audio.extension)))
        if info:
            print_nfo("[YouTube] [Stream] '{0}'." \
                      .format(to_ascii(info.title).encode("utf-8")))
        queue_index = len(self.queue)
        self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index))
        self.queue.append(dict(a=audio, v=video, i=info, q=queue_index))
Ejemplo n.º 23
0
class tizyoutubeproxy(object):
    """A class that accesses YouTube, retrieves stream URLs and creates and manages
    a playback queue.

    """

    def __init__(self):
        self.queue = list()
        self.queue_index = -1
        self.play_queue_order = list()
        self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"])
        self.current_play_mode = self.play_modes.NORMAL
        self.now_playing_stream = None
        # Create multiprocess queues
        self.task_queue = Queue()
        self.done_queue = Queue()
        # Workers
        self.workers = list()

    def set_play_mode(self, mode):
        """ Set the playback mode.

        :param mode: current valid values are "NORMAL" and "SHUFFLE"

        """
        self.current_play_mode = getattr(self.play_modes, mode)
        self.__update_play_queue_order()

    def enqueue_audio_stream(self, arg):
        """Add the audio stream of a YouTube video to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:

            yt_video = pafy.new(arg)
            yt_audio = yt_video.getbestaudio(preftype="webm")
            if not yt_audio:
                raise ValueError(str("No WebM audio stream for : %s" % arg))

            yt_info = VideoInfo(ytid=arg, title=yt_audio.title)
            self.add_to_playback_queue(audio=yt_audio, video=yt_video, info=yt_info)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Video not found : %s" % arg))

    def enqueue_audio_playlist(self, arg):
        """Add all audio streams in a YouTube playlist to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info('arg : %s', arg)
        try:
            count = len(self.queue)

            playlist = pafy.get_playlist2(arg)
            if len(playlist) > 0:
                for yt_video in playlist:
                    self.add_to_playback_queue(video=yt_video, \
                                               info=VideoInfo(ytid=yt_video.videoid, \
                                                              title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Playlist not found : %s" % arg))

    def enqueue_audio_search(self, arg):
        """Search YouTube and add the audio streams to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = 0
            while True:
                for track_info in get_tracks_from_json(wdata2):
                    self.add_to_playback_queue(info=track_info)
                    count += 1

                if count > 100:
                    break
                if not wdata2.get('nextPageToken'):
                    break
                query['pageToken'] = wdata2['nextPageToken']
                wdata2 = pafy.call_gdata('search', query)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_mix(self, arg, feelinglucky=True):
        """Obtain a YouTube mix associated to a given video id or url and add all audio
        streams in the mix playlist to the playback queue.

        :param arg: a YouTube video id

        :param feelinglucky: If True, it will perform another YouTube search to find
        alternatives if the original mix cannot be found.

        """
        logging.info('arg : %s', arg)
        yt_video = None
        try:
            count = len(self.queue)

            yt_video = pafy.new(arg)
            playlist = yt_video.mix
            if len(playlist) > 0:
                for yt_video in playlist:
                    video_id = yt_video.videoid
                    video_title = yt_video.title
                    yt_info = VideoInfo(ytid=video_id, title=video_title)
                    self.add_to_playback_queue(video=yt_video, info=yt_info)

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except IndexError:
            if not feelinglucky:
                raise ValueError
            else:
                print_wrn("[YouTube] Could not find a mix for '{0}'. "\
                          "Searching YouTube instead. Feeling lucky?." \
                          .format(arg.encode('utf-8')))
                if yt_video.title:
                    self.enqueue_audio_search(yt_video.title)
                else:
                    self.enqueue_audio_stream(arg)

    def enqueue_audio_mix_search(self, arg):
        """Obtain a YouTube mix associated to a given textual search and add all the
        audio streams in the mix playlist to the playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = len(self.queue)
            for track_info in get_tracks_from_json(wdata2):
                if track_info and track_info.ytid:
                    try:
                        self.enqueue_audio_mix(track_info.ytid, feelinglucky=False)
                        break
                    except ValueError:
                        logging.info('Could not find a mix. Trying another video')

            if count == len(self.queue):
                raise ValueError

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_channel_uploads(self, arg):
        """Add all audio streams in a YouTube channel to the playback queue.

        :param arg: a YouTube channel url

        """
        logging.info('arg : %s', arg)
        try:
            count = len(self.queue)

            channel = pafy.get_channel(arg)
            if channel:
                for yt_video in channel.uploads:
                    self.add_to_playback_queue(video=yt_video, \
                                               info=VideoInfo(ytid=yt_video.videoid, \
                                                              title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % arg))

    def enqueue_audio_channel_playlist(self, channel_name, playlist_name):
        """Search a playlist within a channel and if found, adds all the audio streams
        to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info('args : %s - %s', channel_name, playlist_name)
        try:
            count = len(self.queue)
            channel = pafy.get_channel(channel_name)
            if channel:
                pl_dict = dict()
                pl_titles = list()
                pl_name = ''
                playlist = None
                for pl in channel.playlists:
                    print_nfo("[YouTube] [Playlist] '{0}'." \
                              .format(to_ascii(pl.title)))
                    if fuzz.partial_ratio(playlist_name, pl.title) > 50:
                        pl_dict[pl.title] = pl
                        pl_titles.append(pl.title)

                if len(pl_titles) > 1:
                    pl_name = process.extractOne(playlist_name, pl_titles)[0]
                    playlist = pl_dict[pl_name]
                elif len(pl_titles) == 1:
                    pl_name = pl_titles[0]
                    playlist = pl_dict[pl_name]

                if pl_name:
                    if pl_name.lower() != playlist_name.lower():
                        print_wrn("[YouTube] Playlist '{0}' not found. " \
                                  "Playing '{1}' instead." \
                                  .format(to_ascii(playlist_name), \
                                          to_ascii(pl_name)))
                    for yt_video in playlist:
                        self.add_to_playback_queue(video=yt_video, \
                                                   info=VideoInfo(ytid=yt_video.videoid, \
                                                                  title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % channel_name))

    def current_audio_stream_title(self):
        """ Retrieve the current stream's title.

        """
        stream = self.now_playing_stream
        title = ''
        if stream:
            title = to_ascii(stream['a'].title).encode("utf-8")
        return title

    def current_audio_stream_author(self):
        """ Retrieve the current stream's author.

        """
        stream = self.now_playing_stream
        author = ''
        if stream:
            author = to_ascii(stream['v'].author).encode("utf-8")
        return author

    def current_audio_stream_file_size(self):
        """ Retrieve the current stream's file size.

        """
        stream = self.now_playing_stream
        size = 0
        if stream:
            size = stream['a'].get_filesize()
        return size

    def current_audio_stream_duration(self):
        """ Retrieve the current stream's duration.

        """
        stream = self.now_playing_stream
        duration = ''
        if stream:
            duration = to_ascii(stream['v'].duration).encode("utf-8")
        return duration

    def current_audio_stream_bitrate(self):
        """ Retrieve the current stream's bitrate.

        """
        stream = self.now_playing_stream
        bitrate = ''
        if stream:
            bitrate = stream['a'].bitrate
        return bitrate

    def current_audio_stream_view_count(self):
        """ Retrieve the current stream's view count.

        """
        stream = self.now_playing_stream
        viewcount = 0
        if stream:
            viewcount = stream['v'].viewcount
        return viewcount

    def current_audio_stream_description(self):
        """ Retrieve the current stream's description.

        """
        stream = self.now_playing_stream
        description = ''
        if stream:
            description = to_ascii(stream['v'].description).encode("utf-8")
        return description

    def current_audio_stream_file_extension(self):
        """ Retrieve the current stream's file extension.

        """
        stream = self.now_playing_stream
        file_extension = ''
        if stream:
            file_extension = to_ascii(stream['a'].extension).encode("utf-8")
        return file_extension

    def current_audio_stream_video_id(self):
        """ Retrieve the current stream's video id.

        """
        stream = self.now_playing_stream
        video_id = ''
        if stream:
            video_id = to_ascii(stream['i'].ytid).encode("utf-8")
        return video_id

    def current_audio_stream_published(self):
        """ Retrieve the current stream's upload date and time.

        """
        stream = self.now_playing_stream
        if stream:
            published = to_ascii(stream['v'].published).encode("utf-8")
        return published

    def current_audio_stream_queue_index_and_queue_length(self):
        """ Retrieve index in the queue (starting from 1) of the current stream and the
        length of the playback queue.

        """
        return self.queue_index + 1, len(self.queue)

    def clear_queue(self):
        """ Clears the playback queue.

        """
        self.queue = list()
        self.queue_index = -1

    def remove_current_url(self):
        """Remove the currently active url from the playback queue.

        """
        logging.info("")
        if len(self.queue) and self.queue_index:
            stream = self.queue[self.queue_index]
            print_nfo("[YouTube] [Stream] '{0}' removed." \
                      .format(to_ascii(stream['i'].title).encode("utf-8")))
            del self.queue[self.queue_index]
            self.queue_index -= 1
            if self.queue_index < 0:
                self.queue_index = 0
            self.__update_play_queue_order()

    def next_url(self):
        """ Retrieve the url of the next stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index += 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    next_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(next_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = -1
                    return self.next_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("KeyError, or AttributeError exception")
            return self.next_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def prev_url(self):
        """ Retrieve the url of the previous stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index -= 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    prev_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(prev_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = len(self.queue)
                    return self.prev_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.prev_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def __update_play_queue_order(self):
        """ Update the queue playback order.

        A sequential order is applied if the current play mode is "NORMAL" or a
        random order if current play mode is "SHUFFLE"

        """
        total_streams = len(self.queue)
        if total_streams:
            if not len(self.play_queue_order):
                # Create a sequential play order, if empty
                self.play_queue_order = range(total_streams)
            if self.current_play_mode == self.play_modes.SHUFFLE:
                random.shuffle(self.play_queue_order)
            print_nfo("[YouTube] [Streams in queue] '{0}'." \
                      .format(total_streams))

    def __retrieve_stream_url(self, stream, queue_index):
        """ Retrieve a stream url

        """
        try:
            if not len(self.workers):
                for _ in range(WORKER_PROCESSES):
                    proc = Process(target=obtain_stream, \
                                   args=(self.task_queue, \
                                         self.done_queue)).start()
                    self.workers.append(proc)

            while not self.done_queue.empty():
                stream = self.done_queue.get()
                self.queue[stream['q']] = stream

            stream = self.queue[queue_index]
            if not stream.get('v') or not stream.get('a'):
                logging.info("ytid : %s", stream['i'].ytid)
                video = stream.get('v')
                if not video:
                    video = pafy.new(stream['i'].ytid)
                audio = video.getbestaudio(preftype="webm")
                if not audio:
                    logging.info("no suitable audio found")
                    raise AttributeError()
                stream.update({'a': audio, 'v': video})

            # streams = stream.get('v').audiostreams[::-1]
            # pprint.pprint(streams)
            # dump_stream_info(streams)

            self.now_playing_stream = stream
            return stream['a'].url.encode("utf-8")

        except AttributeError:
            logging.info("Could not retrieve the stream url!")
            raise

    def add_to_playback_queue(self, audio=None, video=None, info=None):
        """ Add to the playback queue. """

        if audio:
            print_nfo("[YouTube] [Stream] '{0}' [{1}]." \
                      .format(to_ascii(audio.title).encode("utf-8"), \
                              to_ascii(audio.extension)))
        if info:
            print_nfo("[YouTube] [Stream] '{0}'." \
                      .format(to_ascii(info.title).encode("utf-8")))
        queue_index = len(self.queue)
        self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index))
        self.queue.append(
            dict(a=audio, v=video, i=info, q=queue_index))
Ejemplo n.º 24
0
class EventEngine():
    '''事件处理引擎'''

    def __init__(self):
        '''初始化'''

        # 创建队列
        self._queue = Queue()

        # 是否触发时间
        self._active = False

        # 线程池
        self._thread = [Process(target=self._run) for _ in range(5)]
        self._workers_n = 5

        # 执行
        self._handlers = {}

    def _run(self):

        logger.info('Worker启动...')
        while self._active:
            try:
                event = self._queue.get(block=True, timeout=0.3)
                logger.debug('准备处理时间: %s' % event)
                self._process(event)
            except Empty:
                pass

        logger.info('Worker停止...')
        return ''

    def _process(self, event):
        """处理事件"""
        # 检查是否存在对该事件进行监听的处理函数
        handlers = self._handlers.get(event.type, [])

        for handler in handlers:
            try:
                if callable(handler):
                    ret_events = handler(event)
                    if isinstance(ret_events, list) == False:
                        ret_events = [ret_events]

                    for ret_event in ret_events:
                        if isinstance(ret_event, Event):
                            self.trigger(ret_event)

            except Exception as err:
                logger.warning('事件: %s 运行错误: %s' % (event.type, err))

    def start(self):
        '''开始执行'''
        # 后台运行
        logger.info('启动后台线程')
        self._active = True

        for thread in self._thread:
            thread.start()

    def stop(self):
        logger.info('关闭后台线程')
        self._active = False

        # 等待后台线程结束
        for thread in self._thread:
            thread.join(timeout=1)

    def register(self, event_type, handler):
        '''注册监听事件'''
        logger.info('register event type: %s, handler: %s' % (event_type, handler))
        handlers = self._handlers.pop(event_type, [])
        if handler not in handlers:
            handlers.append(handler)
        self._handlers[event_type] = handlers

    def unregister(self, event_type, handler):
        '''取消注册监听事件'''
        logger.debug('unregister event type: %s, handler: %s' % (event_type, handler))
        handlers = self._handlers.pop(event_type, [])
        if handler in handlers:
            handlers.remove(handler)

        if handlers:
            self._handlers[event_type] = handlers

    def trigger(self, event):
        '''触发一个消息'''
        logger.debug('trigger event: %s' % event)
        self._queue.put(event)
        return

    def handle(self, event_type, context=None):
        def deco(func):
            @wraps(func)
            def event_handler(event):
                # TODO 检查参数,然后自动增加相应的数据进去
                return func(event=event, context=context)

            self.register(event_type, event_handler)

            return func

        return deco

    def on_tick(self, context=None):
        return self.handle('on_tick', context=context)

    def on_open(self, context=None):
        return self.handle('on_open', context=context)

    def before_trade(self, context=None):
        return self.handle('before_trade', context=context)

    def pre_close(self, context=None):
        return self.handle('pre_close', context=context)

    def on_close(self, context=None):
        return self.handle('on_close', context=context)

    def after_close(self, context=None):
        return self.handle('alfter_close', context=context)
Ejemplo n.º 25
0
class mzitu:
    def __init__(self):
        self.starturl = 'http://www.mzitu.com/all/'
        self.ua = UserAgent()
        self.qu = Queue(1000)
        self.qu2 = Queue(1000)
        self.suburls = []
        self.oldmurls = []
        conno = sqlite3.connect('mzituoldu.db')
        try:
            conno.execute('create table oldmurls(url text primary key)')
        except (Exception) as e:
            print('创建表ou:{}'.format(e))
        conno.close()
        conns = sqlite3.connect('mzitusubu.db')
        try:
            conns.execute(
                'create table suburls(url text primary key,status int default 1)'
            )
        except (Exception) as e:
            print('创建表su:{}'.format(e))
        conns.close()

    def gethtml(self, url):
        headers = {}
        headers['User-Agent'] = self.ua.random
        headers['Referer'] = url
        try:
            resp = requests.get(url, headers=headers, timeout=8)
            if resp.encoding == 'ISO-8859-1':
                resp.encoding = requests.utils.get_encodings_from_content(
                    resp.content)[0]
            html = pq(resp.text)
            cont = resp.content
            return html, cont
        except (Exception) as e:
            print('gethtml错误:{}'.format(e))
            logging.warning('gethtml错误:{}'.format(e))
            return None, None

    def getmurl(self):
        resp = self.gethtml(self.starturl)[0]
        if resp != None:
            elem = resp('.all')('a').items()
            murls = {}
            for ele in elem:
                murl = ele.attr('href')
                title = ele.text()
                murls[murl] = title
            return murls
        else:
            print('murl未get到网页内容!')
            logging.warning('murl未get到网页内容!')
            return None

    def getsuburl(self, url):
        resp = self.gethtml(url)[0]
        if resp != None:
            elem = resp('.pagenavi').children('a')
            num = int(elem.eq(-2).children('span').text())
            for i in range(1, num + 1):
                suburl = '{}/{}'.format(url, i)
                try:
                    self.qu.put(suburl, timeout=60)
                except (Exception) as e:
                    print('qu.put Error:{}'.format(e))
            self.qu2.put(url)
            print('已get完:{}'.format(url))
        else:
            print('suburl未get到网页内容!')
            logging.warning('suburl未get到网页内容!')

    def getimgurl(self, url):

        try:
            resp = self.gethtml(url)[0]
            if resp != None:
                elem = resp('.main-image')('img').attr('src')
                title = resp('.currentpath').next('h2').text()
                return elem, title
            else:
                print('imgurl未get到网页内容!')
                logging.warning('imgurl未get到网页内容!')
                return None, None
        except (Exception) as e:
            print('getimgurlE:{}'.format(e))
            logging.warning('getimgurlE:{}'.format(e))

    def mkdir(self, rootpath=os.path.abspath('.'), dir='mzitu'):
        global path
        path = os.path.join(rootpath, dir)
        if not os.path.exists(path):
            os.mkdir(path)

    def download(
        self,
        url,
    ):
        imgurl = self.getimgurl(url)
        global path
        imgname = '{}{}{}'.format(imgurl[0].split('/')[-3],
                                  imgurl[0].split('/')[-2],
                                  imgurl[0].split('/')[-1])
        imgpath = os.path.join(path, imgname)
        statu = os.path.exists(imgpath)
        headers = {}
        if not statu:
            for t in range(2):
                headers['User-Agent'] = self.ua.random
                headers['Referer'] = url
                headers['Host'] = 'i.meizitu.net'
                try:
                    resp = requests.get(imgurl[0], headers=headers, timeout=8)
                    with open(imgpath, 'wb') as f:
                        f.write(resp.content)
                    print('下载:{}'.format(imgurl[0]))
                    break
                except (Exception) as e:
                    print('下载图片错误:{}'.format(e))
                    logging.warning('下载图片错误:{}'.format(e))
                    time.sleep(5)
        else:
            print('已下载:{}'.format(imgname))

    def savesuburl(self):
        i = 0
        conn = sqlite3.connect('mzitusubu.db')
        cur = conn.cursor()
        suburls = []
        while 1:
            try:
                suburl = (self.qu.get(timeout=20), )
                suburls.append(suburl)
                i += 1
                if i >= 2000:
                    cur.executemany(
                        'insert or ignore into suburls(url) values(?)',
                        suburls)
                    conn.commit()
                    suburls = []
                    i = 0
            except (Exception) as e:
                print('suburls Error:{}'.format(e))
                cur.executemany('insert or ignore into suburls(url) values(?)',
                                suburls)
                conn.commit()
                cur.close()
                conn.close()
                break
        print('保存suburl')

    def saveoldmurl(self):
        conn = sqlite3.connect('mzituoldu.db')
        cur = conn.cursor()
        oldmurls = []
        while 1:
            try:
                oldmurl = (self.qu2.get(timeout=20), )
                oldmurls.append(oldmurl)
            except (Exception) as e:
                print('oldmurl Error:{}'.format(e))
                cur.executemany(
                    'insert or ignore into oldmurls(url) values(?)', oldmurls)
                conn.commit()
                oldmurls = []
                cur.close()
                conn.close()
                break
        print('保存oldmurl')

    def run(self):
        self.mkdir(rootpath='H:/')
        murls = self.getmurl()

        pool1 = Pool(50)
        pool1.apply_async(self.savesuburl)
        pool1.apply_async(self.saveoldmurl)

        conno = sqlite3.connect('mzituoldu.db')
        oldurls = conno.execute('select url from oldmurls').fetchall()
        conno.close()
        for murl in murls:
            omurl = (murl, )
            if omurl not in oldurls:
                pool1.apply_async(self.getsuburl(murl, ))
                # oldurls.append(omurl)
            else:
                print('{}已经下过了'.format(murl))
        pool1.close()
        pool1.join()

        pool2 = Pool(20)
        conns = sqlite3.connect('mzitusubu.db')
        cur = conns.cursor()
        while 1:
            surls = cur.execute(
                'select url from suburls where status=1').fetchmany(300)
            if surls == []:
                break
            oldsurls = []
            for surl in surls:
                pool2.apply_async(self.download, (surl[0], ))
                oldsurls.append(surl)
            time.sleep(8)
            cur.executemany('update suburls set status=2 where url=?',
                            oldsurls)
            conns.commit()
        cur.close()
        conns.close()
        pool2.close()
        pool2.join()
        print('下载完成!!!!')
Ejemplo n.º 26
0
class DouYu:
    """Dou Yu dan mu spider."""
    @staticmethod
    def find_room():
        """Find the most hot 50 rooms of douyu."""
        result = []
        for page in range(0, 400, 30):
            datas = json.loads(
                requests.get("http://capi.douyucdn.cn/api/v1/live?"
                             "limit={}&offset={}".format(page + 30,
                                                         page)).text)["data"]
            for data in datas:
                result.append((data["game_name"], data["room_id"]))

        return list(set(result))

    @staticmethod
    def log(url, err):
        """Log when error happened."""
        with open("dy_log", "a") as f:
            f.write(time.time + "\t" + str(url) + "\t" + str(err))

    def __init__(self):
        """Initailization spider."""
        self.fname = time.strftime("%Y%m%d_%H%M%S")
        self.rooms = self.find_room()
        self.pool_queue = Queue()
        self.msg_queue = Queue()

    def pool_join(self):
        """Processes pool join function."""
        while 1:
            p = self.pool_queue.get()
            if p is None:
                return
            else:
                try:
                    p.join()
                except KeyboardInterrupt:
                    return
                except BaseException:
                    pass

    def record(self):
        """Record dan mu."""
        with open("record/" + self.fname + ".danmu", "w") as f:
            while 1:
                msg = self.msg_queue.get()
                if msg is None:
                    return
                else:
                    try:
                        f.write(msg)
                    except KeyboardInterrupt:
                        return
                    except BaseException:
                        pass

    def _run(self, room, game):
        """Run for multiprocessing."""
        dmc = DanMuClient('http://www.douyu.com/{}'.format(room))
        if not dmc.isValid():
            self.log('http://www.douyu.com/{}'.format(room), "Url not valid")

        @dmc.default
        def danmu_fn(msg):
            """Dan mu function."""
            res = "\t".join(
                (str(room), str(game), str(time.time()), msg["MsgType"],
                 msg["NickName"], msg["Content"], "\n"))

            self.msg_queue.put(res)

        dmc.start(blockThread=False)
        time.sleep(3600)
        dmc.stop()

    def run(self):
        """Run and get datas."""
        print(self.fname, len(self.rooms))

        p_re = Process(target=self.record)
        pool = Process(target=self.pool_join)
        pool.start()
        p_re.start()

        for (game, room) in self.rooms:
            p = Process(target=self._run, args=(room, game))
            p.start()
            self.pool_queue.put(p)

        self.pool_queue.put(None)
        pool.join()
        self.msg_queue.put(None)
        return p_re, self.fname
Ejemplo n.º 27
0
class tizyoutubeproxy(object):
    """A class that accesses YouTube, retrieves stream URLs and creates and manages
    a playback queue.

    """
    def __init__(self, api_key=API_KEY):
        self.queue = list()
        self.queue_index = -1
        self.play_queue_order = list()
        self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"])
        self.current_play_mode = self.play_modes.NORMAL
        self.now_playing_stream = None
        # Create multiprocess queues
        self.task_queue = Queue()
        self.done_queue = Queue()
        # Workers
        self.workers = list()
        self.api_key = api_key if api_key != "" else API_KEY
        pafy.set_api_key(self.api_key)

    def set_play_mode(self, mode):
        """ Set the playback mode.

        :param mode: current valid values are "NORMAL" and "SHUFFLE"

        """
        self.current_play_mode = getattr(self.play_modes, mode)
        self._update_play_queue_order()

    def enqueue_audio_stream(self, arg):
        """Add the audio stream of a YouTube video to the
        playback queue.

        :param arg: a search string

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio strean] : '{0}'. ".format(arg))

            yt_search = MEMORY.cache(run_youtube_search)
            yt_video = yt_search(arg)
            yt_audio = yt_video.getbestaudio(preftype="webm")
            if not yt_audio:
                raise ValueError(str("No WebM audio stream for : %s" % arg))

            yt_info = VideoInfo(ytid=arg, title=yt_audio.title)
            self._add_to_playback_queue(audio=yt_audio,
                                        video=yt_video,
                                        info=yt_info)

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Video not found : %s" % arg))

    def enqueue_audio_playlist(self, arg):
        """Add all audio streams in a YouTube playlist to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio playlist] : '{0}'. ".format(arg))
            count = len(self.queue)

            yt_pl_search = MEMORY.cache(run_youtube_playlist_search)
            playlist = yt_pl_search(arg)

            if len(playlist) > 0:
                for yt_video in playlist:
                    self._add_to_playback_queue(
                        video=yt_video,
                        info=VideoInfo(ytid=yt_video.videoid,
                                       title=yt_video.title),
                    )

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Playlist not found : %s" % arg))

    def enqueue_audio_search(self, arg):
        """Search YouTube and add the audio streams to the
        playback queue.

        :param arg: a search string

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio search] : '{0}'. ".format(arg))
            yt_dt_search = MEMORY.cache(run_youtube_data_search)
            query = generate_search_query(arg, self.api_key)
            wdata = yt_dt_search("search", query)

            wdata2 = wdata
            count = 0
            while True:
                for track_info in get_tracks_from_json(wdata2):
                    self._add_to_playback_queue(info=track_info)
                    count += 1

                if count > 100:
                    break
                if not wdata2.get("nextPageToken"):
                    break
                query["pageToken"] = wdata2["nextPageToken"]
                wdata2 = yt_dt_search("search", query)

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_mix(self, arg, feelinglucky=True):
        """Obtain a YouTube mix associated to a given video id or url and add all audio
        streams in the mix playlist to the playback queue.

        :param arg: a YouTube video id

        :param feelinglucky: If True, it will perform another YouTube search to find
        alternatives if the original mix cannot be found.

        """
        logging.info("arg : %s", arg)
        yt_video = None
        try:
            print_msg("[YouTube] [Audio mix] : '{0}'. ".format(arg))
            count = len(self.queue)

            yt_search = MEMORY.cache(run_youtube_search)
            yt_video = yt_search(arg)
            playlist = yt_video.mix
            if len(playlist) > 0:
                for yt_video in playlist:
                    video_id = yt_video.videoid
                    video_title = yt_video.title
                    yt_info = VideoInfo(ytid=video_id, title=video_title)
                    self._add_to_playback_queue(video=yt_video, info=yt_info)

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except IndexError:
            if not feelinglucky:
                raise ValueError
            else:
                print_adv("[YouTube] Could not find a mix for '{0}'. "
                          "Searching YouTube instead. Feeling lucky?.".format(
                              arg.encode("utf-8")))
                if yt_video.title:
                    self.enqueue_audio_search(yt_video.title)
                else:
                    self.enqueue_audio_stream(arg)

    def enqueue_audio_mix_search(self, arg):
        """Obtain a YouTube mix associated to a given textual search and add all the
        audio streams in the mix playlist to the playback queue.

        :param arg: a search string

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio mix search] : '{0}'. ".format(arg))
            yt_dt_search = MEMORY.cache(run_youtube_data_search)
            wdata = yt_dt_search("search",
                                 generate_search_query(arg, self.api_key))

            wdata2 = wdata
            count = len(self.queue)
            for track_info in get_tracks_from_json(wdata2):
                if track_info and track_info.ytid:
                    try:
                        self.enqueue_audio_mix(track_info.ytid,
                                               feelinglucky=False)
                        break
                    except ValueError:
                        logging.info(
                            "Could not find a mix. Trying another video")

            if count == len(self.queue):
                raise ValueError

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_channel_uploads(self, arg):
        """Add all audio streams in a YouTube channel to the playback queue.

        :param arg: a YouTube channel url

        """
        logging.info("arg : %s", arg)
        try:
            print_msg(
                "[YouTube] [Audio channel uploads] : '{0}'. ".format(arg))
            count = len(self.queue)

            yt_ch_search = MEMORY.cache(run_youtube_channel_search)
            channel = yt_ch_search(arg)
            if channel:
                for yt_video in channel.uploads:
                    self._add_to_playback_queue(
                        video=yt_video,
                        info=VideoInfo(ytid=yt_video.videoid,
                                       title=yt_video.title),
                    )

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % arg))

    def enqueue_audio_channel_playlist(self, channel_name, playlist_name):
        """Search a playlist within a channel and if found, adds all the audio streams
        to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info("args : %s - %s", channel_name, playlist_name)
        try:
            print_msg(
                "[YouTube] [Audio channel playlist] : '{0} - {1}'. ".format(
                    channel_name, playlist_name))
            count = len(self.queue)
            yt_ch_search = MEMORY.cache(run_youtube_channel_search)
            channel = yt_ch_search(channel_name)

            if channel:
                pl_dict = dict()
                pl_titles = list()
                pl_name = ""
                playlist = None
                for pl in channel.playlists:
                    print_nfo("[YouTube] [Playlist] '{0}'.".format(
                        to_ascii(pl.title)))
                    if fuzz.partial_ratio(playlist_name, pl.title) > 50:
                        pl_dict[pl.title] = pl
                        pl_titles.append(pl.title)

                if len(pl_titles) > 1:
                    pl_name = process.extractOne(playlist_name, pl_titles)[0]
                    playlist = pl_dict[pl_name]
                elif len(pl_titles) == 1:
                    pl_name = pl_titles[0]
                    playlist = pl_dict[pl_name]

                if pl_name:
                    if pl_name.lower() != playlist_name.lower():
                        print_adv("[YouTube] Playlist '{0}' not found. "
                                  "Playing '{1}' instead.".format(
                                      to_ascii(playlist_name),
                                      to_ascii(pl_name)))
                    for yt_video in playlist:
                        self._add_to_playback_queue(
                            video=yt_video,
                            info=VideoInfo(ytid=yt_video.videoid,
                                           title=yt_video.title),
                        )

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % channel_name))

    def current_audio_stream_title(self):
        """ Retrieve the current stream's title.

        """
        stream = self.now_playing_stream
        title = ""
        if stream:
            title = to_ascii(stream["a"].title)
        return title

    def current_audio_stream_author(self):
        """ Retrieve the current stream's author.

        """
        stream = self.now_playing_stream
        author = ""
        if stream:
            author = to_ascii(stream["v"].author)
        return author

    def current_audio_stream_file_size(self):
        """ Retrieve the current stream's file size.

        """
        stream = self.now_playing_stream
        size = 0
        if stream:
            size = stream["a"].get_filesize()
        return size

    def current_audio_stream_duration(self):
        """ Retrieve the current stream's duration.

        """
        stream = self.now_playing_stream
        duration = ""
        if stream:
            duration = to_ascii(stream["v"].duration)
        return duration

    def current_audio_stream_bitrate(self):
        """ Retrieve the current stream's bitrate.

        """
        stream = self.now_playing_stream
        bitrate = ""
        if stream:
            bitrate = stream["a"].bitrate
        return bitrate

    def current_audio_stream_view_count(self):
        """ Retrieve the current stream's view count.

        """
        stream = self.now_playing_stream
        viewcount = 0
        if stream:
            viewcount = stream["v"].viewcount
        return viewcount

    def current_audio_stream_description(self):
        """ Retrieve the current stream's description.

        """
        stream = self.now_playing_stream
        description = ""
        if stream:
            description = to_ascii(stream["v"].description)
        return description

    def current_audio_stream_file_extension(self):
        """ Retrieve the current stream's file extension.

        """
        stream = self.now_playing_stream
        file_extension = ""
        if stream:
            file_extension = to_ascii(stream["a"].extension)
        return file_extension

    def current_audio_stream_video_id(self):
        """ Retrieve the current stream's video id.

        """
        stream = self.now_playing_stream
        video_id = ""
        if stream:
            video_id = to_ascii(stream["i"].ytid)
        return video_id

    def current_audio_stream_published(self):
        """ Retrieve the current stream's upload date and time.

        """
        stream = self.now_playing_stream
        if stream:
            published = to_ascii(stream["v"].published)
        return published

    def current_audio_stream_queue_index_and_queue_length(self):
        """ Retrieve index in the queue (starting from 1) of the current stream and the
        length of the playback queue.

        """
        return self.play_queue_order[self.queue_index] + 1, len(self.queue)

    def clear_queue(self):
        """ Clears the playback queue.

        """
        self.queue = list()
        self.queue_index = -1

    def remove_current_url(self):
        """Remove the currently active url from the playback queue.

        """
        logging.info("")
        if len(self.queue) and self.queue_index:
            stream = self.queue[self.queue_index]
            print_nfo("[YouTube] [Stream] '{0}' removed.".format(
                to_ascii(stream["i"].title)))
            del self.queue[self.queue_index]
            self.queue_index -= 1
            if self.queue_index < 0:
                self.queue_index = 0
            self._update_play_queue_order()

    def next_url(self):
        """ Retrieve the url of the next stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index += 1
                if (self.queue_index < len(self.queue)) and (self.queue_index
                                                             >= 0):
                    next_stream = self.queue[self.play_queue_order[
                        self.queue_index]]
                    return self._retrieve_stream_url(
                        next_stream,
                        self.play_queue_order[self.queue_index]).rstrip()
                else:
                    self.queue_index = -1
                    return self.next_url()
            else:
                return ""
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("KeyError, or AttributeError exception")
            return self.next_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def prev_url(self):
        """ Retrieve the url of the previous stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index -= 1
                if (self.queue_index < len(self.queue)) and (self.queue_index
                                                             >= 0):
                    prev_stream = self.queue[self.play_queue_order[
                        self.queue_index]]
                    return self._retrieve_stream_url(
                        prev_stream,
                        self.play_queue_order[self.queue_index]).rstrip()
                else:
                    self.queue_index = len(self.queue)
                    return self.prev_url()
            else:
                return ""
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.prev_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def _update_play_queue_order(self):
        """ Update the queue playback order.

        A sequential order is applied if the current play mode is "NORMAL" or a
        random order if current play mode is "SHUFFLE"

        """
        total_streams = len(self.queue)
        if total_streams:
            if not len(self.play_queue_order):
                # Create a sequential play order, if empty
                self.play_queue_order = list(range(total_streams))
            if self.current_play_mode == self.play_modes.SHUFFLE:
                random.shuffle(self.play_queue_order)
            print_nfo(
                "[YouTube] [Streams in queue] '{0}'.".format(total_streams))

    def _retrieve_stream_url(self, stream, queue_index):
        """ Retrieve a stream url

        """
        try:
            if not len(self.workers):
                for _ in range(WORKER_PROCESSES):
                    proc = Process(target=obtain_stream,
                                   args=(self.task_queue,
                                         self.done_queue)).start()
                    self.workers.append(proc)

            while not self.done_queue.empty():
                stream = self.done_queue.get()
                self.queue[stream["q"]] = stream

            stream = self.queue[queue_index]
            if not stream.get("v") or not stream.get("a"):
                logging.info("ytid : %s", stream["i"].ytid)
                video = stream.get("v")
                if not video:
                    yt_search = MEMORY.cache(run_youtube_search)
                    video = yt_search(stream["i"].ytid)
                audio = video.getbestaudio(preftype="webm")
                if not audio:
                    logging.info("no suitable audio found")
                    raise AttributeError()
                stream.update({"a": audio, "v": video})

            # streams = stream.get('v').audiostreams[::-1]
            # pprint.pprint(streams)
            # dump_stream_info(streams)

            self.now_playing_stream = stream
            return stream["a"].url

        except AttributeError:
            logging.info("Could not retrieve the stream url!")
            raise

    def _add_to_playback_queue(self, audio=None, video=None, info=None):
        """ Add to the playback queue. """

        if audio:
            print_nfo("[YouTube] [Stream] '{0}' [{1}].".format(
                to_ascii(audio.title), to_ascii(audio.extension)))
        if info:
            print_nfo("[YouTube] [Stream] '{0}'.".format(to_ascii(info.title)))
        queue_index = len(self.queue)
        self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index))
        self.queue.append(dict(a=audio, v=video, i=info, q=queue_index))
Ejemplo n.º 28
0
def pipe_dream(layer, logger, args, backward_event, targets_queue, e,
               data_size, trainloader):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    layer.train()

    if dist.get_rank() == 0:
        criterion.cuda(0)
        output_queue = ThreadQueue(2)
        data_iter = iter(trainloader)
        batch_idx = 0
        while True:
            try:
                if output_queue.qsize() == 2:
                    backward_event.wait()
                    optimizer.zero_grad()
                    grad = torch.zeros([args.batch_size, 128, 16, 16])
                    dist.recv(tensor=grad, src=1)
                    outputs = output_queue.get()
                    outputs.backward(grad.cuda(0))
                    optimizer.step()
                    backward_event.clear()
                    continue
                else:
                    inputs, targets = next(data_iter)
                    inputs = inputs.cuda(0)
                    targets_queue.put(targets.numpy(), block=False)
                    outputs = layer(inputs)
                    send_opt = dist.isend(tensor=outputs.cpu(), dst=1)
                    send_opt.wait()
                    output_queue.put(outputs)
                    batch_idx += 1
            except StopIteration as stop_e:
                send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
                send_opt.wait()
                while output_queue.qsize() > 0:
                    #backward_event.wait()
                    optimizer.zero_grad()
                    grad = torch.zeros([args.batch_size, 128, 16, 16])
                    dist.recv(tensor=grad, src=1)
                    outputs = output_queue.get()
                    outputs.backward(grad.cuda(0))
                    optimizer.step()
                    #backward_event.clear()
                break
    elif dist.get_rank() == 1:
        batch_idx = 0
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(1)
        while True:
            print("while........................")
            try:
                rec_val = torch.zeros([args.batch_size, 128, 16, 16])
                dist.recv(tensor=rec_val, src=0)
                print("recv.......")
            except RuntimeError as error:
                print("runtime........................")
                #e.wait()
                break
            rec_val = rec_val.cuda(1)
            rec_val.requires_grad_()
            optimizer.zero_grad()
            outputs = layer(rec_val)
            targets = targets_queue.get(block=True, timeout=2)
            targets = torch.from_numpy(targets).cuda(1)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            progress_bar(
                batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (train_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))
            if not backward_event.is_set():
                print("set.....")
                backward_event.set()
            send_opt = dist.isend(tensor=rec_val.grad.cpu(), dst=0)
            print("send.....")
            if batch_idx % 10 == 0:
                logger.error("train:" + str(train_loss / (batch_idx + 1)))

            batch_idx += 1
Ejemplo n.º 29
0
class Handler(Process):
    """Multiprocessing adapted handler.  """
    def __init__(self, handler, args=(), **kwargs):
        assert issubclass(handler, logging.Handler)

        self._handler = handler(*args, **kwargs)
        # Patch for use non default encoding.
        if issubclass(handler, logging.StreamHandler):

            def _format(record):
                def _encode(i):
                    if isinstance(i, text_type):
                        try:
                            return i.encode(sys.stdout.encoding, 'replace')
                        except:  # pylint: disable=bare-except
                            pass
                    return i

                def _decode(i):
                    if isinstance(i, binary_type):
                        try:
                            return u(i)
                        except:  # pylint: disable=bare-except
                            pass
                    return i

                record.msg = _decode(record.msg)
                record.args = tuple(_decode(i) for i in record.args)
                ret = handler.format(self._handler, record)
                return _encode(ret)

            self._handler.format = _format
        self.queue = Queue(-1)

        super(Handler, self).__init__(name=str(self._handler))

        self.daemon = True
        self.start()

    def __getattr__(self, name):
        return getattr(self._handler, name)

    def run(self):
        while True:
            try:
                record = self.queue.get()
                self._handler.emit(record)
            except (KeyboardInterrupt, SystemExit):
                raise
            except EOFError:
                break
            except:  # pylint:disable=bare-except
                traceback.print_exc(file=sys.stderr)

    def _format_record(self, record):
        # ensure that exc_info and args
        # have been stringified.  Removes any chance of
        # unpickleable things inside and possibly reduces
        # message size sent over the pipe
        if record.args:
            record.msg = record.msg % record.args
            record.args = None
        if record.exc_info:
            self.format(record)
            record.exc_info = None

        return record

    def emit(self, record):
        """(override)logging.handler.emit  """

        try:
            msg = self._format_record(record)
            self.queue.put_nowait(msg)
        except (KeyboardInterrupt, SystemExit):
            raise
        except:  # pylint:disable=bare-except
            self.handleError(record)

    def close(self):
        """(override)logging.handler.close  """

        self._handler.close()