def send_emails(modeladmin, request, queryset): messages = Queue() for user in queryset: process = Process(target=send_email, args=(user, messages)) process.start() messages.get().send() process.join()
def _get_system_information_threaded(host): system_information_functions = [ collect_win_application_stats, collect_win_bios_stats, collect_win_disk_stats, collect_win_local_account_stats, collect_win_local_group_stats, collect_win_mem_stats, collect_win_network_stats, collect_os_stats, collect_win_processes_stats, collect_win_cpu_stats, collect_win_services_stats ] system_information = {} queue = Queue() list_of_processes = [] for hardware in system_information_functions: process = _Process(target=hardware, args=( host, 1, queue, )) list_of_processes.append(process) process.start() for process in list_of_processes: process.join() system_information.update(queue.get()) return system_information
def main(): '''load video, process frames, display to user''' tque = Queue() #(maxsize=120) framequeue = Queue() #(maxsize=120) cthread = threading.Thread(target=cvworker, args=(tque, )) cthread.daemon = True cthread.start() tthread = threading.Thread(target=tfworker, args=(tque, framequeue)) tthread.daemon = True #terminate testloop when user closes window tthread.start() start = time.time() frame = 0 videoend = False while True: cvw = cv2.waitKey(1) if cvw & 0xFF == ord('q'): break if not videoend: print('got', frame, time.time()) frame += 1 print('frame:', frame) f = framequeue.get() if type(f) == type(None): videoend = True pass #whats this do else: #time.sleep(1/30) #limit to realtime cv2.imshow('frame', f) print('new took:', time.time() - start) cv2.destroyAllWindows()
class Actor(object): def __init__(self): # Actor内部的消息缓存队列 self.__mailbox = Queue() def send(self, msg): self.__mailbox.put(msg) def recv(self): return self.__mailbox.get()
def find_words(start_words, center_words=None, neg_words=None, min_sim=0.6, max_sim=1., alpha=0.25): if center_words == None and neg_words == None: min_sim = max(min_sim, 0.6) center_vec, neg_vec = np.zeros([word_size]), np.zeros([word_size]) if center_words: # 中心向量是所有中心种子词向量的平均 _ = 0 for w in center_words: if w in word2vec.wv.vocab: center_vec += word2vec[w] _ += 1 if _ > 0: center_vec /= _ if neg_words: # 负向量是左右负种子词向量的平均(此处无用) _ = 0 for w in neg_words: if w in word2vec.wv.vocab: neg_vec += word2vec[w] _ += 1 if _ > 0: neg_vec /= _ queue_count = 1 task_count = 0 cluster = [] queue = Queue() # 建立队列 for w in start_words: queue.put((0, w)) if w not in cluster: cluster.append(w) while not queue.empty(): idx, word = queue.get() queue_count -= 1 task_count += 1 sims = most_similar(word, center_vec, neg_vec) min_sim_ = min_sim + (max_sim - min_sim) * (1 - np.exp(-alpha * idx)) if task_count % 10 == 0: log = '%s in cluster, %s in queue, %s tasks done, %s min_sim' % ( len(cluster), queue_count, task_count, min_sim_) print(log) for i, j in sims: if j >= min_sim_: if i not in cluster and is_good(i): # is_good是人工写的过滤规则 queue.put((idx + 1), i) if i not in cluster and is_good(i): cluster.append(i) queue_count += 1 return cluster
def _add_doi(metadata, identifier, citekey): """Add an entry from a DOI.""" info_messages = [] with StatusMessage('Querying DOI metadata...') as message: if metadata.doi_exists(identifier): raise ZoiaAddException(f'DOI {identifier} already exists.') # Query Semantic Scholar to get the corresponding arxiv ID (if there is # one) in a separate thread. arxiv_queue = ThreadQueue() arxiv_process = ThreadProcess( target=lambda q, x: q.put(requests.get(x)), args=( arxiv_queue, f'https://api.semanticscholar.org/v1/paper/{identifier}', ), ) arxiv_process.start() doi_metadata = _get_doi_metadata(identifier) metadatum = zoia.backend.metadata.Metadatum.from_dict(doi_metadata) if citekey is None: citekey = zoia.parse.citekey.create_citekey(metadata, metadatum) paper_dir = os.path.join(metadata.config.library_root, citekey) os.mkdir(paper_dir) message.update( 'Querying Semantic Scholar for corresponding arXiv ID...') arxiv_metadata_response = arxiv_queue.get() arxiv_process.join() arxiv_metadata = json.loads(arxiv_metadata_response.text) if (arxiv_id := arxiv_metadata.get('arxivId')) is not None: doi_metadata['arxiv_id'] = arxiv_id message.update('Downloading PDF from arXiv...') pdf_response = requests.get( f'https://arxiv.org/pdf/{arxiv_id}.pdf') if pdf_response.status_code == 200: with open(os.path.join(paper_dir, 'document.pdf'), 'wb') as fp: fp.write(pdf_response.content) doi_metadata['pdf_md5'] = hashlib.md5( pdf_response.content).hexdigest() else: info_messages.append('Was unable to fetch a PDF') metadata[citekey] = doi_metadata
def run_tasks(tasks, context=None, nb_threads=1, watchdog=None): got_keyboard_interrupt = False watchdogs = [ lambda _: _KEYBOARD_INTERRUPT_ERROR_MESSAGE if got_keyboard_interrupt else None ] if watchdog: watchdogs.append(watchdog) for task in tasks: check_task_dependencies(task) remaining_tasks = list(tasks) completed_tasks = list() pool = Pool(nb_threads) completed_tasks_queue = Queue() try: schedule_tasks_to_be_run( pop_runnable_tasks(remaining_tasks, completed_tasks, nb_threads), watchdogs, context, pool, completed_tasks_queue) while len(completed_tasks) != len(tasks): # wait for one task to complete completed_task = completed_tasks_queue.get() completed_tasks.append(completed_task) # schedule tasks to be run waiting for task success or simple completion tasks_to_be_run = pop_runnable_tasks(remaining_tasks, completed_tasks, nb_threads) schedule_tasks_to_be_run(tasks_to_be_run, watchdogs, context, pool, completed_tasks_queue) except KeyboardInterrupt: got_keyboard_interrupt = True skip_all_tasks(tasks, remaining_tasks, completed_tasks, context, pool, completed_tasks_queue, _KEYBOARD_INTERRUPT_ERROR_MESSAGE) finally: pool.close() exceptions = [ task.result.stacktrace for task in tasks if isinstance(task.result, TaskResultException) ] if exceptions: raise TasksExecutionFailure("Caught exceptions:\n%s" % "\n".join(exceptions))
class Recon: def __init__(self, model_dir, path_group_dict: Dict[str, int]): self.feed = Queue() self.mid = Queue() self.out = Queue() t = MtCNN(self.feed, self.mid) t.start() t1 = EmbeddingCmp(self.feed, self.mid, self.out, path_group_dict, model_dir) t1.start() def face_check(self, _im: np.ndarray, group: int): self.feed.put((_im, group)) return self.out.get()
class BaseActor(object): def __init__(self): """queue:Actor内部的邮箱队列""" self.__mailbox = Queue() def recv(self): """Actor接受消息""" msg = self.__mailbox.get() if msg is ActorExit: # 抛出异常(模版方法会处理) raise ActorExit return msg def send(self, msg): """Actor发送消息""" self.__mailbox.put(msg) def close(self): """发送结束标识""" self.send(ActorExit) def start(self): self.__terminated_event = Event() # 为Join服务 t = threading.Thread(target=self.__templet) t.setDaemon(True) # 设置为守护线程 t.start() def __templet(self): """模版方法(run会被子类重写)""" try: self.run() # 执行Run代码 except ActorExit: pass # 防止线程挂掉 finally: # 设置Event标识 self.__terminated_event.set() def join(self): # Event在set之后便结束等待 self.__terminated_event.wait() def run(self): """ 由子类实现即可,eg: while True: msg = self.recv() print(msg) """ pass
def get_stats(): print 'Fetching NBA player stats...' stats_outfile = RUNDAY+'_nba_stats.csv' csvout = open(stats_outfile, 'wb') NUM_THREADS = 8 in_queue = Queue() out_queue = Queue() queue_players(in_queue) while not in_queue.empty(): jobs = [] for i in range(NUM_THREADS): if not in_queue.empty(): thread = Process(target=get_stats_helper, args=(in_queue, out_queue)) jobs.append(thread) thread.start() for thread in jobs: thread.join() while not out_queue.empty(): player = out_queue.get() del player['SUCCESS'] try: name = player['NAME'] except KeyError as e: continue player['TIME'] = RUNDAY fieldnames = [ 'TIME', 'NAME', 'JERSEY', 'SPORT', 'TEAM', 'POSITION', 'PTS', 'REB', 'AST', 'URL' ] csvwriter = csv.DictWriter(csvout, delimiter='|', fieldnames=fieldnames) csvwriter.writerow(player) csvout.close() print 'Finished fetching NBA player stats.' print 'Ouput saved in %s' % stats_outfile
def _add_arxiv_id(metadata, identifier, citekey=None): info_messages = [] with StatusMessage('Querying arXiv...') as message: if metadata.arxiv_id_exists(identifier): raise ZoiaAddException(f'arXiv paper {identifier} already exists.') # Downloading the PDF can take a while, so start it early in a separate # thread. pdf_queue = ThreadQueue() pdf_process = ThreadProcess( target=lambda q, x: q.put(requests.get(x)), args=(pdf_queue, f'https://arxiv.org/pdf/{identifier}.pdf'), ) pdf_process.start() arxiv_metadata = _get_arxiv_metadata(identifier) if 'doi' in arxiv_metadata: message.update('Querying DOI information...') arxiv_metadata.update(_get_doi_metadata(arxiv_metadata['doi'])) if citekey is None: metadatum = zoia.backend.metadata.Metadatum.from_dict( arxiv_metadata) citekey = zoia.parse.citekey.create_citekey(metadata, metadatum) paper_dir = os.path.join(metadata.config.library_root, citekey) os.mkdir(paper_dir) message.update(text='Downloading PDF...') pdf = pdf_queue.get() pdf_process.join() if pdf.status_code == 200: with open(os.path.join(paper_dir, 'document.pdf'), 'wb') as fp: fp.write(pdf.content) md5_hash = hashlib.md5(pdf.content).hexdigest() arxiv_metadata['pdf_md5'] = md5_hash if metadata.pdf_md5_hash_exists(md5_hash): raise ZoiaAddException( f'arXiv paper {identifier} already exists.') else: info_messages.append('Was unable to fetch a PDF') metadata[citekey] = arxiv_metadata return citekey, metadatum, info_messages
def main(test_cases): procs = [] queue = Queue() case_number = 0 for test_case in test_cases: proc = Process(target=handle_test_case, args=(test_case, case_number, queue)) procs.append(proc) case_number += 1 for proc in procs: proc.start() for proc in procs: proc.join() results = [queue.get() for proc in procs] results.sort() for r in results: print(r[1])
def stat_files(): all_files = [] for root, dirs, files in os.walk('/home/gzguoyubo/mf/tw2/res/entities/custom_type'): ignore = False for ig_path in ignore_paths: if ig_path in root: ignore = True if ignore: continue for fname in files: if not fname.endswith('.py'): continue abs_file_path = join(root, fname) all_files.append(abs_file_path) file_sections = [] file_total_nums = len(all_files) for i in xrange(P_NUM): start = i * file_total_nums / P_NUM stop = start + file_total_nums / P_NUM if i == P_NUM - 1: stop = -1 file_sections.append(all_files[start : stop]) res_queue = Queue() processes = [] for section in file_sections: p = Process(target=stat_file, args=(section, res_queue)) p.start() processes.append(p) for p in processes: p.join() total_stats = defaultdict(int) while not res_queue.empty(): stat = res_queue.get() for author, cnt in stat.iteritems(): total_stats[author] += cnt print total_stats
class TreeCrawler(object): def __init__(self, base_path, callback=None): if not os.path.isdir(base_path): raise IOError("Base path not found: " + base_path) self.base_path = base_path self.unsearched = Manager().Queue() self.dirpath_queue = Queue() self.cpu_count = multiprocessing.cpu_count() self.pool = Pool(self.cpu_count) self.first_level_dirs = "" self.callback = callback def __explore_path(self): directories = [] dirpath = self.dirpath_queue.get() for filename in os.walk(dirpath).next()[1]: fullname = os.path.join(dirpath, filename) directories.append(fullname) return directories def run_crawler(self): # acquire the list of all paths inside base path self.first_level_dirs = next(os.walk(self.base_path))[1] for path in self.first_level_dirs: self.unsearched.put(self.base_path + "/" + path) self.pool.map_async(self.parallel_worker, range(self.cpu_count)) self.pool.close() self.unsearched.join() def parallel_worker(self, task_num): while True: dirpath = self.unsearched.get() print "Task: " + str(task_num) + " >>> Explored path: " + dirpath self.dirpath_queue.put(dirpath) dirs = self.__explore_path() for newdir in dirs: self.unsearched.put(newdir) self.unsearched.task_done()
class ExThread(Thread): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.__status_queue = Queue() def run(self, *args, **kwargs): try: super().run(*args, **kwargs) except Exception as ex: #except_type, except_class, tb = sys.exc_info() self.__status_queue.put(ex) self.__status_queue.put(None) def wait_for_exc_info(self): return self.__status_queue.get() def join(self): ex = self.wait_for_exc_info() if ex is None: return else: raise ex
def downloadFiles(downloadFiles, n_downloadFiles, size_downloadFiles): global q, writeDict downloadChunks = partitionDownload(downloadFiles, n_downloadFiles, size_downloadFiles) n_downloadChunks = len(downloadChunks) n_threads = min(n_concurrentConnections, n_downloadChunks) if (n_threads < n_concurrentConnections): print('Number of connection threads was limited by download size.') q = Queue() completedDict = {} writeDict = {} delList = [] for df in downloadFiles: writeDict[df['file_name']] = [0, int(df['file_size']/size_chunk)] print('Starting {} connection threads.'.format(n_threads)) with progress.Bar(expected_size=size_downloadFiles) as bar: tp = ThreadPool(n_threads) tp.imap_unordered(downloadChunk, downloadChunks) current_size = 0 while current_size < size_downloadFiles: if q.qsize() > 0: f, chunk_id, content = q.get() completedDict[(f['file_name'], chunk_id)] = (content, f['path']) for writer in writeDict: chunk = completedDict.get((writer, writeDict[writer][0])) if chunk != None: current_size += writeChunk(chunk, writer, writeDict[writer][0]) del completedDict[(writer, writeDict[writer][0])] writeDict[writer][0] += 1 if writeDict[writer][0] > writeDict[writer][1]: delList.append(writer) if len(delList) > 0: for item in delList: del writeDict[item] delList.clear() bar.show(current_size) tp.close() tp.join() print('Download complete.')
def _get_hardware_threaded(host): hardware_functions = [ collect_win_bios_stats, collect_win_disk_stats, collect_win_mem_stats, collect_win_network_stats, collect_win_cpu_stats ] hardware_info = {} queue = Queue() list_of_processes = [] for hardware in hardware_functions: process = _Process(target=hardware, args=( host, 1, queue, )) list_of_processes.append(process) process.start() for process in list_of_processes: process.join() hardware_info.update(queue.get()) return hardware_info
def proxy_thr(fun): """ 多线程获取代理地址 :param fun: 处理url函数,用于获取页面内的proxy :return: """ q = Queue() pool = Pool(40) proxys = [] i = 1 for u in proxy_url_list(): pool.apply_async(fun, (q, u)) pool.close() pool.join() while 1: if q.empty(): break s = q.get().split("\n") proxys += s proxys = set(proxys) return list(proxys)
class mmonly: def __init__(self): self.ua = UserAgent() self.headers = {} self.q1 = Queue(300) self.q2 = Queue(1000) self.lock = Lock() # self.path = 'D:/IMG/' self.main_page_urls = [] self.subpageurls = [] conn = sqlite3.connect('mmonly.db') conn.isolation_level = None try: conn.execute( '''create table subpageurl(url text primary key not null)''') conn.execute( '''create table imgurl(url text primary key not null)''') except (Exception) as e: print('创建表:{}'.format(e).decode('utf-8').encode(type)) finally: conn.close() self.rootpath = os.getcwd().replace('\\', '/') self.path = os.path.join(self.rootpath, 'imges/') if not os.path.exists(self.path): os.mkdir(self.path) def get_mainpage_urls(self, inurl): # 得到所有主页url self.headers['User-Agent'] = self.ua.random try: req = requests.get(inurl, headers=self.headers, timeout=10) req.encoding = 'gbk' cont = req.text content = pq(cont) elem = list(content('div #pageNum').children('a').items()) for ele in elem: if ele.text() == '末页': pgnum = int(ele.attr('href').split('_')[-1].split('.')[0]) spurl = inurl.split('_') for i in range(1, pgnum + 1): self.main_page_urls.append('{}_{}_{}.html'.format( spurl[0], spurl[1], str(i))) print('主页计算完毕!!'.decode('utf-8').encode(type)) except (Exception) as e: self.lock.acquire() print('主页读取错误:{}'.format(e).decode('utf-8').encode(type)) self.lock.release() return def get_subpage_urls(self, inurl): # 得到所有子页面url self.headers['User-Agent'] = self.ua.random try: req = requests.get(inurl, headers=self.headers, timeout=10) req.encoding = 'gbk' cont = req.text content = pq(cont) elems = list(content('div .ABox').children('a').items()) for ele in elems: url = ele.attr('href') self.q1.put(url) print('取得子页面地址:{}'.format(url).decode('utf-8').encode(type)) except (Exception) as e: self.lock.acquire() print('遍历主页面读取错误:{}'.format(e).decode('utf-8').encode(type)) self.lock.release() return def savesuburl(self): # 将子页面url存入数据库subpageurl表中 while 1: try: suburl = self.q1.get(timeout=20) self.subpageurls.append(suburl) print('列表存入子页面:{}'.format(suburl).decode('utf-8').encode(type)) except (Exception) as e: print('读取子页面url:{}'.format(e).decode('utf-8').encode(type)) time.sleep(2) if self.q1.empty(): time.sleep(2) if self.q1.empty(): break conn = sqlite3.connect('mmonly.db') cur = conn.cursor() time.sleep(4) print('开始将子页面url写入数据库'.decode('utf-8').encode(type)) for date in self.subpageurls: try: cur.execute('insert into subpageurl values(?)', (date, )) print('写入:{}'.format(date).decode('utf-8').encode(type)) except (Exception) as er: print('写入数据库错误:{}'.format(er).decode('utf-8').encode(type)) conn.commit() conn.close() print('写入完毕!!'.decode('utf-8').encode(type)) def get_img_url(self, inurl): # get图片地址 self.headers['User-Agent'] = self.ua.random try: req = requests.get(inurl, headers=self.headers, timeout=10) time.sleep(0.2) req.encoding = 'gbk' cont = req.text content = pq(cont) imgnum = int(content('.totalpage').text()) urlsp = '.'.join(inurl.split('.')[:-1]) for n in range(1, imgnum + 1): imgpage = '{}_{}.html'.format(urlsp, n) self.headers['User-Agent'] = self.ua.random try: req = requests.get(imgpage, headers=self.headers, timeout=10) time.sleep(0.3) req.encoding = 'gbk' cont = req.text content = pq(cont) imgurl = content('.down-btn').attr('href') self.q2.put(imgurl) except (Exception) as ee: print('get图片url错误:{}'.format(ee).decode('utf-8').encode( type)) print( 'get图片url:{}'.format(imgurl).decode('utf-8').encode(type)) except (Exception) as e: print('get图片页面地址错误:{}'.format(e).decode('utf-8').encode(type)) return def download(self, inurl): # 下载图片 # inurl = q.get(timeout=10) na = inurl.split('/') imgname = '{}{}'.format(na[-2], na[-1]) imgpath = '{}{}'.format(self.path, imgname) statu = os.path.exists(imgpath) if not statu: self.headers['User-Agent'] = self.ua.random try: req = requests.get(inurl, headers=self.headers, timeout=8).content with open(imgpath, 'wb') as f: f.write(req) self.lock.acquire() print('下载图片:{}'.format(imgname).decode('utf-8').encode(type)) self.lock.release() except (Exception) as e: self.lock.acquire() print('下载错误:{}'.format(e).decode('utf-8').encode(type)) self.lock.release() else: self.lock.acquire() print('重复图片:{}'.format(imgname).decode('utf-8').encode(type)) self.lock.release() def run(self, inurl): ch = eval( input('输入1表示采集页面\n输入2表示下载图片\n输入3退出程序\n输入:'.decode('utf-8').encode( type))) if ch == 1: self.get_mainpage_urls(inurl) time.sleep(4) pool1 = Pool(20) for mainurl in self.main_page_urls: pool1.apply_async(self.get_subpage_urls, (mainurl, )) time.sleep(1) self.savesuburl() pool1.close() pool1.join() print('子页面采集完毕!!!'.decode('utf-8').encode(type)) self.run('http://www.mmonly.cc/mmtp/list_9_2.html') elif ch == 2: conn = sqlite3.connect('mmonly.db') cur = conn.cursor() pool2 = Pool(10) pool3 = Pool(30) cur.execute('select * from subpageurl') suburls = cur.fetchall() while 1: for nn in range(200): try: for i in suburls: pool2.apply_async(self.get_img_url, i) cur.execute('delete from subpageurl where url=?', i) while 1: img = self.q2.get(timeout=20) pool3.apply_async(self.download, (img, )) except (Exception) as e: print('数据库读取子页面url:{}'.format(e).decode( 'utf-8').encode(type)) time.sleep(2) if self.q2.empty(): time.sleep(2) if self.q2.empty(): break conn.commit() conn.close() conn = sqlite3.connect('mmonly.db') cur = conn.cursor() cur.execute('select * from subpageurl') suburls = cur.fetchall() time.sleep(2) if self.q2.empty(): time.sleep(2) if self.q2.empty(): break pool3.close() pool2.close() pool3.join() pool2.join() else: print('结束程序!'.decode('utf-8').encode(type))
class ParallelDownloader(URL_Fetcher): 'Parallel threaded web page downloader' def __init__(self, db_name, proc_count, site_base_url, fUseCache=True, fCacheSearchPages=True, fUseCookies=False, timeout=secHTTP_WAIT_TIMEOUT, search_proc_count=2, proxies=None): self.proxies = proxies self.queue = Queue() self.fSaveSearchPages = fCacheSearchPages self.site_base_url = site_base_url self.pool = Pool(processes=proc_count) self.search_queue = Queue() self.url_extract_pool = Pool(processes=search_proc_count) URL_Fetcher.__init__(self, db_name, fUseCache, fUseCookies, timeout=timeout, proxies=proxies) def process_urls_from_search_queue(self): while not self.search_queue.empty(): search_page_url = self.search_queue.get() # logOut('search pages queue size: %d'%self.search_queue.qsize()) logDbg('search page: %s' % search_page_url) search_page = self.get_page(search_page_url, fUseCache=self.fSaveSearchPages) rel_urls = extract_data_xpath(search_page, self.url_extract_xpath) #rel_urls = self.extract_page_xpath(self.url_extract_xpath, search_page_url) # logOut('URLs from %s extracted'%search_page_url) logOut('%d urls extracted from [%s]. Queuing...' % (len(rel_urls), search_page_url)) logDbg('Extracted urls: %s. Queuing to download...' % rel_urls) list(map(self.queue.put, self.prefix_site_base_url(rel_urls))) self.queue.put(None) self.postprocess_search_page_list(rel_urls, search_page) def queue_pages(self, url_list): list(map(self.queue.put, url_list)) # признак завершения очереди заданий self.queue.put(None) def postprocess_search_page_list(self, url, page): pass def prefix_site_base_url(self, rel_urls): return [self.site_base_url + url for url in rel_urls] def process_pages(self, page_processor, *add_processor_args): self.page_processor = page_processor self.add_pprocessor_args = add_processor_args self.pool.apply(self.process_page) def process_page(self): while True: url = self.queue.get() logDbg('Url got from queue: %s' % url) if not url: break page = self.get_page(url) #, proxies=self.proxies #logOut('pp_arg_list: [%s]'%pp_arg_list) if page: self.page_processor(url, page, *self.add_pprocessor_args)
class tizyoutubeproxy(object): """A class that accesses YouTube, retrieves stream URLs and creates and manages a playback queue. """ def __init__(self): self.queue = list() self.queue_index = -1 self.play_queue_order = list() self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"]) self.current_play_mode = self.play_modes.NORMAL self.now_playing_stream = None # Create multiprocess queues self.task_queue = Queue() self.done_queue = Queue() # Workers self.workers = list() def set_play_mode(self, mode): """ Set the playback mode. :param mode: current valid values are "NORMAL" and "SHUFFLE" """ self.current_play_mode = getattr(self.play_modes, mode) self.__update_play_queue_order() def enqueue_audio_stream(self, arg): """Add the audio stream of a YouTube video to the playback queue. :param arg: a search string """ logging.info('arg : %s', arg) try: yt_video = pafy.new(arg) yt_audio = yt_video.getbestaudio(preftype="webm") if not yt_audio: raise ValueError(str("No WebM audio stream for : %s" % arg)) yt_info = VideoInfo(ytid=arg, title=yt_audio.title) self.add_to_playback_queue(audio=yt_audio, video=yt_video, info=yt_info) self.__update_play_queue_order() except ValueError: raise ValueError(str("Video not found : %s" % arg)) def enqueue_audio_playlist(self, arg): """Add all audio streams in a YouTube playlist to the playback queue. :param arg: a YouTube playlist id """ logging.info('arg : %s', arg) try: count = len(self.queue) playlist = pafy.get_playlist2(arg) if len(playlist) > 0: for yt_video in playlist: self.add_to_playback_queue(video=yt_video, \ info=VideoInfo(ytid=yt_video.videoid, \ title=yt_video.title)) if count == len(self.queue): raise ValueError self.__update_play_queue_order() except ValueError: raise ValueError(str("Playlist not found : %s" % arg)) def enqueue_audio_search(self, arg): """Search YouTube and add the audio streams to the playback queue. :param arg: a search string """ logging.info('arg : %s', arg) try: query = generate_search_query(arg) wdata = pafy.call_gdata('search', query) wdata2 = wdata count = 0 while True: for track_info in get_tracks_from_json(wdata2): self.add_to_playback_queue(info=track_info) count += 1 if count > 100: break if not wdata2.get('nextPageToken'): break query['pageToken'] = wdata2['nextPageToken'] wdata2 = pafy.call_gdata('search', query) self.__update_play_queue_order() except ValueError: raise ValueError(str("Could not find any mixes : %s" % arg)) def enqueue_audio_mix(self, arg, feelinglucky=True): """Obtain a YouTube mix associated to a given video id or url and add all audio streams in the mix playlist to the playback queue. :param arg: a YouTube video id :param feelinglucky: If True, it will perform another YouTube search to find alternatives if the original mix cannot be found. """ logging.info('arg : %s', arg) yt_video = None try: count = len(self.queue) yt_video = pafy.new(arg) playlist = yt_video.mix if len(playlist) > 0: for yt_video in playlist: video_id = yt_video.videoid video_title = yt_video.title yt_info = VideoInfo(ytid=video_id, title=video_title) self.add_to_playback_queue(video=yt_video, info=yt_info) if count == len(self.queue): raise ValueError self.__update_play_queue_order() except IndexError: if not feelinglucky: raise ValueError else: print_wrn("[YouTube] Could not find a mix for '{0}'. "\ "Searching YouTube instead. Feeling lucky?." \ .format(arg.encode('utf-8'))) if yt_video.title: self.enqueue_audio_search(yt_video.title) else: self.enqueue_audio_stream(arg) def enqueue_audio_mix_search(self, arg): """Obtain a YouTube mix associated to a given textual search and add all the audio streams in the mix playlist to the playback queue. :param arg: a search string """ logging.info('arg : %s', arg) try: query = generate_search_query(arg) wdata = pafy.call_gdata('search', query) wdata2 = wdata count = len(self.queue) for track_info in get_tracks_from_json(wdata2): if track_info and track_info.ytid: try: self.enqueue_audio_mix(track_info.ytid, feelinglucky=False) break except ValueError: logging.info( 'Could not find a mix. Trying another video') if count == len(self.queue): raise ValueError except ValueError: raise ValueError(str("Could not find any mixes : %s" % arg)) def current_audio_stream_title(self): """ Retrieve the current stream's title. """ stream = self.now_playing_stream title = '' if stream: title = to_ascii(stream['a'].title).encode("utf-8") return title def current_audio_stream_author(self): """ Retrieve the current stream's author. """ stream = self.now_playing_stream author = '' if stream: author = to_ascii(stream['v'].author).encode("utf-8") return author def current_audio_stream_file_size(self): """ Retrieve the current stream's file size. """ stream = self.now_playing_stream size = 0 if stream: size = stream['a'].get_filesize() return size def current_audio_stream_duration(self): """ Retrieve the current stream's duration. """ stream = self.now_playing_stream duration = '' if stream: duration = to_ascii(stream['v'].duration).encode("utf-8") return duration def current_audio_stream_bitrate(self): """ Retrieve the current stream's bitrate. """ stream = self.now_playing_stream bitrate = '' if stream: bitrate = stream['a'].bitrate return bitrate def current_audio_stream_view_count(self): """ Retrieve the current stream's view count. """ stream = self.now_playing_stream viewcount = 0 if stream: viewcount = stream['v'].viewcount return viewcount def current_audio_stream_description(self): """ Retrieve the current stream's description. """ stream = self.now_playing_stream description = '' if stream: description = to_ascii(stream['v'].description).encode("utf-8") return description def current_audio_stream_file_extension(self): """ Retrieve the current stream's file extension. """ stream = self.now_playing_stream file_extension = '' if stream: file_extension = to_ascii(stream['a'].extension).encode("utf-8") return file_extension def current_audio_stream_video_id(self): """ Retrieve the current stream's video id. """ stream = self.now_playing_stream video_id = '' if stream: video_id = to_ascii(stream['i'].ytid).encode("utf-8") return video_id def current_audio_stream_published(self): """ Retrieve the current stream's upload date and time. """ stream = self.now_playing_stream if stream: published = to_ascii(stream['v'].published).encode("utf-8") return published def current_audio_stream_queue_index_and_queue_length(self): """ Retrieve index in the queue (starting from 1) of the current stream and the length of the playback queue. """ return self.queue_index + 1, len(self.queue) def clear_queue(self): """ Clears the playback queue. """ self.queue = list() self.queue_index = -1 def remove_current_url(self): """Remove the currently active url from the playback queue. """ logging.info("") if len(self.queue) and self.queue_index: stream = self.queue[self.queue_index] print_nfo("[YouTube] [Stream] '{0}' removed." \ .format(to_ascii(stream['i'].title).encode("utf-8"))) del self.queue[self.queue_index] self.queue_index -= 1 if self.queue_index < 0: self.queue_index = 0 self.__update_play_queue_order() def next_url(self): """ Retrieve the url of the next stream in the playback queue. """ logging.info("") try: if len(self.queue): self.queue_index += 1 if (self.queue_index < len(self.queue)) \ and (self.queue_index >= 0): next_stream = self.queue[self.play_queue_order \ [self.queue_index]] return self.__retrieve_stream_url( next_stream, self.queue_index).rstrip() else: self.queue_index = -1 return self.next_url() else: return '' except (KeyError, AttributeError): # TODO: We don't remove this for now # del self.queue[self.queue_index] logging.info("exception") return self.next_url() def prev_url(self): """ Retrieve the url of the previous stream in the playback queue. """ logging.info("") try: if len(self.queue): self.queue_index -= 1 if (self.queue_index < len(self.queue)) \ and (self.queue_index >= 0): prev_stream = self.queue[self.play_queue_order \ [self.queue_index]] return self.__retrieve_stream_url( prev_stream, self.queue_index).rstrip() else: self.queue_index = len(self.queue) return self.prev_url() else: return '' except (KeyError, AttributeError): # TODO: We don't remove this for now # del self.queue[self.queue_index] logging.info("exception") return self.prev_url() def __update_play_queue_order(self): """ Update the queue playback order. A sequential order is applied if the current play mode is "NORMAL" or a random order if current play mode is "SHUFFLE" """ total_streams = len(self.queue) if total_streams: if not len(self.play_queue_order): # Create a sequential play order, if empty self.play_queue_order = range(total_streams) if self.current_play_mode == self.play_modes.SHUFFLE: random.shuffle(self.play_queue_order) print_nfo("[YouTube] [Streams in queue] '{0}'." \ .format(total_streams)) def __retrieve_stream_url(self, stream, queue_index): """ Retrieve a stream url """ try: if not len(self.workers): for _ in range(WORKER_PROCESSES): proc = Process(target=obtain_stream, \ args=(self.task_queue, \ self.done_queue)).start() self.workers.append(proc) while not self.done_queue.empty(): stream = self.done_queue.get() self.queue[stream['q']] = stream stream = self.queue[queue_index] if not stream.get('v') or not stream.get('a'): logging.info("ytid : %s", stream['i'].ytid) video = stream.get('v') if not video: video = pafy.new(stream['i'].ytid) audio = video.getbestaudio(preftype="webm") if not audio: logging.info("no suitable audio found") raise AttributeError() stream.update({'a': audio, 'v': video}) # streams = stream.get('v').audiostreams[::-1] # pprint.pprint(streams) # dump_stream_info(streams) self.now_playing_stream = stream return stream['a'].url.encode("utf-8") except AttributeError: logging.info("Could not retrieve the stream url!") raise def add_to_playback_queue(self, audio=None, video=None, info=None): """ Add to the playback queue. """ if audio: print_nfo("[YouTube] [Stream] '{0}' [{1}]." \ .format(to_ascii(audio.title).encode("utf-8"), \ to_ascii(audio.extension))) if info: print_nfo("[YouTube] [Stream] '{0}'." \ .format(to_ascii(info.title).encode("utf-8"))) queue_index = len(self.queue) self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index)) self.queue.append(dict(a=audio, v=video, i=info, q=queue_index))
class tizyoutubeproxy(object): """A class that accesses YouTube, retrieves stream URLs and creates and manages a playback queue. """ def __init__(self): self.queue = list() self.queue_index = -1 self.play_queue_order = list() self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"]) self.current_play_mode = self.play_modes.NORMAL self.now_playing_stream = None # Create multiprocess queues self.task_queue = Queue() self.done_queue = Queue() # Workers self.workers = list() def set_play_mode(self, mode): """ Set the playback mode. :param mode: current valid values are "NORMAL" and "SHUFFLE" """ self.current_play_mode = getattr(self.play_modes, mode) self.__update_play_queue_order() def enqueue_audio_stream(self, arg): """Add the audio stream of a YouTube video to the playback queue. :param arg: a search string """ logging.info('arg : %s', arg) try: yt_video = pafy.new(arg) yt_audio = yt_video.getbestaudio(preftype="webm") if not yt_audio: raise ValueError(str("No WebM audio stream for : %s" % arg)) yt_info = VideoInfo(ytid=arg, title=yt_audio.title) self.add_to_playback_queue(audio=yt_audio, video=yt_video, info=yt_info) self.__update_play_queue_order() except ValueError: raise ValueError(str("Video not found : %s" % arg)) def enqueue_audio_playlist(self, arg): """Add all audio streams in a YouTube playlist to the playback queue. :param arg: a YouTube playlist id """ logging.info('arg : %s', arg) try: count = len(self.queue) playlist = pafy.get_playlist2(arg) if len(playlist) > 0: for yt_video in playlist: self.add_to_playback_queue(video=yt_video, \ info=VideoInfo(ytid=yt_video.videoid, \ title=yt_video.title)) if count == len(self.queue): raise ValueError self.__update_play_queue_order() except ValueError: raise ValueError(str("Playlist not found : %s" % arg)) def enqueue_audio_search(self, arg): """Search YouTube and add the audio streams to the playback queue. :param arg: a search string """ logging.info('arg : %s', arg) try: query = generate_search_query(arg) wdata = pafy.call_gdata('search', query) wdata2 = wdata count = 0 while True: for track_info in get_tracks_from_json(wdata2): self.add_to_playback_queue(info=track_info) count += 1 if count > 100: break if not wdata2.get('nextPageToken'): break query['pageToken'] = wdata2['nextPageToken'] wdata2 = pafy.call_gdata('search', query) self.__update_play_queue_order() except ValueError: raise ValueError(str("Could not find any mixes : %s" % arg)) def enqueue_audio_mix(self, arg, feelinglucky=True): """Obtain a YouTube mix associated to a given video id or url and add all audio streams in the mix playlist to the playback queue. :param arg: a YouTube video id :param feelinglucky: If True, it will perform another YouTube search to find alternatives if the original mix cannot be found. """ logging.info('arg : %s', arg) yt_video = None try: count = len(self.queue) yt_video = pafy.new(arg) playlist = yt_video.mix if len(playlist) > 0: for yt_video in playlist: video_id = yt_video.videoid video_title = yt_video.title yt_info = VideoInfo(ytid=video_id, title=video_title) self.add_to_playback_queue(video=yt_video, info=yt_info) if count == len(self.queue): raise ValueError self.__update_play_queue_order() except IndexError: if not feelinglucky: raise ValueError else: print_wrn("[YouTube] Could not find a mix for '{0}'. "\ "Searching YouTube instead. Feeling lucky?." \ .format(arg.encode('utf-8'))) if yt_video.title: self.enqueue_audio_search(yt_video.title) else: self.enqueue_audio_stream(arg) def enqueue_audio_mix_search(self, arg): """Obtain a YouTube mix associated to a given textual search and add all the audio streams in the mix playlist to the playback queue. :param arg: a search string """ logging.info('arg : %s', arg) try: query = generate_search_query(arg) wdata = pafy.call_gdata('search', query) wdata2 = wdata count = len(self.queue) for track_info in get_tracks_from_json(wdata2): if track_info and track_info.ytid: try: self.enqueue_audio_mix(track_info.ytid, feelinglucky=False) break except ValueError: logging.info('Could not find a mix. Trying another video') if count == len(self.queue): raise ValueError except ValueError: raise ValueError(str("Could not find any mixes : %s" % arg)) def enqueue_audio_channel_uploads(self, arg): """Add all audio streams in a YouTube channel to the playback queue. :param arg: a YouTube channel url """ logging.info('arg : %s', arg) try: count = len(self.queue) channel = pafy.get_channel(arg) if channel: for yt_video in channel.uploads: self.add_to_playback_queue(video=yt_video, \ info=VideoInfo(ytid=yt_video.videoid, \ title=yt_video.title)) if count == len(self.queue): raise ValueError self.__update_play_queue_order() except ValueError: raise ValueError(str("Channel not found : %s" % arg)) def enqueue_audio_channel_playlist(self, channel_name, playlist_name): """Search a playlist within a channel and if found, adds all the audio streams to the playback queue. :param arg: a YouTube playlist id """ logging.info('args : %s - %s', channel_name, playlist_name) try: count = len(self.queue) channel = pafy.get_channel(channel_name) if channel: pl_dict = dict() pl_titles = list() pl_name = '' playlist = None for pl in channel.playlists: print_nfo("[YouTube] [Playlist] '{0}'." \ .format(to_ascii(pl.title))) if fuzz.partial_ratio(playlist_name, pl.title) > 50: pl_dict[pl.title] = pl pl_titles.append(pl.title) if len(pl_titles) > 1: pl_name = process.extractOne(playlist_name, pl_titles)[0] playlist = pl_dict[pl_name] elif len(pl_titles) == 1: pl_name = pl_titles[0] playlist = pl_dict[pl_name] if pl_name: if pl_name.lower() != playlist_name.lower(): print_wrn("[YouTube] Playlist '{0}' not found. " \ "Playing '{1}' instead." \ .format(to_ascii(playlist_name), \ to_ascii(pl_name))) for yt_video in playlist: self.add_to_playback_queue(video=yt_video, \ info=VideoInfo(ytid=yt_video.videoid, \ title=yt_video.title)) if count == len(self.queue): raise ValueError self.__update_play_queue_order() except ValueError: raise ValueError(str("Channel not found : %s" % channel_name)) def current_audio_stream_title(self): """ Retrieve the current stream's title. """ stream = self.now_playing_stream title = '' if stream: title = to_ascii(stream['a'].title).encode("utf-8") return title def current_audio_stream_author(self): """ Retrieve the current stream's author. """ stream = self.now_playing_stream author = '' if stream: author = to_ascii(stream['v'].author).encode("utf-8") return author def current_audio_stream_file_size(self): """ Retrieve the current stream's file size. """ stream = self.now_playing_stream size = 0 if stream: size = stream['a'].get_filesize() return size def current_audio_stream_duration(self): """ Retrieve the current stream's duration. """ stream = self.now_playing_stream duration = '' if stream: duration = to_ascii(stream['v'].duration).encode("utf-8") return duration def current_audio_stream_bitrate(self): """ Retrieve the current stream's bitrate. """ stream = self.now_playing_stream bitrate = '' if stream: bitrate = stream['a'].bitrate return bitrate def current_audio_stream_view_count(self): """ Retrieve the current stream's view count. """ stream = self.now_playing_stream viewcount = 0 if stream: viewcount = stream['v'].viewcount return viewcount def current_audio_stream_description(self): """ Retrieve the current stream's description. """ stream = self.now_playing_stream description = '' if stream: description = to_ascii(stream['v'].description).encode("utf-8") return description def current_audio_stream_file_extension(self): """ Retrieve the current stream's file extension. """ stream = self.now_playing_stream file_extension = '' if stream: file_extension = to_ascii(stream['a'].extension).encode("utf-8") return file_extension def current_audio_stream_video_id(self): """ Retrieve the current stream's video id. """ stream = self.now_playing_stream video_id = '' if stream: video_id = to_ascii(stream['i'].ytid).encode("utf-8") return video_id def current_audio_stream_published(self): """ Retrieve the current stream's upload date and time. """ stream = self.now_playing_stream if stream: published = to_ascii(stream['v'].published).encode("utf-8") return published def current_audio_stream_queue_index_and_queue_length(self): """ Retrieve index in the queue (starting from 1) of the current stream and the length of the playback queue. """ return self.queue_index + 1, len(self.queue) def clear_queue(self): """ Clears the playback queue. """ self.queue = list() self.queue_index = -1 def remove_current_url(self): """Remove the currently active url from the playback queue. """ logging.info("") if len(self.queue) and self.queue_index: stream = self.queue[self.queue_index] print_nfo("[YouTube] [Stream] '{0}' removed." \ .format(to_ascii(stream['i'].title).encode("utf-8"))) del self.queue[self.queue_index] self.queue_index -= 1 if self.queue_index < 0: self.queue_index = 0 self.__update_play_queue_order() def next_url(self): """ Retrieve the url of the next stream in the playback queue. """ logging.info("") try: if len(self.queue): self.queue_index += 1 if (self.queue_index < len(self.queue)) \ and (self.queue_index >= 0): next_stream = self.queue[self.play_queue_order \ [self.queue_index]] return self.__retrieve_stream_url(next_stream, self.queue_index).rstrip() else: self.queue_index = -1 return self.next_url() else: return '' except (KeyError, AttributeError): # TODO: We don't remove this for now # del self.queue[self.queue_index] logging.info("KeyError, or AttributeError exception") return self.next_url() except (IOError): # Remove this video del self.queue[self.queue_index] logging.info("IOError exception") return self.next_url() def prev_url(self): """ Retrieve the url of the previous stream in the playback queue. """ logging.info("") try: if len(self.queue): self.queue_index -= 1 if (self.queue_index < len(self.queue)) \ and (self.queue_index >= 0): prev_stream = self.queue[self.play_queue_order \ [self.queue_index]] return self.__retrieve_stream_url(prev_stream, self.queue_index).rstrip() else: self.queue_index = len(self.queue) return self.prev_url() else: return '' except (KeyError, AttributeError): # TODO: We don't remove this for now # del self.queue[self.queue_index] logging.info("exception") return self.prev_url() except (IOError): # Remove this video del self.queue[self.queue_index] logging.info("IOError exception") return self.next_url() def __update_play_queue_order(self): """ Update the queue playback order. A sequential order is applied if the current play mode is "NORMAL" or a random order if current play mode is "SHUFFLE" """ total_streams = len(self.queue) if total_streams: if not len(self.play_queue_order): # Create a sequential play order, if empty self.play_queue_order = range(total_streams) if self.current_play_mode == self.play_modes.SHUFFLE: random.shuffle(self.play_queue_order) print_nfo("[YouTube] [Streams in queue] '{0}'." \ .format(total_streams)) def __retrieve_stream_url(self, stream, queue_index): """ Retrieve a stream url """ try: if not len(self.workers): for _ in range(WORKER_PROCESSES): proc = Process(target=obtain_stream, \ args=(self.task_queue, \ self.done_queue)).start() self.workers.append(proc) while not self.done_queue.empty(): stream = self.done_queue.get() self.queue[stream['q']] = stream stream = self.queue[queue_index] if not stream.get('v') or not stream.get('a'): logging.info("ytid : %s", stream['i'].ytid) video = stream.get('v') if not video: video = pafy.new(stream['i'].ytid) audio = video.getbestaudio(preftype="webm") if not audio: logging.info("no suitable audio found") raise AttributeError() stream.update({'a': audio, 'v': video}) # streams = stream.get('v').audiostreams[::-1] # pprint.pprint(streams) # dump_stream_info(streams) self.now_playing_stream = stream return stream['a'].url.encode("utf-8") except AttributeError: logging.info("Could not retrieve the stream url!") raise def add_to_playback_queue(self, audio=None, video=None, info=None): """ Add to the playback queue. """ if audio: print_nfo("[YouTube] [Stream] '{0}' [{1}]." \ .format(to_ascii(audio.title).encode("utf-8"), \ to_ascii(audio.extension))) if info: print_nfo("[YouTube] [Stream] '{0}'." \ .format(to_ascii(info.title).encode("utf-8"))) queue_index = len(self.queue) self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index)) self.queue.append( dict(a=audio, v=video, i=info, q=queue_index))
class EventEngine(): '''事件处理引擎''' def __init__(self): '''初始化''' # 创建队列 self._queue = Queue() # 是否触发时间 self._active = False # 线程池 self._thread = [Process(target=self._run) for _ in range(5)] self._workers_n = 5 # 执行 self._handlers = {} def _run(self): logger.info('Worker启动...') while self._active: try: event = self._queue.get(block=True, timeout=0.3) logger.debug('准备处理时间: %s' % event) self._process(event) except Empty: pass logger.info('Worker停止...') return '' def _process(self, event): """处理事件""" # 检查是否存在对该事件进行监听的处理函数 handlers = self._handlers.get(event.type, []) for handler in handlers: try: if callable(handler): ret_events = handler(event) if isinstance(ret_events, list) == False: ret_events = [ret_events] for ret_event in ret_events: if isinstance(ret_event, Event): self.trigger(ret_event) except Exception as err: logger.warning('事件: %s 运行错误: %s' % (event.type, err)) def start(self): '''开始执行''' # 后台运行 logger.info('启动后台线程') self._active = True for thread in self._thread: thread.start() def stop(self): logger.info('关闭后台线程') self._active = False # 等待后台线程结束 for thread in self._thread: thread.join(timeout=1) def register(self, event_type, handler): '''注册监听事件''' logger.info('register event type: %s, handler: %s' % (event_type, handler)) handlers = self._handlers.pop(event_type, []) if handler not in handlers: handlers.append(handler) self._handlers[event_type] = handlers def unregister(self, event_type, handler): '''取消注册监听事件''' logger.debug('unregister event type: %s, handler: %s' % (event_type, handler)) handlers = self._handlers.pop(event_type, []) if handler in handlers: handlers.remove(handler) if handlers: self._handlers[event_type] = handlers def trigger(self, event): '''触发一个消息''' logger.debug('trigger event: %s' % event) self._queue.put(event) return def handle(self, event_type, context=None): def deco(func): @wraps(func) def event_handler(event): # TODO 检查参数,然后自动增加相应的数据进去 return func(event=event, context=context) self.register(event_type, event_handler) return func return deco def on_tick(self, context=None): return self.handle('on_tick', context=context) def on_open(self, context=None): return self.handle('on_open', context=context) def before_trade(self, context=None): return self.handle('before_trade', context=context) def pre_close(self, context=None): return self.handle('pre_close', context=context) def on_close(self, context=None): return self.handle('on_close', context=context) def after_close(self, context=None): return self.handle('alfter_close', context=context)
class mzitu: def __init__(self): self.starturl = 'http://www.mzitu.com/all/' self.ua = UserAgent() self.qu = Queue(1000) self.qu2 = Queue(1000) self.suburls = [] self.oldmurls = [] conno = sqlite3.connect('mzituoldu.db') try: conno.execute('create table oldmurls(url text primary key)') except (Exception) as e: print('创建表ou:{}'.format(e)) conno.close() conns = sqlite3.connect('mzitusubu.db') try: conns.execute( 'create table suburls(url text primary key,status int default 1)' ) except (Exception) as e: print('创建表su:{}'.format(e)) conns.close() def gethtml(self, url): headers = {} headers['User-Agent'] = self.ua.random headers['Referer'] = url try: resp = requests.get(url, headers=headers, timeout=8) if resp.encoding == 'ISO-8859-1': resp.encoding = requests.utils.get_encodings_from_content( resp.content)[0] html = pq(resp.text) cont = resp.content return html, cont except (Exception) as e: print('gethtml错误:{}'.format(e)) logging.warning('gethtml错误:{}'.format(e)) return None, None def getmurl(self): resp = self.gethtml(self.starturl)[0] if resp != None: elem = resp('.all')('a').items() murls = {} for ele in elem: murl = ele.attr('href') title = ele.text() murls[murl] = title return murls else: print('murl未get到网页内容!') logging.warning('murl未get到网页内容!') return None def getsuburl(self, url): resp = self.gethtml(url)[0] if resp != None: elem = resp('.pagenavi').children('a') num = int(elem.eq(-2).children('span').text()) for i in range(1, num + 1): suburl = '{}/{}'.format(url, i) try: self.qu.put(suburl, timeout=60) except (Exception) as e: print('qu.put Error:{}'.format(e)) self.qu2.put(url) print('已get完:{}'.format(url)) else: print('suburl未get到网页内容!') logging.warning('suburl未get到网页内容!') def getimgurl(self, url): try: resp = self.gethtml(url)[0] if resp != None: elem = resp('.main-image')('img').attr('src') title = resp('.currentpath').next('h2').text() return elem, title else: print('imgurl未get到网页内容!') logging.warning('imgurl未get到网页内容!') return None, None except (Exception) as e: print('getimgurlE:{}'.format(e)) logging.warning('getimgurlE:{}'.format(e)) def mkdir(self, rootpath=os.path.abspath('.'), dir='mzitu'): global path path = os.path.join(rootpath, dir) if not os.path.exists(path): os.mkdir(path) def download( self, url, ): imgurl = self.getimgurl(url) global path imgname = '{}{}{}'.format(imgurl[0].split('/')[-3], imgurl[0].split('/')[-2], imgurl[0].split('/')[-1]) imgpath = os.path.join(path, imgname) statu = os.path.exists(imgpath) headers = {} if not statu: for t in range(2): headers['User-Agent'] = self.ua.random headers['Referer'] = url headers['Host'] = 'i.meizitu.net' try: resp = requests.get(imgurl[0], headers=headers, timeout=8) with open(imgpath, 'wb') as f: f.write(resp.content) print('下载:{}'.format(imgurl[0])) break except (Exception) as e: print('下载图片错误:{}'.format(e)) logging.warning('下载图片错误:{}'.format(e)) time.sleep(5) else: print('已下载:{}'.format(imgname)) def savesuburl(self): i = 0 conn = sqlite3.connect('mzitusubu.db') cur = conn.cursor() suburls = [] while 1: try: suburl = (self.qu.get(timeout=20), ) suburls.append(suburl) i += 1 if i >= 2000: cur.executemany( 'insert or ignore into suburls(url) values(?)', suburls) conn.commit() suburls = [] i = 0 except (Exception) as e: print('suburls Error:{}'.format(e)) cur.executemany('insert or ignore into suburls(url) values(?)', suburls) conn.commit() cur.close() conn.close() break print('保存suburl') def saveoldmurl(self): conn = sqlite3.connect('mzituoldu.db') cur = conn.cursor() oldmurls = [] while 1: try: oldmurl = (self.qu2.get(timeout=20), ) oldmurls.append(oldmurl) except (Exception) as e: print('oldmurl Error:{}'.format(e)) cur.executemany( 'insert or ignore into oldmurls(url) values(?)', oldmurls) conn.commit() oldmurls = [] cur.close() conn.close() break print('保存oldmurl') def run(self): self.mkdir(rootpath='H:/') murls = self.getmurl() pool1 = Pool(50) pool1.apply_async(self.savesuburl) pool1.apply_async(self.saveoldmurl) conno = sqlite3.connect('mzituoldu.db') oldurls = conno.execute('select url from oldmurls').fetchall() conno.close() for murl in murls: omurl = (murl, ) if omurl not in oldurls: pool1.apply_async(self.getsuburl(murl, )) # oldurls.append(omurl) else: print('{}已经下过了'.format(murl)) pool1.close() pool1.join() pool2 = Pool(20) conns = sqlite3.connect('mzitusubu.db') cur = conns.cursor() while 1: surls = cur.execute( 'select url from suburls where status=1').fetchmany(300) if surls == []: break oldsurls = [] for surl in surls: pool2.apply_async(self.download, (surl[0], )) oldsurls.append(surl) time.sleep(8) cur.executemany('update suburls set status=2 where url=?', oldsurls) conns.commit() cur.close() conns.close() pool2.close() pool2.join() print('下载完成!!!!')
class DouYu: """Dou Yu dan mu spider.""" @staticmethod def find_room(): """Find the most hot 50 rooms of douyu.""" result = [] for page in range(0, 400, 30): datas = json.loads( requests.get("http://capi.douyucdn.cn/api/v1/live?" "limit={}&offset={}".format(page + 30, page)).text)["data"] for data in datas: result.append((data["game_name"], data["room_id"])) return list(set(result)) @staticmethod def log(url, err): """Log when error happened.""" with open("dy_log", "a") as f: f.write(time.time + "\t" + str(url) + "\t" + str(err)) def __init__(self): """Initailization spider.""" self.fname = time.strftime("%Y%m%d_%H%M%S") self.rooms = self.find_room() self.pool_queue = Queue() self.msg_queue = Queue() def pool_join(self): """Processes pool join function.""" while 1: p = self.pool_queue.get() if p is None: return else: try: p.join() except KeyboardInterrupt: return except BaseException: pass def record(self): """Record dan mu.""" with open("record/" + self.fname + ".danmu", "w") as f: while 1: msg = self.msg_queue.get() if msg is None: return else: try: f.write(msg) except KeyboardInterrupt: return except BaseException: pass def _run(self, room, game): """Run for multiprocessing.""" dmc = DanMuClient('http://www.douyu.com/{}'.format(room)) if not dmc.isValid(): self.log('http://www.douyu.com/{}'.format(room), "Url not valid") @dmc.default def danmu_fn(msg): """Dan mu function.""" res = "\t".join( (str(room), str(game), str(time.time()), msg["MsgType"], msg["NickName"], msg["Content"], "\n")) self.msg_queue.put(res) dmc.start(blockThread=False) time.sleep(3600) dmc.stop() def run(self): """Run and get datas.""" print(self.fname, len(self.rooms)) p_re = Process(target=self.record) pool = Process(target=self.pool_join) pool.start() p_re.start() for (game, room) in self.rooms: p = Process(target=self._run, args=(room, game)) p.start() self.pool_queue.put(p) self.pool_queue.put(None) pool.join() self.msg_queue.put(None) return p_re, self.fname
class tizyoutubeproxy(object): """A class that accesses YouTube, retrieves stream URLs and creates and manages a playback queue. """ def __init__(self, api_key=API_KEY): self.queue = list() self.queue_index = -1 self.play_queue_order = list() self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"]) self.current_play_mode = self.play_modes.NORMAL self.now_playing_stream = None # Create multiprocess queues self.task_queue = Queue() self.done_queue = Queue() # Workers self.workers = list() self.api_key = api_key if api_key != "" else API_KEY pafy.set_api_key(self.api_key) def set_play_mode(self, mode): """ Set the playback mode. :param mode: current valid values are "NORMAL" and "SHUFFLE" """ self.current_play_mode = getattr(self.play_modes, mode) self._update_play_queue_order() def enqueue_audio_stream(self, arg): """Add the audio stream of a YouTube video to the playback queue. :param arg: a search string """ logging.info("arg : %s", arg) try: print_msg("[YouTube] [Audio strean] : '{0}'. ".format(arg)) yt_search = MEMORY.cache(run_youtube_search) yt_video = yt_search(arg) yt_audio = yt_video.getbestaudio(preftype="webm") if not yt_audio: raise ValueError(str("No WebM audio stream for : %s" % arg)) yt_info = VideoInfo(ytid=arg, title=yt_audio.title) self._add_to_playback_queue(audio=yt_audio, video=yt_video, info=yt_info) self._update_play_queue_order() except ValueError: raise ValueError(str("Video not found : %s" % arg)) def enqueue_audio_playlist(self, arg): """Add all audio streams in a YouTube playlist to the playback queue. :param arg: a YouTube playlist id """ logging.info("arg : %s", arg) try: print_msg("[YouTube] [Audio playlist] : '{0}'. ".format(arg)) count = len(self.queue) yt_pl_search = MEMORY.cache(run_youtube_playlist_search) playlist = yt_pl_search(arg) if len(playlist) > 0: for yt_video in playlist: self._add_to_playback_queue( video=yt_video, info=VideoInfo(ytid=yt_video.videoid, title=yt_video.title), ) if count == len(self.queue): raise ValueError self._update_play_queue_order() except ValueError: raise ValueError(str("Playlist not found : %s" % arg)) def enqueue_audio_search(self, arg): """Search YouTube and add the audio streams to the playback queue. :param arg: a search string """ logging.info("arg : %s", arg) try: print_msg("[YouTube] [Audio search] : '{0}'. ".format(arg)) yt_dt_search = MEMORY.cache(run_youtube_data_search) query = generate_search_query(arg, self.api_key) wdata = yt_dt_search("search", query) wdata2 = wdata count = 0 while True: for track_info in get_tracks_from_json(wdata2): self._add_to_playback_queue(info=track_info) count += 1 if count > 100: break if not wdata2.get("nextPageToken"): break query["pageToken"] = wdata2["nextPageToken"] wdata2 = yt_dt_search("search", query) self._update_play_queue_order() except ValueError: raise ValueError(str("Could not find any mixes : %s" % arg)) def enqueue_audio_mix(self, arg, feelinglucky=True): """Obtain a YouTube mix associated to a given video id or url and add all audio streams in the mix playlist to the playback queue. :param arg: a YouTube video id :param feelinglucky: If True, it will perform another YouTube search to find alternatives if the original mix cannot be found. """ logging.info("arg : %s", arg) yt_video = None try: print_msg("[YouTube] [Audio mix] : '{0}'. ".format(arg)) count = len(self.queue) yt_search = MEMORY.cache(run_youtube_search) yt_video = yt_search(arg) playlist = yt_video.mix if len(playlist) > 0: for yt_video in playlist: video_id = yt_video.videoid video_title = yt_video.title yt_info = VideoInfo(ytid=video_id, title=video_title) self._add_to_playback_queue(video=yt_video, info=yt_info) if count == len(self.queue): raise ValueError self._update_play_queue_order() except IndexError: if not feelinglucky: raise ValueError else: print_adv("[YouTube] Could not find a mix for '{0}'. " "Searching YouTube instead. Feeling lucky?.".format( arg.encode("utf-8"))) if yt_video.title: self.enqueue_audio_search(yt_video.title) else: self.enqueue_audio_stream(arg) def enqueue_audio_mix_search(self, arg): """Obtain a YouTube mix associated to a given textual search and add all the audio streams in the mix playlist to the playback queue. :param arg: a search string """ logging.info("arg : %s", arg) try: print_msg("[YouTube] [Audio mix search] : '{0}'. ".format(arg)) yt_dt_search = MEMORY.cache(run_youtube_data_search) wdata = yt_dt_search("search", generate_search_query(arg, self.api_key)) wdata2 = wdata count = len(self.queue) for track_info in get_tracks_from_json(wdata2): if track_info and track_info.ytid: try: self.enqueue_audio_mix(track_info.ytid, feelinglucky=False) break except ValueError: logging.info( "Could not find a mix. Trying another video") if count == len(self.queue): raise ValueError except ValueError: raise ValueError(str("Could not find any mixes : %s" % arg)) def enqueue_audio_channel_uploads(self, arg): """Add all audio streams in a YouTube channel to the playback queue. :param arg: a YouTube channel url """ logging.info("arg : %s", arg) try: print_msg( "[YouTube] [Audio channel uploads] : '{0}'. ".format(arg)) count = len(self.queue) yt_ch_search = MEMORY.cache(run_youtube_channel_search) channel = yt_ch_search(arg) if channel: for yt_video in channel.uploads: self._add_to_playback_queue( video=yt_video, info=VideoInfo(ytid=yt_video.videoid, title=yt_video.title), ) if count == len(self.queue): raise ValueError self._update_play_queue_order() except ValueError: raise ValueError(str("Channel not found : %s" % arg)) def enqueue_audio_channel_playlist(self, channel_name, playlist_name): """Search a playlist within a channel and if found, adds all the audio streams to the playback queue. :param arg: a YouTube playlist id """ logging.info("args : %s - %s", channel_name, playlist_name) try: print_msg( "[YouTube] [Audio channel playlist] : '{0} - {1}'. ".format( channel_name, playlist_name)) count = len(self.queue) yt_ch_search = MEMORY.cache(run_youtube_channel_search) channel = yt_ch_search(channel_name) if channel: pl_dict = dict() pl_titles = list() pl_name = "" playlist = None for pl in channel.playlists: print_nfo("[YouTube] [Playlist] '{0}'.".format( to_ascii(pl.title))) if fuzz.partial_ratio(playlist_name, pl.title) > 50: pl_dict[pl.title] = pl pl_titles.append(pl.title) if len(pl_titles) > 1: pl_name = process.extractOne(playlist_name, pl_titles)[0] playlist = pl_dict[pl_name] elif len(pl_titles) == 1: pl_name = pl_titles[0] playlist = pl_dict[pl_name] if pl_name: if pl_name.lower() != playlist_name.lower(): print_adv("[YouTube] Playlist '{0}' not found. " "Playing '{1}' instead.".format( to_ascii(playlist_name), to_ascii(pl_name))) for yt_video in playlist: self._add_to_playback_queue( video=yt_video, info=VideoInfo(ytid=yt_video.videoid, title=yt_video.title), ) if count == len(self.queue): raise ValueError self._update_play_queue_order() except ValueError: raise ValueError(str("Channel not found : %s" % channel_name)) def current_audio_stream_title(self): """ Retrieve the current stream's title. """ stream = self.now_playing_stream title = "" if stream: title = to_ascii(stream["a"].title) return title def current_audio_stream_author(self): """ Retrieve the current stream's author. """ stream = self.now_playing_stream author = "" if stream: author = to_ascii(stream["v"].author) return author def current_audio_stream_file_size(self): """ Retrieve the current stream's file size. """ stream = self.now_playing_stream size = 0 if stream: size = stream["a"].get_filesize() return size def current_audio_stream_duration(self): """ Retrieve the current stream's duration. """ stream = self.now_playing_stream duration = "" if stream: duration = to_ascii(stream["v"].duration) return duration def current_audio_stream_bitrate(self): """ Retrieve the current stream's bitrate. """ stream = self.now_playing_stream bitrate = "" if stream: bitrate = stream["a"].bitrate return bitrate def current_audio_stream_view_count(self): """ Retrieve the current stream's view count. """ stream = self.now_playing_stream viewcount = 0 if stream: viewcount = stream["v"].viewcount return viewcount def current_audio_stream_description(self): """ Retrieve the current stream's description. """ stream = self.now_playing_stream description = "" if stream: description = to_ascii(stream["v"].description) return description def current_audio_stream_file_extension(self): """ Retrieve the current stream's file extension. """ stream = self.now_playing_stream file_extension = "" if stream: file_extension = to_ascii(stream["a"].extension) return file_extension def current_audio_stream_video_id(self): """ Retrieve the current stream's video id. """ stream = self.now_playing_stream video_id = "" if stream: video_id = to_ascii(stream["i"].ytid) return video_id def current_audio_stream_published(self): """ Retrieve the current stream's upload date and time. """ stream = self.now_playing_stream if stream: published = to_ascii(stream["v"].published) return published def current_audio_stream_queue_index_and_queue_length(self): """ Retrieve index in the queue (starting from 1) of the current stream and the length of the playback queue. """ return self.play_queue_order[self.queue_index] + 1, len(self.queue) def clear_queue(self): """ Clears the playback queue. """ self.queue = list() self.queue_index = -1 def remove_current_url(self): """Remove the currently active url from the playback queue. """ logging.info("") if len(self.queue) and self.queue_index: stream = self.queue[self.queue_index] print_nfo("[YouTube] [Stream] '{0}' removed.".format( to_ascii(stream["i"].title))) del self.queue[self.queue_index] self.queue_index -= 1 if self.queue_index < 0: self.queue_index = 0 self._update_play_queue_order() def next_url(self): """ Retrieve the url of the next stream in the playback queue. """ logging.info("") try: if len(self.queue): self.queue_index += 1 if (self.queue_index < len(self.queue)) and (self.queue_index >= 0): next_stream = self.queue[self.play_queue_order[ self.queue_index]] return self._retrieve_stream_url( next_stream, self.play_queue_order[self.queue_index]).rstrip() else: self.queue_index = -1 return self.next_url() else: return "" except (KeyError, AttributeError): # TODO: We don't remove this for now # del self.queue[self.queue_index] logging.info("KeyError, or AttributeError exception") return self.next_url() except (IOError): # Remove this video del self.queue[self.queue_index] logging.info("IOError exception") return self.next_url() def prev_url(self): """ Retrieve the url of the previous stream in the playback queue. """ logging.info("") try: if len(self.queue): self.queue_index -= 1 if (self.queue_index < len(self.queue)) and (self.queue_index >= 0): prev_stream = self.queue[self.play_queue_order[ self.queue_index]] return self._retrieve_stream_url( prev_stream, self.play_queue_order[self.queue_index]).rstrip() else: self.queue_index = len(self.queue) return self.prev_url() else: return "" except (KeyError, AttributeError): # TODO: We don't remove this for now # del self.queue[self.queue_index] logging.info("exception") return self.prev_url() except (IOError): # Remove this video del self.queue[self.queue_index] logging.info("IOError exception") return self.next_url() def _update_play_queue_order(self): """ Update the queue playback order. A sequential order is applied if the current play mode is "NORMAL" or a random order if current play mode is "SHUFFLE" """ total_streams = len(self.queue) if total_streams: if not len(self.play_queue_order): # Create a sequential play order, if empty self.play_queue_order = list(range(total_streams)) if self.current_play_mode == self.play_modes.SHUFFLE: random.shuffle(self.play_queue_order) print_nfo( "[YouTube] [Streams in queue] '{0}'.".format(total_streams)) def _retrieve_stream_url(self, stream, queue_index): """ Retrieve a stream url """ try: if not len(self.workers): for _ in range(WORKER_PROCESSES): proc = Process(target=obtain_stream, args=(self.task_queue, self.done_queue)).start() self.workers.append(proc) while not self.done_queue.empty(): stream = self.done_queue.get() self.queue[stream["q"]] = stream stream = self.queue[queue_index] if not stream.get("v") or not stream.get("a"): logging.info("ytid : %s", stream["i"].ytid) video = stream.get("v") if not video: yt_search = MEMORY.cache(run_youtube_search) video = yt_search(stream["i"].ytid) audio = video.getbestaudio(preftype="webm") if not audio: logging.info("no suitable audio found") raise AttributeError() stream.update({"a": audio, "v": video}) # streams = stream.get('v').audiostreams[::-1] # pprint.pprint(streams) # dump_stream_info(streams) self.now_playing_stream = stream return stream["a"].url except AttributeError: logging.info("Could not retrieve the stream url!") raise def _add_to_playback_queue(self, audio=None, video=None, info=None): """ Add to the playback queue. """ if audio: print_nfo("[YouTube] [Stream] '{0}' [{1}].".format( to_ascii(audio.title), to_ascii(audio.extension))) if info: print_nfo("[YouTube] [Stream] '{0}'.".format(to_ascii(info.title))) queue_index = len(self.queue) self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index)) self.queue.append(dict(a=audio, v=video, i=info, q=queue_index))
def pipe_dream(layer, logger, args, backward_event, targets_queue, e, data_size, trainloader): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) layer.train() if dist.get_rank() == 0: criterion.cuda(0) output_queue = ThreadQueue(2) data_iter = iter(trainloader) batch_idx = 0 while True: try: if output_queue.qsize() == 2: backward_event.wait() optimizer.zero_grad() grad = torch.zeros([args.batch_size, 128, 16, 16]) dist.recv(tensor=grad, src=1) outputs = output_queue.get() outputs.backward(grad.cuda(0)) optimizer.step() backward_event.clear() continue else: inputs, targets = next(data_iter) inputs = inputs.cuda(0) targets_queue.put(targets.numpy(), block=False) outputs = layer(inputs) send_opt = dist.isend(tensor=outputs.cpu(), dst=1) send_opt.wait() output_queue.put(outputs) batch_idx += 1 except StopIteration as stop_e: send_opt = dist.isend(tensor=torch.zeros(0), dst=1) send_opt.wait() while output_queue.qsize() > 0: #backward_event.wait() optimizer.zero_grad() grad = torch.zeros([args.batch_size, 128, 16, 16]) dist.recv(tensor=grad, src=1) outputs = output_queue.get() outputs.backward(grad.cuda(0)) optimizer.step() #backward_event.clear() break elif dist.get_rank() == 1: batch_idx = 0 train_loss = 0 correct = 0 total = 0 criterion.cuda(1) while True: print("while........................") try: rec_val = torch.zeros([args.batch_size, 128, 16, 16]) dist.recv(tensor=rec_val, src=0) print("recv.......") except RuntimeError as error: print("runtime........................") #e.wait() break rec_val = rec_val.cuda(1) rec_val.requires_grad_() optimizer.zero_grad() outputs = layer(rec_val) targets = targets_queue.get(block=True, timeout=2) targets = torch.from_numpy(targets).cuda(1) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) if not backward_event.is_set(): print("set.....") backward_event.set() send_opt = dist.isend(tensor=rec_val.grad.cpu(), dst=0) print("send.....") if batch_idx % 10 == 0: logger.error("train:" + str(train_loss / (batch_idx + 1))) batch_idx += 1
class Handler(Process): """Multiprocessing adapted handler. """ def __init__(self, handler, args=(), **kwargs): assert issubclass(handler, logging.Handler) self._handler = handler(*args, **kwargs) # Patch for use non default encoding. if issubclass(handler, logging.StreamHandler): def _format(record): def _encode(i): if isinstance(i, text_type): try: return i.encode(sys.stdout.encoding, 'replace') except: # pylint: disable=bare-except pass return i def _decode(i): if isinstance(i, binary_type): try: return u(i) except: # pylint: disable=bare-except pass return i record.msg = _decode(record.msg) record.args = tuple(_decode(i) for i in record.args) ret = handler.format(self._handler, record) return _encode(ret) self._handler.format = _format self.queue = Queue(-1) super(Handler, self).__init__(name=str(self._handler)) self.daemon = True self.start() def __getattr__(self, name): return getattr(self._handler, name) def run(self): while True: try: record = self.queue.get() self._handler.emit(record) except (KeyboardInterrupt, SystemExit): raise except EOFError: break except: # pylint:disable=bare-except traceback.print_exc(file=sys.stderr) def _format_record(self, record): # ensure that exc_info and args # have been stringified. Removes any chance of # unpickleable things inside and possibly reduces # message size sent over the pipe if record.args: record.msg = record.msg % record.args record.args = None if record.exc_info: self.format(record) record.exc_info = None return record def emit(self, record): """(override)logging.handler.emit """ try: msg = self._format_record(record) self.queue.put_nowait(msg) except (KeyboardInterrupt, SystemExit): raise except: # pylint:disable=bare-except self.handleError(record) def close(self): """(override)logging.handler.close """ self._handler.close()