def get_info_from_news(self, news_id: str) -> Dict: info = dict() _, head = self.NNTP.head(news_id) last = "NULL" for l in head.lines: s = l.decode(self.encoding).split(": ", 1) if len(s) != 2: info[last] = info[last] + nntplib.decode_header(s[0]) continue last = s[0] info[s[0]] = nntplib.decode_header(s[1]) return info
def cmd_overview(msg): if "host" not in msg.kwargs: raise IMException("please give a hostname in keywords") if not len(msg.args): raise IMException("which group would you overview?") for g in msg.args: arts = [] for grp in read_group(g, **msg.kwargs): grp["X-FromName"], grp["X-FromEmail"] = parseaddr( grp["from"] if "from" in grp else "") if grp["X-FromName"] == '': grp["X-FromName"] = grp["X-FromEmail"] arts.append( "On {date}, from \x03{0:02d}{X-FromName}\x0F \x02{subject}\x0F: \x0314{message-id}\x0F" .format( adler32(grp["X-FromEmail"].encode()) & 0xf, **{h: decode_header(i) for h, i in grp.items()})) if len(arts): yield Response(arts, channel=msg.channel, title="In \x03{0:02d}{1}\x0F".format( adler32(g[0].encode()) & 0xf, g))
async def print_news(client: discord.Client, news_id: str, group: str, group_manager: NewsGroupManager) -> datetime: info = dict() _, head = group_manager.NNTP.head(news_id) last = "NULL" for l in head.lines: s = l.decode(group_manager.encoding).split(": ", 1) if len(s) != 2: info[last] = info[last] + nntplib.decode_header(s[0]) continue last = s[0] info[s[0]] = nntplib.decode_header(s[1]) author = info["From"] subject = info["Subject"] d = info["Date"][:25] if d[-1] == " ": d = d[:-1] date = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S") _, body = group_manager.NNTP.body(news_id) content = "" for l in body.lines: content += l.decode(group_manager.encoding) + "\n" # get the tags tags = [] s = subject.split("]", 1) while len(s) != 1: tags.append((s[0])[1:]) s = s[1].split("]", 1) subject = s[0] # slice the msg in chunk of 5120 char msg = [content[i:i + 5120] for i in range(0, len(content), 5120)] # print msg in every channel newsgroup_filler_embed embed = EmbedsManager.newsgroup_embed(subject, tags, msg[0], author, date, group_manager.groups[group]["name"]) for channel in group_manager.groups[group]["channels"]: await client.get_channel(channel).send(embed=embed) for m in msg: embed = EmbedsManager.newsgroup_filler_embed( m, author, date, group_manager.groups[group]["name"]) for channel in group_manager.groups[group]["channels"]: await client.get_channel(channel).send(embed=embed) return date
def get_items(self): #新闻生成器 server = NNTP(self.servername) resp, count, first, last, name = server.group(self.group) #新闻组信息列表 start = last - self.howmany + 1 resp, overviews = server.over((start, last)) for id, over in overviews: title = decode_header(over['subject']) resp, info = server.body(id) body = '\n'.join(line.decode() for line in info.lines) + '\n\n' yield NewsItem(title, body) server.quit()
def get_items(self): server = NNTP(self.servername) resp, count, first, last, name = server.group(self.group) start = last - self.howmany + 1 resp, overviews = server.over((start, last)) for id, over in overviews: title = decode_header(over['subject']) resp, info = server.body(id) body = '\n'.join(line.decode('latin') for line in info.lines) + '\n\n' yield NewsItem(title, body) server.quit()
def get_items(self): server = NNTP(self.servername) resp, count, first, last, name = server.group(self.group) start = last - self.howmany + 1 resp, overviews = server.over((start, last)) for id, over in overviews: title = decode_header(over["subject"]) resp, info = server.body(id) body = "\n".join(line.decode("latin") for line in info.lines) + "\n\n" yield NewsItem(title, body) server.quit()
def get_subject(self, message_id): """Get the subject of an message Args: message_id (int): The NNTP ID of the the message Returns: str: The subject of the article """ article = self._get_article(message_id) mail = self._article_to_email(article) # The subject may be encoded by NNTP, so decode it return decode_header(mail['Subject'])
def get_news(connection): news = [] for grp in groups: group_news = [] _, _, first, last, name = connection.group(grp) resp, overview = connection.over((last - 9, last)) for id, over in overview: subject = nntplib.decode_header(over['subject']) if not 'NETIQUETTE' in subject: # remove response about the netiquette group_news.insert(0, (id, subject)) news.append((grp, group_news)) return news
def get_items(self): server = NNTP(self.servername) _, count, first, last, name = server.group(self.group) start = last - self.how_many + 1 _, overviews = server.over((start, last)) for ID, over in overviews: title = decode_header(over['subject']) _, info = server.body(ID) body = '\n'.join(line.decode('latin') for line in info.lines) yield NewsItem(title, body) server.quit()
def get_items(self): server = NNTP(self.servername) #服务器响应、新闻组包含的消息数、第一条和最后一条消息编号、新闻组名称 resp, count, first, last, name = server.group(self.group) #确定要获取的文章编号区间的起始位置 start = last - self.howmany + 1 resp, overviews = server.over((start, last)) for id, over in overviews: title = decode_header(over['subject']) resp, info = server.body(id) body = '\n'.join(line.decode('latin') for line in info.lines) + '\n\n' yield NewsItem(title, body) server.quit()
def get_header(self, message_spec, group_name=None): if group_name is not None: self.group(group_name) if (not isinstance(message_spec, str) or not message_spec.startswith('<')) and not self._group: raise Exception('Article id supplied without group name') ## class ArticleInfo resp, header = self.cli.head(message_spec) h = {} for line in header.lines: line = line.decode(self.cli.encoding, errors=self.cli.errors) k, v = line.split(':', 1) v = nntplib.decode_header(v) h[k.lower()] = v.strip() return h
def getNewsItem(self): resp, count, first, last, name = self.server.group(self.group) resp = resp.split(' ')[0] if resp == '211': # 正常响应 start = last - self.howmany + 1 resp, overviews = self.server.over((start, last)) for id, over in overviews: title = decode_header(over['subject']) resp, info = self.server.body(id) body = '\n'.join( line.decode('latin') for line in info.lines) + '\n\n' # 使用生成器推导,转字符串 yield NewsItem(title, body, self.NNTP) else: yield None self.server.quit()
def get_items(self): for servername in KNOWN_NNTP_SERVERS: try: server = NNTP(servername) resp, count, first, last, name = server.group(self.group) start = last - self.howmany + 1 resp, overviews = server.over((start, last)) for id, over in overviews: title = decode_header(over['subject']) resp, info = server.body(id) body = '\n'.join( line.decode('latin1') for line in info.lines) + '\n\n' yield NewsItem(title, body, "NNTP NewsGroup " + self.group) server.quit() break except: continue return []
def format_article(art, **response_args): art["X-FromName"], art["X-FromEmail"] = parseaddr(art["From"] if "From" in art else "") if art["X-FromName"] == '': art["X-FromName"] = art["X-FromEmail"] date = mktime_tz(parsedate_tz(art["Date"])) if date < time.time() - 120: title = "\x0314In \x0F\x03{0:02d}{Newsgroups}\x0F\x0314: on \x0F{Date}\x0314 by \x0F\x03{0:02d}{X-FromName}\x0F \x02{Subject}\x0F" else: title = "\x0314In \x0F\x03{0:02d}{Newsgroups}\x0F\x0314: by \x0F\x03{0:02d}{X-FromName}\x0F \x02{Subject}\x0F" return Response(art.get_payload().replace('\n', ' '), title=title.format( adler32(art["Newsgroups"].encode()) & 0xf, adler32(art["X-FromEmail"].encode()) & 0xf, **{h: decode_header(i) for h, i in art.items()}), **response_args)
def get_group(self, message_spec, group_name=None): ## because we need to fully decode the header ## in python3 land, we're dealing with ## loading the entire thing to mem.. ## ## if you need to know the # of articles ## get it from the currently selected group ## ## selects the current group and ## returns a list of headers as specified ## by the message_spec arg. ## per NNTP, message_spec is either, a message_id ## or otherwise a (first, last) tuple of ## article ids if group_name is not None: self.group(group_name) if isinstance(message_spec, (tuple, list)) and not self._group: raise Exception('Article ids supplied without group name') resp, overviews = self.cli.over(message_spec) log.debug(len(overviews)) h = [] for article_id, ovr in overviews: d = {} log.debug(u'BEFORE %s' % ovr['subject']) for k, v in ovr.items(): ## (some) short headers from grouplists have these colon ## prefixes for no aparrent reason (they're not in the ## raw headers). We do this so that the response of ## short and long headers properly intersect (and breaks ## the general rule of not touching the data as much as possible) k = k.lstrip(':') d[k] = nntplib.decode_header(v) log.debug('AFTER %s' % d['subject']) h.append((article_id, d)) return h
def backend_req(self, req): print("[nntp-plugin]: backend_req = ", req, flush=True, file=sys.stderr) if req.data == b'is_online': self.ok_send(None) elif req.data == b'get': resp, count, first, last, name = self.conn.group(self.newsgroup) print('Group', name, 'has', count, 'articles, range', first, 'to', last, flush=True, file=sys.stderr) resp, overviews = self.conn.over((0, last)) for chunk in chunks(iter(reversed(overviews)), 100): ret = [] for id, over in chunk: #print(id, nntplib.decode_header(over['subject']), flush=True, file=sys.stderr) env = {} env["hash"] = id env["subject"] = nntplib.decode_header(over["subject"]) env["from"] = nntplib.decode_header(over["from"]) env["date"] = nntplib.decode_header(over["date"]) env["message_id"] = nntplib.decode_header( over["message-id"]) env["references"] = nntplib.decode_header( over["references"]) try: env["to"] = nntplib.decode_header(over["to"]) except KeyError: env["to"] = self.newsgroup ret.append(env) print("ret len = ", len(ret), flush=True, file=sys.stderr) self.ok_send(ret) self.ok_send(None)
def cmd_overview(msg): if "host" not in msg.kwargs: raise IMException("please give a hostname in keywords") if not len(msg.args): raise IMException("which group would you overview?") for g in msg.args: arts = [] for grp in read_group(g, **msg.kwargs): grp["X-FromName"], grp["X-FromEmail"] = parseaddr(grp["from"] if "from" in grp else "") if grp["X-FromName"] == '': grp["X-FromName"] = grp["X-FromEmail"] arts.append("On {date}, from \x03{0:02d}{X-FromName}\x0F \x02{subject}\x0F: \x0314{message-id}\x0F".format(adler32(grp["X-FromEmail"].encode()) & 0xf, **{h: decode_header(i) for h,i in grp.items()})) if len(arts): yield Response(arts, channel=msg.channel, title="In \x03{0:02d}{1}\x0F".format(adler32(g[0].encode()) & 0xf, g))
parsed_encoding = "utf-8" parsed_content_type = None parsed_message_id = None parsed_date = None parsed_subject = None parsed_subject_original = None parsed_ref = None parsed_body_text = "" parsed_body_text_original = None parsed_from = None parsed_from_original = None has_ref = 0 # Get the rest try: parsed_date = nntplib.decode_header(over['date']) except Exception: pass try: parsed_content_type = nntplib.decode_header(over['content-type']) except Exception: pass try: parsed_ref = nntplib.decode_header(over['references']) except Exception: pass try: parsed_subject = nntplib.decode_header(over['subject'])
def update_event(self, inp=-1): self.set_output_val(0, nntplib.decode_header(self.input(0)))
# group 'Return a tuple (response, count, first, last, name) where count is the (estimated) number of articles in the group, first is the first article number in the group, last is the last article number in the group, and name is the group name. The numbers are returned as strings.' resp,count,first,last,name = s.group(groupname) print('Group', name, 'has', count, 'articles, range', first, 'to', last) # over 'Return a pair (response, overviews). overviews is a list of (article_number, overview) tuples, one for each article selected by message_spec' resp,overviews = s.over((last-1,last)) for num,over in overviews: print(num)# 1-100 #print(over) # print(over.keys()) # ['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject'] print(over.get('date')) print(nntplib.decode_header(over.get('from'))) print(over.get('message-id')) print(over.get('subject')) # stat 'Return a triple (response, number, id) where number is the article number and id is the message id.' resp,num,msg_id = s.stat(last) print(num,msg_id) # article 'Return a tuple (response, info) where info is a namedtuple with three attributes number, message_id and lines (in that order).' print('-'*10) resp,info = s.article(last) print(info.number,info.message_id,len(info.lines)) # head
# NNTP info if nntp_user and nntp_password: return nntplib.NNTP.__init__(self, nntp_host, user=nntp_user, password=nntp_password, port=nntp_port) else: return nntplib.NNTP.__init__(self, nntp_host, port=nntp_port) if __name__ == "__main__": # Albasani's anonymous read-only account nntp = TorNNTP(nntp_host="news.albasani.net", nntp_port=119, nntp_user="******", nntp_password="******") resp, count, first, last, name = nntp.group( 'alt.anonymous.messages') print('Group', name, 'has', count, 'articles, range', first, 'to', last) resp, overviews = nntp.over((last - 9, last)) for id, over in overviews: print(id, nntplib.decode_header(over['subject']), nntplib.decode_header(over['from']))
def gives(a, b): self.assertEqual(nntplib.decode_header(a), b)
def scan(self, group_name, first, last): """Scan a group for segments and return a list.""" log.info('{}: Collecting parts {:d} to {:d}...'.format(group_name, first, last)) start = time.clock() try: # grab the headers we're after self.connection.group(group_name) status, overviews = self.connection.over((first, last)) except nntplib.NNTPError as nntpe: log.debug('NNTP Error: {}'.format(nntpe)) return None messages = {} ignored = 0 received = [] for (id, overview) in overviews: # keep track of which messages we received so we can # optionally check for ones we missed later received.append(id) # get the current segment number results = re.findall('\((\d+)[\/](\d+)\)', overview['subject']) # it might match twice, so just get the last one # the first is generally the part number if results: (segment_number, total_segments) = results[-1] else: # if there's no match at all, it's probably not a binary ignored += 1 continue # assuming everything didn't f**k up, continue if int(segment_number) > 0 and int(total_segments) > 0: # strip the segment number off the subject so # we can match binary parts together subject = overview['subject'].replace( '(' + str(segment_number) + '/' + str(total_segments) + ')', '' ).strip() # this is spammy as shit, for obvious reasons #pynab.log.debug('Binary part found: ' + subject) # build the segment, make sure segment number and size are ints segment = { 'message_id': overview['message-id'][1:-1], 'segment': int(segment_number), 'size': int(overview[':bytes']), } # if we've already got a binary by this name, add this segment if subject in messages: messages[subject]['segments'][segment_number] = segment messages[subject]['available_segments'] += 1 else: # dateutil will parse the date as whatever and convert to UTC # some subjects/posters have odd encoding, which will break pymongo # so we make sure it doesn't message = { 'subject': nntplib.decode_header(subject).encode('utf-8', 'surrogateescape').decode('latin-1'), 'posted': dateutil.parser.parse(overview['date']), 'posted_by': nntplib.decode_header(overview['from']).encode('utf-8', 'surrogateescape').decode( 'latin-1'), 'group_name': group_name, 'xref': overview['xref'], 'total_segments': int(total_segments), 'available_segments': 1, 'segments': {segment_number: segment, }, } messages[subject] = message else: # :getout: ignored += 1 # instead of checking every single individual segment, package them first # so we typically only end up checking the blacklist for ~150 parts instead of thousands blacklist = [k for k in messages if pynab.parts.is_blacklisted(k, group_name)] blacklisted_parts = len(blacklist) total_parts = len(messages) for k in blacklist: del messages[k] log.info( '{}: Received {:d} articles of {:d}, forming {:d} parts with {:d} ignored and {:d} blacklisted.' .format(group_name, len(received), last - first + 1, total_parts, ignored, blacklisted_parts) ) # TODO: implement re-checking of missed messages, or maybe not # most parts that get ko'd these days aren't coming back anyway messages_missed = list(set(range(first, last)) - set(received)) end = time.clock() log.info('Time elapsed: {:.2f}s'.format(end - start)) return messages
如果输入速度不够快 把代码放到脚本里, 或着将服务器对象的创建和方法的调用放在同一行内 (以分号隔开) ''' '''-----------------------------------------------------------------------------------------------------------''' # 模块介绍 https://docs.python.org/3/library/nntplib.html import nntplib server = nntplib.NNTP('news.gmane.org') resp, count, frist, last, name = server.group('gmane.comp.python.committers') print('Group--', name, 'has--', count, 'articles, range--', frist, 'to--', last) resp, overviews = server.over((last -9, last)) print(resp, '----', overviews)#返回消息头和概述信息 for id , over in overviews: print(id, nntplib.decode_header(over['subject'])) print(server.body(id)[1])#所有文章信息 print('-----------------', resp)#响应信息 server.quit() """ 当头decode_header()可以包含非ASCII字符时,建议使用该函数: NNTP.over(message_spec,*,file = None ) 在旧服务器上发送OVER命令或XOVER命令。 message_spec可以是表示消息id的字符串,也可以是表示当前组中的文章范围的(第一,最后)元组元组,或者指示从第一个到最后一个文章开始的文章范围的(第一,无)元组 当前组中的文章,或“无”选择当前组中的当前文章。 返回一对(响应,概述)。 概述是(article_number,概述)元组的列表,一个用于每个由message_spec选择的文章。 每个概述都是一个具有相同数量项目的字典,但这个数字取决于服务器。 nntplib.decode_header(header_str ) 解码标头值,解除转义的非ASCII字符。 header_str必须是一个str对象。返回未转义的值。建议使用此功能以人类可读的形式显示一些标题: """
def format_article(art, **response_args): art["X-FromName"], art["X-FromEmail"] = parseaddr(art["From"] if "From" in art else "") if art["X-FromName"] == '': art["X-FromName"] = art["X-FromEmail"] date = mktime_tz(parsedate_tz(art["Date"])) if date < time.time() - 120: title = "\x0314In \x0F\x03{0:02d}{Newsgroups}\x0F\x0314: on \x0F{Date}\x0314 by \x0F\x03{0:02d}{X-FromName}\x0F \x02{Subject}\x0F" else: title = "\x0314In \x0F\x03{0:02d}{Newsgroups}\x0F\x0314: by \x0F\x03{0:02d}{X-FromName}\x0F \x02{Subject}\x0F" return Response(art.get_payload().replace('\n', ' '), title=title.format(adler32(art["Newsgroups"].encode()) & 0xf, adler32(art["X-FromEmail"].encode()) & 0xf, **{h: decode_header(i) for h,i in art.items()}), **response_args)