def download_group(name): if not os.path.exists(name): os.mkdir(name) s = NNTP('news.gmane.org') resp, count, first, last, name = s.group(name) print 'Group', name, 'has', count, 'articles, range', first, 'to', last resp, subs = s.xhdr('subject', first + '-' + last) for id, sub in subs: print id with open(os.path.join(name, str(id)), 'wb') as fp: pprint.pprint(s.article(id), stream=fp)
def getItems(self): start = localtime(time()-self.window*day) date = strftime('%y%m%d',start) hour = strftime('%H%M%S',start) server = NNTP(self.servername) ids = server.newnews(self.group,date,hour)[1] for id in ids: lines = server.article(id)[3] message = message_from_string('\n'.join(lines)) title = memssage['subject'] body = message.get_payload() if message.is_multipart(): body = body[0] yield NewsItem(title,body) server.quit()
def getItems(self): start=localtime(time()-day*self.window) date=strftime('%y%m%d',start) hour=strftime('%H%M%S',start) server=NNTP(self.servername) ids=server.newnews(self.group,date,hour)[1] for id in ids: lines=server.article(id)[3] message=message_from_string('\n'.join(lines)) title=message['subject'] body=message.get_payload() if message.is_multipart(): body=body[0] yield NewsItem(title,body) server.quit()
def getItems(self): # 新闻生成器 yesterday = date.today() - timedelta(days=self.window) # 计算新闻获取的起始时间 server = NNTP(self.server_name) # 创建服务器连接对象 ids = server.newnews(self.group, yesterday)[1] # 获取新闻id列表 count = 0 # 创建计数变量 for id in ids: # 循环获取新闻id count += 1 # 计数递增 if count <= 10: # 如果计数小于10 article = server.article(id)[1][2] # 获取指定id的新闻文章 lines = [] # 创建每行新闻内容的列表 for line in article: # 从新闻文章中读取每一行内容 lines.append(line.decode()) # 将每行新闻内容解码,添加到新闻内容列表。 message = message_from_string( '\n'.join(lines)) # 合并新闻列表内容为字符串并转为消息对象 title = message['subject'].replace('\n', '') # 从消息对象中获取标题 body = message.get_payload() # 从消息对象中获取到新闻主体内容 if message.is_multipart(): # 如果消息对象包含多个部分 body = body[0] # 获取到的内容中第1个部分获取新闻主体内容 yield NewsItem(title, body) # 生成1个新闻内容对象 else: # 如果超出10条内容 break # 跳出循环 server.quit() # 关闭连接
class NewsWatcher(MessageWatcher): def __init__(self, server, groups, user=None, pw=None, port=None, tag=None): MessageWatcher.__init__(self) self.server = server self.groups = groups self.nntp = None # the NNTP connection object self.user = user self.pw = pw self.port = port self.tag = tag self.last = {} self.timeout = None self.pollInterval = 60 self.debug = 0 def __repr__(self): return "<NewsWatcher %s:%s (%s)>" % (self.server, self.port, ",".join(self.groups)) def __getstate__(self): d = MessageWatcher.__getstate__(self) d['nntp'] = None # just in case return d def start(self): port = self.port if not port: port = NNTP_PORT self.nntp = NNTP(self.server, port, self.user, self.pw, readermode=1) # only look for messages that appear after we start. Usenet is big. if not self.last: # only do this the first time for g in self.groups: resp, count, first, last, name = self.nntp.group(g) self.last[g] = int(last) if self.debug: print "last[%s]: %d" % (g, self.last[g]) self.timeout = gtk.timeout_add(self.pollInterval*1000, self.doTimeout) def stop(self): self.nntp.quit() self.nntp = None if self.timeout: gtk.timeout_remove(self.timeout) self.timeout = None def doTimeout(self): self.poll() return gtk.TRUE # keep going def poll(self): #print "polling", self for g in self.groups: resp, count, first, last, name = self.nntp.group(g) for num in range(self.last[g]+1, int(last)+1): try: resp, num, id, lines = self.nntp.article("%d" % num) except NNTPError: continue name = "%s:%d" % (g, int(num)) if self.debug: print "got", name if self.tag: if not filter(lambda line, tag=tag: line.find(tag) != -1, lines): continue self.parseMessage(name, name, time.time(), lines) self.last[g] = int(last)
#server = NNTP('news.mozilla.org') server = NNTP('news.kornet.net') print server.group('comp.lang.python.announce')[0] """ | group(self, name) | Process a GROUP command. Argument: | - group: the group name | Returns: | - resp: server response if successful | - count: number of articles (string) | - first: first article number (string) | - last: last article number (string) | - name: the group name """ group = server.group('han.test') print repr(group) first = group[2] last = group[3] print "first ", first print "last ", last i = 0 for id in range(int(first), int(last)): i += 1 print(server.article(str(id))) if i == 10: break; server.quit()
#__author: ZhengNengjin #__date: 2018/10/25 from nntplib import NNTP n = NNTP('your.nntp.server') rsp, ct, fst, lst, grp = n.group('comp,lang.python') rsp, anum, mid, data = n.article('110457') for eachLine in data: print(eachLine) n.quit()
class DanskGruppenArchive(object): """Class that provides an interface to Dansk-gruppens emails archive on gmane """ def __init__(self, article_cache_size=300, cache_file=None): """Initialize local variables""" # Connect to news.gmane.org self.nntp = NNTP('news.gmane.org') # Setting the group returns information, which right now we ignore self.nntp.group('gmane.comp.internationalization.dansk') # Keep a local cache in an OrderedDict, transferred across session # in a pickled version in a file self.article_cache_size = article_cache_size self.cache_file = cache_file if cache_file and path.isfile(cache_file): with open(cache_file, 'rb') as file_: self.article_cache = pickle.load(file_) logging.info('Loaded %i items from file cache', len(self.article_cache)) else: self.article_cache = OrderedDict() def close(self): """Quit the NNTP session and save the cache""" self.nntp.quit() if self.cache_file: with open(self.cache_file, 'wb') as file_: pickle.dump(self.article_cache, file_) logging.info('Wrote %i items to cache file', len(self.article_cache)) @property def last(self): """Return the last NNTP ID as an int""" return self.nntp.group('gmane.comp.internationalization.dansk')[3] def _get_article(self, message_id): """Get an article (cached) Args: message_id (int): The NNTP ID of the message Returns: list: List of byte strings in the message """ # Clear excess cache if len(self.article_cache) > self.article_cache_size: self.article_cache.popitem(last=False) # Check if article is in cache and if not, put it there if message_id not in self.article_cache: # nntp.article() returns: response, information # pylint: disable=unbalanced-tuple-unpacking _, info = self.nntp.article(message_id) self.article_cache[message_id] = info return self.article_cache[message_id] @staticmethod def _article_to_email(article): """Convert a raw article to an email object Args: article (namedtuple): An article named tuple as returned by NNTP Returns: email.message: An email message object """ # article lines are a list of byte strings decoded_lines = [line.decode('ascii') for line in article.lines] article_string = '\n'.join(decoded_lines) # Make an email object return email.message_from_string(article_string) def get_subject(self, message_id): """Get the subject of an message Args: message_id (int): The NNTP ID of the the message Returns: str: The subject of the article """ article = self._get_article(message_id) mail = self._article_to_email(article) # The subject may be encoded by NNTP, so decode it return decode_header(mail['Subject']) def get_body(self, message_id): """Get the body of a message Args: message_id (int): The NNTP ID of the the message Returns: str: The body of the article as a str or None if no body could be found or succesfully decoded """ article = self._get_article(message_id) mail = self._article_to_email(article) # Walk parts of the email and look for text/plain content type for part in mail.walk(): if part.get_content_type() == 'text/plain': body = part.get_payload(decode=True) # Find the text encoding from lines like: # text/plain; charset=UTF-8 # text/plain; charset=utf-8; format=flowed # Encoding sometimes has "" around it, decode is OK with that for type_part in part['Content-Type'].split(';'): if type_part.strip().startswith('charset='): encoding = type_part.replace('charset=', '') break else: message = 'Looking for the character encoding in the '\ 'string "%s" went wrong' logging.warning(message, part['Content-Type']) return None # Decode and return the body try: body = body.decode(encoding) except LookupError: message = 'Do not know how to handle a body with '\ 'charset: %s' logging.warning(message, encoding) return None return body def get_attachment(self, message_id, filename): """Get attachment by filename Args: message_id (int): The NNTP ID of the the message filename (str): The filename for the attachment Returns: bytes: The binary content of the attachment """ return self.get_attachments(message_id).get(filename) def get_attachments(self, message_id): """Get attachments Args: message_id (int): The NNTP ID of the the message Returns: dict: Dict with attachments where keys are filenames and values are their binary content """ article = self._get_article(message_id) mail = self._article_to_email(article) attachments = {} # Walk parts of the email and look for application/octet-stream # content type for part in mail.walk(): content_disp = part['Content-Disposition'] if not (content_disp and content_disp.startswith('attachment')): continue # Get the filename from a line like: Content-Disposition: # attachment; filename="hitori.master.da.podiff" filename = None for disp_part in content_disp.split(';'): if disp_part.strip().startswith('filename='): filename = disp_part.strip().replace('filename=', '') # Strip " from filename filename = filename.strip('"') if filename is None: message = 'Unable to extract filename from '\ 'Content-Disposition: %s' logging.warning(message, part['Content-Disposition']) raise Exception('Unable to extract filename') attachments[filename] = part.get_payload(decode=True) return attachments
#!/usr/bin/env python # -*- coding: UTF-8 *-* from nntplib import NNTP n = NNTP('your.nntp.server') rsp, ct, fst, lst, grp = n.group('comp.lang.python') rsp, anum, mid, data = n.article('110457') for eachLine in data: print eachLine From: "Alex Martelli" <alex@...> Subject: Re: Rounding Question Date: Wed, 21 Feb 2001 17:05:36 +0100 "Remco Gerlich" <remco@...> wrote: Jacob Kaplan-Moss <jacob@...> wrote in comp.lang.python: So I've got a number between 40 and 130 that I want to round up to the nearest 10. That is: 40 --> 40, 41 --> 50, ..., 49 --> 50, 50 --> 50, 51 --> 60 Rounding like this is the same as adding 5 to the number and then rounding down. Rounding down is substracting the remainder if you were to divide by 10, for which we use the % operator in Python. This will work if you use +9 in each case rather than +5 (note that he doesn't really want rounding -- he wants 41 to 'round' to 50, for ex). Alex >>> n.quit() '205 closing connection - goodbye!'
class DanskGruppenArchive(object): """Class that provides an interface to Dansk-gruppens emails archive on gmane """ def __init__(self, article_cache_size=300, cache_file=None): """Initialize local variables""" # Connect to news.gmane.org self.nntp = NNTP('news.gmane.org') # Setting the group returns information, which right now we ignore self.nntp.group('gmane.comp.internationalization.dansk') # Keep a local cache in an OrderedDict, transferred across session # in a pickled version in a file self.article_cache_size = article_cache_size self.cache_file = cache_file if cache_file and path.isfile(cache_file): with open(cache_file, 'rb') as file_: self.article_cache = pickle.load(file_) logging.info('Loaded %i items from file cache', len(self.article_cache)) else: self.article_cache = OrderedDict() def close(self): """Quit the NNTP session and save the cache""" self.nntp.quit() if self.cache_file: with open(self.cache_file, 'wb') as file_: pickle.dump(self.article_cache, file_) logging.info('Wrote %i items to cache file', len(self.article_cache)) @property def last(self): """Return the last NNTP ID as an int""" return self.nntp.group('gmane.comp.internationalization.dansk')[3] def _get_article(self, message_id): """Get an article (cached) Args: message_id (int): The NNTP ID of the message Returns: list: List of byte strings in the message """ # Clear excess cache if len(self.article_cache) > self.article_cache_size: self.article_cache.popitem(last=False) # Check if article is in cache and if not, put it there if message_id not in self.article_cache: # nntp.article() returns: response, information # pylint: disable=unbalanced-tuple-unpacking _, info = self.nntp.article(message_id) self.article_cache[message_id] = info return self.article_cache[message_id] @staticmethod def _article_to_email(article): """Convert a raw article to an email object Args: article (namedtuple): An article named tuple as returned by NNTP Returns: email.message: An email message object """ # article lines are a list of byte strings decoded_lines = [line.decode('ascii') for line in article.lines] article_string = '\n'.join(decoded_lines) # Make an email object return email.message_from_string(article_string) def get_subject(self, message_id): """Get the subject of an message Args: message_id (int): The NNTP ID of the the message Returns: str: The subject of the article """ article = self._get_article(message_id) mail = self._article_to_email(article) # The subject may be encoded by NNTP, so decode it return decode_header(mail['Subject']) def get_body(self, message_id): """Get the body of a message Args: message_id (int): The NNTP ID of the the message Returns: str: The body of the article as a str or None if no body could be found or succesfully decoded """ article = self._get_article(message_id) mail = self._article_to_email(article) # Walk parts of the email and look for text/plain content type for part in mail.walk(): if part.get_content_type() == 'text/plain': body = part.get_payload(decode=True) # Find the text encoding from lines like: # text/plain; charset=UTF-8 # text/plain; charset=utf-8; format=flowed # Encoding sometimes has "" around it, decode is OK with that for type_part in part['Content-Type'].split(';'): if type_part.strip().startswith('charset='): encoding = type_part.replace('charset=', '') break else: message = 'Looking for the character encoding in the '\ 'string "%s" went wrong' logging.warning(message, part['Content-Type']) return None # Decode and return the body try: body = body.decode(encoding) except LookupError: message = 'Do not know how to handle a body with '\ 'charset: %s' logging.warning(message, encoding) return None return body def get_attachment(self, message_id, filename): """Get attachment by filename Args: message_id (int): The NNTP ID of the the message filename (str): The filename for the attachment Returns: bytes: The binary content of the attachment """ return self.get_attachments(message_id).get(filename) def get_attachments(self, message_id): """Get attachments Args: message_id (int): The NNTP ID of the the message Returns: dict: Dict with attachments where keys are filenames and values are their binary content """ article = self._get_article(message_id) mail = self._article_to_email(article) attachments = {} # Walk parts of the email and look for application/octet-stream # content type for part in mail.walk(): content_disp = part['Content-Disposition'] if not (content_disp and content_disp.startswith('attachment')): continue # Get the filename from a line like: Content-Disposition: # attachment; filename="hitori.master.da.podiff" filename = None for disp_part in content_disp.split(';'): if disp_part.strip().startswith('filename='): filename = disp_part.strip().replace('filename=', '') # Strip " from filename filename = filename.strip('"') if filename is None: message = 'Unable to extract filename from '\ 'Content-Disposition: %s' logging.warning(message, part['Content-Disposition']) raise Exception('Unable to extract filename') attachments[filename] = part.get_payload(decode=True) return attachments
# print(over.keys()) # ['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject'] print(over.get('date')) print(nntplib.decode_header(over.get('from'))) print(over.get('message-id')) print(over.get('subject')) # stat 'Return a triple (response, number, id) where number is the article number and id is the message id.' resp,num,msg_id = s.stat(last) print(num,msg_id) # article 'Return a tuple (response, info) where info is a namedtuple with three attributes number, message_id and lines (in that order).' print('-'*10) resp,info = s.article(last) print(info.number,info.message_id,len(info.lines)) # head 'Same as article(), but sends a HEAD command. The lines returned (or written to file) will only contain the message headers, not the body.' print('-'*10) resp,info = s.head(last) print(info.number,info.message_id,len(info.lines)) # body 'Same as article(), but sends a BODY command. The lines returned (or written to file) will only contain the message body, not the headers.' print('-'*10) resp,info = s.body(last) print(info.number,info.message_id,len(info.lines)) # newgroups