#coding=utf-8 # from nntplib import NNTP from time import strftime, time, localtime day = 24 * 60 * 60 # Number of seconds in one day yesterday = localtime(time() - day) date = strftime('%y%m%d', yesterday) hour = strftime('%H%M%S', yesterday) servername = 'news.mixmin.net' group = 'talk.euthanasia' server = NNTP(servername) ids = server.newnews(group, date, hour)[1] for id in ids: head = server.head(id)[3] for line in head: if line.lower().startswith('subject:'): subject = line[9:] break body = server.body(id)[3] print subject print '-'*len(subject) print '\n'.join(body) server.quit()
#!/usr/bin/python from nntplib import NNTP from time import time, localtime, strftime day = 24 * 60 * 60 yesterday = localtime(time() - day) date = strftime('%y%m%d', yesterday) t = strftime('%H%M%S', yesterday) s = NNTP('web.aioe.org') g = 'comp.lang.python.announce' ids = s.newnews(g, date, t)[1] for id in ids: head = s.head(id)[3] for line in head: if line.lower().startswith('subject:'): subject = line[9:] break body = s.body(id)[3] print subject print '-' * len(subject) print '\n'.join(body) s.quit()
yesterday = localtime(time() - day) date = strftime('%y%m%d', yesterday) hour = strftime('%H%M%S', yesterday) servername = 'news.aioe.org' group = 'comp.lang.python' server = NNTP(servername) ids = server.newnews(group, date, hour)[1] logging.info('this is ids') logging.info(ids) for id in ids: print 'this is id', id head = server.head(id)[3] for line in head: if line.lower().startswith('subject:'): subject = line[9:] break body = server.body(id)[3] print subject print '-' * len(subject) print '\n'.join(body) server.quit()
except: servername = 'news.gmane.org' groupname = 'gmane.comp.python.general' # cmd line args or defaults showcount = 10 # show last showcount posts # connect to nntp server print 'Connecting to', servername, 'for', groupname from nntplib import NNTP connection = NNTP(servername) (reply, count, first, last, name) = connection.group(groupname) print '%s has %s articles: %s-%s' % (name, count, first, last) # get request headers only fetchfrom = str(int(last) - (showcount-1)) (reply, subjects) = connection.xhdr('subject', (fetchfrom + '-' + last)) # show headers, get message hdr+body for (id, subj) in subjects: # [-showcount:] if fetch all hdrs print 'Article %s [%s]' % (id, subj) if not listonly and raw_input('=> Display?') in ['y', 'Y']: reply, num, tid, list = connection.head(id) for line in list: for prefix in showhdrs: if line[:len(prefix)] == prefix: print line[:80]; break if raw_input('=> Show body?') in ['y', 'Y']: reply, num, tid, list = connection.body(id) for line in list: print line[:80] print print connection.quit( )
# mcadams.posc.mu.edu: alt.assassination.jfk # news.php.net: php.dev from nntplib import NNTP from time import strftime, time, localtime day = 24 * 60 * 60 # seconds of one day yesterday = localtime(time() - day) date = strftime('%y%m%d', yesterday) hour = strftime('%H%M%S', yesterday) servername = 'mcadams.posc.mu.edu' group = 'alt.assassination.jfk' server = NNTP(servername) ids = server.newnews(group, date, hour)[1] for id in ids: head = server.head(id)[3] # 4th element for line in head: if line.lower().startswith('subject:'): subject = line[9:] break body = server.body(id)[3] print subject print '-'*len(subject) print '\n'.join(body) server.quit()
groupname = 'comp.lang.python' # cmd line args or defaults showcount = 10 # show last showcount posts # connect to nntp server print 'Connecting to', servername, 'for', groupname from nntplib import NNTP connection = NNTP(servername) (reply, count, first, last, name) = connection.group(groupname) print '%s has %s articles: %s-%s' % (name, count, first, last) # get request headers only fetchfrom = str(int(last) - (showcount-1)) (reply, subjects) = connection.xhdr('subject', (fetchfrom + '-' + last)) # show headers, get message hdr+body for (id, subj) in subjects: # [-showcount:] if fetch all hdrs print 'Article %s [%s]' % (id, subj) if not listonly and raw_input('=> Display?') in ['y', 'Y']: reply, num, tid, list = connection.head(id) for line in list: for prefix in showhdrs: if line[:len(prefix)] == prefix: print line[:80]; break if raw_input('=> Show body?') in ['y', 'Y']: reply, num, tid, list = connection.body(id) for line in list: print line[:80] print print connection.quit()
def readnews(I="", A=None, P=None, RESPONSE=None): """Display article in HTML""" article = I user = A password = P RESPONSE.write("""<HTML><HEAD><TITLE>Tokyo PC Users Group</TITLE></HEAD> <BODY BGCOLOR="#FFFFFF">""") try: news = NNTP(NEWS_SERVER) except: RESPONSE.write("Can not connect to server: " + NEWS_SERVER) resp = news.shortcmd('MODE READER') if user: resp = news.shortcmd('authinfo user '+user) if resp[:3] == '381': if not password: RESPONSE.write("<B>Can not fetch article</B><P>") else: resp = news.shortcmd('authinfo pass '+password) if resp[:3] != '281': RESPONSE.write("<B>Can not fetch article</B><P>") try: resp, nr, id, subs = news.head(article) except: RESPONSE.write("Article %s not available" % quote(article)) RESPONSE.write('<TABLE WIDTH="100%" BGCOLOR="#CFCFCF"><TR><TD>') # build up the header (so we know Subject: by Newsgroups: output time) from_line = "" newsgroup_line = "" subject_line = "" keep_lines = "" mail_subject = "" for line in subs: ihdr = interesting_headers.match(line) if ihdr: if ihdr.group('from'): name, email = parseaddr(line[6:]) if name: from_line = 'From: <A HREF="mailto:%s%s">%s</A> <%s><BR>' % ( email, "%s", name, email) else: from_line = 'From: <A HREF="mailto:%s%s">%s</A><BR>' % ( email, "%s", email) elif ihdr.group('newsgroups'): newsgroup_line = 'Newsgroups: <A HREF="mailto:tpc-%[email protected]%s">tpc.%s</A>%s<BR>' % ( ihdr.group('group'), "%s", ihdr.group('group'), ihdr.group('othergroups')) elif ihdr.group('subject'): subject_line = 'Subject: <B>%s</B><BR>' % line[9:] if ihdr.group('re'): mail_subject = "?subject="+line[9:] else: mail_subject = "?subject=Re: "+line[9:] elif ihdr.group('keep'): keep_lines = keep_lines+line+"<BR>" if from_line: RESPONSE.write(from_line % mail_subject) if newsgroup_line: RESPONSE.write(newsgroup_line % mail_subject) RESPONSE.write(subject_line + keep_lines) RESPONSE.write('</TD></TR></TABLE><P>') try: resp, nr, id, subs = news.body(article) except: RESPONSE.write("Article %s body not available" % article) RESPONSE.write("<CODE>") for line in subs: RESPONSE.write(re.sub(r'''(?i)(?x) (?P<opening>[<(";]?) (?P<url>(((?P<web>http:)|(news:)|(mailto:)|(telnet:))(?P<uri>\S*?)) # a mail address is some non-ws characters followed by @ # followed by a domain name that has at least one . in it |(?P<mailadr>\S+@(\S+\.)+\S+?)) # either a URL or a mail address will not contain [)">\s] # and will not end with punctuation just before the whitespace (?P<closing>([)"'>\s]|$|([&.?,:;]\s)+))''', liven_url, line) + "<BR>") RESPONSE.write("</CODE>") RESPONSE.write("</BODY></HTML>") resp = news.quit()
class Archive(object): @staticmethod def is_diff(body): return bool([line for line in body if line.startswith("diff ")]) def __init__(self, group, server): self.conn = NNTP(server) resp, count, first, last, name = self.conn.group(group) self.group = group self.server = server self.first = int(first) self.last = int(last) def get_number_from_user(self, msg_id): """ Convert something the user might input into a message id. These are: # An NNTP message number # A gmane link that includes the NNTP message number # The original Message-Id header of the message. NOTE: gmane's doesn't include the message number in STAT requests that involve only the Message-Id (hence the convolution of getting all the headers). """ msg_id = re.sub(r".*gmane.org/gmane.comp.version-control.git/([0-9]+).*", r"\1", str(msg_id)) _, n, id, result = self.conn.head(msg_id) for header in result: m = re.match(r"Xref: .*:([0-9]+)\s*$", header, re.I) if m: return int(m.group(1)) else: raise FatalError("No (or bad) Xref header for message '%s'" % msg_id) def get_patch_series(self, user_input, search_limit=100): """ Given an NNTP message number or a Message-Id header return an mbox containing the patches introduced by the author of that message. This handles the case where the threading is right *and* the patches are numbered in a simple scheme: [PATCH] this patch has no replies and stands on its own [PATCH 0/2] this is an introduction to the series |- [PATCH 1/2] the first commit |- [PATCH 2/2] the second commit [PATCH 1/3] this is the first commit |- [PATCH 2/3] and this is the second |- [PATCH 3/3] and this is the third TODO: it would be nice to make the search more efficient, we can use the numbers in [PATCH <foo>/<bar>] to stop early. """ start_id = self.get_number_from_user(user_input) messages = limit(self.messages_starting_from(start_id), search_limit) try: thread = Thread(messages.next()) except StopIteration: raise FatalError("No message at id '%s' using XOVER") n_since_last = 0 for message in messages: if n_since_last > 5: break elif thread.should_include(message): n_since_last = 0 thread.append(message) else: n_since_last += 1 else: raise FatalError('did not find end of series within %s messages', search_limit) for message in self.xover(start_id - 5, start_id -1): if thread.should_include(message): thread.append(message) return self.mboxify(thread) def mboxify(self, thread): """ Convert a thread into an mbox for application via git-am. """ lines = [] for message in thread.in_order(): _, number, msg_id, body = self.conn.body(str(message.number)) # git-am doesn't like empty patches very much, and the 0/X'th patch is # often not a patch, we skip it here. (TODO, warn the user about this) if re.search(r" 0+/[0-9]+", message.subject) and not self.is_diff(body): continue poster = parseaddr(message.poster)[0] date = ctime(mktime(parsedate(message.date))) lines.append("From %s %s" % (poster, date)) lines.append("From: %s" % message.poster) lines.append("Subject: %s" % message.subject) lines.append("Date: %s" % message.date) lines.append("Message-Id: %s" % message.msg_id) lines.append("Xref: %s %s:%s" % (self.server, self.group, message.number)) lines.append("References: %s" % "\n\t".join(message.references)) lines.append("") lines += body lines.append("") return "\n".join(lines) def messages_starting_from(self, start_id): """ Generate all message headers starting from the given id and working upwards. """ while start_id < self.last: next_id = min(start_id + 20, self.last) for message in self.xover(start_id, next_id): yield message start_id = next_id + 1 def xover(self, begin, end): """ Get the headers for the messages with numbers between begin and end. """ if begin == end: return [] _, result = self.conn.xover(str(min(begin, end)), str(max(begin, end))) result = [Message(int(number), subject, poster, date, msg_id, references) for (number, subject, poster, date, msg_id, references, size, lines) in result] return sorted(result, key=lambda x: x.number)
# stat 'Return a triple (response, number, id) where number is the article number and id is the message id.' resp,num,msg_id = s.stat(last) print(num,msg_id) # article 'Return a tuple (response, info) where info is a namedtuple with three attributes number, message_id and lines (in that order).' print('-'*10) resp,info = s.article(last) print(info.number,info.message_id,len(info.lines)) # head 'Same as article(), but sends a HEAD command. The lines returned (or written to file) will only contain the message headers, not the body.' print('-'*10) resp,info = s.head(last) print(info.number,info.message_id,len(info.lines)) # body 'Same as article(), but sends a BODY command. The lines returned (or written to file) will only contain the message body, not the headers.' print('-'*10) resp,info = s.body(last) print(info.number,info.message_id,len(info.lines)) # newgroups 'Return a pair (response, groups) where groups is a list of group names that are new since the given date and time' #resp,groups = s.newgroups(date,time) #print len(groups) #pprint(groups) #newnews
class DownloadSpots(object): def __init__(self): try: self.news = NNTP(NEWS_SERVER, NNTP_PORT, NNTP_USERNAME, NNTP_PASSWORD) except NNTPTemporaryError as e: raise SpotError('NNTP', e) except socket.error as e: raise SpotError('Connection', e) self.conn = sqlite3.connect(NEWS_SERVER + '.db') self.conn.row_factory = sqlite3.Row self.cur = self.conn.cursor() self.cur.executescript('''\ PRAGMA synchronous = OFF; PRAGMA journal_mode = MEMORY; PRAGMA temp_store = MEMORY; PRAGMA count_changes = OFF; ''') def __del__(self): print 'quit!' if hasattr(self, 'news'): self.news.quit() if hasattr(self, 'cur'): self.cur.close() if hasattr(self, 'conn'): self.conn.close() def make_table(self): sql = '''\ CREATE TABLE IF NOT EXISTS spots ( id int PRIMARY KEY, full_id str, cat int, title str, poster str, date int, size int, erotiek int, subcats str, modulus int, keyid int, c_count int ); CREATE TABLE IF NOT EXISTS comments ( id int PRIMARY KEY, full_id str, spot_full_id str ); CREATE INDEX IF NOT EXISTS spots_full_id_index on spots(full_id); ''' self.cur.executescript(sql) self.conn.commit() def download_detail(self, id): '''Get information about a spot. Args: id (int/string): the nntp id of the spot. Can be: - a short id: 123456 - a long id: '*****@*****.**' Returns: a dict with the following keys: - nzb: a list of nntp id's of the nzb file - image: the location of the image file: url or nntp id - website: url of the website - desc: a description of the spot - title2: the title of the spot (is called title2 to avoid a conflict) ''' id = str(id) self.news.group('free.pt') print id head = self.news.head(id)[-1] xmltext = ''.join(item[7:] for item in head if item.startswith('X-XML:')) xml = etree.XML(xmltext) xmldict = defaultdict(list) xmldict['nzb'] = [i.text for i in xml.find('.//NZB').iter('Segment')] imgfind = xml.find('.//Image') if imgfind is not None: if imgfind.find('.//Segment') is not None: xmldict['image'] = imgfind.find('.//Segment').text else: xmldict['image'] = imgfind.text else: xmldict['image'] = None webfind = xml.find('.//Website') if webfind is not None: xmldict['website'] = webfind.text else: xmldict['website'] = None xmldict['desc'] = bb2html('\n'.join(self.news.body(id)[-1])) xmldict['title2'] = xml.find('.//Title').text print xmldict return xmldict def download_image(self, article, imgnr): '''Download and save an image file. Args: article: Location of the file, can be: - None - an url: 'http://url.to.image.ext' - a nntp id: '*****@*****.**' imgnr: filename of the saved image ''' print imgnr file = 'temp/%s.picture' % imgnr if article is None: shutil.copy('none.png', file) elif article.startswith('http'): if 'imgchili.com' in article: article = re.sub(r't([0-9]\.imgchili)', r'i\1', article) try: print urllib.urlretrieve(article, file) except: print 'Image download error' shutil.copy('none.png', file) elif '@' in article: article = '<%s>' % article print article try: self.news.group('alt.binaries.ftd') data = ''.join(self.news.body(article)[-1]) except NNTPTemporaryError as e: shutil.copy('none.png', file) raise SpotError('NNTP', e) else: data = data.replace('=A', '\x00').replace('=B', '\r') \ .replace('=C', '\n').replace('=D', '=') with open(file, 'wb') as f: f.write(data) else: shutil.copy('none.png', file) def download_nzb(self, articles, title): '''Download and save a nzb file. Args: articles: a list of nntp id's: ['*****@*****.**'] title (string): the filename of the saved nzb file (must be already escaped) ''' print 'download_nzb' file = 'temp/%s.nzb' % title self.news.group('alt.binaries.ftd') print articles data = ''.join(''.join(self.news.body('<%s>' % article)[-1]) for article in articles) data = data.replace('=A', '\x00').replace('=B', '\r') \ .replace('=C', '\n').replace('=D', '=') data = zlib.decompress(data, -zlib.MAX_WBITS) with open(file, 'wb') as f: f.write(data) def update_spots(self): '''Download new spots and save them to the database. Yields: total: total amount of spots to be downloaded subtotal: amount of spots already done ''' print 'Opening database...', self.make_table() self.cur.execute('SELECT count(id) FROM spots') oude_lengte = self.cur.fetchone()[0] print 'Done' print oude_lengte, 'spots in database' print 'Connecting...', last = int(self.news.group('free.pt')[3]) print 'Done' if oude_lengte: self.cur.execute('SELECT max(id) FROM spots') current = self.cur.fetchone()[0] else: current = last - (NR_OF_SPOTS * 100) delta = 2000 total = last - current print 'Current:', current print 'Last:', last print total, 'spots op te halen' yield total if current >= last: print 'Geen nieuwe spots!' yield 'end' return print 'Getting new spots...', self.cur.execute('DROP INDEX IF EXISTS spots_full_id_index') self.conn.commit() for i in xrange(0, total, delta): for (id, subject), (id, fromline), (id, full_id) in \ zip(*(self.news.xhdr(header, '%s-%s' % (current + 1 + i, current + delta + i))[-1] for header in ('subject', 'from', 'Message-ID'))): subject = re.sub('^[\t]', '', subject).split('\t')[0] if subject[:7] == 'DISPOSE': continue subject = subject.decode('latin-1') fromline = fromline.decode('latin-1') subject = re.sub('\s\|\s\w+$', '', subject) poster = re.sub('(\t[^\t]+\t)?([^\s]+)\s<.+$', r'\2', fromline) try: info = re.findall(r'<[^>]+>', fromline)[0].split('@') modulus = info[0] info = info[1].split('.')[:4] date = info[3] except IndexError: continue if info[0][1] == '7': modulus = modulus.split('.')[0][1:] else: modulus = 0 subcats = '|'.join(info[0][2:][x:x + 3] for x in xrange(0, len(info[0][2:]), 3)) erotiek = 0 if info[0][0] == '1': for genre in re.findall('d[0-9]{2}', subcats): if ((23 <= int(genre[1:]) <= 26) or (72 <= int(genre[1:]) <= 89)): erotiek = 1 break sql = ('INSERT INTO spots (id, full_id, cat, title, poster, date, ' 'size, erotiek, subcats, modulus, keyid, c_count) ' 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)') self.cur.execute(sql, (id, full_id, info[0][0], subject, poster, date, info[1], erotiek, subcats, modulus, info[0][1])) self.conn.commit() yield int(id) - current # subtotal self.cur.execute('CREATE INDEX spots_full_id_index on spots(full_id)') self.conn.commit() print 'Spots klaar!' self.cur.execute('SELECT count(id) FROM spots') lengte = self.cur.fetchone()[0] print lengte - oude_lengte, 'nieuwe spots!' self.news.quit() yield 'end' def update_comments(self): '''Download new comments and save them to the database. Yields: total: total amount of comments to be downloaded subtotal: amount of comments already done ''' print 'comments' print 'Opening database...', self.make_table() self.cur.execute('SELECT count(id) FROM comments') oude_lengte = self.cur.fetchone()[0] print 'Done' print oude_lengte, 'comments in database' print 'Connecting...', last = int(self.news.group('free.usenet')[3]) print 'Done' if oude_lengte: self.cur.execute('SELECT max(id) FROM comments') current = self.cur.fetchone()[0] print current else: current = last - (NR_OF_SPOTS * 500) delta = 2000 total = last - current print 'Current:', current print 'Last:', last print total, 'comments op te halen' yield total if current >= last: print 'Geen nieuwe comments!' yield 'end' return print 'begin reacties', time.time() for i in xrange(0, total, delta): for (id, ref), (id, msgid) in \ zip(*(self.news.xhdr(header, '%s-%s' % (current + 1 + i, current + delta + i))[-1] for header in ('References', 'Message-ID'))): self.cur.execute('INSERT INTO comments VALUES (?, ?, ?)', (id, msgid, ref)) self.cur.execute('UPDATE spots SET c_count = c_count + 1 WHERE full_id = ?', (ref,)) yield int(id) - current # subtotal self.conn.commit() print 'Reacties klaar' self.cur.execute('SELECT count(id) FROM comments') lengte = self.cur.fetchone()[0] print lengte - oude_lengte, 'nieuwe comments!' self.news.quit() yield 'end' def show(self, search_col, op, search, search_rest='', limit=NR_OF_SPOTS, columns='*'): '''Perform a sql query. Args: search_col: the column to use in the where clause op: the operator to use in the where clause search: the search value to use in the where clause search_rest: additional conditions for the where clause limit: the number of rows to select columns: the columns to select Returns: a list of tuples with the results of the sql query ''' self.make_table() sql = '''\ SELECT %s FROM spots WHERE %s %s :search %s ORDER BY id DESC LIMIT %s ''' % (columns, search_col, op, search_rest, limit) results = map(Spot, self.cur.execute(sql, (search,))) print len(results) return results
from datetime import date, timedelta #获取24小时的新闻源需要datetime模块 server = NNTP('web.aioe.org') #实例化NNTP服务器连接对象 yesterday = date.today() - timedelta(days=1) #当前时间减去时间间隔 group = 'comp.lang.python' #新闻组名称 def get_id(): #创建新闻id生成器 ids = server.newnews(group, yesterday)[1] #获取进近4小时新闻内容中的所有新闻id for id in ids: #遍历所有新闻id yield id #生成1个新闻id ids = get_id() #创建新闻id生成器对象 id = next(ids) #获取第一个新闻id head_data = server.head(id)[1][2] #获取新闻头部内容 body_data = server.body(id)[1][2] #获取新闻的主体内容 title = '' #创建标题 body = '' #创建主体 for line in head_data: #遍历头部内容 if line.decode().lower().startswith( 'subject:'): #如果发现新闻标题特征("subject:"开头) startswithk开始于 title = line[9:].decode() #去除特征文字保存标题内容 for line in body_data: #遍历主体内容 if line.decode().endswith('='): #如果行内容以'='结尾 endswith结束于 line = line[:-1] #去除“=” if line.decode().endswith('=20'): #如果行内容以'=20'结尾 line = line[:-3] + b'\n' #去除'=20'并添加换行符 body += line.decode() #将每行内容组织为新的主体内容