Beispiel #1
0
def download_group(name):

    if not os.path.exists(name):
        os.mkdir(name)

    s = NNTP('news.gmane.org')
    resp, count, first, last, name = s.group(name)
    print 'Group', name, 'has', count, 'articles, range', first, 'to', last
    resp, subs = s.xhdr('subject', first + '-' + last)
    for id, sub in subs: 
        print id
        with open(os.path.join(name, str(id)), 'wb') as fp:
            pprint.pprint(s.article(id), stream=fp)
Beispiel #2
0
    def getItems(self):
        start = localtime(time()-self.window*day)
        date = strftime('%y%m%d',start)
        hour = strftime('%H%M%S',start)
        server = NNTP(self.servername)
        ids = server.newnews(self.group,date,hour)[1]

        for id in ids:
            lines = server.article(id)[3]
            message = message_from_string('\n'.join(lines))

            title = memssage['subject']
            body = message.get_payload()
            if message.is_multipart():
                body = body[0]

            yield NewsItem(title,body)
        server.quit()
Beispiel #3
0
	def getItems(self):
		start=localtime(time()-day*self.window)
		date=strftime('%y%m%d',start)
		hour=strftime('%H%M%S',start)

		server=NNTP(self.servername)


		ids=server.newnews(self.group,date,hour)[1]

		for id in ids:
			lines=server.article(id)[3]
			message=message_from_string('\n'.join(lines))

			title=message['subject']
			body=message.get_payload()
			if message.is_multipart():
				body=body[0]

			yield NewsItem(title,body)

		server.quit()
Beispiel #4
0
 def getItems(self):  # 新闻生成器
     yesterday = date.today() - timedelta(days=self.window)  # 计算新闻获取的起始时间
     server = NNTP(self.server_name)  # 创建服务器连接对象
     ids = server.newnews(self.group, yesterday)[1]  # 获取新闻id列表
     count = 0  # 创建计数变量
     for id in ids:  # 循环获取新闻id
         count += 1  # 计数递增
         if count <= 10:  # 如果计数小于10
             article = server.article(id)[1][2]  # 获取指定id的新闻文章
             lines = []  # 创建每行新闻内容的列表
             for line in article:  # 从新闻文章中读取每一行内容
                 lines.append(line.decode())  # 将每行新闻内容解码,添加到新闻内容列表。
             message = message_from_string(
                 '\n'.join(lines))  # 合并新闻列表内容为字符串并转为消息对象
             title = message['subject'].replace('\n', '')  # 从消息对象中获取标题
             body = message.get_payload()  # 从消息对象中获取到新闻主体内容
             if message.is_multipart():  # 如果消息对象包含多个部分
                 body = body[0]  # 获取到的内容中第1个部分获取新闻主体内容
             yield NewsItem(title, body)  # 生成1个新闻内容对象
         else:  # 如果超出10条内容
             break  # 跳出循环
     server.quit()  # 关闭连接
Beispiel #5
0
class NewsWatcher(MessageWatcher):
    def __init__(self, server, groups,
                 user=None, pw=None, port=None, tag=None):
        MessageWatcher.__init__(self)
        self.server = server
        self.groups = groups
        self.nntp = None  # the NNTP connection object
        self.user = user
        self.pw = pw
        self.port = port
        self.tag = tag
        self.last = {}
        self.timeout = None
        self.pollInterval = 60
        self.debug = 0

    def __repr__(self):
        return "<NewsWatcher %s:%s (%s)>" % (self.server, self.port,
                                             ",".join(self.groups))
    def __getstate__(self):
        d = MessageWatcher.__getstate__(self)
        d['nntp'] = None # just in case
        return d

    def start(self):
        port = self.port
        if not port:
            port = NNTP_PORT
        self.nntp = NNTP(self.server, port, self.user, self.pw,
                         readermode=1)
        # only look for messages that appear after we start. Usenet is big.
        if not self.last: # only do this the first time
            for g in self.groups:
                resp, count, first, last, name = self.nntp.group(g)
                self.last[g] = int(last)
                if self.debug: print "last[%s]: %d" % (g, self.last[g])
        self.timeout = gtk.timeout_add(self.pollInterval*1000,
                                       self.doTimeout)

    def stop(self):
        self.nntp.quit()
        self.nntp = None
        if self.timeout:
            gtk.timeout_remove(self.timeout)
            self.timeout = None

    def doTimeout(self):
        self.poll()
        return gtk.TRUE # keep going

    def poll(self):
        #print "polling", self
        for g in self.groups:
            resp, count, first, last, name = self.nntp.group(g)
            for num in range(self.last[g]+1, int(last)+1):
                try:
                    resp, num, id, lines = self.nntp.article("%d" % num)
                except NNTPError:
                    continue
                name = "%s:%d" % (g, int(num))
                if self.debug: print "got", name
                if self.tag:
                    if not filter(lambda line, tag=tag: line.find(tag) != -1,
                                  lines):
                        continue
                self.parseMessage(name, name, time.time(), lines)
            self.last[g] = int(last)
Beispiel #6
0
#server = NNTP('news.mozilla.org')
server = NNTP('news.kornet.net')
print server.group('comp.lang.python.announce')[0]
"""
 |  group(self, name)
 |      Process a GROUP command.  Argument:
 |      - group: the group name
 |      Returns:
 |      - resp: server response if successful
 |      - count: number of articles (string)
 |      - first: first article number (string)
 |      - last: last article number (string)
 |      - name: the group name
"""
group = server.group('han.test')
print repr(group)

first = group[2]
last = group[3]

print "first ", first
print "last ", last

i = 0
for id in range(int(first), int(last)):
    i += 1
    print(server.article(str(id)))
    if i == 10: break;

server.quit()
#__author: ZhengNengjin
#__date: 2018/10/25
from nntplib import NNTP
n = NNTP('your.nntp.server')
rsp, ct, fst, lst, grp = n.group('comp,lang.python')
rsp, anum, mid, data = n.article('110457')
for eachLine in data:
	print(eachLine)
n.quit()
Beispiel #8
0
class DanskGruppenArchive(object):
    """Class that provides an interface to Dansk-gruppens emails archive on
    gmane
    """
    def __init__(self, article_cache_size=300, cache_file=None):
        """Initialize local variables"""
        # Connect to news.gmane.org
        self.nntp = NNTP('news.gmane.org')
        # Setting the group returns information, which right now we ignore
        self.nntp.group('gmane.comp.internationalization.dansk')

        # Keep a local cache in an OrderedDict, transferred across session
        # in a pickled version in a file
        self.article_cache_size = article_cache_size
        self.cache_file = cache_file
        if cache_file and path.isfile(cache_file):
            with open(cache_file, 'rb') as file_:
                self.article_cache = pickle.load(file_)
            logging.info('Loaded %i items from file cache',
                         len(self.article_cache))
        else:
            self.article_cache = OrderedDict()

    def close(self):
        """Quit the NNTP session and save the cache"""
        self.nntp.quit()
        if self.cache_file:
            with open(self.cache_file, 'wb') as file_:
                pickle.dump(self.article_cache, file_)
                logging.info('Wrote %i items to cache file',
                             len(self.article_cache))

    @property
    def last(self):
        """Return the last NNTP ID as an int"""
        return self.nntp.group('gmane.comp.internationalization.dansk')[3]

    def _get_article(self, message_id):
        """Get an article (cached)

        Args:
            message_id (int): The NNTP ID of the message

        Returns:
            list: List of byte strings in the message
        """
        # Clear excess cache
        if len(self.article_cache) > self.article_cache_size:
            self.article_cache.popitem(last=False)

        # Check if article is in cache and if not, put it there
        if message_id not in self.article_cache:
            # nntp.article() returns: response, information
            # pylint: disable=unbalanced-tuple-unpacking
            _, info = self.nntp.article(message_id)
            self.article_cache[message_id] = info
        return self.article_cache[message_id]

    @staticmethod
    def _article_to_email(article):
        """Convert a raw article to an email object

        Args:
            article (namedtuple): An article named tuple as returned by NNTP

        Returns:
            email.message: An email message object
        """
        # article lines are a list of byte strings
        decoded_lines = [line.decode('ascii') for line in article.lines]
        article_string = '\n'.join(decoded_lines)
        # Make an email object
        return email.message_from_string(article_string)

    def get_subject(self, message_id):
        """Get the subject of an message

        Args:
            message_id (int): The NNTP ID of the the message

        Returns:
            str: The subject of the article
        """
        article = self._get_article(message_id)
        mail = self._article_to_email(article)
        # The subject may be encoded by NNTP, so decode it
        return decode_header(mail['Subject'])

    def get_body(self, message_id):
        """Get the body of a message

        Args:
            message_id (int): The NNTP ID of the the message

        Returns:
            str: The body of the article as a str or None if no body could be
                found or succesfully decoded
        """
        article = self._get_article(message_id)
        mail = self._article_to_email(article)

        # Walk parts of the email and look for text/plain content type
        for part in mail.walk():
            if part.get_content_type() == 'text/plain':
                body = part.get_payload(decode=True)
                # Find the text encoding from lines like:
                # text/plain; charset=UTF-8
                # text/plain; charset=utf-8; format=flowed
                # Encoding sometimes has "" around it, decode is OK with that
                for type_part in part['Content-Type'].split(';'):
                    if type_part.strip().startswith('charset='):
                        encoding = type_part.replace('charset=', '')
                        break
                else:
                    message = 'Looking for the character encoding in the '\
                        'string "%s" went wrong'
                    logging.warning(message, part['Content-Type'])
                    return None

                # Decode and return the body
                try:
                    body = body.decode(encoding)
                except LookupError:
                    message = 'Do not know how to handle a body with '\
                        'charset: %s'
                    logging.warning(message, encoding)
                    return None

                return body

    def get_attachment(self, message_id, filename):
        """Get attachment by filename

        Args:
            message_id (int):  The NNTP ID of the the message
            filename (str): The filename for the attachment

        Returns:
            bytes: The binary content of the attachment
        """
        return self.get_attachments(message_id).get(filename)

    def get_attachments(self, message_id):
        """Get attachments

        Args:
            message_id (int):  The NNTP ID of the the message

        Returns:
            dict: Dict with attachments where keys are filenames and values are
                their binary content
        """
        article = self._get_article(message_id)
        mail = self._article_to_email(article)

        attachments = {}
        # Walk parts of the email and look for application/octet-stream
        # content type
        for part in mail.walk():
            content_disp = part['Content-Disposition']
            if not (content_disp and content_disp.startswith('attachment')):
                continue

            # Get the filename from a line like: Content-Disposition:
            # attachment; filename="hitori.master.da.podiff"
            filename = None
            for disp_part in content_disp.split(';'):
                if disp_part.strip().startswith('filename='):
                    filename = disp_part.strip().replace('filename=', '')
                    # Strip " from filename
                    filename = filename.strip('"')

            if filename is None:
                message = 'Unable to extract filename from '\
                  'Content-Disposition: %s'
                logging.warning(message, part['Content-Disposition'])
                raise Exception('Unable to extract filename')

            attachments[filename] = part.get_payload(decode=True)

        return attachments
Beispiel #9
0
#!/usr/bin/env python
# -*- coding: UTF-8 *-*


from nntplib import NNTP
n = NNTP('your.nntp.server')
rsp, ct, fst, lst, grp = n.group('comp.lang.python')
rsp, anum, mid, data = n.article('110457')
for eachLine in data:
    print eachLine
From: "Alex Martelli" <alex@...> Subject: Re: Rounding Question
Date: Wed, 21 Feb 2001 17:05:36 +0100
"Remco Gerlich" <remco@...> wrote:
Jacob Kaplan-Moss <jacob@...> wrote in comp.lang.python:
So I've got a number between 40 and 130 that I want to round up to
the nearest 10. That is:

40 --> 40, 41 --> 50, ..., 49 --> 50, 50 --> 50, 51 --> 60
Rounding like this is the same as adding 5 to the number and then
rounding down. Rounding down is substracting the remainder if you were
to divide by 10, for which we use the % operator in Python.
This will work if you use +9 in each case rather than +5 (note that he doesn't
really want rounding -- he wants 41 to 'round' to 50, for ex).
Alex
>>> n.quit()
'205 closing connection - goodbye!'




class DanskGruppenArchive(object):
    """Class that provides an interface to Dansk-gruppens emails archive on
    gmane
    """

    def __init__(self, article_cache_size=300, cache_file=None):
        """Initialize local variables"""
        # Connect to news.gmane.org
        self.nntp = NNTP('news.gmane.org')
        # Setting the group returns information, which right now we ignore
        self.nntp.group('gmane.comp.internationalization.dansk')

        # Keep a local cache in an OrderedDict, transferred across session
        # in a pickled version in a file
        self.article_cache_size = article_cache_size
        self.cache_file = cache_file
        if cache_file and path.isfile(cache_file):
            with open(cache_file, 'rb') as file_:
                self.article_cache = pickle.load(file_)
            logging.info('Loaded %i items from file cache',
                         len(self.article_cache))
        else:
            self.article_cache = OrderedDict()

    def close(self):
        """Quit the NNTP session and save the cache"""
        self.nntp.quit()
        if self.cache_file:
            with open(self.cache_file, 'wb') as file_:
                pickle.dump(self.article_cache, file_)
                logging.info('Wrote %i items to cache file',
                             len(self.article_cache))

    @property
    def last(self):
        """Return the last NNTP ID as an int"""
        return self.nntp.group('gmane.comp.internationalization.dansk')[3]

    def _get_article(self, message_id):
        """Get an article (cached)

        Args:
            message_id (int): The NNTP ID of the message

        Returns:
            list: List of byte strings in the message
        """
        # Clear excess cache
        if len(self.article_cache) > self.article_cache_size:
            self.article_cache.popitem(last=False)

        # Check if article is in cache and if not, put it there
        if message_id not in self.article_cache:
            # nntp.article() returns: response, information
            # pylint: disable=unbalanced-tuple-unpacking
            _, info = self.nntp.article(message_id)
            self.article_cache[message_id] = info
        return self.article_cache[message_id]

    @staticmethod
    def _article_to_email(article):
        """Convert a raw article to an email object

        Args:
            article (namedtuple): An article named tuple as returned by NNTP

        Returns:
            email.message: An email message object
        """
        # article lines are a list of byte strings
        decoded_lines = [line.decode('ascii') for line in article.lines]
        article_string = '\n'.join(decoded_lines)
        # Make an email object
        return email.message_from_string(article_string)

    def get_subject(self, message_id):
        """Get the subject of an message

        Args:
            message_id (int): The NNTP ID of the the message

        Returns:
            str: The subject of the article
        """
        article = self._get_article(message_id)
        mail = self._article_to_email(article)
        # The subject may be encoded by NNTP, so decode it
        return decode_header(mail['Subject'])

    def get_body(self, message_id):
        """Get the body of a message

        Args:
            message_id (int): The NNTP ID of the the message

        Returns:
            str: The body of the article as a str or None if no body could be
                found or succesfully decoded
        """
        article = self._get_article(message_id)
        mail = self._article_to_email(article)

        # Walk parts of the email and look for text/plain content type
        for part in mail.walk():
            if part.get_content_type() == 'text/plain':
                body = part.get_payload(decode=True)
                # Find the text encoding from lines like:
                # text/plain; charset=UTF-8
                # text/plain; charset=utf-8; format=flowed
                # Encoding sometimes has "" around it, decode is OK with that
                for type_part in part['Content-Type'].split(';'):
                    if type_part.strip().startswith('charset='):
                        encoding = type_part.replace('charset=', '')
                        break
                else:
                    message = 'Looking for the character encoding in the '\
                        'string "%s" went wrong'
                    logging.warning(message, part['Content-Type'])
                    return None

                # Decode and return the body
                try:
                    body = body.decode(encoding)
                except LookupError:
                    message = 'Do not know how to handle a body with '\
                        'charset: %s'
                    logging.warning(message, encoding)
                    return None

                return body

    def get_attachment(self, message_id, filename):
        """Get attachment by filename

        Args:
            message_id (int):  The NNTP ID of the the message
            filename (str): The filename for the attachment

        Returns:
            bytes: The binary content of the attachment
        """
        return self.get_attachments(message_id).get(filename)

    def get_attachments(self, message_id):
        """Get attachments

        Args:
            message_id (int):  The NNTP ID of the the message

        Returns:
            dict: Dict with attachments where keys are filenames and values are
                their binary content
        """
        article = self._get_article(message_id)
        mail = self._article_to_email(article)

        attachments = {}
        # Walk parts of the email and look for application/octet-stream
        # content type
        for part in mail.walk():
            content_disp = part['Content-Disposition']
            if not (content_disp and content_disp.startswith('attachment')):
                continue

            # Get the filename from a line like: Content-Disposition:
            # attachment; filename="hitori.master.da.podiff"
            filename = None
            for disp_part in content_disp.split(';'):
                if disp_part.strip().startswith('filename='):
                    filename = disp_part.strip().replace('filename=', '')
                    # Strip " from filename
                    filename = filename.strip('"')

            if filename is None:
                message = 'Unable to extract filename from '\
                  'Content-Disposition: %s'
                logging.warning(message, part['Content-Disposition'])
                raise Exception('Unable to extract filename')

            attachments[filename] = part.get_payload(decode=True)

        return attachments
Beispiel #11
0
	# print(over.keys())
	# ['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject']
	print(over.get('date'))
	print(nntplib.decode_header(over.get('from')))
	print(over.get('message-id'))
	print(over.get('subject'))

# stat
'Return a triple (response, number, id) where number is the article number and id is the message id.'
resp,num,msg_id = s.stat(last)
print(num,msg_id)

# article
'Return a tuple (response, info) where info is a namedtuple with three attributes number, message_id and lines (in that order).'
print('-'*10)
resp,info = s.article(last)
print(info.number,info.message_id,len(info.lines))

# head
'Same as article(), but sends a HEAD command. The lines returned (or written to file) will only contain the message headers, not the body.'
print('-'*10)
resp,info = s.head(last)
print(info.number,info.message_id,len(info.lines))

# body
'Same as article(), but sends a BODY command. The lines returned (or written to file) will only contain the message body, not the headers.'
print('-'*10)
resp,info = s.body(last)
print(info.number,info.message_id,len(info.lines))

# newgroups