def test_posting_content(self): """ Tests the group variations """ # Duplicates groups are are removed automatically article = NNTPArticle( subject='woo-hoo', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # First we create a 512K file tmp_file = join( self.tmp_dir, 'NNTPArticle_Test.posting', 'file.tmp') # File should not already exist assert(isfile(tmp_file) is False) # Create a random file assert(self.touch(tmp_file, size='512K', random=True) is True) # File should exist now assert(isfile(tmp_file) is True) # Now we want to load it into a NNTPContent object content = NNTPBinaryContent(filepath=tmp_file, work_dir=self.tmp_dir) assert(article.add(content) is True) # Now we want to split the file up results = article.split('128K') # Tests that our results are expected assert(isinstance(results, sortedset) is True) assert(len(results) == 4)
def test_nzbfile_generation(self): """ Tests the creation of NZB Files """ nzbfile = join(self.tmp_dir, 'test.nzbfile.nzb') payload = join(self.var_dir, 'uudecoded.tax.jpg') assert isfile(nzbfile) is False # Create our NZB Object nzbobj = NNTPnzb() # create a fake article segpost = NNTPSegmentedPost(basename(payload)) content = NNTPBinaryContent(payload) article = NNTPArticle('testfile', groups='newsreap.is.awesome') # Note that our nzb object segment tracker is not marked as being # complete. This flag gets toggled when we add segments manually to # our nzb object or if we parse an NZB-File assert(nzbobj._segments_loaded is None) # Add our Content to the article article.add(content) # now add our article to the NZBFile segpost.add(article) # now add our Segmented Post to the NZBFile nzbobj.add(segpost) # Since .add() was called, this will be set to True now assert(nzbobj._segments_loaded is True) # Store our file assert nzbobj.save(nzbfile) is True assert isfile(nzbfile) is True
def test_post_iter(self): """ Tests that we can correctly iterate over our content for posting purposes. """ # Prepare Article article = NNTPArticle( subject='', poster='', body='hello world', work_dir=self.tmp_dir, ) # we failed because our subject and poster was blank # we also fail because we have no groups defined assert(article.post_iter() is None) article.groups.add('alt.binaries.test') assert(article.post_iter() is None) article.subject = 'Subject' assert(article.post_iter() is None) article.poster = '*****@*****.**' # Now we're good to go it = article.post_iter() assert(it is not None) for entry in it: assert(isinstance(entry, basestring) is True)
def test_general_features(self): """ NNTPSegmentedPost manage a list of NNTPArticles Test the basic funtionality of the object """ # create an object segobj = NNTPSegmentedPost('mytestfile') # Not valid because there are no entries assert segobj.is_valid() is False article = NNTPArticle(work_dir=self.tmp_dir) assert(segobj.add(article) is True) assert(len(segobj) == 1) # Not valid because the entry added is not loaded or retrieved assert(segobj.is_valid() is False) # Duplicates are ignored (we can't add the same file twice) assert(segobj.add(article) is False) assert(len(segobj) == 1) # We can't add other types assert(segobj.add(None) is False) assert(segobj.add("bad bad") is False) assert(segobj.add(1) is False) assert(len(segobj) == 1) # Test iterations for a in segobj: assert isinstance(a, NNTPArticle)
def test_article_copy(self): """ The copy() function built into the article allows you to create a duplicate copy of the original article without obstructing the content from within. """ tmp_dir = join(self.tmp_dir, 'NNTPArticle_Test.test_article_copy') # First we create a 512K file tmp_file_01 = join(tmp_dir, 'file01.tmp') tmp_file_02 = join(tmp_dir, 'file02.tmp') # Allow our files to exist assert(self.touch(tmp_file_01, size='512K', random=True) is True) assert(self.touch(tmp_file_02, size='512K', random=True) is True) # Duplicates groups are are removed automatically article = NNTPArticle( subject='woo-hoo', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Store some content content = NNTPBinaryContent( filepath=tmp_file_01, part=1, work_dir=self.tmp_dir) assert(article.add(content) is True) content = NNTPBinaryContent( filepath=tmp_file_02, part=2, work_dir=self.tmp_dir) assert(article.add(content) is True) # Detect our 2 articles assert(len(article) == 2) # Set a few header entries article.header['Test'] = 'test' article.header['Another-Entry'] = 'test2' # Create a copy of our object article_copy = article.copy() assert(len(article_copy) == len(article)) assert(len(article_copy.header) == len(article.header)) # Make sure that if we obstruct 1 object it doesn't # effect the other (hence we should have a pointer to # the same location in memory article.header['Yet-Another-Entry'] = 'test3' assert(len(article_copy.header)+1 == len(article.header))
def test_msgid(self): """ Tests that we can generate message id's when we need to """ # Prepare Article article = NNTPArticle(work_dir=self.tmp_dir) # We equal a blank assert(article.id == '') # Store our new identifier (our Message-ID) new_id = article.msgid() # We now have a set id assert(article.id == new_id) # Consecutive calls do not change the value assert(article.msgid() == new_id) # However they do change if we put a reset in it another_id = article.msgid(reset=True) # We're no longer using the old ID assert(article.id != new_id) assert(another_id != new_id) # We are using the new id assert(article.msgid() == another_id) # This is also what we're set to now assert(article.id == another_id)
def test_yenc_v1_3_NNTPArticle_encode_01(self): """ Test the encoding of data; this is nessisary prior to a post """ # A simple test for ensuring that the yEnc # library exists; otherwise we want this test # to fail; the below line will handle this for # us; we'll let the test fail on an import error import yenc # First we take a binary file binary_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(binary_filepath) # Initialize Codec encoder = CodecYenc(work_dir=self.test_dir) # Create an NNTPArticle Object article = NNTPArticle() # Add our file article.add(binary_filepath) # Encode our article by object new_article_a = article.encode(encoder) # We should have gotten an NNTPArticle Object assert isinstance(new_article_a, NNTPArticle) is True # We should actually have article associated with out data assert len(new_article_a) > 0 # Encode our article by type new_article_b = article.encode(CodecYenc) # We should have gotten an NNTPArticle Object assert isinstance(new_article_b, NNTPArticle) is True # We should actually have article associated with out data assert len(new_article_b) > 0 # Our article should be the same when it was generated by both # methods assert new_article_a[0].md5() == new_article_b[0].md5() # Chain our encodings new_article = article.encode( [CodecYenc, CodecYenc(work_dir=self.test_dir)], ) # We should have gotten an ASCII Content Object assert isinstance(new_article, NNTPArticle) is True # We should actually have article associated with out data assert len(new_article) > 0
def test_NNTPArticle_UU_encode_01(self): """ Test the encoding of data; this is nessisary prior to a post """ # First we take a binary file binary_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(binary_filepath) # Initialize Codec encoder = CodecUU(work_dir=self.test_dir) # Create an NNTPArticle Object article = NNTPArticle() # Add our file article.add(binary_filepath) # Encode our article by object new_article_a = article.encode(encoder) # We should have gotten an NNTPArticle Object assert isinstance(new_article_a, NNTPArticle) is True # We should actually have article associated with out data assert len(new_article_a) > 0 # Encode our article by type new_article_b = article.encode(CodecUU) # We should have gotten an NNTPArticle Object assert isinstance(new_article_b, NNTPArticle) is True # We should actually have article associated with out data assert len(new_article_b) > 0 # Our article should be the same when it was generated by both # methods assert new_article_a[0].md5() == new_article_b[0].md5() # Chain our encodings new_article = article.encode( [CodecUU, CodecUU(work_dir=self.test_dir)], ) # We should have gotten an ASCII Content Object assert isinstance(new_article, NNTPArticle) is True # We should actually have article associated with out data assert len(new_article) > 0
def test_loading_response(self): """ Tests the load() function of the article """ # Prepare a Response response = NNTPResponse(200, 'Great Data') response.decoded.add(NNTPBinaryContent(work_dir=self.tmp_dir)) # Prepare Article article = NNTPArticle(id='random-id', work_dir=self.tmp_dir) # There is no data so our article can't be valid assert(article.is_valid() is False) # Load and Check assert(article.load(response) is True) assert(article.header is None) assert(len(article.decoded) == 1) assert(len(article.decoded) == len(article.files())) assert(str(article) == 'random-id') assert(unicode(article) == u'random-id') assert(article.size() == 0) # Now there is data, but it's an empty Object so it can't be valid assert(article.is_valid() is False) result = re.search(' Message-ID=\"(?P<id>[^\"]+)\"', repr(article)) assert(result is not None) assert(result.group('id') == str(article)) result = re.search(' attachments=\"(?P<no>[^\"]+)\"', repr(article)) assert(result is not None) assert(int(result.group('no')) == len(article)) # Prepare Article article_a = NNTPArticle(id='a', work_dir=self.tmp_dir) article_b = NNTPArticle(id='b', work_dir=self.tmp_dir) assert((article_a < article_b) is True) # playing with the sort order however alters things article_a.no += 1 assert((article_a < article_b) is False) # Prepare a Response (with a Header) response = NNTPResponse(200, 'Great Data') response.decoded.add(NNTPHeader(work_dir=self.tmp_dir)) response.decoded.add(NNTPBinaryContent(work_dir=self.tmp_dir)) # Prepare Article article = NNTPArticle(id='random-id', work_dir=self.tmp_dir) # Load and Check assert(article.load(response) is True) assert(isinstance(article.header, NNTPHeader)) assert(len(article.decoded) == 1) for no, decoded in enumerate(article.decoded): # Test equality assert(article[no] == decoded) # We can also load another article ontop of another # This used when associating downloaded articles with ones # found in NZB-Files new_article = NNTPArticle( msgid='brand-new-id', no=article.no+1, groups='a.b.c,d.e.f', work_dir=self.tmp_dir, ) new_article.subject = 'test-subject-l2g' new_article.poster = 'test-poster-l2g' new_article.header = 'test-header-l2g' assert(article.load(new_article) is True) assert(article.id == new_article.id) assert(article.no == new_article.no) assert(article.groups == new_article.groups) assert(article.poster == new_article.poster) assert(article.subject == new_article.subject) assert(article.header == new_article.header) assert(article.body == new_article.body) assert(article.decoded == new_article.decoded) assert(article.groups == new_article.groups)
def test_get_paths(self): """ Test that we fail under certain conditions """ # Generate temporary folder to work with work_dir = join(self.tmp_dir, 'CodecFile_Test', 'work') # Initialize Codec (without volume_size disables it) cr = CodecFile(work_dir=work_dir) # create some dummy file entries tmp_files = set() for i in range(0, 10): # Create some temporary files to work with in our source # directory tmp_file = join(work_dir, 'DSC_IMG%.3d.jpeg' % i) self.touch(tmp_file, size='120K', random=True) # Add a file to our tmp_files list tmp_files.add(tmp_file) # Non-existant file reference invalid_file = join(self.tmp_dir, 'non-existant-file') assert isfile(invalid_file) is False content = NNTPContent( join(work_dir, 'testfile'), work_dir=self.tmp_dir, ) content.write('test data') # Empty NNTPArticle() can not be added article = NNTPArticle(work_dir=self.tmp_dir) # New Empty NNTPContent() can not be added article_content = NNTPContent( join(work_dir, 'testfile2'), work_dir=self.tmp_dir, ) # Store some new data article_content.write('some more test data') # We'll add our new content to our article assert article.add(content) is True # save path sub_dir = join(work_dir, 'subdir') assert mkdir(sub_dir) is True assert isdir(sub_dir) is True # string work assert len(cr.get_paths(self.tmp_dir)) == 1 assert cr.get_paths(self.tmp_dir).pop() == self.tmp_dir # Sub-directories that exist within a root directory also included are # removed assert len(cr.get_paths([self.tmp_dir, sub_dir])) == 1 assert cr.get_paths([self.tmp_dir, sub_dir]).pop() == self.tmp_dir # Invalid files/dirs are not found assert len(cr.get_paths(invalid_file)) == 0 # Create a list of many assorted type of items __set = set([ work_dir, sub_dir, article_content, content, invalid_file, ]) | set(tmp_files) # At the end of the day, the work_dir includes all of the sub-content # and the invalid_file is simply just tossed. However because our # NNTPContent() and NNTPArticle() files are stored outside of our # work_dir, they will also be included in the results results = cr.get_paths(__set) assert len(results) == 3 assert work_dir in results assert content.filepath in results assert article_content.filepath in results # Now if we did the same test but without the work_dir directory, then # we'd have a much larger list; we'll work with lists this time to # show that we support them too __list = [ sub_dir, article_content, content, invalid_file, ] __list.extend(tmp_files) results = cr.get_paths(__list) # +1 for content # +1 for sub_dir assert len(results) == (len(tmp_files) + len(article) + 2) for f in tmp_files: # Each file in our tmp_files will be in our results assert f in results assert content.filepath in results assert article_content.filepath in results assert sub_dir in results
def next(self): """ Python 2 support Support stream type functions and iterations """ # We track our iterator since we move along if our mode tells us to do # so. _iter = None if self.xml_root is not None: # clear our unused memory self.xml_root.clear() if self._segment_iter: while 1: _iter = self._segment_iter.next() if self._valid_by_mode(_iter): return _iter # get the root element try: _, self.xml_root = self.xml_iter.next() # Increment our iterator self.xml_itr_count += 1 except StopIteration: # let this pass through self.xml_root = None self.xml_itr_count = 0 except IOError: logger.warning('NZB-File is missing: %s' % self.filepath) self.xml_root = None self.xml_itr_count = 0 # Mark situation self._lazy_is_valid = False except XMLSyntaxError as e: if e[0] is not None: # We have corruption logger.error("NZB-File '%s' is corrupt" % self.filepath) logger.debug('NZB-File XMLSyntaxError Exception %s' % str(e)) # Mark situation self._lazy_is_valid = False # else: # this is a bug with lxml in earlier versions # https://bugs.launchpad.net/lxml/+bug/1185701 # It occurs when the end of the file is reached and lxml # simply just doesn't handle the closure properly # it was fixed here: # https://github.com/lxml/lxml/commit\ # /19f0a477c935b402c93395f8c0cb561646f4bdc3 # So we can relax and return ok results here self.xml_root = None self.xml_itr_count = 0 except Exception as e: logger.error("NZB-File '%s' is corrupt" % self.filepath) logger.debug('NZB-File Exception %s' % str(e)) # Mark situation self._lazy_is_valid = False if self.xml_root is None or len(self.xml_root) == 0: self.xml_iter = None self.xml_root = None self.xml_itr_count = 0 raise StopIteration() if self.meta is None: # Attempt to populate meta information self.meta = {} for meta in self.xml_root.xpath('/ns:nzb/ns:head[1]/ns:meta', namespaces=NZB_LXML_NAMESPACES): # Store the Meta Information Detected self.meta[meta.attrib['type'].decode(self.encoding)] = \ self.unescape_xml(meta.text.strip()) # Acquire the Segments Groups groups = [ group.text.strip().decode(self.encoding) for group in self.xml_root.xpath( 'ns:groups/ns:group', namespaces=NZB_LXML_NAMESPACES, ) ] # The detected filename _filename = '' # The name from the meta tag _name = self.meta.get('name', '').decode(self.encoding).strip() if not _name and self.filepath is not None: # Lets try to generate a name frome our NZB-File tmpfname = basename(self.filepath) # Strip our extension off the end (if present) result = NZB_EXTENSION_RE(tmpfname) if result and result.group('fname'): # Store our new filename as our name _name = result.group('fname') # Subject _subject = self.unescape_xml( self.xml_root.attrib.get('subject', '')).decode(self.encoding) # Poster _poster = self.unescape_xml( self.xml_root.attrib.get('poster', '')).decode(self.encoding) # Use our Codec(s) to extract our Yenc Subject matched = None for c in self._codecs: # for each entry, parse our article matched = c.parse_article( subject=_subject, poster=_poster, ) if matched: # We matched break if matched: # We succesfully got a filename from our subject line _filename = matched.get('fname', '').strip() if _filename and _name: # always allow the name to over-ride the detected filename if # we actually have a real name we can assciated with it by _ext = self._mime.extension_from_filename(_filename) if _ext: _filename = '{0}{1}'.format(_name, _ext) # Initialize a NNTPSegmented File Object using the data we read _file = NNTPSegmentedPost( _filename, poster=_poster, epoch=self.xml_root.attrib.get('date', '0'), subject=_subject, groups=groups, work_dir=self.work_dir, sort_no=self.xml_itr_count, ) # index tracker _last_index = 0 # Now append our segments for segment in self.xml_root.xpath( 'ns:segments/ns:segment', namespaces=NZB_LXML_NAMESPACES): _cur_index = int(segment.attrib.get('number', _last_index+1)) try: _size = int(segment.attrib.get('bytes')) if _size < 0: _size = 0 except (TypeError, ValueError): _size = 0 article = NNTPArticle( subject=_file.subject, poster=_file.poster, id=self.unescape_xml(segment.text), no=_cur_index, work_dir=self.work_dir, codecs=self._codecs, ) # Store our empty content Placeholder article.add( NNTPEmptyContent( filepath=_filename, part=self.xml_itr_count, total_size=_size, work_dir=self.work_dir, ) ) # Add article _file.add(article) # Track our index _last_index = _cur_index if not self._valid_by_mode(_file): # Not used; recursively move along return self.next() # Return our object return _file
def test_rar_errors(self): """ Test that we fail under certain conditions """ # Generate temporary folder to work with work_dir = join(self.tmp_dir, 'CodecRar_Test.rar.fail', 'work') # Now we want to prepare a folder filled with temporary content # Note: this directory is horrible because it's 'within' our work_dir # as a result, adding content should not succeed source_dir = join(work_dir, 'test') # Initialize Codec (without volume_size disables it) cr = CodecRar(work_dir=work_dir) # No files assert len(cr) == 0 tmp_file = join(source_dir, 'temp_file_non-existant') assert isfile(tmp_file) is False # We can't add content that does not exist assert cr.add(tmp_file) is False # Still No files assert len(cr) == 0 # However directories can not cross into our work directory tmp_dir = dirname(work_dir) # We intentionally pick a directory that has the work_dir # as a child within it assert isdir(tmp_dir) # Denied adding the file because it would include the work_dir # if we did assert cr.add(tmp_file) is False # Temporary file (within directory denied in previous check) tmp_file = join(tmp_dir, 'temp_file') assert isfile(tmp_file) is False # Create our temporary file now self.touch(tmp_file, size='120K', random=True) assert isfile(tmp_file) is True # This file is within our work_dir but we're still okay because # it's accessing the file explicitly and the fact it's a file # and not a directory assert cr.add(tmp_file) is True # Now we'll have 1 entry in our list assert len(cr) == 1 # You can't add duplicates assert cr.add(tmp_file) is False # We still have 1 entry assert len(cr) == 1 # Empty NNTPContent() can not be added content = NNTPContent(unique=True, work_dir=self.tmp_dir) # Can't do it assert cr.add(content) is False # Store some data content.write('some data\r\n') # Now we can add it because it has data in it assert cr.add(content) is True # We now have 2 entries assert len(cr) == 2 # We can't add duplicates assert cr.add(content) is False # We still have 2 entries assert len(cr) == 2 # Empty NNTPArticle() can not be added article = NNTPArticle(work_dir=self.tmp_dir) # Can't do it assert cr.add(article) is False # If we add content that's already been added, nothing # new will happen either assert article.add(content) is True # Still can't do (only because it was already added) assert cr.add(article) is False # We still have 2 entries assert len(cr) == 2 # New Empty NNTPContent() can not be added content = NNTPContent(unique=True, work_dir=self.tmp_dir) # We'll add our new content to our article assert article.add(content) is True # Our new content has no data associated with it, so this should # still fail assert cr.add(article) is False # We still have 2 entries assert len(cr) == 2 # Store some new data content.write('some new data\r\n') # Our new content within our article now has data so this will work assert cr.add(article) is True # We now have 3 entries assert len(cr) == 3
def next(self): """ Python 2 support Support stream type functions and iterations """ # We track our iterator since we move along if our mode tells us to do # so. _iter = None if self.xml_root is not None: # clear our unused memory self.xml_root.clear() if self._segment_iter: while 1: _iter = self._segment_iter.next() if self._valid_by_mode(_iter): return _iter # get the root element try: _, self.xml_root = self.xml_iter.next() # Increment our iterator self.xml_itr_count += 1 except StopIteration: # let this pass through self.xml_root = None self.xml_itr_count = 0 except IOError: logger.warning('NZB-File is missing: %s' % self.filepath) self.xml_root = None self.xml_itr_count = 0 # Mark situation self._lazy_is_valid = False except XMLSyntaxError as e: if e[0] is not None: # We have corruption logger.error("NZB-File '%s' is corrupt" % self.filepath) logger.debug('NZB-File XMLSyntaxError Exception %s' % str(e)) # Mark situation self._lazy_is_valid = False # else: # this is a bug with lxml in earlier versions # https://bugs.launchpad.net/lxml/+bug/1185701 # It occurs when the end of the file is reached and lxml # simply just doesn't handle the closure properly # it was fixed here: # https://github.com/lxml/lxml/commit\ # /19f0a477c935b402c93395f8c0cb561646f4bdc3 # So we can relax and return ok results here self.xml_root = None self.xml_itr_count = 0 except Exception as e: logger.error("NZB-File '%s' is corrupt" % self.filepath) logger.debug('NZB-File Exception %s' % str(e)) # Mark situation self._lazy_is_valid = False if self.xml_root is None or len(self.xml_root) == 0: self.xml_iter = None self.xml_root = None self.xml_itr_count = 0 raise StopIteration() if self.meta is None: # Attempt to populate meta information self.meta = {} for meta in self.xml_root.xpath('/ns:nzb/ns:head[1]/ns:meta', namespaces=NZB_LXML_NAMESPACES): # Store the Meta Information Detected self.meta[meta.attrib['type'].decode(self.encoding)] = \ self.unescape_xml(meta.text.strip()) # Acquire the Segments Groups groups = [ group.text.strip().decode(self.encoding) for group in self.xml_root.xpath( 'ns:groups/ns:group', namespaces=NZB_LXML_NAMESPACES, ) ] # The detected filename _filename = '' # The name from the meta tag _name = self.meta.get('name', '').decode(self.encoding).strip() if not _name and self.filepath is not None: # Lets try to generate a name frome our NZB-File tmpfname = basename(self.filepath) # Strip our extension off the end (if present) result = NZB_EXTENSION_RE.search(tmpfname) if result and result.group('fname'): # Store our new filename as our name _name = result.group('fname') # Subject _subject = self.unescape_xml(self.xml_root.attrib.get( 'subject', '')).decode(self.encoding) # Poster _poster = self.unescape_xml(self.xml_root.attrib.get( 'poster', '')).decode(self.encoding) # Use our Codec(s) to extract our Yenc Subject matched = None for c in self._codecs: # for each entry, parse our article matched = c.parse_article( subject=_subject, poster=_poster, ) if matched: # We matched break if matched: # We succesfully got a filename from our subject line _filename = matched.get('fname', '').strip() if _filename and _name: # always allow the name to over-ride the detected filename if # we actually have a real name we can assciated with it by _ext = self._mime.extension_from_filename(_filename) if _ext: _filename = '{0}{1}'.format(_name, _ext) # Initialize a NNTPSegmented File Object using the data we read _file = NNTPSegmentedPost( _filename, poster=_poster, epoch=self.xml_root.attrib.get('date', '0'), subject=_subject, groups=groups, work_dir=self.work_dir, sort_no=self.xml_itr_count, ) # index tracker _last_index = 0 # Now append our segments for segment in self.xml_root.xpath('ns:segments/ns:segment', namespaces=NZB_LXML_NAMESPACES): _cur_index = int(segment.attrib.get('number', _last_index + 1)) try: _size = int(segment.attrib.get('bytes')) if _size < 0: _size = 0 except (TypeError, ValueError): _size = 0 article = NNTPArticle( subject=_file.subject, poster=_file.poster, id=self.unescape_xml(segment.text), no=_cur_index, work_dir=self.work_dir, codecs=self._codecs, ) # Store our empty content Placeholder article.add( NNTPEmptyContent( filepath=_filename, part=self.xml_itr_count, total_size=_size, work_dir=self.work_dir, )) # Add article _file.add(article) # Track our index _last_index = _cur_index if not self._valid_by_mode(_file): # Not used; recursively move along return self.next() # Return our object return _file
def test_7z_errors(self): """ Test that we fail under certain conditions """ # Generate temporary folder to work with work_dir = join(self.tmp_dir, 'Codec7Zip_Test.7z.fail', 'work') # Now we want to prepare a folder filled with temporary content # Note: this directory is horrible because it's 'within' our work_dir # as a result, adding content should not succeed source_dir = join(work_dir, 'test') # Initialize Codec (without volume_size disables it) cr = Codec7Zip(work_dir=work_dir) # No files assert len(cr) == 0 tmp_file = join(source_dir, 'temp_file_non-existant') assert isfile(tmp_file) is False # We can't add content that does not exist assert cr.add(tmp_file) is False # Still No files assert len(cr) == 0 # However directories can not cross into our work directory tmp_dir = dirname(work_dir) # We intentionally pick a directory that has the work_dir # as a child within it assert isdir(tmp_dir) # Denied adding the file because it would include the work_dir # if we did assert cr.add(tmp_file) is False # Temporary file (within directory denied in previous check) tmp_file = join(tmp_dir, 'temp_file') assert isfile(tmp_file) is False # Create our temporary file now self.touch(tmp_file, size='120K', random=True) assert isfile(tmp_file) is True # This file is within our work_dir but we're still okay because # it's accessing the file explicitly and the fact it's a file # and not a directory assert cr.add(tmp_file) is True # Now we'll have 1 entry in our list assert len(cr) == 1 # You can't add duplicates assert cr.add(tmp_file) is False # We still have 1 entry assert len(cr) == 1 # Empty NNTPContent() can not be added content = NNTPContent(unique=True, work_dir=self.tmp_dir) # Can't do it assert cr.add(content) is False # Store some data content.write('some data\r\n') # Now we can add it because it has data in it assert cr.add(content) is True # We now have 2 entries assert len(cr) == 2 # We can't add duplicates assert cr.add(content) is False # We still have 2 entries assert len(cr) == 2 # Empty NNTPArticle() can not be added article = NNTPArticle(work_dir=self.tmp_dir) # Can't do it assert cr.add(article) is False # If we add content that's already been added, nothing # new will happen either assert article.add(content) is True # Still can't do (only because it was already added) assert cr.add(article) is False # We still have 2 entries assert len(cr) == 2 # New Empty NNTPContent() can not be added content = NNTPContent(unique=True, work_dir=self.tmp_dir) # We'll add our new content to our article assert article.add(content) is True # Our new content has no data associated with it, so this should # still fail assert cr.add(article) is False # We still have 2 entries assert len(cr) == 2 # Store some new data content.write('some new data\r\n') # Our new content within our article now has data so this will work assert cr.add(article) is True # We now have 3 entries assert len(cr) == 3
def search(ctx, group, keywords, minscore, maxscore, case_insensitive, nzb): """ Searches cached groups for articles. Specified keywords stack on one another. Each keyword specified must match somewhere in the subject line or else the result is filtered. Keywords can also be prefixed with special characters too to help identify what is being scanned. 1. Example 1: A search that should ignore any text with 'Test' in it but include text with 'Jack' in it. Unless you include the case-insensitive switch (inspired from grep), the search will be case sensitive: -Test +Jack The + (plus) is always implied. It's primary use it to eliminate abiguity (and allow for the minus to exist). It is also nessisary if you intend to search for something with a plus in it, hence the following would search for the string '+++AWESOME+++': +++++AWESOME+++ The extra plus symbol is stripped off and the search works as intended. 2. Example 2: Search by Poster. Since all keywords imply that you're searching for a subject keyword, the next token that changes this is '%p' where as the subject is always implied identified as '%s'. Hence the following would look for me: %pChris %pl2g This can also be written like this: %p+Chris %p+l2g You should not be confused here, the tokens at the front will be stripped off and the search will run as normal. These tokens are very important because it allows you to mix and match search with both the subject and poster: %p+Chris %p+l2g AWESOME The above implies that AWESOME will have a +%s infront of it. Make sense? The final thing worth noting is doing a search for text that contains dash/minus (-) signs. Click (the awesome cli wrapper this script uses can pick the - up as an actual switch thinking you're trying to pass it into this function. So you can easily disable this with by adding a double dash/minus sign (--) like so: nr search -- -keyword +keyword2 """ session = ctx['NNTPSettings'].session() if not session: logger.error("The database is not correctly configured.") exit(1) if not group: logger.error("You must specify a group/alias.") exit(1) # Simplify Alias groups = get_groups(session, group) if not groups: logger.error("You must specify a group/alias.") exit(1) for name, _id in groups.iteritems(): db_path = join(ctx['NNTPSettings'].work_dir, 'cache', 'search') db_file = '%s%s' % ( join(db_path, name), SQLITE_DATABASE_EXTENSION, ) if not isfile(db_file): logger.warning("There is no cached content for '%s'." % db_file) continue reset = not exists(db_file) engine = 'sqlite:///%s' % db_file db = NNTPGroupDatabase(engine=engine, reset=reset) group_session = db.session() if not group_session: logger.warning("The database %s not be accessed." % db_file) continue gt = group_session.query(Article) # Parse our keywords parsed_keywords = parse_search_keyword(keywords) for _op, _cat, keyword in parsed_keywords: if _cat == SearchCategory.SUBJECT: if _op == SearchOperation.INCLUDE: if case_insensitive: logger.debug( 'Scanning -and- (case-insensitive) subject: ' '"%s"' % (keyword)) gt = gt.filter( Article.subject.ilike('%%%s%%' % keyword)) else: logger.debug( 'Scanning -and- (case-sensitive) subject: ' '"%s"' % (keyword)) gt = gt.filter(Article.subject.like('%%%s%%' % keyword)) else: # _op == SearchCategory.EXCLUDE if case_insensitive: logger.debug( 'Scanning -not- (case-insensitive) subject: ' '"%s"' % (keyword)) gt = gt.filter( not_(Article.subject.ilike('%%%s%%' % keyword))) else: logger.debug( 'Scanning -and not- (case-sensitive) subject: ' '"%s"' % (keyword)) gt = gt.filter( not_(Article.subject.like('%%%s%%' % keyword))) elif _cat == SearchCategory.POSTER: if _op == SearchOperation.INCLUDE: if case_insensitive: logger.debug( 'Scanning -and- (case-insensitive) poster: ' '"%s"' % (keyword)) gt = gt.filter(Article.poster.ilike('%%%s%%' % keyword)) else: logger.debug('Scanning -and- (case-sensitive) poster: ' '"%s"' % (keyword)) gt = gt.filter(Article.poster.like('%%%s%%' % keyword)) else: # _op == SearchCategory.EXCLUDE if case_insensitive: logger.debug( 'Scanning -and not- (case-insensitive) poster: ' '"%s"' % (keyword)) gt = gt.filter( not_(Article.poster.ilike('%%%s%%' % keyword))) else: logger.debug( 'Scanning -and not- (case-sensitive) poster: ' '"%s"' % (keyword)) gt = gt.filter( not_(Article.poster.like('%%%s%%' % keyword))) # Handle Scores if maxscore == minscore: logger.debug('Scanning -score == %d-' % (maxscore)) gt = gt.filter(Article.score == maxscore) else: logger.debug('Scanning -score >= %d and score <= %d-' % (minscore, maxscore)) gt = gt.filter(Article.score <= maxscore)\ .filter(Article.score >= minscore) gt = gt.order_by(Article.score.desc()) group_session.close() db.close() if nzb: # make an NZB file from our results nzb_file = '_'.join(keywords) + '.nzb' nzb = NNTPnzb( nzb_file, work_dir='/tmp/', ) # Iterate through our list for entry in gt: # create segment/article for each result segment = NNTPSegmentedPost('', poster=entry.poster, subject=entry.subject, utc=entry.posted_date, groups=name) article = NNTPArticle(str(entry.message_id)) # add empty content placeholder to article article.add(NNTPEmptyContent('', total_size=entry.size)) # Add Article to Segment to NZB segment.add(article) nzb.add(segment) # save NZB file nzb.save() else: # Iterate through our list print("%s:" % (name)) for entry in gt: print(" [%s] %.4d %s" % (entry.message_id, entry.score, (entry.subject).encode('ascii', 'ignore'))) return
def test_group(self): """ Tests the group variations """ # Test String article = NNTPArticle( id='random-id', work_dir=self.tmp_dir, ) assert(isinstance(article.groups, set)) assert(len(article.groups) == 0) # Test String article = NNTPArticle( id='random-id', groups='convert.lead.2.gold', work_dir=self.tmp_dir, ) assert(isinstance(article.groups, set)) assert(len(article.groups) == 1) assert('convert.lead.2.gold' in article.groups) # Support Tuples article = NNTPArticle( id='random-id', groups=( 'convert.lead.2.gold', 'convert.lead.2.gold.again', ), work_dir=self.tmp_dir, ) assert(isinstance(article.groups, set)) assert(len(article.groups) == 2) assert('convert.lead.2.gold' in article.groups) assert('convert.lead.2.gold.again' in article.groups) # Support Lists article = NNTPArticle( id='random-id', groups=[ 'convert.lead.2.gold', 'convert.lead.2.gold.again', ], work_dir=self.tmp_dir, ) assert(isinstance(article.groups, set)) assert(len(article.groups) == 2) assert('convert.lead.2.gold' in article.groups) assert('convert.lead.2.gold.again' in article.groups) # Support Sets article = NNTPArticle( id='random-id', groups=set([ 'convert.lead.2.gold', 'convert.lead.2.gold.again', ]), work_dir=self.tmp_dir, ) assert(isinstance(article.groups, set)) assert(len(article.groups) == 2) assert('convert.lead.2.gold' in article.groups) assert('convert.lead.2.gold.again' in article.groups) # Don't expect invalid groups to stick article = NNTPArticle( id='random-id', groups=4, work_dir=self.tmp_dir, ) assert(len(article.groups) == 0) # Duplicates groups are are removed automatically article = NNTPArticle( id='random-id', groups=[ 'convert.lead.2.gold.again', 'ConVert.lead.2.gold', 'convert.lead.2.gold', 'convert.lead.2.gold.again', ], work_dir=self.tmp_dir, ) assert(isinstance(article.groups, set)) assert(len(article.groups) == 2) assert('convert.lead.2.gold' in article.groups) assert('convert.lead.2.gold.again' in article.groups)
def test_deobsfucation(self): """ Tests deobsfucation functionality """ tmp_dir = join(self.tmp_dir, 'NNTPArticle_Test.deobsfucation') # First we create a 512K file tmp_file = join(tmp_dir, 'file.tmp') rar_file = join(tmp_dir, 'file.rar') # Allow our files to exist assert(self.touch(tmp_file, size='512K', random=True) is True) assert(self.touch(rar_file, size='512K', random=True) is True) # Create an article that we'll store our rar file into; but we # intentionally want to give our rarfile a different name then what # is defined above article = NNTPArticle( subject='"my test file" - testfile.rar yEnc (1/1)', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Add our Rar File article.add(rar_file) # the attachment name takes priority over the detected article name assert(article.deobsfucate() == 'file.rar') # filebase allows us to enforce what the filename will be once we # figure out the extension assert(article.deobsfucate(filebase="mytest") == 'mytest.rar') # Adding a second file adds ambiguity, this will fail article.add(tmp_file) assert(article.deobsfucate() is None) # Create another article; but this time we'll associate our temporary # file to it. Since our temporary file has a useless extension # we will test that the article parsing takes over a bigger role in # the detection process. article = NNTPArticle( subject='"my test file" - testfile.jpeg yEnc (1/1)', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Add our temporary file with a bad extension (.tmp is useless to us) article.add(tmp_file) # the article takes priority over the detected attachment assert(article.deobsfucate() == 'testfile.jpeg') # None is a perfectly accepted argument and won't cause any issues assert(article.deobsfucate(filebase=None) == 'testfile.jpeg') # If codecs are set to None, then the default codecs are used assert(article.deobsfucate(codecs=None) == 'testfile.jpeg') # If codecs are set to to an empty list, then you're effectively # telling the tool to 'not' parse the article at all so our # attachment is used instead assert(article.deobsfucate(codecs=[]) == 'file.tmp') # a file base with codecs disabled still alows our base to prevail assert(article.deobsfucate(filebase="abcd", codecs=[]) == 'abcd.tmp') # filebase allows us to enforce what the filename will be once we # figure out the extension. Our article extension takes over assert(article.deobsfucate(filebase="mytest") == 'mytest.jpeg') # Now another thing that can happen is that our Article is not # parseable but our decoded file is: # Create an article that we'll store our rar file into; but we # intentionally want to give our rarfile a different name then what # is defined above article = NNTPArticle( subject='"a garbage unparseable subject', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Add our Rar File article.add(rar_file) # the attachment name takes priority assert(article.deobsfucate() == 'file.rar') # Another thing that can happen is that neither the attachment or the # article is parseable article = NNTPArticle( subject='"a garbage unparseable subject', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Add our garbage .tmp file article.add(tmp_file) # unparseable everything just returns out attachment filename assert(article.deobsfucate() == 'file.tmp') # Another thing that can happen is that the subject identifies one # type of file, however our attachment identifies another. article = NNTPArticle( subject='"my greatest picture" - l2g.png yEnc (1/1)', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Add our Rar File (even though we're looking for a picture) article.add(rar_file) # the attachment name takes priority over the detected article name # when 2 mime types collide assert(article.deobsfucate() == 'file.rar')
def test_article_splitting(self): """ Tests that articles can split """ # Duplicates groups are are removed automatically article = NNTPArticle( work_dir=self.tmp_dir, subject='split-test', poster='<*****@*****.**>', groups='alt.binaries.l2g', ) # Nothing to split gives an error assert(article.split() is None) tmp_file = join(self.tmp_dir, 'NNTPArticle_Test.chunk', '1MB.rar') # The file doesn't exist at first assert(isfile(tmp_file) is False) # Create it assert(self.touch(tmp_file, size='1MB', random=True) is True) # Now it does assert(isfile(tmp_file) is True) # Now we want to load it into a NNTPContent object content = NNTPBinaryContent(filepath=tmp_file, work_dir=self.tmp_dir) # Add our object to our article assert(article.add(content) is True) # No size to split on gives an error assert(article.split(size=0) is None) assert(article.split(size=-1) is None) assert(article.split(size=None) is None) assert(article.split(size='bad_string') is None) # Invalid Memory Limit assert(article.split(mem_buf=0) is None) assert(article.split(mem_buf=-1) is None) assert(article.split(mem_buf=None) is None) assert(article.split(mem_buf='bad_string') is None) # We'll split it in 2 results = article.split(strsize_to_bytes('512K')) # Tests that our results are expected assert(isinstance(results, sortedset) is True) assert(len(results) == 2) # Test that the parts were assigned correctly for i, article in enumerate(results): # We should only have one content object assert(isinstance(article, NNTPArticle) is True) assert(len(article) == 1) # Our content object should correctly have the part and # total part contents populated correctly assert(article[0].part == (i+1)) assert(article[0].total_parts == len(results))
def test_article_append(self): """ Test article append() Appending effectively takes another's article and appends it's content to the end of the article doing the appending. Consider: - test.rar.000 (ArticleA) - test.rar.001 (ArticleB) - test.rar.002 (Articlec) # The following would assemble the entire article ArticleA.append(ArticleB) ArticleA.append(ArticleC) """ # Create a temporary file we can use tmp_file = join(self.tmp_dir, 'NNTPArticle_Test.append', '1MB.rar') # The file doesn't exist at first assert(not isfile(tmp_file)) # Create it assert(self.touch(tmp_file, size='1MB', random=True)) # Now it does assert(isfile(tmp_file)) # Duplicates groups are are removed automatically article_a = NNTPArticle( work_dir=self.tmp_dir, subject='split-test-a', poster='<*****@*****.**>', groups='alt.binaries.l2g', ) # No size at this point assert(article_a.size() == 0) # Add our file to our article assert(article_a.add(tmp_file) is True) # We should be equal to the size we created our content with assert(article_a.size() == strsize_to_bytes('1M')) # We'll split it in 2 results = article_a.split(strsize_to_bytes('512K')) # Size doesn't change even if we're split assert(article_a.size() == strsize_to_bytes('1M')) # Tests that our results are expected assert(isinstance(results, sortedset) is True) assert(len(results) == 2) # We'll create another article article_b = NNTPArticle( subject='split-test-b', poster='<*****@*****.**>', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Now we'll join the contents using append assert(article_b.size() == 0) for article in results: assert(isinstance(article, NNTPArticle) is True) assert(article_b.append(article) is True) assert(article_b.size() == article_a.size()) assert(article_b[0].md5() == article_a[0].md5())