def yDecode(dataList): buffer = [] index = -1 for line in dataList: if index <= 5 and (line[:7] == '=ybegin' or line[:6] == '=ypart'): continue elif line[:5] == '=yend': break buffer.append(line) data = ''.join(buffer) if Hellanzb.HAVE_C_YENC: return _yenc.decode_string(data) # unescape NUL, TAB, LF, CR, 'ESC', ' ', ., = # NOTE: The yencode standard dictates these characters as 'critical' and are required # to be escaped, EXCEPT for the ESCAPE CHAR. It is included here because it has been # seen to be escaped by some yencoders. The standard also says that ydecoders should # be able to handle decoding ANY character being escaped. I have noticed some # yencoders take it upon themselves to escape the ESCAPE CHAR, so we handle it. FIXME: # We obviously aren't 'correct' in we only handle unescaping characters we know about # (this is faster). This will be as good as it gets for the python yDecoder, the next # step in fixing this & optimizing the ydecoder is switching to a C implementation # -pjenvey for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) return data.translate(YDEC_TRANS)
def decode(data): yenc, data = yCheck(data) ybegin, ypart, yend = yenc decoded_data = None # Deal with non-yencoded posts # TODO: # Deal with yenc encoded posts if ybegin and yend: if "name" in ybegin: filename = name_fixer(ybegin["name"]) _type = "yenc" # Decode data decoded_data, crc = _yenc.decode_string("".join(data))[:2] partcrc = "%08X" % ((crc ^ -1) & 2 ** 32L - 1) if ypart: crcname = "pcrc32" else: crcname = "crc32" if crcname in yend: _partcrc = "0" * (8 - len(yend[crcname])) + yend[crcname].upper() else: # Corrupt header... _partcrc = None if not (_partcrc == partcrc): log.error("CRC Error") return decoded_data
def feed(self, data): """ Encode some data and write the encoded data into the internal buffer """ decoded, self._crc, self._escape = _yenc.decode_string(data, self._crc, self._escape) self._decoded = self._decoded + len(decoded) self._buffer.write(decoded) return len(decoded)
def feed(self, data): """ Decode some data and write the decoded data into the internal buffer. """ if not self._feedable: raise IOError("Decoding already terminated") decoded, self._crc, self._escape = _yenc.decode_string(data, self._crc, self._escape) self._decoded = self._decoded + len(decoded) self._buffer.write(decoded) return len(decoded)
def feed(self, data): """ Decode some data and write the decoded data into the internal buffer. """ if not self._feedable: raise IOError("Decoding already terminated") decoded, self._crc, self._escape = _yenc.decode_string( data, self._crc, self._escape) self._decoded = self._decoded + len(decoded) self._buffer.write(decoded) return len(decoded)
new_lines = "".join(data_chunks).split("\r\n") for i in range(len(new_lines)): if new_lines[i][:2] == "..": new_lines[i] = new_lines[i][1:] if new_lines[-1] == ".": new_lines = new_lines[1:-1] data.extend(new_lines) # Filter out empty ones data = [_f for _f in data if _f] yenc, data = tests.testsupport.parse_yenc_data(data) ybegin, ypart, yend = yenc # Different from homemade flat_data = "".join(data) decoded_data, crc = _yenc.decode_string(flat_data)[:2] partcrc = "%08X" % ((crc ^ -1) & 2**32 - 1) if ypart: crcname = "pcrc32" else: crcname = "crc32" if crcname in yend: _partcrc = "0" * (8 - len(yend[crcname])) + yend[crcname].upper() else: _partcrc = None if output_buffer != decoded_data: # Discrepancy between _yenc and sabyenc3 failed_checks += 1
def decode(article, data): # Filter out empty ones data = filter(None, data) # No point in continuing if we don't have any data left if data: nzf = article.nzf yenc, data = yCheck(data) ybegin, ypart, yend = yenc decoded_data = None # Deal with non-yencoded posts if not ybegin: found = False try: for i in xrange(min(40, len(data))): if data[i].startswith('begin '): nzf.type = 'uu' found = True # Pause the job and show warning if nzf.nzo.status != Status.PAUSED: nzf.nzo.pause() msg = T( 'UUencode detected, only yEnc encoding is supported [%s]' ) % nzf.nzo.final_name logging.warning(msg) break except IndexError: raise BadYenc() if found: decoded_data = '' else: raise BadYenc() # Deal with yenc encoded posts elif ybegin and yend: if 'name' in ybegin: nzf.filename = yenc_name_fixer(ybegin['name']) else: logging.debug("Possible corrupt header detected => ybegin: %s", ybegin) nzf.type = 'yenc' # Decode data if HAVE_YENC: decoded_data, crc = _yenc.decode_string(''.join(data))[:2] partcrc = '%08X' % ((crc ^ -1) & 2**32L - 1) else: data = ''.join(data) for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) decoded_data = data.translate(YDEC_TRANS) crc = binascii.crc32(decoded_data) partcrc = '%08X' % (crc & 2**32L - 1) if ypart: crcname = 'pcrc32' else: crcname = 'crc32' if crcname in yend: _partcrc = '0' * (8 - len(yend[crcname])) + yend[crcname].upper() else: _partcrc = None logging.debug("Corrupt header detected => yend: %s", yend) if not _partcrc == partcrc: raise CrcError(_partcrc, partcrc, decoded_data) else: raise BadYenc() return decoded_data
if new_lines[i][:2] == '..': new_lines[i] = new_lines[i][1:] if new_lines[-1] == '.': new_lines = new_lines[1:-1] data.extend(new_lines) # Filter out empty ones data = filter(None, data) yenc, data = yCheck(data) ybegin, ypart, yend = yenc timet += time.clock()-time2 # Different from homemade decoded_data, crc = _yenc.decode_string(''.join(data))[:2] partcrc = '%08X' % ((crc ^ -1) & 2 ** 32L - 1) if ypart: crcname = 'pcrc32' else: crcname = 'crc32' if crcname in yend: _partcrc = '0' * (8 - len(yend[crcname])) + yend[crcname].upper() #print _partcrc else: _partcrc = None print "Corrupt header detected => yend: %s" % yend
def decode(article, data): data = strip(data) ## No point in continuing if we don't have any data left if data: nzf = article.nzf yenc, data = yCheck(data) ybegin, ypart, yend = yenc decoded_data = None #Deal with non-yencoded posts if not ybegin: found = False for i in xrange(10): if data[i].startswith('begin '): nzf.filename = name_fixer(data[i].split(None, 2)[2]) nzf.type = 'uu' found = True break if found: for n in xrange(i): data.pop(0) if data[-1] == 'end': data.pop() if data[-1] == '`': data.pop() decoded_data = '\r\n'.join(data) #Deal with yenc encoded posts elif (ybegin and yend): if 'name' in ybegin: nzf.filename = name_fixer(ybegin['name']) else: logging.debug("Possible corrupt header detected " + \ "=> ybegin: %s", ybegin) nzf.type = 'yenc' # Decode data if HAVE_YENC: decoded_data, crc = _yenc.decode_string(''.join(data))[:2] partcrc = '%08X' % ((crc ^ -1) & 2**32L - 1) else: data = ''.join(data) for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) decoded_data = data.translate(YDEC_TRANS) crc = binascii.crc32(decoded_data) partcrc = '%08X' % (crc & 2**32L - 1) if ypart: crcname = 'pcrc32' else: crcname = 'crc32' if crcname in yend: _partcrc = '0' * (8 - len(yend[crcname])) + yend[crcname].upper() else: _partcrc = None logging.debug("Corrupt header detected " + \ "=> yend: %s", yend) if not (_partcrc == partcrc): raise CrcError(_partcrc, partcrc, decoded_data) else: raise BadYenc() return decoded_data
def testDecode(self): d, c, x = _yenc.decode_string( b'r\x8f\x96\x96\x99J\xa1\x99\x9c\x96\x8eK') self.assertEqual(d, b'Hello world!') self.assertEqual(c, 3833259626)
def decode(self, article, data, raw_data): # Do we have SABYenc? Let it do all the work if sabnzbd.decoder.SABYENC_ENABLED: decoded_data, output_filename, crc, crc_expected, crc_correct = sabyenc.decode_usenet_chunks(raw_data, article.bytes) # Assume it is yenc article.nzf.type = 'yenc' # Only set the name if it was found and not obfuscated self.verify_filename(article, decoded_data, output_filename) # CRC check if not crc_correct: raise CrcError(crc_expected, crc, decoded_data) return decoded_data # Continue for _yenc or Python-yEnc # Filter out empty ones data = filter(None, data) # No point in continuing if we don't have any data left if data: nzf = article.nzf yenc, data = yCheck(data) ybegin, ypart, yend = yenc # Deal with non-yencoded posts if not ybegin: found = False try: for i in xrange(min(40, len(data))): if data[i].startswith('begin '): nzf.type = 'uu' found = True # Pause the job and show warning if nzf.nzo.status != Status.PAUSED: nzf.nzo.pause() msg = T('UUencode detected, only yEnc encoding is supported [%s]') % nzf.nzo.final_name logging.warning(msg) break except IndexError: raise BadYenc() if found: decoded_data = '' else: raise BadYenc() # Deal with yenc encoded posts elif ybegin and yend: if 'name' in ybegin: output_filename = yenc_name_fixer(ybegin['name']) else: output_filename = None logging.debug("Possible corrupt header detected => ybegin: %s", ybegin) nzf.type = 'yenc' # Decode data if HAVE_YENC: decoded_data, crc = _yenc.decode_string(''.join(data))[:2] partcrc = '%08X' % ((crc ^ -1) & 2 ** 32L - 1) else: data = ''.join(data) for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) decoded_data = data.translate(YDEC_TRANS) crc = binascii.crc32(decoded_data) partcrc = '%08X' % (crc & 2 ** 32L - 1) if ypart: crcname = 'pcrc32' else: crcname = 'crc32' if crcname in yend: _partcrc = yenc_name_fixer('0' * (8 - len(yend[crcname])) + yend[crcname].upper()) else: _partcrc = None logging.debug("Corrupt header detected => yend: %s", yend) if not _partcrc == partcrc: raise CrcError(_partcrc, partcrc, decoded_data) else: raise BadYenc() # Parse filename if there was data if decoded_data: # Only set the name if it was found and not obfuscated self.verify_filename(article, decoded_data, output_filename) return decoded_data
def decode(self, stream): """ Decode some data and decode the data to descriptor identified (by the stream) """ # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while self.decode_loop(): # fall_back ptr ptr = stream.tell() # Read in our data data = stream.readline() if not data: # We're done for now return True # Total Line Tracking self._total_lines += 1 # Detect a yEnc line _meta = self.detect(data, relative=False) if _meta is not None: # # We just read a yEnc keyword token such as # begin, part, or end # if _meta['key'] in self._meta: # We already processed this key; uh oh # Fix our stream stream.seek(ptr, SEEK_SET) # Fix our line count self._total_lines -= 1 # We're done break if _meta['key'] == 'end' and \ len(set(('begin', 'part')) - set(self._meta)) == 2: # Why did we get an end before a begin or part? # Just ignore it and keep going continue # store our key self._meta[_meta['key']] = _meta if 'end' in self._meta: # Mark the binary as being valid self.decoded._is_valid = True # We're done! break elif _meta['key'] == 'begin': # Depending on the version of yEnc we're using binary # content starts now; thefore we create our binary # instance now if 'name' not in _meta: # Why did we get a begin before a part # Just ignore it and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', 1) # Create our binary instance self.decoded = NNTPBinaryContent( filepath=_meta['name'], part=self._part, work_dir=self.work_dir, ) elif _meta['key'] == 'part': if 'begin' not in self._meta: # we must have a begin if we have a part # This is a messed up message; treat this # as junk and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', self._part) # Update our Binary File if nessisary self.decoded.part = self._part continue if len(set(('begin', 'part')) - set(self._meta)) == 2: # We haven't found the start yet which means we should just # keep going until we find it continue if FAST_YENC_SUPPORT: try: decoded, self._crc, self._escape = \ decode_string(data, self._crc, self._escape) except YencError: logger.warning( "Yenc corruption detected on line %d." % self._lines, ) # Line Tracking self._lines += 1 # keep storing our data continue else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we need to translate the special keyword tokens # that are used by the yEnc language. We also want to # ignore any trailing white space or new lines. This # occurs by applying our DECODE_SPECIAL_MAP to the line # being processed. # # - finally we translate the remaining characters by taking # away 42 from their value. # decoded = YENC_DECODE_SPECIAL_RE.sub( lambda x: YENC_DECODE_SPECIAL_MAP[x.group()], data, ).translate(YENC42) # CRC Calculations self._calc_crc(decoded) # Line Tracking self._lines += 1 # Track the number of bytes decoded self._decoded += len(decoded) # Write data to out stream self.decoded.write(decoded) if self._max_bytes > 0 and self._decoded >= self._max_bytes: # If we specified a limit and hit it then we're done at # this point. Before we do so; advance to the end of our # stream stream.seek(0, SEEK_END) # We're done break # Reset our meta tracking self._meta = {} # Reset part information self._part = 1 if self.decoded: # close article when complete self.decoded.close() # Return what we do have return self.decoded
def yenc_decode(self, seg): ignore_errors = seg.lastTry() buffer = [] in_body = False end_found = False for line in seg.data: if (line[:7] == '=ybegin'): args = line.split(" ") for arg in args: if ( arg.startswith("name=") ): seg.decoded_filename = line.split("=")[-1] if ( arg.startswith("part=") ): seg.decoded_number = int(arg.split("=")[1]) elif (line[:6] == '=ypart'): in_body = True continue elif (line[:5] == '=yend'): args = line.split(" ") for arg in args: if ( arg.startswith("pcrc32=") or arg.startswith("crc32=") ): c = arg.split("=")[1] seg.decoded_crc = '0' * (8 - len(c)) + c end_found = True break if ( in_body ): buffer.append(line) # no ending found, article must have been cut off in transmit. if ( not end_found ) and ( not ignore_errors ): mt.log.debug("Article decode error: =yend not found.") return False # join the data together and decode it. data = ''.join(buffer) crc = "" if ( yenc_found ): decoded_data, _yenc_crc, something = _yenc.decode_string(data) crc = '%08X' % ((_yenc_crc ^ -1) & 2**32L - 1) else: # stolen from hellanzb. for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) decoded_data = data.translate(self.YDEC_TRANS) crc = '%08X' % (crc32(decoded_data) & 2**32L - 1) # if the article has failed multiple times we'll ignore errors and take # whatever we can get from it. if ( not ignore_errors ): # If a CRC was included, check it. if ( seg.decoded_crc != "" ) and ( crc != "" ): if ( seg.decoded_crc.upper() != crc ): mt.log.debug("CRC does not match. A: " + seg.decoded_crc.upper() + " B: " + crc) return False # check partnum if ( seg.decoded_number != seg.number ): mt.log.debug("Part number does not match: " + seg.msgid) return False # ensure we decoded a filename. if ( seg.decoded_filename == "" ): mt.log.debug(seg.msgid + " does not have a filename.") return False else: if ( seg.decoded_number != seg.number ): seg.decoded_number = seg.number seg.decoded_size = len(decoded_data) seg.decoded_data = decoded_data return True
def decode(article, data): # Filter out empty ones data = filter(None, data) # No point in continuing if we don't have any data left if data: nzf = article.nzf yenc, data = yCheck(data) ybegin, ypart, yend = yenc decoded_data = None # Deal with non-yencoded posts if not ybegin: found = False try: for i in xrange(min(40, len(data))): if data[i].startswith('begin '): nzf.type = 'uu' found = True # Pause the job and show warning if nzf.nzo.status != Status.PAUSED: nzf.nzo.pause() msg = T('UUencode detected, only yEnc encoding is supported [%s]') % nzf.nzo.final_name logging.warning(msg) break except IndexError: raise BadYenc() if found: decoded_data = '' else: raise BadYenc() # Deal with yenc encoded posts elif ybegin and yend: if 'name' in ybegin: nzf.filename = yenc_name_fixer(ybegin['name']) else: logging.debug("Possible corrupt header detected => ybegin: %s", ybegin) nzf.type = 'yenc' # Decode data if HAVE_YENC: decoded_data, crc = _yenc.decode_string(''.join(data))[:2] partcrc = '%08X' % ((crc ^ -1) & 2 ** 32L - 1) else: data = ''.join(data) for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) decoded_data = data.translate(YDEC_TRANS) crc = binascii.crc32(decoded_data) partcrc = '%08X' % (crc & 2 ** 32L - 1) if ypart: crcname = 'pcrc32' else: crcname = 'crc32' if crcname in yend: _partcrc = '0' * (8 - len(yend[crcname])) + yend[crcname].upper() else: _partcrc = None logging.debug("Corrupt header detected => yend: %s", yend) if not _partcrc == partcrc: raise CrcError(_partcrc, partcrc, decoded_data) else: raise BadYenc() return decoded_data
def decode(self, article, data, raw_data): # Do we have SABYenc? Let it do all the work if sabnzbd.decoder.SABYENC_ENABLED: decoded_data, output_filename, crc, crc_expected, crc_correct = sabyenc.decode_usenet_chunks( raw_data, article.bytes) # Assume it is yenc article.nzf.type = 'yenc' # Only set the name if it was found and not obfuscated self.verify_filename(article, decoded_data, output_filename) # CRC check if not crc_correct: raise CrcError(crc_expected, crc, decoded_data) return decoded_data # Continue for _yenc or Python-yEnc # Filter out empty ones data = filter(None, data) # No point in continuing if we don't have any data left if data: nzf = article.nzf yenc, data = yCheck(data) ybegin, ypart, yend = yenc # Deal with non-yencoded posts if not ybegin: found = False try: for i in xrange(min(40, len(data))): if data[i].startswith('begin '): nzf.type = 'uu' found = True # Pause the job and show warning if nzf.nzo.status != Status.PAUSED: nzf.nzo.pause() msg = T( 'UUencode detected, only yEnc encoding is supported [%s]' ) % nzf.nzo.final_name logging.warning(msg) break except IndexError: raise BadYenc() if found: decoded_data = '' else: raise BadYenc() # Deal with yenc encoded posts elif ybegin and yend: if 'name' in ybegin: output_filename = yenc_name_fixer(ybegin['name']) else: output_filename = None logging.debug( "Possible corrupt header detected => ybegin: %s", ybegin) nzf.type = 'yenc' # Decode data if HAVE_YENC: decoded_data, crc = _yenc.decode_string(''.join(data))[:2] partcrc = '%08X' % ((crc ^ -1) & 2**32L - 1) else: data = ''.join(data) for i in (0, 9, 10, 13, 27, 32, 46, 61): j = '=%c' % (i + 64) data = data.replace(j, chr(i)) decoded_data = data.translate(YDEC_TRANS) crc = binascii.crc32(decoded_data) partcrc = '%08X' % (crc & 2**32L - 1) if ypart: crcname = 'pcrc32' else: crcname = 'crc32' if crcname in yend: _partcrc = yenc_name_fixer('0' * (8 - len(yend[crcname])) + yend[crcname].upper()) else: _partcrc = None logging.debug("Corrupt header detected => yend: %s", yend) if not _partcrc == partcrc: raise CrcError(_partcrc, partcrc, decoded_data) else: raise BadYenc() # Parse filename if there was data if decoded_data: # Only set the name if it was found and not obfuscated self.verify_filename(article, decoded_data, output_filename) return decoded_data
def decode(data, seg_part=False, ignore_crc=False): data = strip(data) # No point in continuing if we don't have any data left if data: yenc, data = yCheck(data) ybegin, ypart, yend = yenc decoded_data = None # Deal with non-yencoded posts if not ybegin: print("Non yEnc encoded data found!") found = False for i in range(10): if data[i].startswith(b'begin '): found = True break if found: for _ in range(i): data.pop(0) if data[-1] == b'end': data.pop() if data[-1] == b'`': data.pop() decoded_data = b'\r\n'.join(data) ybegin = {b'size': len(decoded_data), b'name': "UNKNOWN"} # Deal with yenc encoded posts elif ybegin and (yend or (not yend and seg_part)): if not b'name' in ybegin: logging.debug( "Possible corrupt header detected " "=> ybegin: %s", ybegin) # Decode data if HAVE_YENC: decoded_data, crc = _yenc.decode_string( b''.join(data))[:2] # @UndefinedVariable partcrc = (crc ^ -1) & 0xFFFFFFFF else: data = b''.join(data) for i in (0, 9, 10, 13, 27, 32, 46, 61): j = b'=' + bytearray((i + 64, )) i = bytearray((i, )) data = data.replace(j, i) decoded_data = data.translate(YDEC_TRANS) if not seg_part: crc = crc32(decoded_data) partcrc = crc & 0xFFFFFFFF # we don't need to check all the CRC stuff if it isn't there if not seg_part and not ignore_crc: if ypart: crcname = b'pcrc32' else: crcname = b'crc32' try: _partcrc = int(yend[crcname], 16) except (LookupError, ValueError): _partcrc = None logging.debug("Corrupt header detected " "=> yend: %s", yend) if not (_partcrc == partcrc): raise CrcError(_partcrc, partcrc, decoded_data) else: # print(yenc) # ({'total': '15', 'line': '128', 'part': '15', # 'name': 'mdj.104-diff.r00', 'size': '15000000'}, # {'begin': '14000001', 'end': '15000000'}, # None) raise YencException("No =yend: segment data is not all there") # '=ypart begin=400001 end=500000' line can be omitted if not ypart: # fill it in ourselves ypart = {b'begin': 1, b'end': ybegin[b'size']} # in this case are 2 =ybegin parameters 'missing' too: # =ybegin line=128 size=3566 name=k-9-vrs.sfv # part= and total= (but these are optional according to the spec) ybegin.setdefault(b'part', 1) # print(ybegin, ypart, yend) # ({'line': '128', 'part': '1', 'name': # 'Fringe S01E07 X264 720p 2Audio (2009-12-06).nfo', 'size': '3554'}, # {'begin': '1', 'end': '3554'}, # {'part': '1', 'pcrc32': '13fca903', 'size': '3554'}) return { 'data': decoded_data, # KeyError: 'part' 'part_number': int(ybegin[b'part']), 'part_begin': int(ypart[b'begin']), # in the the joined file 'part_end': int(ypart[b'end']), # counts from 1 onwards 'part_size': int(ypart[b'end']) - int(ypart[b'begin']) + 1, 'file_size': int(ybegin[b'size']), 'file_name': ybegin[b'name'], } else: raise YencException("No data available to decode.")