def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core import config as hachoir_config hachoir_config.quiet = True if ek(os.path.isfile, filename): base_filename = ek(os.path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None for byte in sickbeard.helpers.readFileBuffered(filename): try: file_metadata = extractMetadata(guessParser(StringInputStream(byte))) for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', 0) if height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height < 680: return (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except:continue return Quality.UNKNOWN
def mangleData(self, data, index): self.sha1_offset = 208 self.md5_offset = 256 self.header_offset = 360 self.filedata_offset = 3170 data = MangleFile.mangleData(self, data, index) if USE_HACHOIR: #data.tofile(open('/tmp/oops', 'wb')) hachoir_config.quiet = True data_str = data.tostring() parser = guessParser(StringInputStream(data_str)) if parser: self.useHachoirParser(parser) summary_data = data[self.header_offset:].tostring() checksum = md5(summary_data).digest() data[self.md5_offset:self.md5_offset + 16] = array('B', checksum) summary_data = data[self.header_offset:self.filedata_offset].tostring() checksum = sha(summary_data).hexdigest() data[self.sha1_offset:self.sha1_offset + 40] = array('B', checksum) return data
def which_type(self, path): """ Analyzes the image provided and attempts to determine whether it is a poster or banner. :param path: full path to the image :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist) """ if not os.path.isfile(path): sickrage.srCore.srLogger.warning("Couldn't check the type of " + str(path) + " cause it doesn't exist") return None with io.open(path, 'rb') as fh: img_metadata = extractMetadata(guessParser(StringInputStream(fh.read()))) if not img_metadata: sickrage.srCore.srLogger.debug("Unable to get metadata from " + str(path) + ", not using your existing image") return None img_ratio = float(img_metadata.get('width', 0)) / float(img_metadata.get('height', 0)) # most posters are around 0.68 width/height ratio (eg. 680/1000) if 0.55 < img_ratio < 0.8: return self.POSTER # most banners are around 5.4 width/height ratio (eg. 758/140) elif 5 < img_ratio < 6: return self.BANNER # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080) elif 1.7 < img_ratio < 1.8: return self.FANART else: sickrage.srCore.srLogger.warning("Image has size ratio of " + str(img_ratio) + ", unknown type")
def main(argv): os.unlink(DATA_FILE) createFile(int(argv[0])) stream = StringInputStream(open(DATA_FILE).read(100)) root = SQLite(stream) for field in root: print "%s) %s=%s" % (field.address, field.name, field.display)
def createInputStream(cis, source=None, **args): stream = cis(source=source) header = StringInputStream("FWS" + self.stream.readBytes(3 * 8, 5)) args.setdefault("tags", []).append(("class", SwfFile)) return ConcatStream((header, stream), source=stream.source, **args)
def from_string(self, data): from hachoir_parser import guessParser from hachoir_core.stream import StringInputStream stream = StringInputStream(data) parser = guessParser(stream) from hachoir_metadata import extractMetadata ret = extractMetadata(parser) #formated = md.exportPlaintext(line_prefix=u"") return ret
def _sync_file(self, manager, file_path, node, upload_pool): if self._aborting: return with manager.start_task(1, '* Syncing file "%s"...' % file_path): file_name = file_path.split(os.sep)[-1].strip() with open(file_path, 'rb') as f: file_content = f.read() remote_file = node.get_child(file_name) # noinspection PyBroadException try: with open(file_path + ".json", 'rb') as json_file: json_file_content: json = json.load(json_file) except: json_file_content: json = json.dumps({"Keywords": "auto"}) if remote_file: if remote_file['Format'].lower() in VIDEO_EXT: # Video files are modified by SmugMug server side, so we cannot use # the MD5 to check if the file needs a re-sync. Use the last # modification time instead. remote_time = datetime.datetime.strptime( remote_file.get('ImageMetadata')['DateTimeModified'], '%Y-%m-%dT%H:%M:%S') try: parser = guessParser(StringInputStream(file_content)) metadata = extractMetadata(parser) file_time = max( metadata.getValues('last_modification') + metadata.getValues('creation_date')) except Exception as err: print('Failed extracting metadata for file "%s".' % file_path) file_time = datetime.datetime.fromtimestamp( os.path.getmtime(file_path)) time_delta = abs(remote_time - file_time) same_file = (time_delta <= datetime.timedelta(seconds=1)) else: remote_md5 = remote_file['ArchivedMD5'] file_md5 = hashlib.md5(file_content).hexdigest() same_file = (remote_md5 == file_md5) if same_file: remote_file.patch('Image', json=json_file_content) return # File already exists on Smugmug if self._aborting: return for key in json_file_content: json_file_content["X-Smug-" + key] = json_file_content.pop(key) upload_pool.add(self._upload_media, manager, node, remote_file, file_path, file_name, file_content, json_file_content)
def _sync_file(self, manager, file_path, node, upload_pool): if self._aborting: return with manager.start_task(1, '* Syncing file "%s"...' % file_path): file_name = file_path.split(os.sep)[-1].strip() with open(file_path, 'rb') as f: file_content = f.read() file_root, file_extension = os.path.splitext(file_name) if file_extension.lower() == '.heic': # SmugMug converts HEIC files to JPEG and renames them in the process renamed_file = file_root + '_' + file_extension[1:] + '.JPG' remote_file = node.get_child(renamed_file) else: remote_file = node.get_child(file_name) if remote_file: if remote_file['Format'].lower() in VIDEO_EXT: # Video files are modified by SmugMug server side, so we cannot use # the MD5 to check if the file needs a re-sync. Use the last # modification time instead. remote_time = datetime.datetime.strptime( remote_file.get('ImageMetadata')['DateTimeModified'], '%Y-%m-%dT%H:%M:%S') try: parser = guessParser(StringInputStream(file_content)) metadata = extractMetadata(parser) file_time = max( metadata.getValues('last_modification') + metadata.getValues('creation_date')) except Exception as err: print('Failed extracting metadata for file "%s".' % file_path) file_time = datetime.datetime.fromtimestamp( os.path.getmtime(file_path)) time_delta = abs(remote_time - file_time) same_file = (time_delta <= datetime.timedelta(seconds=1)) elif file_extension.lower() == '.heic': # HEIC files are recoded to JPEG's server side by SmugMug so we cannot # use MD5 to check if file needs a re-sync. Moreover, no image # metadata (e.g. time taken timestamp) is kept in SmugMug that would # allow us to tell if the file is the same. Hence, for now we just # assume HEIC files never change and we never re-upload them. same_file = True else: remote_md5 = remote_file['ArchivedMD5'] file_md5 = hashlib.md5(file_content).hexdigest() same_file = (remote_md5 == file_md5) if same_file: return # File already exists on Smugmug if self._aborting: return upload_pool.add(self._upload_media, manager, node, remote_file, file_path, file_name, file_content)
def createInputStream(self): # FIXME: Use lazy stream creation data = [] for item in self.items: if 'rawdata' in item: data.append(item["rawdata"].value) data = "".join(data) # FIXME: Use smarter code to send arguments tags = {"class": self.parser, "args": self.args} tags = tags.iteritems() return StringInputStream(data, "<fragment group>", tags=tags)
def createInputStream(cis, **args): tags = args.setdefault("tags", []) if parser_class: tags.append(("class", parser_class)) if parser is not None: tags.append(("id", parser.PARSER_TAGS["id"])) if mime_type: tags.append(("mime", mime_type)) if filename: tags.append(("filename", filename)) print args return StringInputStream(decompressor(self.value), **args)
def testRandom(seed=0, tests=(1,8)): random.seed(seed) a = array('L') parser_list = HachoirParserList() n = max(tests) * max(parser.getParserTags()["min_size"] for parser in parser_list) k = 8 * a.itemsize for i in xrange((n - 1) // k + 1): a.append(random.getrandbits(k)) a = StringInputStream(a.tostring(), source="<random data>") ok = True for parser in parser_list: size = parser.getParserTags()["min_size"] for test in tests: a._size = a._current_size = size * test try: parser(a, validate=True) error("[%s] Parser didn't reject random data" % parser.__name__) except ValidateError: continue except HACHOIR_ERRORS, err: error(u"[%s] %s" % (parser.__name__, err)) ok = False
def createInputStream(self): # FIXME: Use lazy stream creation data = [] for item in self.items: data.append(item["rawdata"].value) data = "".join(data) # FIXME: Use smarter code to send arguments self.args["compr_level"] = self.items[0].parent.parent.folder[ "compr_level"].value tags = {"class": self.parser, "args": self.args} tags = tags.iteritems() return StringInputStream(data, "<fragment group>", tags=tags)
def createInputStream(cis, source=None, **args): stream = cis(source=source) tags = args.setdefault("tags", []) tags.extend(stream.tags) tags.append(("class", FolderParser)) tags.append(("args", {'files': files})) for unused in self: pass if folder["compr_method"].value == 3: # LZX self.uncompressed_data = lzx_decompress( self["block[0]/data"].getSubIStream(), folder["compr_level"].value) return StringInputStream(self.uncompressed_data, source=source, **args)
def EXTRACT_EMBEDDED(s, buff): EXTRACT_FILES = {} CHILD_BUFF = {} stream = StringInputStream(buff) subfile = SearchSubfile(stream) subfile.loadParsers(categories=None, parser_ids=None) subfile.stats = {} subfile.next_offset = None counter = 0 last_start = 0 last_end = 0 while subfile.current_offset < subfile.size: subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic(subfile.current_offset): # Don't care about extracting the base file, just what's within it # False positives often return sizes exceeding the size of the file # they also may not even posess a content size at all, weed em out if offset != 0 and parser.content_size != subfile.size \ and parser.content_size < subfile.size and parser.content_size: start = offset // 8 end = start + parser.content_size // 8 # We want to make sure we aren't pulling sub files out of ones # we are already extracting, we will be doing that later anyway # when the module is run again on the 'buffer' returned key value if start >= last_end: EXTRACT_FILES['Object_%s' % counter] = OrderedDict([ ('Start', '%s bytes' % start), ('End', '%s bytes' % end), ('Description', parser.description), ('Buffer', buff[start:end]) ]) counter += 1 last_start = start last_end = end subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) return EXTRACT_FILES
def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core.log import log log.use_print = False if ek(os.path.isfile, filename): base_filename = ek(os.path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None try: with ek(io.open, filename, "rb") as file: file_metadata = extractMetadata( guessParser(StringInputStream(file.read()))) if file_metadata: for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', None) if height and height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height and height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height and height < 680: return (Quality.SDTV, Quality.SDDVD)[re.search( r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except Exception as e: sickbeard.logger.log(ex(e)) return Quality.UNKNOWN
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ # primitive verification of torrents, just make sure we didn't get a text file or something if file_name.endswith('torrent'): try: with open(file_name, 'rb') as file: mime_type = guessParser(StringInputStream(file.read()))._getMimeType() if mime_type == 'application/x-bittorrent': return True except Exception as e: sickrage.srCore.srLogger.debug("Failed to validate torrent file: {}".format(e.message)) sickrage.srCore.srLogger.debug("Result is not a valid torrent file") return False return True
def get_parser(data, streamdata, sessid): """Guess or retrieve the parser based on the stream. Streams are retrieved from the "data" persistant storage variable, from the "streams" key. The parser for the main stream ((None, None, filename) in data['streams']) is cached for efficiency reasons in data['parser_cache']. """ # must remake parser EVERY TIME because parsers can't be pickled # (they contain generators which are currently not pickleable) # best I can do here is cache the parser, so at least we're not # taking time to re-guess the parser... if streamdata[0] is None: # original file stream = FileInputStream(data['filename'], real_filename=unicode(tmp_dir + sessid + '.file')) if 'parser_cache' in data: parser = data['parser_cache'](stream) else: parser = guessParser(stream) if not parser: print_parse_error() return (None, None) data['parser_cache'] = parser.__class__ save_data(data, sessid) elif isinstance(streamdata[0], tuple): prevstream, prevparser = get_parser(data, streamdata[0], sessid) stream = prevparser[streamdata[1]].getSubIStream() parser = guessParser(stream) else: stream = StringInputStream(streamdata[1]) stream.tags = streamdata[0] parser = guessParser(stream) return stream, parser