def get_creation_date(_path): """ Simple function to retrieve the creation date from the file's metdata Args: _path the full path to the file. """ # Initialise result _creation_date = None # Using the hachoir metadata library retrieve file metadata hachoir_config.quiet = True try: parser = createParser(unicodeFilename(_path), _path) if parser: metadata = extractMetadata(parser) if metadata: _creation_date = metadata.get("creation_date") except Exception: pass # Validate and use ctime if not available if not _creation_date: _ctime = os.path.getctime(_path) _creation_date = datetime.datetime.fromtimestamp(_ctime) # Return result return _creation_date
def lnkparse(reflectPath, filename): """ Return the target filename from a MS-widows link (URL format) """ filename = unicodeFilename(filename) try: parser = createParser(filename) if parser is not None and isinstance(parser, LnkFile): #It is a "MS-Windows" link file try: for field in parser: pass # trigger parsing lnkpath = parser.getField('relative_path').value # mount the complet target path,analyses if inside BasePath if lnkpath.startswith('.\\'): lnkpath = lnkpath[2:] lnkpath = lnkpath.replace('\\','/') filenamePath = os.path.dirname(filename) allLnkpath = os.path.join(reflectPath, filenamePath, lnkpath) allLnkpath = os.path.abspath(allLnkpath) #remove all ..\ if allLnkpath.startswith(reflectPath): lnkpath = quote(lnkpath.encode('utf-8')) return 'OK', lnkpath else: return 'ERROR_OUTREFLECTPATH', '' except MissingField: # example: link to a network file return 'ERROR_RELPATH', '' else: return 'NOT_LNKFILE', '' except InputStreamError: return 'NOT_PARSED', ''
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ # primitive verification of torrents, just make sure we didn't get a text file or something if file_name.endswith(GenericProvider.TORRENT): try: parser = createParser(file_name) if parser: # pylint: disable=protected-access # Access to a protected member of a client class mime_type = parser._getMimeType() try: parser.stream._input.close() except Exception: pass if mime_type == 'application/x-bittorrent': return True except Exception as e: logger.log(u"Failed to validate torrent file: " + ex(e), logger.DEBUG) logger.log(u"Result is not a valid torrent file", logger.DEBUG) return False return True
def _extractMetadata(self): """ Extract metadata from file on client or server using hachoir-metadata. """ try: parser = createParser(unicode(self.path), str(self.path)) if parser is None: raise HachoirError extractor = extractMetadata(parser) if extractor is None: raise HachoirError self.metadata = dict() for data in sorted(extractor): if not data.values: continue key = data.description value = ', '.join([item.text for item in data.values]) self.metadata[key] = value except HachoirError: self.metadata = None
def lnkparse(reflectPath, filename): """ Return the target filename from a MS-widows link (URL format) """ filename = unicodeFilename(filename) try: parser = createParser(filename) if parser is not None and isinstance(parser, LnkFile): #It is a "MS-Windows" link file try: for field in parser: pass # trigger parsing lnkpath = parser.getField('relative_path').value # mount the complet target path,analyses if inside BasePath if lnkpath.startswith('.\\'): lnkpath = lnkpath[2:] lnkpath = lnkpath.replace('\\', '/') filenamePath = os.path.dirname(filename) allLnkpath = os.path.join(reflectPath, filenamePath, lnkpath) allLnkpath = os.path.abspath(allLnkpath) #remove all ..\ if allLnkpath.startswith(reflectPath): lnkpath = quote(lnkpath.encode('utf-8')) return 'OK', lnkpath else: return 'ERROR_OUTREFLECTPATH', '' except MissingField: # example: link to a network file return 'ERROR_RELPATH', '' else: return 'NOT_LNKFILE', '' except InputStreamError: return 'NOT_PARSED', ''
def parse_metadata(path): try: parser = createParser(unicode(path)) except InputStreamError: return if not parser: return try: metadata = extractMetadata(parser, appsettings.INFO_QUALITY) except HachoirError: return if not metadata: return data = {} text = metadata.exportPlaintext(priority=None, human=False) for line in text: if not line.strip().startswith('-'): key = line.strip().lower().split(':')[0] value = [] else: key = line.strip().split('- ')[1].split(': ')[0] value = line.split(key)[1][2:] if key in data: if hasattr(data[key],'__iter__'): value = data[key] + [value] else: value = [data[key],value] if value: data[key] = value return data
def extract_metadata_from_file(filename): parser = createParser(filename) if not parser: raise ValueError("Could not parse %s" % filename) return extractMetadata(parser)
def extract_metadata(uuid): oda_file = OdaFile.objects.get(uuid=uuid) filename = oda_file.file_handle().name #filename, real_filename = unicodeFilename(filename, "utf8"), filename parser = createParser(filename, real_filename=filename, tags=None) metadata = extractMetadata(parser,1.0) return metadata
def classify(path,rootdir): # add an extra argument here to take the root dir :) print 'path given: ', path,' RootDir: ',rootdir foo = path.rsplit('/', 1) fname = foo[1] # defaults audio, video: artist = album = genre = 'unknown' # defaults image: latitude = longitude = 0 city = state = country = 'unknown' year = '1960' month = 'January' # here we go : filename = path filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print >>stderr, "Unable to parse file" exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def get_file_date(root, file): date = "" try: filename = "{}/{}".format(root,file) filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print >>stderr, "Unable to parse file {}".format(filename) try: actualstderr = sys.stderr sys.stderr = open(os.devnull,'w') metadata = extractMetadata(parser) sys.stderr = actualstderr except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None if not metadata: print "Unable to extract metadata, {}".format(filename) text = metadata.exportPlaintext() date = "" # Tracer()() for line in text: if line[0:10] == "- Creation": match = re.search('(\d+-\d+-\d+ \d+:\d+:\d+)', line) if match: date = time.strptime(match.groups()[0], '%Y-%m-%d %H:%M:%S') return date
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ # primitive verification of torrents, just make sure we didn't get a text file or something if file_name.endswith(GenericProvider.TORRENT): try: parser = createParser(file_name) if parser: # pylint: disable=W0212 # Access to a protected member of a client class mime_type = parser._getMimeType() try: parser.stream._input.close() except Exception: pass if mime_type == 'application/x-bittorrent': return True except Exception as e: logging.debug("Failed to validate torrent file: {}".format( ex(e))) logging.debug("Result is not a valid torrent file") return False return True
def get_metadata(file_names): print ("- Analyzing files metadata.." + "\n") file_ = open('results.txt', 'w') file_extensions = [".3do", ".3ds", ".7z", ".a", ".ace", ".aif", ".aifc", ".aiff", ".ani", ".apm", ".asf", ".au", ".avi", ".bin", ".bmp", ".bz2", ".cab", ".cda", ".chm", ".class", ".cur", ".deb", ".der", ".dll", ".doc", ".dot", ".emf", ".exe", ".flv", ".gif", ".gz", ".ico", ".jar", ".jpeg", ".jpg", ".laf", ".lnk", ".m4a", ".m4b", ".m4p", ".m4v", ".mar", ".mid", ".midi", ".mka", ".mkv", ".mod", ".mov", ".mp1", ".mp2", ".mp3", ".mp4", ".mpa", ".mpe", ".mpeg", ".mpg", ".msi", ".nst", ".oct", ".ocx", ".odb", ".odc", ".odf", ".odg", ".odi", ".odm", ".odp", ".ods", ".odt", ".ogg", ".ogm", ".otg", ".otp", ".ots", ".ott", ".pcf", ".pcx", ".pdf", ".png", ".pot", ".pps", ".ppt", ".ppz", ".psd", ".ptm", ".pyo", ".qt", ".ra", ".rar", ".rm", ".rpm", ".s3m", ".sd0", ".snd", ".so", ".stc", ".std", ".sti", ".stw", ".swf", ".sxc", ".sxd", ".sxg", ".sxi", ".sxm", ".sxw", ".tar", ".tga", ".tif", ".tiff", ".torrent", ".ts", ".ttf", ".vob", ".wav", ".wma", ".wmf", ".wmv", ".wow", ".xcf", ".xla", ".xls", ".xm", ".zip", ".zs1", ".zs2", ".zs3", ".zs4", ".zs5", ".zs6", ".zs7", ".zs8", ".zs9", ".zst"] for filename in file_names: print ("- Extracting file metadata: " + filename + "\n") extension = os.path.splitext(filename) if extension[1] in file_extensions: print (" * The file extension is: " + extension[1] + "\n") filename, realname = unicodeFilename(filename), filename file_.write('Name: ') file_.write(filename) file_.write('\n') parser = createParser(filename, realname) if not parser: print >>stderr, "Error, parsing file" exit(1) try: metadata = extractMetadata(parser) except Exception as e: print ("Error extracting file metadata: " + str(e)) metadata = None if not metadata: print ("Metadata can not be extracted") exit(1) text = metadata.exportPlaintext() for line in text: file_.write(line) file_.write('\n') print (" * Successfull metadata extraction" + "\n" + "\n") if not extension[1] in file_extensions: print (" * File extension is unknown or not supported" + "\n" + "\n") return text file_.close()
def _guess_from_metadata(self): parse = lambda s: s.split(":") guesses = [] for filename in self.files: filename = get_filename(filename) if not isinstance(filename, unicode): filename, realname = unicodeFilename(filename), filename else: realname = filename parser = createParser(filename, realname) if parser: try: metadata = extractMetadata(parser) except HachoirError: continue for line in metadata.exportPlaintext(): entries = dict((parse(normalize(l)) for l in line if 'comment' in l or 'title' in l)) entries = dict(((k, guessit.guess_episode_info(v)) for (k, v) in entries.items())) if 'title' in entries: guesses.append(entries['title']) elif 'comment' in entries: guesses.append(entries['comment']) return guesses
def Downloadfile(url): infoMeta = [] file_name = url.split('/')[-1] infoMeta.append(file_name) u = urllib2.urlopen(url) meta = u.info() infoMeta.append(meta.headers) doc = u.read() f = open(file_name, 'wb') f.write(doc) with open(file_name, 'rb') as p: # Slurp the whole file and efficiently convert it to hex all at once hexdata = binascii.hexlify(p.read()) # use hachoir to add the standard metadata filename = './' + file_name print filename filename, realname = unicodeFilename(filename), filename parser = createParser(filename) try: metalist = metadata.extractMetadata(parser).exportPlaintext() infoMeta.append(metalist[1:4]) except Exception: infoMeta.append(["none", "none", "none"]) p.close() # print "Done", file_name, " Info is ", infoMeta return file_name, hexdata
def get_creation_date(file_path): try: parser = createParser(file_path) metadata = extractMetadata(parser, 0.5) return metadata['creation_date'].strftime('%Y-%m-%d') except: return None
def classify(path, rootdir): # add an extra argument here to take the root dir :) print 'path given: ', path, ' RootDir: ', rootdir foo = path.rsplit('/', 1) fname = foo[1] # defaults audio, video: artist = album = genre = 'unknown' # defaults image: latitude = longitude = 0 city = state = country = 'unknown' year = '1960' month = 'January' # here we go : filename = path filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print >> stderr, "Unable to parse file" exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def hachm(filename): # using this example http://archive.org/details/WorkToFishtestwmv try: filename, realname = unicodeFilename(filename), filename except TypeError: filename,realname=filename,filename parser = createParser(filename) # See what keys you can extract tmp = metadata.extractMetadata(parser) if tmp is None: return {} else: tmp = tmp._Metadata__data.iteritems() for k,v in tmp: if v.values: print v.key, v.values[0].value # Turn the tags into a defaultdict metalist = metadata.extractMetadata(parser).exportPlaintext() meta = defaultdict(defaultdict) if not metalist: return meta for item in metalist[1:]: item = [x.strip() for x in item.split('-') if x.strip()][0] item = [ x.strip().lower().replace(' ','_') for x in item.split(':') ] k,v = item.pop(0),':'.join(item) meta[k]=v return meta
def ident(filename) : mimetypes.init() filetype = mimetypes.guess_type(filename)[0] unifile = hachoir_core.cmd_line.unicodeFilename(filename) parser = hachoir_parser.createParser(unifile) meta = MetadataFilter(parser) #if meta: # print meta generalinfo = GeneralInfo(filename) results = None #print filetype if filetype.startswith("video"): results = VideoParse(filename, filetype) + ["video"] if not results[0] == "Fail": results[0] = filetype elif filetype.startswith("audio"): results = MusicParse(filename) + ["audio"] elif filetype.startswith("image"): results = ImageParse(filename) + ["image"] #print generalinfo return results
def checkFile(filename): sys.stdout.write(addonname + ":Checking File: " + filename) sys.stdout.flush() try: parser = createParser(filename) except InputStreamError, err: return ("streamerror")
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ # primitive verification of torrents, just make sure we didn't get a text file or something if self.providerType == GenericProvider.TORRENT: try: parser = createParser(file_name) if parser: mime_type = parser._getMimeType() try: parser.stream._input.close() except: pass if mime_type == 'application/x-bittorrent': return True except Exception as e: logger.log(u"Failed to validate torrent file: " + ex(e), logger.DEBUG) logger.log(u"Result is not a valid torrent file", logger.WARNING) return False return True
def parse_metadata(cls, full_path, db_conn): file_ext = os.path.splitext(full_path)[1][1:].lower() if(file_ext in ['mp3','bzip2','gzip','zip','tar','wav','midi','bmp','gif','jpeg','jpg','png','tiff','exe','wmv','mkv','mov']): # full_path = self._full_path(orig_path) # print(full_path) parser = createParser(full_path) metalist = metadata.extractMetadata(parser).exportPlaintext() for item in metalist: x = item.split(':')[0] if item.split(':')[0][2:].lower() in ["author","album","music genre"]: # print(item.split(':')[1][1:]) item1 = item.split(':')[1][1:] new_item = str(item1.decode('utf-8')) print new_item new_item = string.replace(new_item, ";", ",") new_item = string.replace(new_item, "|", ",") tag_name = new_item.split(',') print(tag_name) for names in tag_name: # inode = os.stat(full_path)[ST_INO tagname = names.strip() MiscFunctions.storeTagInDB(full_path, tagname, db_conn, is_system_tag=1) print("Database storage successful") elif file_ext in ["docx", "doc", "txt"]: tags = MiscFunctions.handleTextFiles(full_path) for tagname in tags: print "txt file tag: %s" % tagname MiscFunctions.storeTagInDB(full_path, tagname, db_conn, is_system_tag=1)
def get_meta(self, file_path): """ Get the meta information. """ self.check_extension(file_path) filename, realname = unicodeFilename(file_path), file_path parser = createParser(filename, realname) if parser is None: if file_path.lower().endswith('.mov'): return 'video/quicktime', 'null' if file_path.lower().endswith('.mpg'): return 'video/mpeg', 'null' if file_path.lower().endswith('.jpg'): return 'image/jpeg', 'null' if file_path.lower().endswith('.bup'): return 'video/dvd', 'null' if file_path.lower().endswith('.vob'): return 'video/dvd', 'null' if file_path.lower().endswith('.ifo'): return 'video/dvd', 'null' metadata = extractMetadata(parser) mime_type = parser.mime_type info = {} for data in sorted(metadata or ()): if not data.values: continue info[data.key] = [item.text for item in data.values] return mime_type, json.dumps(info)
def get_file_metadata(path): rdata = {} if os.path.isfile(path): try: parser = createParser(unicodeFilename(path), path) rdata["size"] = os.stat(path).st_size if parser: try: metadata = extractMetadata(parser) if metadata: rdata.update( (md.key, md.values[0].value if len(md.values) == 1 else [value.value for value in md.values] ) for md in metadata if md.values ) except HachoirError as e: logging.exception(e) except NullStreamError: rdata["size"] = 0 except BaseException as e: logging.exception(e) finally: if parser and parser.stream and parser.stream._input and not parser.stream._input.closed: parser.stream._input.close() return rdata
def extract(self, fname, quality=0.5, decoder=None): """this code comes from processFile in hachoir-metadata""" fname = safe_unicode(fname) if not fname: print('UNICODE FAILED: %s' % fname) return {} filename, real_filename = fname, fname (f, ext) = os.path.splitext(fname) ext = ext.lower()[1:] # Create parser try: if decoder: tags = None tags = [ ("id", decoder), None ] else: tags = None parser = None parser = hachoir_parser.createParser(fname, real_filename=real_filename, tags=tags) except hachoir_core.stream.InputStreamError, err: print('Failed to create parser for %s' % fname) print(err) return False
def googlesearch(): print "Searching google for files..." # set up browser browse = mechanize.Browser() cookiejar = cookielib.LWPCookieJar() browse.set_cookiejar(cookiejar) browse.set_handle_equiv(True) browse.set_handle_redirect(True) browse.set_handle_referer(True) browse.set_handle_robots(False) browse.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) browse.addheaders = [ ( "User-agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1", ) ] # response = browse.open("https://www.google.com/#q=filetype: %s + %s" % (filetype, domain)) for filetype in ["doc", "docx", "ppt", "xls"]: response = browse.open("https://www.google.com") browse.select_form(nr=0) browse.form["q"] = "filetype:%s site:%s" % (filetype, domain) browse.submit() results = browse.response().read() soup = BeautifulSoup(results, "lxml") sidlist = [] namelist = [] typelist = [] metalist = [] counter = 1 for link in soup.find_all("a", href=re.compile("/url")): link = link.get("href") if link.startswith("/url?q="): link = link[len("/url?q=") :] link = link.split("." + filetype)[0] # print str(link + ".pdf") filename = "%s%s.%s" % (domain, counter, filetype) try: downfile = browse.retrieve(str(link + "." + filetype), filename)[0] filename = downfile filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) metadata = extractMetadata(parser) text = metadata.exportPlaintext() charset = getTerminalCharset() sidlist.append(sid) typelist.append(str(filetype)) namelist.append(str(filename)) metalist.append(str(text)) counter += 1 except: pass for meta in zip(sidlist, typelist, namelist, metalist): executor.execute("INSERT INTO metadata VALUES (?,?,?,?)", meta) # for line in text: # print makePrintable(line, charset) connection.commit()
def __init__(self, path): super(HachoirParsable, self).__init__() from hachoir_parser import createParser self.parser = createParser(unicode(path)) if not self.parser: raise Exception("Could not parse: %s" % path) self._metadata_paths = {} self._field_modifications = 0
def testListBinaries(self): self.mox.StubOutWithMock(hachoir_parser, 'createParser', use_mock_anything=True) hachoir_parser_mock = self.mox.CreateMockAnything() hachoir_parser.createParser( u'/fake/path/CSWfoo/root/foo-file').AndReturn(hachoir_parser_mock) self.mox.StubOutWithMock(os, 'access') os.access(u'/fake/path/CSWfoo/root/foo-file', os.R_OK).AndReturn(True) machine_mock = self.mox.CreateMockAnything() machine_mock.value = 2 hachoir_parser_mock.__getitem__('/header/machine').AndReturn( machine_mock) endian_mock = self.mox.CreateMockAnything() endian_mock.display = 'fake-endian' hachoir_parser_mock.__getitem__('/header/endian').AndReturn( endian_mock) magic_cookie_mock = self.mox.CreateMockAnything() self.mox.StubOutWithMock(magic, 'open') magic.open(0).AndReturn(magic_cookie_mock) magic_cookie_mock.load() if "MAGIC_MIME" in dir(magic): flag = magic.MAGIC_MIME elif "MIME" in dir(magic): flag = magic.MIME magic_cookie_mock.setflags(flag) magic_cookie_mock.file(u'/fake/path/CSWfoo/root/foo-file').AndReturn( "application/x-executable") self.mox.StubOutWithMock(os.path, 'isdir') self.mox.StubOutWithMock(os.path, 'exists') self.mox.StubOutWithMock(os, 'walk') # self.mox.StubOutWithMock(__builtins__, 'open') os.path.isdir("/fake/path/CSWfoo").AndReturn(True) os.path.isdir("/fake/path/CSWfoo").AndReturn(True) os.path.isdir("/fake/path/CSWfoo").AndReturn(True) os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False) os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False) os.walk("/fake/path/CSWfoo/root").AndReturn([ ("/fake/path/CSWfoo/root", [], ["foo-file"]), ]) self.mox.ReplayAll() ip = inspective_package.InspectivePackage("/fake/path/CSWfoo") ip.pkginfo_dict = { "BASEDIR": "", } self.assertEqual([u'foo-file'], ip.ListBinaries())
def extract_title(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) metadata = extractMetadata(parser) text = metadata.exportPlaintext() return metadata.get('title')
def testListBinaries(self): self.mox.StubOutWithMock(hachoir_parser, 'createParser', use_mock_anything=True) hachoir_parser_mock = self.mox.CreateMockAnything() hachoir_parser.createParser( u'/fake/path/CSWfoo/root/foo-file').AndReturn(hachoir_parser_mock) self.mox.StubOutWithMock(os, 'access') os.access(u'/fake/path/CSWfoo/root/foo-file', os.R_OK).AndReturn(True) machine_mock = self.mox.CreateMockAnything() machine_mock.value = 2 hachoir_parser_mock.__getitem__('/header/machine').AndReturn(machine_mock) endian_mock = self.mox.CreateMockAnything() endian_mock.display = 'fake-endian' hachoir_parser_mock.__getitem__('/header/endian').AndReturn(endian_mock) magic_cookie_mock = self.mox.CreateMockAnything() self.mox.StubOutWithMock(magic, 'open') magic.open(0).AndReturn(magic_cookie_mock) magic_cookie_mock.load() if "MAGIC_MIME" in dir(magic): flag = magic.MAGIC_MIME elif "MIME" in dir(magic): flag = magic.MIME magic_cookie_mock.setflags(flag) magic_cookie_mock.file( u'/fake/path/CSWfoo/root/foo-file').AndReturn( "application/x-executable") self.mox.StubOutWithMock(os.path, 'isdir') self.mox.StubOutWithMock(os.path, 'exists') self.mox.StubOutWithMock(os, 'walk') # self.mox.StubOutWithMock(__builtins__, 'open') os.path.isdir("/fake/path/CSWfoo").AndReturn(True) os.path.isdir("/fake/path/CSWfoo").AndReturn(True) os.path.isdir("/fake/path/CSWfoo").AndReturn(True) os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False) os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False) os.walk("/fake/path/CSWfoo/root").AndReturn( [ ("/fake/path/CSWfoo/root", [], ["foo-file"]), ] ) self.mox.ReplayAll() ip = inspective_package.InspectivePackage("/fake/path/CSWfoo") ip.pkginfo_dict = { "BASEDIR": "", } self.assertEqual([u'foo-file'], ip.ListBinaries())
def getData(self): filename, realname = unicodeFilename(self.filename), self.filename parser = createParser(filename, realname) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def checkFile(filename, check_metadata, quality=1.0): sys.stdout.write(" - Create parser: ") sys.stdout.flush() try: parser = createParser(filename) except InputStreamError, err: sys.stdout.write("stream error! %s\n" % unicode(err)) sys.exit(1)
def checkFile(filename, check_parser): sys.stdout.write(" - Create parser: ") sys.stdout.flush() try: parser = createParser(filename) except InputStreamError, err: sys.stdout.write("stream error! %s\n" % unicode(err)) sys.exit(1)
def which_type(self, image_path): """ Analyzes the image provided and attempts to determine whether it is a poster or banner. :param image_path: full path to the image :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist) """ if not ek(os.path.isfile, image_path): logger.log( u"Couldn't check the type of {image_path} cause it doesn't exist" .format(image_path=image_path), logger.WARNING) return None if try_int(ek(os.path.getsize, image_path)) == 0: logger.log( u'Image has 0 bytes size. Deleting it: {image_path}'.format( image_path=image_path), logger.WARNING) try: ek(os.remove, image_path) except OSError as e: logger.log( u"Could't delete file: '{image_path}'. Please manually delete it. Error: {error_msg}" .format(image_path=image_path, error_msg=e), logger.WARNING) return # use hachoir to parse the image for us img_parser = createParser(image_path) img_metadata = extractMetadata(img_parser) if not img_metadata: logger.log( u"Unable to get metadata from {image_path}, not using your existing image" .format(image_path=image_path), logger.DEBUG) return None img_ratio = float(img_metadata.get('width')) / float( img_metadata.get('height')) img_parser.stream._input.close() # most posters are around 0.68 width/height ratio (eg. 680/1000) if 0.55 < img_ratio < 0.8: return self.POSTER # most banners are around 5.4 width/height ratio (eg. 758/140) elif 5 < img_ratio < 6: return self.BANNER # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080) elif 1.7 < img_ratio < 1.8: return self.FANART else: logger.log( u"Image has size ratio of {img_ratio}, unknown type".format( img_ratio=img_ratio), logger.WARNING) return
def qualityFromFileMeta(filename): # pylint: disable=too-many-branches """ Get quality file file metadata :param filename: Filename to analyse :return: Quality prefix """ log.use_print = False try: parser = createParser(filename) except Exception: # pylint: disable=broad-except parser = None if not parser: return Quality.UNKNOWN try: metadata = extractMetadata(parser) except Exception: # pylint: disable=broad-except metadata = None try: parser.stream._input.close() # pylint: disable=protected-access except Exception: # pylint: disable=broad-except pass if not metadata: return Quality.UNKNOWN height = 0 if metadata.has('height'): height = int(metadata.get('height') or 0) else: test = getattr(metadata, "iterGroups", None) if callable(test): for metagroup in metadata.iterGroups(): if metagroup.has('height'): height = int(metagroup.get('height') or 0) if not height: return Quality.UNKNOWN base_filename = ek(path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None ret = Quality.UNKNOWN if height > 1000: ret = ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif 680 < height < 800: ret = ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height < 680: ret = (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] return ret
def getMetadata(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print "Unable to parse file" exit(1) metadata = extractMetadata(parser) return metadata
def __init__(self, file_path, data=None): self._file_data = {} self._file_path = file_path if data is not None: self.populate(data) else: self._parser = createParser(unicode(self._file_path, "utf-8"), self._file_path) if not self._parser: stderr("Unable to parse file: " + self._file_path)
def GetFilesMetadata(self): """Returns a data structure with all the files plus their metadata. [ { "path": ..., "mime_type": ..., }, ] """ if not self.files_metadata: self.CheckPkgpathExists() self.files_metadata = [] files_root = self.GetFilesDir() all_files = self.GetAllFilePaths() def StripRe(x, strip_re): return re.sub(strip_re, "", x) root_re = re.compile(r"^(reloc|root)/") file_magic = FileMagic() basedir = self.GetBasedir() for file_path in all_files: full_path = unicode(self.MakeAbsolutePath(file_path)) file_info = { "path": StripRe(file_path, root_re), "mime_type": file_magic.GetFileMimeType(full_path) } if basedir: file_info["path"] = os.path.join(basedir, file_info["path"]) if not file_info["mime_type"]: logging.error("Could not establish the mime type of %s", full_path) # We really don't want that, as it misses binaries. msg = ( "It was not possible to establish the mime type of %s. " "It's a known problem which occurs when indexing a large " "number of packages in a single run. " "It's probably caused by a bug in libmagic, or a bug in " "libmagic Python bindings. " "Currently, there is no fix for it. " "You have to restart your process - it " "will probably finish successfully when do you that." % full_path) raise package.PackageError(msg) if sharedlib_utils.IsBinary(file_info): parser = hp.createParser(full_path) if not parser: logging.warning("Can't parse file %s", file_path) else: file_info["mime_type_by_hachoir"] = parser.mime_type machine_id = parser["/header/machine"].value file_info["machine_id"] = machine_id file_info["endian"] = parser["/header/endian"].display self.files_metadata.append(file_info) return self.files_metadata
def get_metadata(self,fullurl): metadata = 'not' try: filename, realname = unicode(fullurl), fullurl parser = createParser(filename, realname) metadata = extractMetadata(parser) except Exception as e: print ("Error getting metadata ",e.args) return metadata
def __get_hd_tag__(self, video): result = 104 file_metadata = extractMetadata(createParser(unicodeFilename(video))) if file_metadata.get('width') == 1280: result = 404 elif file_metadata.get('width') == 1920: result = 1604 return result
def getinfo(rootdir, extensions=(".avi", ".mp4" , ".mov")): if not isinstance(rootdir, unicode): rootdir = rootdir.decode(sys.getfilesystemencoding()) for dirpath, dirs, files in os.walk(rootdir): dirs.sort() # traverse directories in sorted order files.sort() for filename in files: if filename.endswith(extensions): path = os.path.join(dirpath, filename) yield path, extractMetadata(createParser(path))
def getinfo(rootdir, extensions=(".avi", ".mp4", ".mov")): if not isinstance(rootdir, unicode): rootdir = rootdir.decode(sys.getfilesystemencoding()) for dirpath, dirs, files in os.walk(rootdir): dirs.sort() # traverse directories in sorted order files.sort() for filename in files: if filename.endswith(extensions): path = os.path.join(dirpath, filename) yield path, extractMetadata(createParser(path))
def file_mimetype(filename): if filename and filename != "": result = Cigma().identify(filename=filename) return result["match"]["mimetype"] if result["match"] else None parser = createParser(unicodeFilename(filename), filename) return { "mimetype": str(parser.mime_type) } if parser else { "mimetype": "text/plain" }
def _get_hachoir_metadata(blob_path): parser = createParser(blob_path) if not parser: print "Unable to parse file" exit(1) try: metadata = extractMetadata(parser, quality=metadata_item.QUALITY_BEST) except HachoirError, err: print "Metadata extraction error: {}".format(err) metadata = None
def __init__(self, input_filename, **kw): Fuse.__init__(self, **kw) log.setFilename("/home/haypo/fuse_log") self.hachoir = createParser(input_filename) if True: self.hachoir = createEditor(self.hachoir) self.readonly = False else: self.readonly = True self.fs_charset = "ASCII"
def get_video_creation_date_metadata(fname): """ Returns the "Creation date" entry from the metadata of a file The return string will have the format '- Creation date: YYYY-MM-DD HH:MM:SS' or if no metadata is found or the file is not valid or doesn't exist, an exception will be thrown :param fname: Name of file to read the metadata from :returns: creation data metadata in specified format :Example: >>> import fileops >>> print fileops.get_video_creation_date_metadata("IMG_1234.JPG") '- Creation date: 2013-09-30 15:21:42' """ # suppress errors from hachoir calls, use our own logging hachoir_core.config.quiet = True # try to access tags associated with video files using # hachoir parser try: fname, realname = hachoir_core.cmd_line.unicodeFilename( fname), fname parser = hachoir_parser.createParser(fname, realname) except: raise VideoMetadataError, "Unable to parse video file" if not parser: raise VideoMetadataError, "Unable to parse video file" try: metadata = hachoir_metadata.extractMetadata(parser) except HachoirError: raise VideoMetadataError, "Error extracting metadata " finally: # hachoir doesn't close the file associated with # the parser object, hence need to do this parser.stream._input.close() if not metadata: raise VideoMetadataError, "No metadata found" text = metadata.exportPlaintext() for line in text: printable = hachoir_core.tools.makePrintable(line, hachoir_core.i18n.getTerminalCharset()) if "Creation date" in printable: return printable raise VideoMetadataError, "No 'Creation date' found in metadata"
def metadata_for_video(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print "Unable to parse file" exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def metadata_map(filename): filename, realname = filename, filename test= filename.split('/') if test[-1][0]==u'.' or test[-1][0] == '.': return None parser = createParser(filename) # See what keys you can extract if parser: return metadata.extractMetadata(parser)._Metadata__data return None
def parse(self): filename, realname = unicodeFilename(self.filename), self.filename parser = hachoir_parser.createParser(filename, realname) if not parser: sys.stderr.write("Unable to parse file %s/n" % self.filename) return try: ## TODO Essa chamada da um warning quando nao ha gps data metadata = hachoir_metadata.extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def getMetadata(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) try: metadata = extractMetadata(parser) except: return None if metadata is not None: metadata = metadata.exportPlaintext() return metadata return None
def get_video_creation_date_metadata(fname): """ Returns the "Creation date" entry from the metadata of a file The return string will have the format '- Creation date: YYYY-MM-DD HH:MM:SS' or if no metadata is found or the file is not valid or doesn't exist, an exception will be thrown :param fname: Name of file to read the metadata from :returns: creation data metadata in specified format :Example: >>> import fileops >>> print fileops.get_video_creation_date_metadata("IMG_1234.JPG") '- Creation date: 2013-09-30 15:21:42' """ # suppress errors from hachoir calls, use our own logging hachoir_core.config.quiet = True # try to access tags associated with video files using # hachoir parser try: fname, realname = hachoir_core.cmd_line.unicodeFilename(fname), fname parser = hachoir_parser.createParser(fname, realname) except: raise VideoMetadataError, "Unable to parse video file" if not parser: raise VideoMetadataError, "Unable to parse video file" try: metadata = hachoir_metadata.extractMetadata(parser) except HachoirError: raise VideoMetadataError, "Error extracting metadata " finally: # hachoir doesn't close the file associated with # the parser object, hence need to do this parser.stream._input.close() if not metadata: raise VideoMetadataError, "No metadata found" text = metadata.exportPlaintext() for line in text: printable = hachoir_core.tools.makePrintable( line, hachoir_core.i18n.getTerminalCharset()) if "Creation date" in printable: return printable raise VideoMetadataError, "No 'Creation date' found in metadata"
def processFile(self, filename): filename, realname = unicodeFilename(filename), filename print u"[%s] Process file %s..." % (self.total, filename) parser = createParser(filename, realname) if not parser: print >> stderr, "Unable to parse file" return None try: metadata = extractMetadata(parser) except HachoirError, err: print >> stderr, "Metadata extraction error: %s" % unicode(err) return None
def GetSongName(self,songDir): parser = createParser(songDir) print parser meta="" # See what keys you can extract for k,v in metadata.extractMetadata(parser)._Metadata__data.iteritems(): if v.values: print v.key, v.values[0].value if v.key=="title": return v.values[0].value