Ejemplo n.º 1
2
def get_creation_date(_path):
    """
    Simple function to retrieve the creation date from the file's metdata

    Args:
        _path the full path to the file.
    """
    # Initialise result
    _creation_date = None

    # Using the hachoir metadata library retrieve file metadata    
    hachoir_config.quiet = True
    try:
        parser = createParser(unicodeFilename(_path), _path)
        if parser:
            metadata = extractMetadata(parser)
            if metadata:
                _creation_date = metadata.get("creation_date")
    except Exception:
        pass

    # Validate and use ctime if not available
    if not _creation_date:
        _ctime = os.path.getctime(_path)
        _creation_date = datetime.datetime.fromtimestamp(_ctime)

    # Return result
    return _creation_date
Ejemplo n.º 2
0
    def lnkparse(reflectPath, filename):
        """ Return the target filename from a MS-widows link (URL format)
        """
        filename = unicodeFilename(filename)
        try:
            parser = createParser(filename)
            if parser is not None and isinstance(parser, LnkFile):
                #It is a "MS-Windows" link file
                try:
                    for field in parser: pass # trigger parsing
                    lnkpath = parser.getField('relative_path').value
                    # mount the complet target path,analyses if inside BasePath
                    if lnkpath.startswith('.\\'):
                        lnkpath = lnkpath[2:]
                    lnkpath = lnkpath.replace('\\','/')
                    filenamePath = os.path.dirname(filename)
                    allLnkpath = os.path.join(reflectPath, filenamePath, lnkpath)
                    allLnkpath = os.path.abspath(allLnkpath) #remove all ..\

                    if allLnkpath.startswith(reflectPath):
                        lnkpath = quote(lnkpath.encode('utf-8'))
                        return 'OK', lnkpath
                    else:
                        return 'ERROR_OUTREFLECTPATH', ''
                except MissingField:
                    # example: link to a network file
                    return 'ERROR_RELPATH', ''
            else:
                return 'NOT_LNKFILE', ''
        except InputStreamError:
            return 'NOT_PARSED', ''
Ejemplo n.º 3
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """

        # primitive verification of torrents, just make sure we didn't get a text file or something
        if file_name.endswith(GenericProvider.TORRENT):
            try:
                parser = createParser(file_name)
                if parser:
                    # pylint: disable=protected-access
                    # Access to a protected member of a client class
                    mime_type = parser._getMimeType()
                    try:
                        parser.stream._input.close()
                    except Exception:
                        pass
                    if mime_type == 'application/x-bittorrent':
                        return True
            except Exception as e:
                logger.log(u"Failed to validate torrent file: " + ex(e), logger.DEBUG)

            logger.log(u"Result is not a valid torrent file", logger.DEBUG)
            return False

        return True
Ejemplo n.º 4
0
    def _extractMetadata(self):
        """
        Extract metadata from file on client or server using hachoir-metadata.
        """
        try:
            parser = createParser(unicode(self.path), str(self.path))

            if parser is None:
                raise HachoirError

            extractor = extractMetadata(parser)

            if extractor is None:
                raise HachoirError

            self.metadata = dict()

            for data in sorted(extractor):
                if not data.values:
                    continue

                key = data.description
                value = ', '.join([item.text for item in data.values])
                self.metadata[key] = value

        except HachoirError:
            self.metadata = None
Ejemplo n.º 5
0
    def lnkparse(reflectPath, filename):
        """ Return the target filename from a MS-widows link (URL format)
        """
        filename = unicodeFilename(filename)
        try:
            parser = createParser(filename)
            if parser is not None and isinstance(parser, LnkFile):
                #It is a "MS-Windows" link file
                try:
                    for field in parser:
                        pass  # trigger parsing
                    lnkpath = parser.getField('relative_path').value
                    # mount the complet target path,analyses if inside BasePath
                    if lnkpath.startswith('.\\'):
                        lnkpath = lnkpath[2:]
                    lnkpath = lnkpath.replace('\\', '/')
                    filenamePath = os.path.dirname(filename)
                    allLnkpath = os.path.join(reflectPath, filenamePath,
                                              lnkpath)
                    allLnkpath = os.path.abspath(allLnkpath)  #remove all ..\

                    if allLnkpath.startswith(reflectPath):
                        lnkpath = quote(lnkpath.encode('utf-8'))
                        return 'OK', lnkpath
                    else:
                        return 'ERROR_OUTREFLECTPATH', ''
                except MissingField:
                    # example: link to a network file
                    return 'ERROR_RELPATH', ''
            else:
                return 'NOT_LNKFILE', ''
        except InputStreamError:
            return 'NOT_PARSED', ''
Ejemplo n.º 6
0
def parse_metadata(path):
    try:
        parser = createParser(unicode(path))
    except InputStreamError:           
        return
    if not parser:
        return
    try:
        metadata = extractMetadata(parser, appsettings.INFO_QUALITY)
    except HachoirError:
        return
    if not metadata:
        return
    data = {}
    text = metadata.exportPlaintext(priority=None, human=False)           
    for line in text:
        if not line.strip().startswith('-'):
            key = line.strip().lower().split(':')[0]
            value = []
        else:
            key = line.strip().split('- ')[1].split(': ')[0]
            value = line.split(key)[1][2:]
            if key in data:
                if hasattr(data[key],'__iter__'):
                    value = data[key] + [value]
                else:
                    value = [data[key],value]
        if value:
            data[key] = value
    return data
Ejemplo n.º 7
0
def extract_metadata_from_file(filename):
    parser = createParser(filename)

    if not parser:
        raise ValueError("Could not parse %s" % filename)

    return extractMetadata(parser)
Ejemplo n.º 8
0
def extract_metadata(uuid):
    oda_file = OdaFile.objects.get(uuid=uuid)
    filename = oda_file.file_handle().name
    #filename, real_filename = unicodeFilename(filename, "utf8"), filename
    parser = createParser(filename, real_filename=filename, tags=None)
    metadata = extractMetadata(parser,1.0)
    return metadata
Ejemplo n.º 9
0
def classify(path,rootdir): # add an extra argument here to take the root dir  :)

    print 'path given: ', path,' RootDir: ',rootdir
    foo = path.rsplit('/', 1)
    fname = foo[1]

    # defaults audio, video:
    artist = album = genre = 'unknown'

    # defaults image:
    latitude = longitude = 0
    city = state = country = 'unknown'
    year = '1960'
    month = 'January'
    # here we go :
    filename = path
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print >>stderr, "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
Ejemplo n.º 10
0
def get_file_date(root, file):
    date = ""
    try:
        filename = "{}/{}".format(root,file)
        filename, realname = unicodeFilename(filename), filename
        parser = createParser(filename, realname)
        if not parser:
            print >>stderr, "Unable to parse file {}".format(filename)
        try:
            actualstderr = sys.stderr
            sys.stderr = open(os.devnull,'w')
            metadata = extractMetadata(parser)
            sys.stderr = actualstderr
        except HachoirError, err:
            print "Metadata extraction error: %s" % unicode(err)
            metadata = None
        if not metadata:
            print "Unable to extract metadata, {}".format(filename)

        text = metadata.exportPlaintext()
        date = ""
        # Tracer()()
        for line in text:
            if line[0:10] == "- Creation":
                
                match = re.search('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
                if match:
                    date = time.strptime(match.groups()[0], '%Y-%m-%d %H:%M:%S')
                    return date
Ejemplo n.º 11
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """

        # primitive verification of torrents, just make sure we didn't get a text file or something
        if file_name.endswith(GenericProvider.TORRENT):
            try:
                parser = createParser(file_name)
                if parser:
                    # pylint: disable=W0212
                    # Access to a protected member of a client class
                    mime_type = parser._getMimeType()
                    try:
                        parser.stream._input.close()
                    except Exception:
                        pass
                    if mime_type == 'application/x-bittorrent':
                        return True
            except Exception as e:
                logging.debug("Failed to validate torrent file: {}".format(
                    ex(e)))

            logging.debug("Result is not a valid torrent file")
            return False

        return True
Ejemplo n.º 12
0
def get_metadata(file_names):
    print ("- Analyzing files metadata.." + "\n")
    file_ = open('results.txt', 'w')
    file_extensions = [".3do",    ".3ds",    ".7z",    ".a",    ".ace",    ".aif",    ".aifc",    ".aiff",    ".ani",    ".apm",    ".asf",    ".au",    ".avi",    ".bin",    ".bmp",    ".bz2",    ".cab",    ".cda",    ".chm",    ".class",    ".cur",    ".deb",    ".der",    ".dll",    ".doc",    ".dot",    ".emf",    ".exe",    ".flv",    ".gif",    ".gz",    ".ico",    ".jar",    ".jpeg",    ".jpg",    ".laf",    ".lnk",    ".m4a",    ".m4b",    ".m4p",    ".m4v",    ".mar",    ".mid",    ".midi",    ".mka",    ".mkv",    ".mod",    ".mov",    ".mp1",    ".mp2",    ".mp3",    ".mp4",    ".mpa",    ".mpe",    ".mpeg",    ".mpg",    ".msi",    ".nst",    ".oct",    ".ocx",    ".odb",    ".odc",    ".odf",    ".odg",    ".odi",    ".odm",    ".odp",    ".ods",    ".odt",    ".ogg",    ".ogm",    ".otg",    ".otp",    ".ots",    ".ott",    ".pcf",    ".pcx",    ".pdf",    ".png",    ".pot",    ".pps",    ".ppt",    ".ppz",    ".psd",    ".ptm", ".pyo",    ".qt",    ".ra",    ".rar",    ".rm",    ".rpm",    ".s3m",    ".sd0",    ".snd",    ".so",    ".stc",    ".std",    ".sti",    ".stw",    ".swf",    ".sxc",    ".sxd",    ".sxg",    ".sxi",    ".sxm",    ".sxw",    ".tar",    ".tga",    ".tif",    ".tiff",    ".torrent",    ".ts",    ".ttf",    ".vob",    ".wav",    ".wma",    ".wmf",    ".wmv",    ".wow",    ".xcf",    ".xla",    ".xls",    ".xm",    ".zip",    ".zs1",    ".zs2",    ".zs3",    ".zs4",    ".zs5",    ".zs6",    ".zs7",    ".zs8",    ".zs9",    ".zst"]
    for filename in file_names:
        print ("- Extracting file metadata: " + filename + "\n")
        extension = os.path.splitext(filename)
        if extension[1] in file_extensions:
            print ("    * The file extension is: " + extension[1] + "\n")
            filename, realname = unicodeFilename(filename), filename
            file_.write('Name: ')
            file_.write(filename)
            file_.write('\n')
            parser = createParser(filename, realname)
            if not parser:
                print >>stderr, "Error, parsing file"
                exit(1)
            try:
                metadata = extractMetadata(parser)
            except Exception as e:
                print ("Error extracting file metadata: " + str(e))
                metadata = None
            if not metadata:
                print ("Metadata can not be extracted")
                exit(1)
            text = metadata.exportPlaintext()
            for line in text:
                file_.write(line)
                file_.write('\n')
            print ("    * Successfull metadata extraction" + "\n" + "\n")
        if not extension[1] in file_extensions:
            print ("    * File extension is unknown or not supported" + "\n" + "\n")
    return text
    file_.close()
Ejemplo n.º 13
0
    def _guess_from_metadata(self):
        parse = lambda s: s.split(":")
        guesses = []
        for filename in self.files:
            filename = get_filename(filename)
            if not isinstance(filename, unicode):
                filename, realname = unicodeFilename(filename), filename
            else:
                realname = filename

            parser = createParser(filename, realname)
            if parser:
                try:
                    metadata = extractMetadata(parser)
                except HachoirError:
                    continue

                for line in metadata.exportPlaintext():
                    entries = dict((parse(normalize(l)) for l in line if 'comment' in l or 'title' in l))
                    entries = dict(((k, guessit.guess_episode_info(v)) for (k, v) in entries.items()))
                    if 'title' in entries:
                        guesses.append(entries['title'])
                    elif 'comment' in entries:
                        guesses.append(entries['comment'])
        return guesses
Ejemplo n.º 14
0
def Downloadfile(url):
    infoMeta = []
    file_name = url.split('/')[-1]
    infoMeta.append(file_name)
    u = urllib2.urlopen(url)

    meta = u.info()
    infoMeta.append(meta.headers)
    doc = u.read()
    f = open(file_name, 'wb')
    f.write(doc)

    with open(file_name, 'rb') as p:
        # Slurp the whole file and efficiently convert it to hex all at once
        hexdata = binascii.hexlify(p.read())

    # use hachoir to add the standard metadata
    filename = './' + file_name
    print filename
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename)
    try:
        metalist = metadata.extractMetadata(parser).exportPlaintext()
        infoMeta.append(metalist[1:4])
    except Exception:
        infoMeta.append(["none", "none", "none"])

    p.close()
    # print "Done", file_name, " Info is ", infoMeta
    return file_name, hexdata
Ejemplo n.º 15
0
def get_creation_date(file_path):
    try:
        parser = createParser(file_path)
        metadata = extractMetadata(parser, 0.5)
        return metadata['creation_date'].strftime('%Y-%m-%d')
    except:
        return None
Ejemplo n.º 16
0
def classify(path,
             rootdir):  # add an extra argument here to take the root dir  :)

    print 'path given: ', path, ' RootDir: ', rootdir
    foo = path.rsplit('/', 1)
    fname = foo[1]

    # defaults audio, video:
    artist = album = genre = 'unknown'

    # defaults image:
    latitude = longitude = 0
    city = state = country = 'unknown'
    year = '1960'
    month = 'January'
    # here we go :
    filename = path
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print >> stderr, "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
Ejemplo n.º 17
0
def hachm(filename):
    # using this example http://archive.org/details/WorkToFishtestwmv
    try:
        filename, realname = unicodeFilename(filename), filename
    except TypeError:
        filename,realname=filename,filename
    parser = createParser(filename)
    # See what keys you can extract
    tmp = metadata.extractMetadata(parser)
    if tmp is None: return {}
    else: tmp = tmp._Metadata__data.iteritems()
    for k,v in tmp:
        if v.values:
            print v.key, v.values[0].value
    # Turn the tags into a defaultdict
    metalist = metadata.extractMetadata(parser).exportPlaintext()
    meta = defaultdict(defaultdict)
    if not metalist:
        return meta
    for item in metalist[1:]:
        item = [x.strip() for x in item.split('-') if x.strip()][0]
        item = [ x.strip().lower().replace(' ','_') for x in item.split(':') ]

        k,v = item.pop(0),':'.join(item)
        meta[k]=v
    return meta
Ejemplo n.º 18
0
def ident(filename) :
    
    mimetypes.init()
    filetype = mimetypes.guess_type(filename)[0]

    unifile = hachoir_core.cmd_line.unicodeFilename(filename)
    parser = hachoir_parser.createParser(unifile)
    meta = MetadataFilter(parser)

    #if meta:
    #    print meta

    generalinfo = GeneralInfo(filename)
    results = None
    #print filetype
    if filetype.startswith("video"):
        results = VideoParse(filename, filetype) + ["video"]
        if not results[0] == "Fail":
            results[0] = filetype
        
    elif filetype.startswith("audio"):
        results = MusicParse(filename) + ["audio"]
        
    elif filetype.startswith("image"):
        results = ImageParse(filename) + ["image"]

    #print generalinfo

    return results
Ejemplo n.º 19
0
def checkFile(filename):
    sys.stdout.write(addonname + ":Checking File: " + filename)
    sys.stdout.flush()
    try:
        parser = createParser(filename)
    except InputStreamError, err:
        return ("streamerror")
Ejemplo n.º 20
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """

        # primitive verification of torrents, just make sure we didn't get a text file or something
        if self.providerType == GenericProvider.TORRENT:
            try:
                parser = createParser(file_name)
                if parser:
                    mime_type = parser._getMimeType()
                    try:
                        parser.stream._input.close()
                    except:
                        pass
                    if mime_type == 'application/x-bittorrent':
                        return True
            except Exception as e:
                logger.log(u"Failed to validate torrent file: " + ex(e),
                           logger.DEBUG)

            logger.log(u"Result is not a valid torrent file", logger.WARNING)
            return False

        return True
Ejemplo n.º 21
0
    def parse_metadata(cls, full_path, db_conn):
        file_ext = os.path.splitext(full_path)[1][1:].lower()
        if(file_ext in ['mp3','bzip2','gzip','zip','tar','wav','midi','bmp','gif','jpeg','jpg','png','tiff','exe','wmv','mkv','mov']):
            # full_path = self._full_path(orig_path)
            # print(full_path)
            parser = createParser(full_path)
            metalist = metadata.extractMetadata(parser).exportPlaintext()
            for item in metalist:
                x = item.split(':')[0] 
                if item.split(':')[0][2:].lower() in ["author","album","music genre"]:
                    # print(item.split(':')[1][1:])
                    item1 = item.split(':')[1][1:]
                    new_item = str(item1.decode('utf-8'))
                    print new_item
                    new_item = string.replace(new_item, ";", ",")
                    new_item = string.replace(new_item, "|", ",")
                    tag_name = new_item.split(',')
                    print(tag_name)
                    for names in tag_name:
                        # inode = os.stat(full_path)[ST_INO
                        tagname = names.strip()
                        MiscFunctions.storeTagInDB(full_path, tagname, db_conn, is_system_tag=1)

            print("Database storage successful")
        elif file_ext in ["docx", "doc", "txt"]:
            tags = MiscFunctions.handleTextFiles(full_path)
            for tagname in tags:
                print "txt file tag: %s" % tagname
                MiscFunctions.storeTagInDB(full_path, tagname, db_conn, is_system_tag=1)
Ejemplo n.º 22
0
 def get_meta(self, file_path):
     """ Get the meta information.
     """
     self.check_extension(file_path)
     filename, realname = unicodeFilename(file_path), file_path
     parser = createParser(filename, realname)
     if parser is None:
         if file_path.lower().endswith('.mov'):
             return 'video/quicktime', 'null'
         if file_path.lower().endswith('.mpg'):
             return 'video/mpeg', 'null'
         if file_path.lower().endswith('.jpg'):
             return 'image/jpeg', 'null'
         if file_path.lower().endswith('.bup'):
             return 'video/dvd', 'null'
         if file_path.lower().endswith('.vob'):
             return 'video/dvd', 'null'
         if file_path.lower().endswith('.ifo'):
             return 'video/dvd', 'null'
     metadata = extractMetadata(parser)
     mime_type = parser.mime_type
     info = {}
     for data in sorted(metadata or ()):
         if not data.values:
             continue
         info[data.key] = [item.text for item in data.values]
     return mime_type, json.dumps(info)
Ejemplo n.º 23
0
    def _guess_from_metadata(self):
        parse = lambda s: s.split(":")
        guesses = []
        for filename in self.files:
            filename = get_filename(filename)
            if not isinstance(filename, unicode):
                filename, realname = unicodeFilename(filename), filename
            else:
                realname = filename

            parser = createParser(filename, realname)
            if parser:
                try:
                    metadata = extractMetadata(parser)
                except HachoirError:
                    continue

                for line in metadata.exportPlaintext():
                    entries = dict((parse(normalize(l)) for l in line
                                    if 'comment' in l or 'title' in l))
                    entries = dict(((k, guessit.guess_episode_info(v))
                                    for (k, v) in entries.items()))
                    if 'title' in entries:
                        guesses.append(entries['title'])
                    elif 'comment' in entries:
                        guesses.append(entries['comment'])
        return guesses
Ejemplo n.º 24
0
def get_file_metadata(path):
    rdata = {}
    if os.path.isfile(path):
        try:
            parser = createParser(unicodeFilename(path), path)
            rdata["size"] = os.stat(path).st_size
            if parser:
                try:
                    metadata = extractMetadata(parser)
                    if metadata:
                        rdata.update(
                            (md.key,
                                md.values[0].value
                                if len(md.values) == 1 else
                                [value.value for value in md.values]
                                )
                            for md in metadata if md.values
                            )
                except HachoirError as e:
                    logging.exception(e)
        except NullStreamError:
            rdata["size"] = 0
        except BaseException as e:
            logging.exception(e)
        finally:
            if parser and parser.stream and parser.stream._input and not parser.stream._input.closed:
                parser.stream._input.close()
    return rdata
Ejemplo n.º 25
0
    def extract(self, fname, quality=0.5, decoder=None):
        """this code comes from processFile in hachoir-metadata"""
        fname = safe_unicode(fname)
        if not fname:
            print('UNICODE FAILED: %s' % fname)
            return {}

        filename, real_filename = fname, fname

        (f, ext) = os.path.splitext(fname)
        ext = ext.lower()[1:]

        # Create parser
        try:
            if decoder:
                tags = None
                tags = [ ("id", decoder), None ]
            else:
                tags = None
            parser = None
            parser = hachoir_parser.createParser(fname, real_filename=real_filename, tags=tags)
        except hachoir_core.stream.InputStreamError, err:
            print('Failed to create parser for %s' % fname)
            print(err)
            return False
Ejemplo n.º 26
0
def googlesearch():
    print "Searching google for files..."
    # set up browser
    browse = mechanize.Browser()
    cookiejar = cookielib.LWPCookieJar()
    browse.set_cookiejar(cookiejar)
    browse.set_handle_equiv(True)
    browse.set_handle_redirect(True)
    browse.set_handle_referer(True)
    browse.set_handle_robots(False)
    browse.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    browse.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    # response = browse.open("https://www.google.com/#q=filetype: %s + %s" % (filetype, domain))
    for filetype in ["doc", "docx", "ppt", "xls"]:
        response = browse.open("https://www.google.com")
        browse.select_form(nr=0)
        browse.form["q"] = "filetype:%s site:%s" % (filetype, domain)
        browse.submit()
        results = browse.response().read()
        soup = BeautifulSoup(results, "lxml")
        sidlist = []
        namelist = []
        typelist = []
        metalist = []
        counter = 1
        for link in soup.find_all("a", href=re.compile("/url")):
            link = link.get("href")
            if link.startswith("/url?q="):
                link = link[len("/url?q=") :]
                link = link.split("." + filetype)[0]
                # print str(link + ".pdf")
                filename = "%s%s.%s" % (domain, counter, filetype)
                try:
                    downfile = browse.retrieve(str(link + "." + filetype), filename)[0]
                    filename = downfile
                    filename, realname = unicodeFilename(filename), filename
                    parser = createParser(filename, realname)
                    metadata = extractMetadata(parser)
                    text = metadata.exportPlaintext()
                    charset = getTerminalCharset()
                    sidlist.append(sid)
                    typelist.append(str(filetype))
                    namelist.append(str(filename))
                    metalist.append(str(text))
                    counter += 1
                except:
                    pass
            for meta in zip(sidlist, typelist, namelist, metalist):
                executor.execute("INSERT INTO metadata VALUES (?,?,?,?)", meta)
                # for line in text:
                #    print makePrintable(line, charset)

    connection.commit()
Ejemplo n.º 27
0
 def __init__(self, path):
     super(HachoirParsable, self).__init__()
     from hachoir_parser import createParser
     self.parser = createParser(unicode(path))
     if not self.parser:
         raise Exception("Could not parse: %s" % path)
     self._metadata_paths = {}
     self._field_modifications = 0
Ejemplo n.º 28
0
 def testListBinaries(self):
     self.mox.StubOutWithMock(hachoir_parser,
                              'createParser',
                              use_mock_anything=True)
     hachoir_parser_mock = self.mox.CreateMockAnything()
     hachoir_parser.createParser(
         u'/fake/path/CSWfoo/root/foo-file').AndReturn(hachoir_parser_mock)
     self.mox.StubOutWithMock(os, 'access')
     os.access(u'/fake/path/CSWfoo/root/foo-file', os.R_OK).AndReturn(True)
     machine_mock = self.mox.CreateMockAnything()
     machine_mock.value = 2
     hachoir_parser_mock.__getitem__('/header/machine').AndReturn(
         machine_mock)
     endian_mock = self.mox.CreateMockAnything()
     endian_mock.display = 'fake-endian'
     hachoir_parser_mock.__getitem__('/header/endian').AndReturn(
         endian_mock)
     magic_cookie_mock = self.mox.CreateMockAnything()
     self.mox.StubOutWithMock(magic, 'open')
     magic.open(0).AndReturn(magic_cookie_mock)
     magic_cookie_mock.load()
     if "MAGIC_MIME" in dir(magic):
         flag = magic.MAGIC_MIME
     elif "MIME" in dir(magic):
         flag = magic.MIME
     magic_cookie_mock.setflags(flag)
     magic_cookie_mock.file(u'/fake/path/CSWfoo/root/foo-file').AndReturn(
         "application/x-executable")
     self.mox.StubOutWithMock(os.path, 'isdir')
     self.mox.StubOutWithMock(os.path, 'exists')
     self.mox.StubOutWithMock(os, 'walk')
     # self.mox.StubOutWithMock(__builtins__, 'open')
     os.path.isdir("/fake/path/CSWfoo").AndReturn(True)
     os.path.isdir("/fake/path/CSWfoo").AndReturn(True)
     os.path.isdir("/fake/path/CSWfoo").AndReturn(True)
     os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False)
     os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False)
     os.walk("/fake/path/CSWfoo/root").AndReturn([
         ("/fake/path/CSWfoo/root", [], ["foo-file"]),
     ])
     self.mox.ReplayAll()
     ip = inspective_package.InspectivePackage("/fake/path/CSWfoo")
     ip.pkginfo_dict = {
         "BASEDIR": "",
     }
     self.assertEqual([u'foo-file'], ip.ListBinaries())
Ejemplo n.º 29
0
def extract_title(filename):
	filename, realname = unicodeFilename(filename), filename
	parser = createParser(filename, realname)
	metadata = extractMetadata(parser)

	text = metadata.exportPlaintext()

	return metadata.get('title')
Ejemplo n.º 30
0
 def testListBinaries(self):
   self.mox.StubOutWithMock(hachoir_parser, 'createParser',
       use_mock_anything=True)
   hachoir_parser_mock = self.mox.CreateMockAnything()
   hachoir_parser.createParser(
       u'/fake/path/CSWfoo/root/foo-file').AndReturn(hachoir_parser_mock)
   self.mox.StubOutWithMock(os, 'access')
   os.access(u'/fake/path/CSWfoo/root/foo-file', os.R_OK).AndReturn(True)
   machine_mock = self.mox.CreateMockAnything()
   machine_mock.value = 2
   hachoir_parser_mock.__getitem__('/header/machine').AndReturn(machine_mock)
   endian_mock = self.mox.CreateMockAnything()
   endian_mock.display = 'fake-endian'
   hachoir_parser_mock.__getitem__('/header/endian').AndReturn(endian_mock)
   magic_cookie_mock = self.mox.CreateMockAnything()
   self.mox.StubOutWithMock(magic, 'open')
   magic.open(0).AndReturn(magic_cookie_mock)
   magic_cookie_mock.load()
   if "MAGIC_MIME" in dir(magic):
     flag = magic.MAGIC_MIME
   elif "MIME" in dir(magic):
     flag = magic.MIME
   magic_cookie_mock.setflags(flag)
   magic_cookie_mock.file(
       u'/fake/path/CSWfoo/root/foo-file').AndReturn(
           "application/x-executable")
   self.mox.StubOutWithMock(os.path, 'isdir')
   self.mox.StubOutWithMock(os.path, 'exists')
   self.mox.StubOutWithMock(os, 'walk')
   # self.mox.StubOutWithMock(__builtins__, 'open')
   os.path.isdir("/fake/path/CSWfoo").AndReturn(True)
   os.path.isdir("/fake/path/CSWfoo").AndReturn(True)
   os.path.isdir("/fake/path/CSWfoo").AndReturn(True)
   os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False)
   os.path.exists("/fake/path/CSWfoo/reloc").AndReturn(False)
   os.walk("/fake/path/CSWfoo/root").AndReturn(
       [
         ("/fake/path/CSWfoo/root", [], ["foo-file"]),
       ]
   )
   self.mox.ReplayAll()
   ip = inspective_package.InspectivePackage("/fake/path/CSWfoo")
   ip.pkginfo_dict = {
       "BASEDIR": "",
   }
   self.assertEqual([u'foo-file'], ip.ListBinaries())
Ejemplo n.º 31
0
	def getData(self):
		filename, realname = unicodeFilename(self.filename), self.filename
		parser = createParser(filename, realname)
		try:
			metadata = extractMetadata(parser)
		except HachoirError, err:
			print "Metadata extraction error: %s" % unicode(err)
			metadata = None
Ejemplo n.º 32
0
def checkFile(filename, check_metadata, quality=1.0):
    sys.stdout.write("  - Create parser: ")
    sys.stdout.flush()
    try:
        parser = createParser(filename)
    except InputStreamError, err:
        sys.stdout.write("stream error! %s\n" % unicode(err))
        sys.exit(1)
Ejemplo n.º 33
0
def checkFile(filename, check_parser):
    sys.stdout.write("  - Create parser: ")
    sys.stdout.flush()
    try:
        parser = createParser(filename)
    except InputStreamError, err:
        sys.stdout.write("stream error! %s\n" % unicode(err))
        sys.exit(1)
 def getData(self):
     filename, realname = unicodeFilename(self.filename), self.filename
     parser = createParser(filename, realname)
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
Ejemplo n.º 35
0
    def which_type(self, image_path):
        """
        Analyzes the image provided and attempts to determine whether it is a poster or banner.

        :param image_path: full path to the image
        :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist)
        """

        if not ek(os.path.isfile, image_path):
            logger.log(
                u"Couldn't check the type of {image_path} cause it doesn't exist"
                .format(image_path=image_path), logger.WARNING)
            return None

        if try_int(ek(os.path.getsize, image_path)) == 0:
            logger.log(
                u'Image has 0 bytes size. Deleting it: {image_path}'.format(
                    image_path=image_path), logger.WARNING)
            try:
                ek(os.remove, image_path)
            except OSError as e:
                logger.log(
                    u"Could't delete file: '{image_path}'. Please manually delete it. Error: {error_msg}"
                    .format(image_path=image_path,
                            error_msg=e), logger.WARNING)
            return

        # use hachoir to parse the image for us
        img_parser = createParser(image_path)
        img_metadata = extractMetadata(img_parser)

        if not img_metadata:
            logger.log(
                u"Unable to get metadata from {image_path}, not using your existing image"
                .format(image_path=image_path), logger.DEBUG)
            return None

        img_ratio = float(img_metadata.get('width')) / float(
            img_metadata.get('height'))

        img_parser.stream._input.close()

        # most posters are around 0.68 width/height ratio (eg. 680/1000)
        if 0.55 < img_ratio < 0.8:
            return self.POSTER

        # most banners are around 5.4 width/height ratio (eg. 758/140)
        elif 5 < img_ratio < 6:
            return self.BANNER

        # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080)
        elif 1.7 < img_ratio < 1.8:
            return self.FANART
        else:
            logger.log(
                u"Image has size ratio of {img_ratio}, unknown type".format(
                    img_ratio=img_ratio), logger.WARNING)
            return
Ejemplo n.º 36
0
    def qualityFromFileMeta(filename):  # pylint: disable=too-many-branches
        """
        Get quality file file metadata

        :param filename: Filename to analyse
        :return: Quality prefix
        """

        log.use_print = False

        try:
            parser = createParser(filename)
        except Exception:  # pylint: disable=broad-except
            parser = None

        if not parser:
            return Quality.UNKNOWN

        try:
            metadata = extractMetadata(parser)
        except Exception:  # pylint: disable=broad-except
            metadata = None

        try:
            parser.stream._input.close()  # pylint: disable=protected-access
        except Exception:  # pylint: disable=broad-except
            pass

        if not metadata:
            return Quality.UNKNOWN

        height = 0
        if metadata.has('height'):
            height = int(metadata.get('height') or 0)
        else:
            test = getattr(metadata, "iterGroups", None)
            if callable(test):
                for metagroup in metadata.iterGroups():
                    if metagroup.has('height'):
                        height = int(metagroup.get('height') or 0)

        if not height:
            return Quality.UNKNOWN

        base_filename = ek(path.basename, filename)
        bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None
        webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None

        ret = Quality.UNKNOWN
        if height > 1000:
            ret = ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl]
        elif 680 < height < 800:
            ret = ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl]
        elif height < 680:
            ret = (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None]

        return ret
Ejemplo n.º 37
0
def getMetadata(filename):
	filename, realname = unicodeFilename(filename), filename
	
	parser = createParser(filename, realname)
	if not parser:
		print "Unable to parse file"
		exit(1)
	metadata = extractMetadata(parser)
	return metadata
Ejemplo n.º 38
0
 def __init__(self, file_path, data=None):
     self._file_data = {}
     self._file_path = file_path
     if data is not None:
         self.populate(data)
     else:
         self._parser = createParser(unicode(self._file_path, "utf-8"), self._file_path)
         if not self._parser:
             stderr("Unable to parse file: " + self._file_path)
Ejemplo n.º 39
0
    def GetFilesMetadata(self):
        """Returns a data structure with all the files plus their metadata.

    [
      {
        "path": ...,
        "mime_type": ...,
      },
    ]
    """
        if not self.files_metadata:
            self.CheckPkgpathExists()
            self.files_metadata = []
            files_root = self.GetFilesDir()
            all_files = self.GetAllFilePaths()

            def StripRe(x, strip_re):
                return re.sub(strip_re, "", x)

            root_re = re.compile(r"^(reloc|root)/")
            file_magic = FileMagic()
            basedir = self.GetBasedir()
            for file_path in all_files:
                full_path = unicode(self.MakeAbsolutePath(file_path))
                file_info = {
                    "path": StripRe(file_path, root_re),
                    "mime_type": file_magic.GetFileMimeType(full_path)
                }
                if basedir:
                    file_info["path"] = os.path.join(basedir,
                                                     file_info["path"])
                if not file_info["mime_type"]:
                    logging.error("Could not establish the mime type of %s",
                                  full_path)
                    # We really don't want that, as it misses binaries.
                    msg = (
                        "It was not possible to establish the mime type of %s.  "
                        "It's a known problem which occurs when indexing a large "
                        "number of packages in a single run.  "
                        "It's probably caused by a bug in libmagic, or a bug in "
                        "libmagic Python bindings. "
                        "Currently, there is no fix for it.  "
                        "You have to restart your process - it "
                        "will probably finish successfully when do you that." %
                        full_path)
                    raise package.PackageError(msg)
                if sharedlib_utils.IsBinary(file_info):
                    parser = hp.createParser(full_path)
                    if not parser:
                        logging.warning("Can't parse file %s", file_path)
                    else:
                        file_info["mime_type_by_hachoir"] = parser.mime_type
                        machine_id = parser["/header/machine"].value
                        file_info["machine_id"] = machine_id
                        file_info["endian"] = parser["/header/endian"].display
                self.files_metadata.append(file_info)
        return self.files_metadata
Ejemplo n.º 40
0
 def get_metadata(self,fullurl):
     metadata = 'not'
     try:
         filename, realname = unicode(fullurl), fullurl
         parser = createParser(filename, realname)
         metadata = extractMetadata(parser)
     except Exception as e:
         print ("Error getting metadata ",e.args)
     return metadata   
Ejemplo n.º 41
0
    def qualityFromFileMeta(filename):  # pylint: disable=too-many-branches
        """
        Get quality file file metadata

        :param filename: Filename to analyse
        :return: Quality prefix
        """

        log.use_print = False

        try:
            parser = createParser(filename)
        except Exception:  # pylint: disable=broad-except
            parser = None

        if not parser:
            return Quality.UNKNOWN

        try:
            metadata = extractMetadata(parser)
        except Exception:  # pylint: disable=broad-except
            metadata = None

        try:
            parser.stream._input.close()  # pylint: disable=protected-access
        except Exception:  # pylint: disable=broad-except
            pass

        if not metadata:
            return Quality.UNKNOWN

        height = 0
        if metadata.has('height'):
            height = int(metadata.get('height') or 0)
        else:
            test = getattr(metadata, "iterGroups", None)
            if callable(test):
                for metagroup in metadata.iterGroups():
                    if metagroup.has('height'):
                        height = int(metagroup.get('height') or 0)

        if not height:
            return Quality.UNKNOWN

        base_filename = ek(path.basename, filename)
        bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None
        webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None

        ret = Quality.UNKNOWN
        if height > 1000:
            ret = ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl]
        elif 680 < height < 800:
            ret = ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl]
        elif height < 680:
            ret = (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None]

        return ret
Ejemplo n.º 42
0
    def __get_hd_tag__(self, video):
        result = 104

        file_metadata = extractMetadata(createParser(unicodeFilename(video)))
        if file_metadata.get('width') == 1280:
            result = 404
        elif file_metadata.get('width') == 1920:
            result = 1604

        return result
Ejemplo n.º 43
0
def getinfo(rootdir, extensions=(".avi", ".mp4" , ".mov")):
    if not isinstance(rootdir, unicode):
       rootdir = rootdir.decode(sys.getfilesystemencoding())
    for dirpath, dirs, files in os.walk(rootdir):
        dirs.sort() # traverse directories in sorted order
        files.sort()
        for filename in files:
            if filename.endswith(extensions):
               path = os.path.join(dirpath, filename)
               yield path, extractMetadata(createParser(path))
Ejemplo n.º 44
0
def getinfo(rootdir, extensions=(".avi", ".mp4", ".mov")):
    if not isinstance(rootdir, unicode):
        rootdir = rootdir.decode(sys.getfilesystemencoding())
    for dirpath, dirs, files in os.walk(rootdir):
        dirs.sort()  # traverse directories in sorted order
        files.sort()
        for filename in files:
            if filename.endswith(extensions):
                path = os.path.join(dirpath, filename)
                yield path, extractMetadata(createParser(path))
Ejemplo n.º 45
0
def file_mimetype(filename):
    if filename and filename != "":
        result = Cigma().identify(filename=filename)
        return result["match"]["mimetype"] if result["match"] else None
        parser = createParser(unicodeFilename(filename), filename)
        return {
            "mimetype": str(parser.mime_type)
        } if parser else {
            "mimetype": "text/plain"
        }
Ejemplo n.º 46
0
Archivo: utils.py Proyecto: nseidl/Ripe
def _get_hachoir_metadata(blob_path):
    parser = createParser(blob_path)
    if not parser:
        print "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser, quality=metadata_item.QUALITY_BEST)
    except HachoirError, err:
        print "Metadata extraction error: {}".format(err)
        metadata = None
Ejemplo n.º 47
0
 def __init__(self, input_filename, **kw):
     Fuse.__init__(self, **kw)
     log.setFilename("/home/haypo/fuse_log")
     self.hachoir = createParser(input_filename)
     if True:
         self.hachoir = createEditor(self.hachoir)
         self.readonly = False
     else:
         self.readonly = True
     self.fs_charset = "ASCII"
 def __init__(self, input_filename, **kw):
     Fuse.__init__(self, **kw)
     log.setFilename("/home/haypo/fuse_log")
     self.hachoir = createParser(input_filename)
     if True:
         self.hachoir = createEditor(self.hachoir)
         self.readonly = False
     else:
         self.readonly = True
     self.fs_charset = "ASCII"
Ejemplo n.º 49
0
def get_video_creation_date_metadata(fname):

    """
    Returns the "Creation date" entry from the metadata of a file

    The return string will have the format
    '- Creation date: YYYY-MM-DD HH:MM:SS' or if no metadata is found
    or the file is not valid or doesn't exist, an exception will be thrown

    :param fname:   Name of file to read the metadata from
    :returns:       creation data metadata in specified format

    :Example:

        >>> import fileops
        >>> print fileops.get_video_creation_date_metadata("IMG_1234.JPG")
        '- Creation date: 2013-09-30 15:21:42'
    """

    # suppress errors from hachoir calls, use our own logging
    hachoir_core.config.quiet = True

    # try to access tags associated with video files using
    # hachoir parser
    try:
        fname, realname = hachoir_core.cmd_line.unicodeFilename(
                fname), fname
        parser = hachoir_parser.createParser(fname, realname)
    except:
        raise VideoMetadataError, "Unable to parse video file"

    if not parser:
        raise VideoMetadataError, "Unable to parse video file"

    try:
        metadata = hachoir_metadata.extractMetadata(parser)
    except HachoirError:
        raise VideoMetadataError, "Error extracting metadata "
    finally:
        # hachoir doesn't close the file associated with
        # the parser object, hence need to do this
        parser.stream._input.close()
    
    if not metadata:
        raise VideoMetadataError, "No metadata found" 
    
    text = metadata.exportPlaintext()

    for line in text:
        printable = hachoir_core.tools.makePrintable(line,
                hachoir_core.i18n.getTerminalCharset())
        if "Creation date" in printable:
            return printable

    raise VideoMetadataError, "No 'Creation date' found in metadata"
Ejemplo n.º 50
0
def metadata_for_video(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
Ejemplo n.º 51
0
def metadata_map(filename):
  filename, realname = filename, filename
  test= filename.split('/')
  if test[-1][0]==u'.' or test[-1][0] == '.':
    return None
  parser = createParser(filename)

  # See what keys you can extract
  if parser:
    return metadata.extractMetadata(parser)._Metadata__data
  return None
Ejemplo n.º 52
0
 def parse(self):
     filename, realname = unicodeFilename(self.filename), self.filename
     parser = hachoir_parser.createParser(filename, realname)
     if not parser:
         sys.stderr.write("Unable to parse file %s/n" % self.filename)
         return
     try:
         ## TODO Essa chamada da um warning quando nao ha gps data
         metadata = hachoir_metadata.extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
Ejemplo n.º 53
0
def getMetadata(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    try:
        metadata = extractMetadata(parser)
    except:
        return None

    if metadata is not None:
        metadata = metadata.exportPlaintext()
        return metadata
    return None
Ejemplo n.º 54
0
def get_video_creation_date_metadata(fname):
    """
    Returns the "Creation date" entry from the metadata of a file

    The return string will have the format
    '- Creation date: YYYY-MM-DD HH:MM:SS' or if no metadata is found
    or the file is not valid or doesn't exist, an exception will be thrown

    :param fname:   Name of file to read the metadata from
    :returns:       creation data metadata in specified format

    :Example:

        >>> import fileops
        >>> print fileops.get_video_creation_date_metadata("IMG_1234.JPG")
        '- Creation date: 2013-09-30 15:21:42'
    """

    # suppress errors from hachoir calls, use our own logging
    hachoir_core.config.quiet = True

    # try to access tags associated with video files using
    # hachoir parser
    try:
        fname, realname = hachoir_core.cmd_line.unicodeFilename(fname), fname
        parser = hachoir_parser.createParser(fname, realname)
    except:
        raise VideoMetadataError, "Unable to parse video file"

    if not parser:
        raise VideoMetadataError, "Unable to parse video file"

    try:
        metadata = hachoir_metadata.extractMetadata(parser)
    except HachoirError:
        raise VideoMetadataError, "Error extracting metadata "
    finally:
        # hachoir doesn't close the file associated with
        # the parser object, hence need to do this
        parser.stream._input.close()

    if not metadata:
        raise VideoMetadataError, "No metadata found"

    text = metadata.exportPlaintext()

    for line in text:
        printable = hachoir_core.tools.makePrintable(
            line, hachoir_core.i18n.getTerminalCharset())
        if "Creation date" in printable:
            return printable

    raise VideoMetadataError, "No 'Creation date' found in metadata"
 def processFile(self, filename):
     filename, realname = unicodeFilename(filename), filename
     print u"[%s] Process file %s..." % (self.total, filename)
     parser = createParser(filename, realname)
     if not parser:
         print >> stderr, "Unable to parse file"
         return None
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print >> stderr, "Metadata extraction error: %s" % unicode(err)
         return None
Ejemplo n.º 56
0
    def GetSongName(self,songDir):
        parser = createParser(songDir)
        print parser

        meta=""

        # See what keys you can extract
        for k,v in metadata.extractMetadata(parser)._Metadata__data.iteritems():

            if v.values:
                print v.key, v.values[0].value
                if v.key=="title":
                    return v.values[0].value