Beispiel #1
0
def extract_metadata(audio):
    parser = guessParser(StringInputStream(audio))

    if not parser:
        raise ValueError("Could not parse the stream")

    return extractMetadata(parser)
Beispiel #2
0
 def getFragment(self, frag):
     stream = frag.getSubIStream()
     ministream = guessParser(stream)
     if not ministream:
         warning("Unable to create the OLE2 mini stream parser!")
         return frag
     return ministream
Beispiel #3
0
    def which_type(self, path):
        """
        Analyzes the image provided and attempts to determine whether it is a poster or banner.

        :param path: full path to the image
        :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist)
        """

        if not os.path.isfile(path):
            sickrage.app.log.warning("Couldn't check the type of " + str(path) + " cause it doesn't exist")
            return None

        with io.open(path, 'rb') as fh:
            img_metadata = extractMetadata(guessParser(StringInputStream(fh.read())))
            if not img_metadata:
                sickrage.app.log.debug(
                    "Unable to get metadata from " + str(path) + ", not using your existing image")
                return None

            img_ratio = float(img_metadata.get('width', 0)) / float(img_metadata.get('height', 0))

            # most posters are around 0.68 width/height ratio (eg. 680/1000)
            if 0.55 < img_ratio < 0.8:
                return self.POSTER

            # most banners are around 5.4 width/height ratio (eg. 758/140)
            elif 5 < img_ratio < 6:
                return self.BANNER

            # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080)
            elif 1.7 < img_ratio < 1.8:
                return self.FANART
            else:
                sickrage.app.log.warning("Image has size ratio of " + str(img_ratio) + ", unknown type")
Beispiel #4
0
    def to_python(self, data):
        f = super(AudioField, self).to_python(data)
        if f is None:
            return None

        if hasattr(data, 'temporary_file_path'):
            file = open(data.temporary_file_path(), 'rb')
        else:
            if hasattr(data, 'read'):
                file = BytesIO(data.read())
            else:
                file = BytesIO(data['content'])

        try:            
            parser = guessParser(InputIOStream(file))

            if not (parser.validate() and parser.mime_type == u'audio/mpeg'):
                raise Exception
        except ImportError:
            raise
        except Exception: #not an mp3
            raise forms.ValidationError(self.error_messages['invalid_format'])
        if hasattr(f, 'seek') and callable(f.seek):
            f.seek(0)
        return f
Beispiel #5
0
def getOLE2Parser(ole2, path):
    name = path+"[0]"
    if name in ole2:
        fragment = ole2[name]
    else:
        fragment = getRootParser(ole2)[name]
    return guessParser(fragment.getSubIStream())
Beispiel #6
0
 def useRoot(self, root):
     stream = root.getSubIStream()
     ministream = guessParser(stream)
     if not ministream:
         warning("Unable to create the OLE2 mini stream parser!")
         return
     self._extract(ministream, main_document=False)
Beispiel #7
0
    def qualityFromFileMeta(filename):
        """
        Get quality from file metadata

        :param filename: Filename to analyse
        :return: Quality prefix
        """

        from hachoir_core.stream import StringInputStream
        from hachoir_parser import guessParser
        from hachoir_metadata import extractMetadata
        from hachoir_core.log import log
        log.use_print = False

        if ek(os.path.isfile, filename):
            base_filename = ek(os.path.basename, filename)
            bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None
            webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None

            try:
                with ek(io.open, filename, "rb") as file:
                    file_metadata = extractMetadata(guessParser(StringInputStream(file.read())))
                    if file_metadata:
                        for metadata in chain([file_metadata], file_metadata.iterGroups()):
                            height = metadata.get('height', None)
                            if height and height > 1000:
                                return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl]
                            elif height and height > 680 and height < 800:
                                return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl]
                            elif height and height < 680:
                                return (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None]
            except Exception as e:
                sickbeard.logger.log(ex(e))

        return Quality.UNKNOWN
Beispiel #8
0
    def qualityFromFileMeta(filename):
        """
        Get quality from file metadata

        :param filename: Filename to analyse
        :return: Quality prefix
        """

        from hachoir_core.stream import StringInputStream
        from hachoir_parser import guessParser
        from hachoir_metadata import extractMetadata
        from hachoir_core import config as hachoir_config
        hachoir_config.quiet = True

        if os.path.isfile(filename):
            base_filename = os.path.basename(filename)
            bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None
            webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None

            for byte in readFileBuffered(filename):
                try:
                    file_metadata = extractMetadata(guessParser(StringInputStream(byte)))
                    for metadata in chain([file_metadata], file_metadata.iterGroups()):
                        height = metadata.get('height', 0)
                        if height > 1000:
                            return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl]
                        elif height > 680 and height < 800:
                            return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl]
                        elif height < 680:
                            return (Quality.SDTV, Quality.SDDVD)[
                                re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None]
                except:
                    continue

        return Quality.UNKNOWN
Beispiel #9
0
def media_from_file(infile, batch, user, manual=False):
    """Creates an instance of correct Media class from an open file"""
    stream = InputIOStream(infile)
    parser = hachoir_parser.guessParser(stream)
    metadata = hachoir_metadata.extractMetadata(parser)
    model_class = klass_from_metadata(metadata, infile.name)

    if not model_class:
        # TODO: need to test different errors
        log.warn('no media found for: %s', infile.name)
        return None

    else:
        mediatype = model_class.mediatype()
        cursor = connection.cursor()
        cursor.execute("SELECT nextval ('gallery_mediabase_id_seq')")
        slugid = cursor.fetchone()[0]

        slug = '%s.%d' % (user.username, slugid)
        args = {'owner': user,
                'slug': slug,
                'status': 'uploaded',
                'textheight' : 50,
                'batch': batch}

        if not manual:
            if hasattr(model_class, 'IKOptions'):
                # we're some type of image object
                args['image'] = infile
            else:
                args['filefield'] = infile

        for dimension in ('width', 'height'):
            dimvalue = metadata.get(dimension, False)
            if dimvalue:
                args[dimension] = dimvalue
        if mediatype == 'video' and not infile.name.endswith('flv'):
            args['encode'] = True

        if metadata.has('creation_date'):
            year = metadata.get('creation_date', None)
            if year:
                year = year.year
                args['year'] = year

        instance = model_class(**args)
        if manual:
            fn = os.path.basename(infile.name)
            fileobj = File(infile)
            log.debug('manual creation of %s: %s', mediatype, fn)
            if hasattr(model_class, 'IKOptions'):
                # we're some type of image object
                instance.image.save(fn, fileobj)
            else:
                instance.filefield.save(fn, fileobj)

        instance.save()
        log.debug('Saved %s: %s' % (mediatype, instance.get_fname()))
        return instance
Beispiel #10
0
def get_duration(fn):
    # We need to provide just begining of file otherwise hachoir might try to read all file
    with open(fn,'rb') as f:
        s=StringIO(f.read(1024*64))
    p=guessParser(InputIOStream(s, filename=unicode(fn), tags=[]))
    m=extractMetadata(p)
    if m:
        return m.getItem('duration',0) and m.getItem('duration',0).value
Beispiel #11
0
 def from_string(self, data):
     from hachoir_parser import guessParser
     from hachoir_core.stream import StringInputStream
     stream = StringInputStream(data)
     parser = guessParser(stream)
     from hachoir_metadata import extractMetadata
     ret = extractMetadata(parser)
     # formated = md.exportPlaintext(line_prefix=u"")
     return ret
Beispiel #12
0
 def extract_metadata(self, file):
     config.MAX_STR_LENGTH = float("inf")
     try:
         filename = file.name
         if not isinstance(filename, unicode):
             filename = unicodeFilename(filename)
         stream = InputIOStream(file, source="file:%s" % filename, tags=[], filename=filename)
         parser = guessParser(stream)
         return extractMetadata(parser)
     except (HachoirError, TypeError) as e:
         raise MetadataException(e)
Beispiel #13
0
    def extract(self):
        self.nb_extract += 1
        self.prefix = ""

        data = self.data.tostring()
        stream = InputIOStream(StringIO(data), filename=self.filename)

        # Create parser
        start = time()
        try:
            parser = guessParser(stream)
        except InputStreamError, err:
            parser = None
Beispiel #14
0
 def getField(self, fieldset, main_document, name):
     if name not in fieldset:
         return None
     # _feedAll() is needed to make sure that we get all fragments
     # eg. summary[0], summary[1], ..., summary[n]
     fieldset._feedAll()
     field = fieldset[name]
     if main_document:
         stream = field.getSubIStream()
         field = guessParser(stream)
         if not field:
             warning("Unable to create the OLE2 parser for %s!" % name)
             return None
     return field
Beispiel #15
0
def metadata_for_filelike(filelike):
  try:
    filelike.seek(0)
  except (AttributeError, IOError):
    return None
  stream = InputIOStream(filelike, None, tags=[])
  parser = guessParser(stream)
  if not parser:
    return None
  try:
    metadata = extractMetadata(parser)
  except HachoirError:
    return None
  return metadata._Metadata__data
Beispiel #16
0
def _parse_file(filename):
    """Extract metatata from file"""
    # Workaround to fix unicode path problem on different OSs
    if sys.platform == 'win32':
        f = open(filename, 'rb')
    else:
        f = File(filename)
    try:
        s = StringIO(f.read(1024 * 64))
        p = guessParser(InputIOStream(s, filename=unicode(filename), tags=[]))
        metadata = extractMetadata(p)
    finally:
        f.close()
    return metadata
Beispiel #17
0
 def _duration(self):
     """
     File duration in sec
     """
     if getattr(self, '_duration_cache', None):
         return self._duration_cache
     duration = extractMetadata(guessParser(\
         InputIOStream(self))).get('duration')
     if not duration:
         raise Exception(u'Not an audio file')
     else:
         duration = duration.seconds
     self._duration_cache = duration
     return duration
 def run():
     msg = _resize = retry = 0
     events = ( "window resize", )
     profile_display = args.profile_display
     while True:
         for e in events:
             try:
                 if e == "window resize":
                     size = ui.get_cols_rows()
                     resize = log.height
                 else:
                     e = top.keypress(size, e)
                     if e is None:
                         pass
                     elif e in ('f1', '?'):
                         try:
                             body.select(body.tabs.index(help))
                         except ValueError:
                             body.append(help)
                             resize = log.height
                     elif e in ('esc', 'ctrl w'):
                         body.close()
                         if body.box_widget is None:
                             return
                         resize = log.height
                     elif e == '+':
                         if log.height:
                             resize = log.height - 1
                     elif e == '-':
                         resize = log.height + 1
                     elif e == 'q':
                         return
             #except AssertionError:
             #    hachoir_log.error(getBacktrace())
             except NewTab_Stream, e:
                 stream = e.field.getSubIStream()
                 logger.objects[stream] = e = "%u/%s" % (body.active, e.field.absolute_address)
                 parser = guessParser(stream)
                 if not parser:
                     hachoir_log.error(_("No parser found for %s") % stream.source)
                 else:
                     logger.objects[parser] = e
                     body.append((e, TreeBox(charset, Node(parser, None), preload_fields, None, options)))
                     resize = log.height
             except NeedInput, e:
                 input.do(*e.args)
             if profile_display:
                 events = events[1:]
                 break
 def extract(self, myfile):
     """ """
     if not self.available():
         return
     dataIO = myfile.open("r")
     filename, realname = unicodeFilename(myfile.name), myfile.name
     source = "file:%s" % filename 
     args = {"tags" : [], "filename" : filename}
     stream = InputIOStream(dataIO, source=source, **args)
     parser = guessParser(stream)
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
Beispiel #20
0
  def get_track(self, track):
    for url in track.urls:
      f, parser = None, None

      try:
        f      = self.download_url(url)
        parser = guessParser(InputIOStream(f))
      except Exception, e:
        LOG.exception('Unable to handle url: %s' % url)
        continue

      if parser:
        metadata  = extractMetadata(parser)
        new_track = models.Track(reference_key = track.reference_key, url = url)
        new_track.title    = metadata.get('title')
        new_track.artist   = metadata.get('author')
        new_track.duration = 24 * 60 * 60 * metadata.get('duration').days + metadata.get('duration').seconds
        return new_track
Beispiel #21
0
def getMetadata(vidFile):
    try:
        vidFile.seek(0)
    except (AttributeError, IOError):
        return None

    stream = InputIOStream(vidFile, None, tags=[])
    parser = guessParser(stream)

    if not parser:
        return None

    try:
        metadata = extractMetadata(parser)
    except HachoirError:
        return None

    return metadata
Beispiel #22
0
  def attributes(self, node):
    attr = VMap()
    attr.thisown = False
    file = node.open()
    parser = guessParser(StringInputStream(file.read()))
    file.close()
    if not parser:
      attr["info"] = Variant("unable to read metadata")
      return attr

    try:
      metadata = extractMetadata(parser)
      for data in metadata:
        if not(any(data.values)):
          continue
        attr[data.key] = Variant("; ".join([str(val.value) for val in data.values]))
    except HachoirError, err:
      attr["info"] = Variant("error while reading metadata")
Beispiel #23
0
    def useSummary(self, summary):
        # FIXME: Remove this hack
        # Problem: there is no method to get all fragments from a file
        summary.parent._feedAll()
        # ---

        stream = summary.getSubIStream()
        summary = guessParser(stream)
        if not summary:
            print "Unable to create summary parser"

        if "os" in summary:
            self.os = summary["os"].display
        if "section[0]" not in summary:
            return
        summary = summary["section[0]"]
        for property in summary.array("property_index"):
            self.useProperty(summary, property)
Beispiel #24
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """

        # primitive verification of torrents, just make sure we didn't get a text file or something
        if file_name.endswith('torrent'):
            try:
                with open(file_name, 'rb') as file:
                    mime_type = guessParser(StringInputStream(file.read()))._getMimeType()
                    if mime_type == 'application/x-bittorrent':
                        return True
            except Exception as e:
                sickrage.srCore.srLogger.debug("Failed to validate torrent file: {}".format(e.message))

            sickrage.srCore.srLogger.debug("Result is not a valid torrent file")
            return False

        return True
Beispiel #25
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """
        result = True
        # primitive verification of torrents, just make sure we didn't get a text file or something
        if GenericProvider.TORRENT == self.providerType:
            parser = stream = None
            try:
                stream = FileInputStream(file_name)
                parser = guessParser(stream)
            except:
                pass
            result = parser and 'application/x-bittorrent' == parser.mime_type

            try:
                stream._input.close()
            except:
                pass

        return result
Beispiel #26
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """

        # primitive verification of torrents, just make sure we didn't get a text file or something
        if file_name.endswith(GenericProvider.TORRENT):
            try:
                for byte in readFileBuffered(file_name):
                    mime_type = guessParser(StringInputStream(byte))._getMimeType()
                    if mime_type == "application/x-bittorrent":
                        # clean up
                        del mime_type

                        return True
            except Exception as e:
                sickrage.srLogger.debug("Failed to validate torrent file: {}".format(e.message))

            sickrage.srLogger.debug("Result is not a valid torrent file")
            return False

        return True
Beispiel #27
0
def metadata_for_filelike(filelike):
    try:
        filelike.seek(0)
    except (AttributeError, IOError):
        return None

    stream = InputIOStream(filelike, None, tags=[])
    parser = guessParser(stream)
	
    if not parser:
        return None

    try:
        metadata = extractMetadata(parser)
    except HachoirError:
        return None

    metas = {}
    for k,v in metadata._Metadata__data.iteritems():
        if v.values:
            metas[v.key] = v.values[0].value
    return metas
Beispiel #28
0
def META_OLECF(s, buff):

   META_DICT = { }

   try:
      stream = InputIOStream(StringIO(buff))
      parser = guessParser(stream)
      meta = extractMetadata(parser)
   except:
      return META_DICT

   for data in sorted(meta):
      if data.values:
         if len(data.values) == 1:
            META_DICT['%s' % data.key] = data.values[0].text
         else:
            values = []
            for value in data.values:
               values.append(value.text)
            META_DICT['%s' % data.key] = values

   return META_DICT
Beispiel #29
0
    def main(self):
        if len(argv) != 2:
            print >>stderr, "usage: %s document.swf" % argv[0]
            exit(1)

        realname = argv[1]
        filename = unicodeFilename(realname)
        parser = createParser(filename, real_filename=realname)

        if parser["signature"].value == "CWS":
            deflate_swf = parser["compressed_data"].getSubIStream()
            parser = guessParser(deflate_swf)

        if "jpg_table/data" in parser:
            # JPEG pictures with common header
            jpeg_header = parser["jpg_table/data"].value[:-2]
            for field in parser.array("def_bits"):
                jpeg_content = field["image"].value[2:]
                if self.verbose:
                    print "Extract JPEG from %s" % field.path
                self.storeJPEG(jpeg_header + jpeg_content)

        # JPEG in format 2/3
        for field in parser.array("def_bits_jpeg2"):
            self.extractFormat2(field)
        for field in parser.array("def_bits_jpeg3"):
            self.extractFormat2(field)

        # Extract sound
        #self.extractSound(parser)
        self.extractSound2(parser)

        # Does it extract anything?
        if self.jpg_index == 1:
            print "No JPEG picture found."
        if self.snd_index == 1:
            print "No sound found."
Beispiel #30
0
    usage = "usage: %prog <file_name>"
    op = OptionParser(usage)

    (options, args) = op.parse_args()
    if len(args) != 1:
        op.print_help()
        sys.exit(1)

    inputFileName = unicode(args[0])
    try:
        stream = FileInputStream(inputFileName)
    except InputStreamError, err:
        exit("Unable to open file: %s" % err)

    try:
        data = guessParser(stream)
        if not data:
            exit("Unable to parse file: %s" % inputFileName)

        for struct in data.allFeatures():
            print "%08X: %s = %s" % (
                (struct.address) / 8, struct.path, struct.display)

        return 1

        for struct in data:
            print "%08X: %s = %s" % (
                (struct.address) / 8, struct.path, struct.display)
            try:
                iter_exists = getattr(struct, "__iter__", None)
            except AttributeError:
Beispiel #31
0
def upload(request):
    """View that displays the upload form and processes upload form
    submissions."""
    # Django's 'permission_required' decorator redirects to the login
    # form even if the user is already logged in.  That sucks, so we
    # don't use it, and we do the permission check in the code
    # instead.
    if not request.user.has_perm('gallery.can_upload'):
        raise PermissionDenied
    if request.method == 'POST':
        mediatype_form = MediaTypeForm(request.POST)
        upload_formset = UploadFormSet(request.POST, request.FILES)
        if 'cancel' in request.POST:
            request.notifications.add(_('Upload canceled.'))
            return HttpResponseRedirect(reverse('bm.gallery.views.index'))
        if (mediatype_form.data['mediatype'] == 'video' and
            not request.user.has_perm('gallery.can_review')):
            raise PermissionDenied
        if mediatype_form.is_valid() and upload_formset.is_valid():
            instances = []
            if len([u for u in upload_formset.cleaned_data if u]) >= 10:
                raise PermissionDenied
            for cleaned_data in upload_formset.cleaned_data:
                if not cleaned_data:
                    continue
                file_ = cleaned_data['file_']
                # extract media metadata
                stream = InputIOStream(file_)
                parser = hachoir_parser.guessParser(stream)
                metadata = hachoir_metadata.extractMetadata(parser)
                # create model instance
                mediatype = mediatype_form.cleaned_data['mediatype']
                model_class = models.mediatype_map[mediatype]['klass']

                cursor = connection.cursor()
                cursor.execute("SELECT nextval ('gallery_mediabase_id_seq')")
                id_ = cursor.fetchone()[0]
                slug = '%s.%d' % (request.user.username, id_)

                model_args = {'id': id_, 'owner': request.user, 'slug': slug,
                              'status': 'uploaded'}
                if hasattr(model_class, 'IKOptions'):
                    # we're some type of image object
                    model_args['image'] = file_
                else:
                    model_args['filefield'] = file_
                for dimension in ('width', 'height'):
                    dimvalue = metadata.get(dimension, False)
                    if dimvalue:
                        model_args[dimension] = dimvalue
                if mediatype == 'video' and not file_.name.endswith('flv'):
                    model_args['encode'] = True
                try:
                    year = metadata.get('creation_date').year
                    model_args['year'] = year
                except ValueError:
                    # no creation date in metadata
                    pass
                instance = model_class(**model_args)
                instances.append(instance)
            # we're not using Django's transaction middleware, so we
            # fake our own transaction behavior here.  should probably
            # switch to using the middleware...
            try:
                for instance in instances:
                    instance.save()
            except:
                for instance in instances:
                    instance.delete()
                raise
            request.notifications.add(_('Resources uploaded.'))
            url = '%s/edit' % instance.get_absolute_url()
            batch_length = len(instances)
            if batch_length > 1:
                ids = [str(i.id) for i in instances]
                url = '%s?batch_length=%d&ids=%s' % (url, batch_length,
                                                     ','.join(ids))
            return HttpResponseRedirect(url)
    else:
        mediatype_form = MediaTypeForm()
        upload_formset = UploadFormSet()
    # only moderators can upload video
    if not request.user.has_perm('gallery.can_review'):
        mediatype_field = mediatype_form.fields['mediatype']
        choices = mediatype_field.choices
        mediatype_field.choices = [choice for choice in choices
                                   if choice[0] != 'video']
    return render_to_response('gallery/upload.html',
                              {'mediatype_form': mediatype_form,
                               'upload_formset': upload_formset},
                              context_instance=RequestContext(request))
Beispiel #32
0
    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a file-like object using Hachoir.

    Args:
      parser_mediator: A parser context object (instance of ParserContext).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_name = parser_mediator.GetDisplayName()

        try:
            fstream = hachoir_core.stream.InputIOStream(file_object,
                                                        None,
                                                        tags=[])
        except hachoir_core.error.HachoirError as exception:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, exception))

        if not fstream:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, 'Not fstream'))

        try:
            doc_parser = hachoir_parser.guessParser(fstream)
        except hachoir_core.error.HachoirError as exception:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, exception))

        if not doc_parser:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, 'Not parser'))

        try:
            metadata = hachoir_metadata.extractMetadata(doc_parser)
        except (AssertionError, AttributeError) as exception:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, exception))

        try:
            metatext = metadata.exportPlaintext(human=False)
        except AttributeError as exception:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, exception))

        if not metatext:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: No metadata'.format(
                    self.NAME, file_name))

        attributes = {}
        extracted_events = []
        for meta in metatext:
            if not meta.startswith('-'):
                continue

            if len(meta) < 3:
                continue

            key, _, value = meta[2:].partition(': ')

            key2, _, value2 = value.partition(': ')
            if key2 == 'LastPrinted' and value2 != 'False':
                date_object = timelib.Timestamp.FromTimeString(
                    value2, timezone=parser_mediator.timezone)
                if isinstance(date_object, datetime.datetime):
                    extracted_events.append((date_object, key2))

            try:
                date = metadata.get(key)
                if isinstance(date, datetime.datetime):
                    extracted_events.append((date, key))
            except ValueError:
                pass

            if key in attributes:
                if isinstance(attributes.get(key), list):
                    attributes[key].append(value)
                else:
                    old_value = attributes.get(key)
                    attributes[key] = [old_value, value]
            else:
                attributes[key] = value

        if not extracted_events:
            raise errors.UnableToParseFile(
                u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
                    self.NAME, file_name, 'No events discovered'))

        for date, key in extracted_events:
            event_object = HachoirEvent(date, key, attributes)
            parser_mediator.ProduceEvent(event_object)
Beispiel #33
0
  def Parse(self, file_entry):
    """Extract data from a file using Hachoir.

    Args:
      file_entry: A file entry object.

    Yields:
      An event object (instance of EventObject) that contains the parsed
      attributes.
    """
    file_object = file_entry.GetFileObject()

    try:
      fstream = hachoir_core.stream.InputIOStream(file_object, None, tags=[])
    except hachoir_core.error.HachoirError as exception:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, exception))

    if not fstream:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, 'Not fstream'))

    try:
      doc_parser = hachoir_parser.guessParser(fstream)
    except hachoir_core.error.HachoirError as exception:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, exception))

    if not doc_parser:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, 'Not parser'))

    try:
      metadata = hachoir_metadata.extractMetadata(doc_parser)
    except (AssertionError, AttributeError) as exception:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, exception))

    try:
      metatext = metadata.exportPlaintext(human=False)
    except AttributeError as exception:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, exception))

    if not metatext:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: No metadata'.format(
              self.parser_name, file_entry.name))

    attributes = {}
    extracted_events = []
    for meta in metatext:
      if not meta.startswith('-'):
        continue

      if len(meta) < 3:
        continue

      key, _, value = meta[2:].partition(': ')

      key2, _, value2 = value.partition(': ')
      if key2 == 'LastPrinted' and value2 != 'False':
        date_object = timelib.StringToDatetime(
            value2, timezone=self._pre_obj.zone)
        if isinstance(date_object, datetime.datetime):
          extracted_events.append((date_object, key2))

      try:
        date = metadata.get(key)
        if isinstance(date, datetime.datetime):
          extracted_events.append((date, key))
      except ValueError:
        pass

      if key in attributes:
        if isinstance(attributes.get(key), list):
          attributes[key].append(value)
        else:
          old_value = attributes.get(key)
          attributes[key] = [old_value, value]
      else:
        attributes[key] = value

    if not extracted_events:
      raise errors.UnableToParseFile(
          u'[{0:s}] unable to parse file {1:s}: {2:s}'.format(
              self.parser_name, file_entry.name, 'No events discovered'))

    for date, key in extracted_events:
      yield HachoirEvent(date, key, attributes)