def gzip_handler(self, entity): """ Handles .gz files """ self.log.debug("gzip_handler(%s)" % entity) entity[u'format'] = u'gz' p = subprocess.Popen('gunzip "%s"' % entity[u'filename'], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=dirname) out, err = p.communicate() if err: self.log.error("Error unzipping: " + err) return magic = utils.get_magic(extracted) self.call_magic_handler(extracted, child)
def call_magic_handler(self, filename, entity): """ Determine the file magic, and call the appropriate handler """ magic = utils.get_magic(filename) if magic in self.magic_types: self.log.info('Calling %r for %s magic' % ( self.magic_types[magic], magic)) self.magic_types[magic](self, entity) else: # Try to match any regex magic patterns for pattern in self.magic_types: if isinstance(pattern, type(re.compile(r'foo'))): if re.match(pattern, magic): self.log.info('Calling %r for %s magic' % ( self.magic_types[pattern], magic)) self.magic_types[pattern](self, entity) break else: self.log.error('No handler for magic: %s' % magic) return magic
def zip_exe_handler(self, entity): """ Handles self-extracting zip files """ self.log.debug("zip_exe_handler(%s)" % entity) entity[u'format'] = u'zip' dirname = os.path.dirname(entity[u'filename']) p = subprocess.Popen('unzip -o "%s"' % entity[u'filename'], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=dirname) out, err = p.communicate() if err: self.log.error("Error unzipping: " + err) else: # Delete compressed data after extracting os.unlink(entity[u'filename']) for line in out.split('\n'): if line.strip().startswith('inflating'): extracted = os.path.join(dirname, line.strip().split()[-1]) self.log.debug("extracted " + extracted) magic = utils.get_magic(extracted) # Create a new child Entity for each extracted file extracted = to_unicode(extracted) child = Entity.by_name(extracted) if not child: child = Entity(name=os.path.basename(extracted)) child[u'filename'] = extracted DBSession.add(child) child.parent = entity child[u'magic'] = to_unicode(magic) self.log.debug("Created %s" % child) else: child.parent = entity DBSession.flush() self.call_magic_handler(extracted, child)