Example #1
0
    def check_if_truncated(self):
        if self.is_truncated:
            return self.is_truncated

        # Setting up the pipeline depending on the file extension
        if self.file_obj.file_ext == "bz2":
            if not exists(self._wiki.config.checkforbz2footer):
                raise BackupError("checkforbz2footer command %s not found" %
                                  self._wiki.config.checkforbz2footer)
            checkforbz2footer = self._wiki.config.checkforbz2footer
            pipeline = []
            pipeline.append([checkforbz2footer, self.filename])
        else:
            if self.file_obj.file_ext == 'gz':
                pipeline = [[self._wiki.config.gzip, "-dc", self.filename,
                             ">", "/dev/null"]]
            elif self.file_obj.file_ext == '7z':
                # Note that 7z does return 0, if archive contains
                # garbage /after/ the archive end
                pipeline = [[self._wiki.config.sevenzip, "e", "-so",
                             self.filename, ">", "/dev/null"]]
            else:
                # we do't know how to handle this type of file.
                return self.is_truncated

        # Run the perpared pipeline
        proc = CommandPipeline(pipeline, quiet=True)
        proc.run_pipeline_get_output()
        self.is_truncated = not proc.exited_successfully()

        return self.is_truncated
Example #2
0
 def get_lineno_last_page(self, fileobj, runner):
     if not fileobj.filename or not exists(runner.dump_dir.filename_public_path(fileobj)):
         return None
     dumpfile = DumpFile(self.wiki,
                         runner.dump_dir.filename_public_path(fileobj, self.wiki.date),
                         fileobj, self.verbose)
     pipeline = dumpfile.setup_uncompression_command()
     grep = self.wiki.config.grep
     if not exists(grep):
         raise BackupError("grep command %s not found" % grep)
     pipeline.append([grep, "-n", "<page>"])
     tail = self.wiki.config.tail
     if not exists(tail):
         raise BackupError("tail command %s not found" % tail)
     pipeline.append([tail, "-1"])
     # without shell
     proc = CommandPipeline(pipeline, quiet=True)
     proc.run_pipeline_get_output()
     if (proc.exited_successfully() or
             (proc.get_failed_cmds_with_retcode() ==
              [[-signal.SIGPIPE, pipeline[0]]]) or
             (proc.get_failed_cmds_with_retcode() ==
              [[signal.SIGPIPE + 128, pipeline[0]]])):
         output = proc.output()
         # 339915646:  <page>
         if ':' in output:
             linecount = output.split(':')[0]
             if linecount.isdigit():
                 return linecount
     return None
Example #3
0
    def check_if_empty(self):
        if self.is_empty:
            return self.is_empty
        if self.file_obj.file_ext == "bz2":
            pipeline = [["%s -dc  %s | head -5" % (self._wiki.config.bzip2, self.filename)]]
        elif self.file_obj.file_ext == "gz":
            pipeline = [["%s -dc %s | head -5" % (self._wiki.config.gzip, self.filename)]]
        elif self.file_obj.file_ext == '7z':
            pipeline = [["%s e -so %s | head -5" % (self._wiki.config.sevenzip, self.filename)]]
        elif (self.file_obj.file_ext == '' or self.file_obj.file_ext == 'txt' or
              self.file_obj.file_ext == 'html'):
            pipeline = [["head -5 %s" % self.filename]]
        else:
            # we do't know how to handle this type of file.
            return self.is_empty

        proc = CommandPipeline(pipeline, quiet=True, shell=True)
        proc.run_pipeline_get_output()
        self.is_empty = bool(not len(proc.output()))

        return self.is_empty
Example #4
0
 def run_sql_and_get_output(self, query):
     command = self.build_sql_command(query)
     proc = CommandPipeline(command, quiet=True)
     proc.run_pipeline_get_output()
     # fixme best to put the return code someplace along with any errors....
     if proc.exited_successfully() and (proc.output()):
         return proc.output()
     else:
         return None
Example #5
0
    def get_last_lines_from_n(self, fileobj, runner, count):
        if not fileobj.filename or not exists(runner.dump_dir.filename_public_path(fileobj)):
            return None

        dumpfile = DumpFile(self.wiki,
                            runner.dump_dir.filename_public_path(fileobj, self.wiki.date),
                            fileobj, self.verbose)
        pipeline = dumpfile.setup_uncompression_command()

        tail = self.wiki.config.tail
        if not exists(tail):
            raise BackupError("tail command %s not found" % tail)
        tail_esc = MiscUtils.shell_escape(tail)
        pipeline.append([tail, "-n", "+%s" % count])
        # without shell
        proc = CommandPipeline(pipeline, quiet=True)
        proc.run_pipeline_get_output()
        if (proc.exited_successfully() or
                (proc.get_failed_cmds_with_retcode() ==
                 [[-signal.SIGPIPE, pipeline[0]]]) or
                (proc.get_failed_cmds_with_retcode() ==
                 [[signal.SIGPIPE + 128, pipeline[0]]])):
            last_lines = proc.output()
        return last_lines
Example #6
0
    def get_first_500_lines(self):
        if self.first_lines:
            return self.first_lines

        if not self.filename or not exists(self.filename):
            return None

        pipeline = self.setup_uncompression_command()

        if not exists(self._wiki.config.head):
            raise BackupError("head command %s not found" % self._wiki.config.head)
        head = self._wiki.config.head
        head_esc = MiscUtils.shell_escape(head)
        pipeline.append([head, "-500"])
        # without shell
        proc = CommandPipeline(pipeline, quiet=True)
        proc.run_pipeline_get_output()
        if (proc.exited_successfully() or
                (proc.get_failed_cmds_with_retcode() ==
                 [[-signal.SIGPIPE, pipeline[0]]]) or
                (proc.get_failed_cmds_with_retcode() ==
                 [[signal.SIGPIPE + 128, pipeline[0]]])):
            self.first_lines = proc.output()
        return self.first_lines
Example #7
0
    def build_recombine_command_string(self, runner, files, output_file, compression_command,
                                       uncompression_command, end_header_marker="</siteinfo>"):
        output_filename = runner.dump_dir.filename_public_path(output_file)
        partnum = 0
        recombines = []
        if not exists(runner.wiki.config.head):
            raise BackupError("head command %s not found" % runner.wiki.config.head)
        head = runner.wiki.config.head
        if not exists(runner.wiki.config.tail):
            raise BackupError("tail command %s not found" % runner.wiki.config.tail)
        tail = runner.wiki.config.tail
        if not exists(runner.wiki.config.grep):
            raise BackupError("grep command %s not found" % runner.wiki.config.grep)
        grep = runner.wiki.config.grep

        # we assume the result is always going to be run in a subshell.
        # much quicker than this script trying to read output
        # and pass it to a subprocess
        output_filename_esc = MiscUtils.shell_escape(output_filename)
        head_esc = MiscUtils.shell_escape(head)
        tail_esc = MiscUtils.shell_escape(tail)
        grep_esc = MiscUtils.shell_escape(grep)

        uncompression_command_esc = uncompression_command[:]
        for command in uncompression_command_esc:
            command = MiscUtils.shell_escape(command)
        for command in compression_command:
            command = MiscUtils.shell_escape(command)

        if not files:
            raise BackupError("No files for the recombine step found in %s." % self.name())

        for file_obj in files:
            # uh oh FIXME
            # f = MiscUtils.shell_escape(file_obj.filename)
            fpath = runner.dump_dir.filename_public_path(file_obj)
            partnum = partnum + 1
            pipeline = []
            uncompress_this_file = uncompression_command[:]
            uncompress_this_file.append(fpath)
            pipeline.append(uncompress_this_file)
            # warning: we figure any header (<siteinfo>...</siteinfo>)
            # is going to be less than 2000 lines!
            pipeline.append([head, "-2000"])
            pipeline.append([grep, "-n", end_header_marker])
            # without shell
            proc = CommandPipeline(pipeline, quiet=True)
            proc.run_pipeline_get_output()
            if ((proc.output()) and
                    (proc.exited_successfully() or
                     proc.get_failed_cmds_with_retcode() ==
                     [[-signal.SIGPIPE, uncompress_this_file]] or
                     proc.get_failed_cmds_with_retcode() ==
                     [[signal.SIGPIPE + 128, uncompress_this_file]])):
                (header_end_num, junk_unused) = proc.output().split(":", 1)
                # get header_end_num
            else:
                raise BackupError("Could not find 'end of header' marker for %s" % fpath)
            recombine = " ".join(uncompress_this_file)
            header_end_num = int(header_end_num) + 1
            if partnum == 1:
                # first file, put header and contents
                recombine = recombine + " | %s -n -1 " % head
            elif partnum == len(files):
                # last file, put footer
                recombine = recombine + (" | %s -n +%s" % (tail, header_end_num))
            else:
                # put contents only
                recombine = recombine + (" | %s -n +%s" % (tail, header_end_num))
                recombine = recombine + " | %s -n -1 " % head
            recombines.append(recombine)
        recombine_command_string = ("(" + ";".join(recombines) + ")" + "|" +
                                    "%s %s" % (compression_command, output_filename))
        return recombine_command_string