def check_if_truncated(self): if self.is_truncated: return self.is_truncated # Setting up the pipeline depending on the file extension if self.file_obj.file_ext == "bz2": if not exists(self._wiki.config.checkforbz2footer): raise BackupError("checkforbz2footer command %s not found" % self._wiki.config.checkforbz2footer) checkforbz2footer = self._wiki.config.checkforbz2footer pipeline = [] pipeline.append([checkforbz2footer, self.filename]) else: if self.file_obj.file_ext == 'gz': pipeline = [[self._wiki.config.gzip, "-dc", self.filename, ">", "/dev/null"]] elif self.file_obj.file_ext == '7z': # Note that 7z does return 0, if archive contains # garbage /after/ the archive end pipeline = [[self._wiki.config.sevenzip, "e", "-so", self.filename, ">", "/dev/null"]] else: # we do't know how to handle this type of file. return self.is_truncated # Run the perpared pipeline proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() self.is_truncated = not proc.exited_successfully() return self.is_truncated
def get_lineno_last_page(self, fileobj, runner): if not fileobj.filename or not exists(runner.dump_dir.filename_public_path(fileobj)): return None dumpfile = DumpFile(self.wiki, runner.dump_dir.filename_public_path(fileobj, self.wiki.date), fileobj, self.verbose) pipeline = dumpfile.setup_uncompression_command() grep = self.wiki.config.grep if not exists(grep): raise BackupError("grep command %s not found" % grep) pipeline.append([grep, "-n", "<page>"]) tail = self.wiki.config.tail if not exists(tail): raise BackupError("tail command %s not found" % tail) pipeline.append([tail, "-1"]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or (proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, pipeline[0]]]) or (proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, pipeline[0]]])): output = proc.output() # 339915646: <page> if ':' in output: linecount = output.split(':')[0] if linecount.isdigit(): return linecount return None
def check_if_empty(self): if self.is_empty: return self.is_empty if self.file_obj.file_ext == "bz2": pipeline = [["%s -dc %s | head -5" % (self._wiki.config.bzip2, self.filename)]] elif self.file_obj.file_ext == "gz": pipeline = [["%s -dc %s | head -5" % (self._wiki.config.gzip, self.filename)]] elif self.file_obj.file_ext == '7z': pipeline = [["%s e -so %s | head -5" % (self._wiki.config.sevenzip, self.filename)]] elif (self.file_obj.file_ext == '' or self.file_obj.file_ext == 'txt' or self.file_obj.file_ext == 'html'): pipeline = [["head -5 %s" % self.filename]] else: # we do't know how to handle this type of file. return self.is_empty proc = CommandPipeline(pipeline, quiet=True, shell=True) proc.run_pipeline_get_output() self.is_empty = bool(not len(proc.output())) return self.is_empty
def run_sql_and_get_output(self, query): command = self.build_sql_command(query) proc = CommandPipeline(command, quiet=True) proc.run_pipeline_get_output() # fixme best to put the return code someplace along with any errors.... if proc.exited_successfully() and (proc.output()): return proc.output() else: return None
def get_last_lines_from_n(self, fileobj, runner, count): if not fileobj.filename or not exists(runner.dump_dir.filename_public_path(fileobj)): return None dumpfile = DumpFile(self.wiki, runner.dump_dir.filename_public_path(fileobj, self.wiki.date), fileobj, self.verbose) pipeline = dumpfile.setup_uncompression_command() tail = self.wiki.config.tail if not exists(tail): raise BackupError("tail command %s not found" % tail) tail_esc = MiscUtils.shell_escape(tail) pipeline.append([tail, "-n", "+%s" % count]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or (proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, pipeline[0]]]) or (proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, pipeline[0]]])): last_lines = proc.output() return last_lines
def get_first_500_lines(self): if self.first_lines: return self.first_lines if not self.filename or not exists(self.filename): return None pipeline = self.setup_uncompression_command() if not exists(self._wiki.config.head): raise BackupError("head command %s not found" % self._wiki.config.head) head = self._wiki.config.head head_esc = MiscUtils.shell_escape(head) pipeline.append([head, "-500"]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or (proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, pipeline[0]]]) or (proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, pipeline[0]]])): self.first_lines = proc.output() return self.first_lines
def build_recombine_command_string(self, runner, files, output_file, compression_command, uncompression_command, end_header_marker="</siteinfo>"): output_filename = runner.dump_dir.filename_public_path(output_file) partnum = 0 recombines = [] if not exists(runner.wiki.config.head): raise BackupError("head command %s not found" % runner.wiki.config.head) head = runner.wiki.config.head if not exists(runner.wiki.config.tail): raise BackupError("tail command %s not found" % runner.wiki.config.tail) tail = runner.wiki.config.tail if not exists(runner.wiki.config.grep): raise BackupError("grep command %s not found" % runner.wiki.config.grep) grep = runner.wiki.config.grep # we assume the result is always going to be run in a subshell. # much quicker than this script trying to read output # and pass it to a subprocess output_filename_esc = MiscUtils.shell_escape(output_filename) head_esc = MiscUtils.shell_escape(head) tail_esc = MiscUtils.shell_escape(tail) grep_esc = MiscUtils.shell_escape(grep) uncompression_command_esc = uncompression_command[:] for command in uncompression_command_esc: command = MiscUtils.shell_escape(command) for command in compression_command: command = MiscUtils.shell_escape(command) if not files: raise BackupError("No files for the recombine step found in %s." % self.name()) for file_obj in files: # uh oh FIXME # f = MiscUtils.shell_escape(file_obj.filename) fpath = runner.dump_dir.filename_public_path(file_obj) partnum = partnum + 1 pipeline = [] uncompress_this_file = uncompression_command[:] uncompress_this_file.append(fpath) pipeline.append(uncompress_this_file) # warning: we figure any header (<siteinfo>...</siteinfo>) # is going to be less than 2000 lines! pipeline.append([head, "-2000"]) pipeline.append([grep, "-n", end_header_marker]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if ((proc.output()) and (proc.exited_successfully() or proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, uncompress_this_file]] or proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, uncompress_this_file]])): (header_end_num, junk_unused) = proc.output().split(":", 1) # get header_end_num else: raise BackupError("Could not find 'end of header' marker for %s" % fpath) recombine = " ".join(uncompress_this_file) header_end_num = int(header_end_num) + 1 if partnum == 1: # first file, put header and contents recombine = recombine + " | %s -n -1 " % head elif partnum == len(files): # last file, put footer recombine = recombine + (" | %s -n +%s" % (tail, header_end_num)) else: # put contents only recombine = recombine + (" | %s -n +%s" % (tail, header_end_num)) recombine = recombine + " | %s -n -1 " % head recombines.append(recombine) recombine_command_string = ("(" + ";".join(recombines) + ")" + "|" + "%s %s" % (compression_command, output_filename)) return recombine_command_string