def execute(self, inp): archive, archiveid = self.vfs.pull_remote_file(inp) args = [self.map_exec, str(self.vfs.master_id), str(self.vfs.worker_id), str(self.num_reducer), archive, self.output_path, str(self.limit_size)] self.info("Processing archive ID=%d name=%s" % (archiveid, archive)) self.info("Executing %s" % str(' '.join(args))) totsize = 0 start = time.time() process = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) results = [] filenames = [] for line in process.stdout.readlines(): self.debug(">> %s" % line.strip()) if not line.startswith("=> "): continue fname, rid, fsize = line[3:].split(' ', 2) rid = int(rid) fsize = int(fsize) totsize += fsize filenames.append(fname) results.append((rid, get_id(fname), fsize)) self.info("Map finished. Result is %s" % str(results)) for fname in filenames: self.vfs.push_local_file(fname, True) return ((totsize, time.time() - start), results)