def word_count(REDUCES=4):
    os.system(f"{HDFS_EXECUTABLE} dfs -put {INPUT_PATH}/* input")
    log.debug(f"Adding executable permissions to mapper file:  {MAPPER_PATH}")
    os.system(f"chmod +x {MAPPER_PATH}")
    log.debug(
        f"Adding executable permissions to reducer file:  {REDUCER_PATH}")
    os.system(f"chmod +x {REDUCER_PATH}")

    WORD_COUNT_COMMAND  = f"{MAPRED_EXECUTABLE} streaming  " +  \
                          f"-D mapreduce.job.reduces={REDUCES}  " + \
                          "-input input  " +\
                          "-output output  " +  \
                          f"-mapper {MAPPER_PATH}  " + \
                          f"-reducer  {REDUCER_PATH}"

    log.debug('Executing Word Count Map Reduce command.')
    result = subprocess.run([WORD_COUNT_COMMAND],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            shell=True)
    stderr = result.stderr.decode()
    stdout = result.stdout.decode()

    if stderr.strip() is not '':
        log.info(f"stdout:\n{stdout}")
        log.info(f"stderr:\n{stderr}")

    writeToFile(stdout, MAPRED_OUTPUT_FILE_PATH)
    appendToFile(stderr, MAPRED_OUTPUT_FILE_PATH)
    log.info(f"Output of mapreduce step saved to {MAPRED_OUTPUT_FILE_PATH}.")
Example #2
0
    def writeOutputs(self):
        """Used to write the output of the commands to the specified files"""

        if self.outputLock != None:
            self.outputLock.acquire()

        if self.validateOutputFile(self.standardOutputFile):
            stdoutStatus = writeToFile(self.results["stdOut"],
                                       self.standardOutputFile)
        else:
            stdoutStatus = -1
        if self.validateOutputFile(self.standardErrorFile):
            stderrStatus = writeToFile(self.results["stdError"],
                                       self.standardErrorFile)
        else:
            stderrStatus = -1

        if self.outputLock != None:
            self.outputLock.release()

        if stdoutStatus and self.standardOutputFile is not None:
            self.stdError = "Failed to write to file{" + self.standardOutputFile.encode(
                'utf-8') + "}\r\n" + self.results["stdOut"]
        if stderrStatus and self.standardErrorFile is not None:
            self.stdError = "Failed to write to file{" + self.standardErrorFile.encode(
                'utf-8') + "}\r\n" + self.results["stdError"]
        if self.results['exitCode']:
            return self.results['exitCode']
        return stdoutStatus + stderrStatus
Example #3
0
    def writeOutputs(self):
        """Used to write the output of the commands to the specified files"""


        if self.outputLock != None:
            self.outputLock.acquire()

        standardOut = self.writeOutputsValidateOutputFile(self.standardOutputFile)
        standardError = self.writeOutputsValidateOutputFile(self.standardErrorFile)

        #output , filename
        a = writeToFile(self.results["stdOut"], standardOut)
        b = writeToFile(self.results["stdError"], standardError)

        if self.outputLock != None:
            self.outputLock.release()

        if a:
            self.stdError = "Failed to write to file{" + standardOut + "}\r\n" + self.results["stdOut"]
        if b:
            self.stdError = "Failed to write to file{" + standardError + "}\r\n" + self.results["stdError"]
        if  self.results['exitCode']:
            return self.results['exitCode']
        return a + b
Example #4
0
    def write_output(self):
        """
        Write the stdout/stderror we got from MCP Client out to files if
        necessary.
        """
        for task in self.groupTasks:
            with task.outputLock:
                if task.standardOutputFile:
                    try:
                        writeToFile(task.results['stdout'],
                                    task.standardOutputFile)
                    except Exception as e:
                        LOGGER.warning("Unable to write to: %s: %s",
                                       task.standardOutputFile, str(e))
                        LOGGER.exception(e)

                if task.standardErrorFile:
                    try:
                        writeToFile(task.results['stderror'],
                                    task.standardErrorFile)
                    except Exception as e:
                        LOGGER.warning("Unable to write to: %s: %s",
                                       task.standardErrorFile, str(e))
                        LOGGER.exception(e)