Esempio n. 1
0
class PigProxy(object):
    """Functionality for interacting with the pig interpreter"""
    
    orig_pig_code = None
    args = None
    arg_files = None
    alias_overrides = None    
    _temp_pig_script = None
    
    def __init__(self, pig_code, args = None, arg_files = None):
        """
        pig_code: The text of the Pig script to test with no substitution or change
        args: The list of arguments of the script.
        arg_files: The list of file arguments of the script.
        """
        self.orig_pig_code = pig_code
        self.args = args or []
        self.arg_files = arg_files or []
        self.alias_overrides = {
            "STORE" : "",
            "DUMP" : "",
            }
        if (System.getProperties().containsKey("pigunit.exectype.cluster")):
            self.pig = PigServer(ExecType.MAPREDUCE)
        else:
            self.pig = PigServer(ExecType.LOCAL)

    def from_file(cls, pig_script, args = None, arg_files = None):
        f = open(pig_script, 'r')
        try:
            pig_code = f.read()
        finally:
            f.close()
        return cls(pig_code, args, arg_files)
    from_file = classmethod(from_file)

    def register_script(self):
        """
        Registers a pig scripts with its variables substituted.
        raises: IOException If a temp file containing the pig script could not be created.
        raises: ParseException The pig script could not have all its variables substituted.

        todo: Refactor this processes that result in calling this method.  This method gets
        called twice for a single assert as every method that needs the data assumes no one
        else has called it (even methods that call other methods that call it (assertOutput()
        calls get_alias() which both call this method).  
        """
        pigIStream = BufferedReader(StringReader(self.orig_pig_code))
        pigOStream =  StringWriter()

        ps = ParameterSubstitutionPreprocessor(50) # Where does 50 come from?
        ps.genSubstitutedFile(pigIStream, pigOStream, self.args, self.arg_files)

        substitutedPig = pigOStream.toString()
        f = File.createTempFile("tmp", "pigunit")
        pw = PrintWriter(f)
        pw.println(substitutedPig)
        pw.close()

        pigSubstitutedFile = f.getCanonicalPath()
        self._temp_pig_script = pigSubstitutedFile

        self.pig.registerScript(pigSubstitutedFile, self.alias_overrides)        

    def pig_script(self):
        """
        Returns the path to the pig script containing all modifications
        due to overrides.
        """
        return self._temp_pig_script

    def run_script(self):
        self.register_script()

    def get_alias(self, alias):
        self.register_script()
        return iter(self.pig.openIterator(alias))

    def last_stored_alias_name(self):
        """
        Returns the name of the relation that was last stored
        in the pig script
        """
        self.register_script()
        return self.alias_overrides["LAST_STORE_ALIAS"]

    def override(self, alias, query):
        """
        Replaces the query of an aliases by another query.

        For example:
            B = FILTER A BY count > 5;
        overridden with:
            <B, B = FILTER A BY name == 'Pig';>
        becomes
            B = FILTER A BY name == 'Pig';

        alias: The alias to override.
        query: The new value of the alias.
        """
        self.alias_overrides[alias] = query

    def unoverride(self, alias):
        """Remove an override placed on an alias"""
        if alias in self.alias_overrides:
            del self.alias_overrides[alias]        

    def schemaFor(self, alias):
        """
        Returns string containing the schema of the specified alias
        """
        self.register_script()
        sb = StringBuilder()
        Schema.stringifySchema(sb, self.pig.dumpSchema(alias), DataType.TUPLE)
        return sb.toString()

    def override_to_data(self, alias, input_data):
        """
        Override a statement so that the alias results in having the
        specified set of data
        """
        schema = self.schemaFor(alias)
        destination = mktemp()
        cluster = Cluster(self.pig.getPigContext())
        cluster.copyContentFromLocalFile(input_data, destination, True)
        self.override(alias, "%s = LOAD '%s' AS %s;" % (alias, destination, schema))
Esempio n. 2
0
class PigProxy(object):
    """Functionality for interacting with the pig interpreter"""

    orig_pig_code = None
    args = None
    arg_files = None
    alias_overrides = None
    _temp_pig_script = None

    def __init__(self, pig_code, args=None, arg_files=None):
        """
        pig_code: The text of the Pig script to test with no substitution or change
        args: The list of arguments of the script.
        arg_files: The list of file arguments of the script.
        """
        self.orig_pig_code = pig_code
        self.args = args or []
        self.arg_files = arg_files or []
        self.alias_overrides = {
            "STORE": "",
            "DUMP": "",
        }
        if (System.getProperties().containsKey("pigunit.exectype.cluster")):
            self.pig = PigServer(ExecType.MAPREDUCE)
        else:
            self.pig = PigServer(ExecType.LOCAL)

    def from_file(cls, pig_script, args=None, arg_files=None):
        f = open(pig_script, 'r')
        try:
            pig_code = f.read()
        finally:
            f.close()
        return cls(pig_code, args, arg_files)

    from_file = classmethod(from_file)

    def register_script(self):
        """
        Registers a pig scripts with its variables substituted.
        raises: IOException If a temp file containing the pig script could not be created.
        raises: ParseException The pig script could not have all its variables substituted.

        todo: Refactor this processes that result in calling this method.  This method gets
        called twice for a single assert as every method that needs the data assumes no one
        else has called it (even methods that call other methods that call it (assertOutput()
        calls get_alias() which both call this method).  
        """
        pigIStream = BufferedReader(StringReader(self.orig_pig_code))
        pigOStream = StringWriter()

        ps = ParameterSubstitutionPreprocessor(50)  # Where does 50 come from?
        ps.genSubstitutedFile(pigIStream, pigOStream, self.args,
                              self.arg_files)

        substitutedPig = pigOStream.toString()
        f = File.createTempFile("tmp", "pigunit")
        pw = PrintWriter(f)
        pw.println(substitutedPig)
        pw.close()

        pigSubstitutedFile = f.getCanonicalPath()
        self._temp_pig_script = pigSubstitutedFile

        self.pig.registerScript(pigSubstitutedFile, self.alias_overrides)

    def pig_script(self):
        """
        Returns the path to the pig script containing all modifications
        due to overrides.
        """
        return self._temp_pig_script

    def run_script(self):
        self.register_script()

    def get_alias(self, alias):
        self.register_script()
        return iter(self.pig.openIterator(alias))

    def last_stored_alias_name(self):
        """
        Returns the name of the relation that was last stored
        in the pig script
        """
        self.register_script()
        return self.alias_overrides["LAST_STORE_ALIAS"]

    def override(self, alias, query):
        """
        Replaces the query of an aliases by another query.

        For example:
            B = FILTER A BY count > 5;
        overridden with:
            <B, B = FILTER A BY name == 'Pig';>
        becomes
            B = FILTER A BY name == 'Pig';

        alias: The alias to override.
        query: The new value of the alias.
        """
        self.alias_overrides[alias] = query

    def unoverride(self, alias):
        """Remove an override placed on an alias"""
        if alias in self.alias_overrides:
            del self.alias_overrides[alias]

    def schemaFor(self, alias):
        """
        Returns string containing the schema of the specified alias
        """
        self.register_script()
        sb = StringBuilder()
        Schema.stringifySchema(sb, self.pig.dumpSchema(alias), DataType.TUPLE)
        return sb.toString()

    def override_to_data(self, alias, input_data):
        """
        Override a statement so that the alias results in having the
        specified set of data
        """
        schema = self.schemaFor(alias)
        destination = mktemp()
        cluster = Cluster(self.pig.getPigContext())
        cluster.copyContentFromLocalFile(input_data, destination, True)
        self.override(alias,
                      "%s = LOAD '%s' AS %s;" % (alias, destination, schema))