Esempio n. 1
0
 def meta_source(self, input_path):
     """Use data files in a folder and read the scheme from the meta file.
     
     Defines a source tap using files in input_path, which should be a
     (HDFS) folder. Takes care of using the appropriate scheme that was
     used to store the data, using meta data in the data folder.
     
     Arguments:
     input_path -- the HDFS folder to store data into
     """
     input_path = expand_path_with_home(input_path)
     source_scheme = MetaScheme.getSourceScheme(input_path)
     return self.source(cascading.tap.Hfs(source_scheme, input_path))
Esempio n. 2
0
    def meta_source(self, input_path):
        """Use data files in a folder and read the scheme from the meta file.

        Defines a source tap using files in input_path, which should be a
        (HDFS) folder. Takes care of using the appropriate scheme that was
        used to store the data, using meta data in the data folder.

        Arguments:
        input_path -- the HDFS folder to store data into
        """
        input_path = expand_path_with_home(input_path)
        source_scheme = MetaScheme.getSourceScheme(input_path)
        return self.source(cascading.tap.Hfs(source_scheme, input_path))
Esempio n. 3
0
 def meta_sink(self, cascading_scheme, output_path):
     """Store data together with meta information about the scheme used.
     
     A sink that also stores in a file information about the scheme used to
     store data, and human-readable descriptions in the .pycascading_header
     and .pycascading_types files with the field names and their types,
     respectively.
     
     Arguments:
     cascading_scheme -- the Cascading Scheme used to store data
     output_path -- the folder where the output tuples should be stored.
         If it exists, it will be erased and replaced!
     """
     output_path = expand_path_with_home(output_path)
     sink_scheme = MetaScheme.getSinkScheme(cascading_scheme, output_path)
     return self.sink(cascading.tap.Hfs(sink_scheme, output_path, cascading.tap.SinkMode.REPLACE))
Esempio n. 4
0
 def meta_sink(self, cascading_scheme, output_path):
     """Store data together with meta information about the scheme used.
     
     A sink that also stores in a file information about the scheme used to
     store data, and human-readable descriptions in the .pycascading_header
     and .pycascading_types files with the field names and their types,
     respectively.
     
     Arguments:
     cascading_scheme -- the Cascading Scheme used to store data
     output_path -- the folder where the output tuples should be stored.
         If it exists, it will be erased and replaced!
     """
     output_path = expand_path_with_home(output_path)
     sink_scheme = MetaScheme.getSinkScheme(cascading_scheme, output_path)
     return self.sink(
         cascading.tap.Hfs(sink_scheme, output_path,
                           cascading.tap.SinkMode.REPLACE))