Exemple #1
0
 def __or__(self, pipe):
     if not self.__refresh and self.__hdfs_folder_exists:
         # We remove all sources that are replaced by this cache, otherwise
         # Cascading complains about unused source taps
         return self.__taps.meta_source(self.__cache_folder)
     else:
         # We split the data into storing and processing pipelines
         pipe | Pipe(random_pipe_name("cache")) | self.__taps.binary_sink(self.__cache_folder)
         return pipe | Pipe(random_pipe_name("no_cache"))
Exemple #2
0
 def __or__(self, pipe):
     if not self.__refresh and self.__hdfs_folder_exists:
         # We remove all sources that are replaced by this cache, otherwise
         # Cascading complains about unused source taps
         return self.__taps.meta_source(self.__cache_folder)
     else:
         # We split the data into storing and processing pipelines
         pipe | Pipe(random_pipe_name('cache')) | \
         self.__taps.binary_sink(self.__cache_folder)
         return pipe | Pipe(random_pipe_name('no_cache'))
Exemple #3
0
 def _create_with_parent(self, parent):
     # We need to name every tail differently so that Cascading can assign
     # a tail map to all sinks.
     # TODO: revise this after I name every pipe part separately
     parent = parent | Pipe(name=random_pipe_name("sink"))
     self.__taps.sink_map[parent.get_assembly().getName()] = self.__cascading_tap
     self.__taps.tails.append(parent)
     return None
Exemple #4
0
 def _create_with_parent(self, parent):
     # We need to name every tail differently so that Cascading can assign
     # a tail map to all sinks.
     # TODO: revise this after I name every pipe part separately
     parent = parent | Pipe(name=random_pipe_name('sink'))
     self.__taps.sink_map[parent.get_assembly().getName()] = \
     self.__cascading_tap
     self.__taps.tails.append(parent)
     return None
Exemple #5
0
 def _create_with_parent(self, parent):
     args = []
     if self.__argument_selector:
         args.append(coerce_to_fields(self.__argument_selector))
     args.append(self.__function)
     if self.__output_selector:
         args.append(coerce_to_fields(self.__output_selector))
     # We need to put another Pipe after the Each since otherwise
     # joins may not work as the names of pipes apparently have to be
     # different for Cascading.
     each = cascading.pipe.Each(parent.get_assembly(), *args)
     return cascading.pipe.Pipe(random_pipe_name('each'), each)
Exemple #6
0
 def _create_with_parent(self, parent):
     args = []
     if self.__argument_selector:
         args.append(coerce_to_fields(self.__argument_selector))
     args.append(self.__function)
     if self.__output_selector:
         args.append(coerce_to_fields(self.__output_selector))
     # We need to put another Pipe after the Each since otherwise
     # joins may not work as the names of pipes apparently have to be
     # different for Cascading.
     each = cascading.pipe.Each(parent.get_assembly(), *args)
     return cascading.pipe.Pipe(random_pipe_name('each'), each)
Exemple #7
0
 def source(self, cascading_tap):
     """A generic source using Cascading taps.
     
     Arguments:
     cascading_tap -- the Cascading Scheme object to store data into
     """
     # We can create the source tap right away and also use a Pipe to name
     # the head of this pipeline
     p = Pipe(name=random_pipe_name('source'))
     p.hash = hash(cascading_tap)
     p.add_context([p.get_assembly().getName()])
     self._connect_source(p.get_assembly().getName(), cascading_tap)
     return p
Exemple #8
0
    def source(self, cascading_tap):
        """A generic source using Cascading taps.

        Arguments:
        cascading_tap -- the Cascading Scheme object to store data into
        """
        # We can create the source tap right away and also use a Pipe to name
        # the head of this pipeline
        p = Pipe(name=random_pipe_name('source'))
        p.hash = hash(cascading_tap)
        p.add_context([p.get_assembly().getName()])
        self._connect_source(p.get_assembly().getName(), cascading_tap)
        return p