Exemple #1
0
def count_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    return SubAssembly(assembly.CountBy, *args)
Exemple #2
0
def count_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    return SubAssembly(assembly.CountBy, *args)
Exemple #3
0
def average_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        args[2] = coerce_to_fields(args[2])
    return SubAssembly(assembly.AverageBy, *args)
Exemple #4
0
def average_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        args[2] = coerce_to_fields(args[2])
    return SubAssembly(assembly.AverageBy, *args)
Exemple #5
0
 def _create_with_parent(self, parent):
     args = []
     if self.__argument_selector:
         args.append(coerce_to_fields(self.__argument_selector))
     args.append(self.__function)
     if self.__output_selector:
         args.append(coerce_to_fields(self.__output_selector))
     # We need to put another Pipe after the Each since otherwise
     # joins may not work as the names of pipes apparently have to be
     # different for Cascading.
     each = cascading.pipe.Each(parent.get_assembly(), *args)
     return cascading.pipe.Pipe(random_pipe_name('each'), each)
Exemple #6
0
 def _create_with_parent(self, parent):
     args = []
     if self.__argument_selector:
         args.append(coerce_to_fields(self.__argument_selector))
     args.append(self.__function)
     if self.__output_selector:
         args.append(coerce_to_fields(self.__output_selector))
     # We need to put another Pipe after the Each since otherwise
     # joins may not work as the names of pipes apparently have to be
     # different for Cascading.
     each = cascading.pipe.Each(parent.get_assembly(), *args)
     return cascading.pipe.Pipe(random_pipe_name('each'), each)
Exemple #7
0
def rename(*args):
    """Rename the fields to new names.

    If only one argument (a list of names) is given, it is assumed that the
    user wants to rename all the fields. If there are two arguments, the first
    list is the set of fields to be renamed, and the second is a list of the
    new names.
    """
    if len(args) == 1:
        (fields_from, fields_to) = (Fields.ALL, args[0])
    else:
        (fields_from, fields_to) = (args[0], args[1])
    return SubAssembly(cascading.pipe.assembly.Rename, \
                       coerce_to_fields(fields_from), \
                       coerce_to_fields(fields_to))
Exemple #8
0
def rename(*args):
    """Rename the fields to new names.

    If only one argument (a list of names) is given, it is assumed that the
    user wants to rename all the fields. If there are two arguments, the first
    list is the set of fields to be renamed, and the second is a list of the
    new names.
    """
    if len(args) == 1:
        (fields_from, fields_to) = (Fields.ALL, args[0])
    else:
        (fields_from, fields_to) = (args[0], args[1])
    return SubAssembly(cascading.pipe.assembly.Rename, \
                       coerce_to_fields(fields_from), \
                       coerce_to_fields(fields_to))
Exemple #9
0
def SelectFields(fields):
    """Keeps only some fields in the tuple stream.
    
    Arguments:
    fields -- a list of fields to keep, or a Cascading Fields wildcard
    """
    return com.twitter.pycascading.SelectFields(coerce_to_fields(fields))
Exemple #10
0
def SelectFields(fields):
    """Keeps only some fields in the tuple stream.
    
    Arguments:
    fields -- a list of fields to keep, or a Cascading Fields wildcard
    """
    return com.twitter.pycascading.SelectFields(coerce_to_fields(fields))
Exemple #11
0
def un_group(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        if isinstance(args[1], (list, tuple)):
            new_arg = []
            for f in args[1]:
                new_arg.append(coerce_to_fields(f))
            args[1] = new_arg
        else:
            args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        if isinstance(args[2], (list, tuple)):
            new_arg = []
            for f in args[2]:
                new_arg.append(coerce_to_fields(f))
            args[2] = new_arg
    return function.UnGroup(*args)
Exemple #12
0
def un_group(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        if isinstance(args[1], (list, tuple)):
            new_arg = []
            for f in args[1]:
                new_arg.append(coerce_to_fields(f))
            args[1] = new_arg
        else:
            args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        if isinstance(args[2], (list, tuple)):
            new_arg = []
            for f in args[2]:
                new_arg.append(coerce_to_fields(f))
            args[2] = new_arg
    return function.UnGroup(*args)
Exemple #13
0
    def __create_args(self,
                      pipe=None,
                      aggregator=None,
                      output_selector=None,
                      assertion_level=None,
                      assertion=None,
                      buffer=None,
                      argument_selector=None):
        if self.__args:
            # If we pass in an unnamed argument, try to determine its type
            if isinstance(self.__args[0], cascading.operation.Aggregator):
                aggregator = self.__args[0]
            else:
                buffer = self.__args[0]
        # Set up some defaults
        if argument_selector is None:
            argument_selector = cascading.tuple.Fields.ALL
        if output_selector is None:
            if aggregator is not None:
                # In the case of aggregators, we want to return both the
                # groupings and the results
                output_selector = cascading.tuple.Fields.ALL
            else:
                output_selector = cascading.tuple.Fields.RESULTS

        args = []
        args.append(pipe.get_assembly())
        if argument_selector is not None:
            args.append(coerce_to_fields(argument_selector))
        if aggregator is not None:
            # for now we assume it's a Cascading aggregator straight
            args.append(wrap_function(aggregator, CascadingAggregatorWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        if assertion_level is not None:
            args.append(assertion_level)
            args.append(assertion)
        if buffer is not None:
            args.append(wrap_function(buffer, CascadingBufferWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        return args
Exemple #14
0
    def __create_args(self,
                      pipe=None,
                      aggregator=None, output_selector=None,
                      assertion_level=None, assertion=None,
                      buffer=None,
                      argument_selector=None):
        if self.__args:
            # If we pass in an unnamed argument, try to determine its type
            if isinstance(self.__args[0], cascading.operation.Aggregator):
                aggregator = self.__args[0]
            else:
                buffer = self.__args[0]
        # Set up some defaults
        if argument_selector is None:
            argument_selector = cascading.tuple.Fields.ALL
        if output_selector is None:
            if aggregator is not None:
                # In the case of aggregators, we want to return both the
                # groupings and the results
                output_selector = cascading.tuple.Fields.ALL
            else:
                output_selector = cascading.tuple.Fields.RESULTS

        args = []
        args.append(pipe.get_assembly())
        if argument_selector is not None:
            args.append(coerce_to_fields(argument_selector))
        if aggregator is not None:
            # for now we assume it's a Cascading aggregator straight
            args.append(wrap_function(aggregator, CascadingAggregatorWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        if assertion_level is not None:
            args.append(assertion_level)
            args.append(assertion)
        if buffer is not None:
            args.append(wrap_function(buffer, CascadingBufferWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        return args
Exemple #15
0
 def __create_args(self,
                   group_name=None,
                   pipes=None, group_fields=None, sort_fields=None,
                   reverse_order=None,
                   pipe=None,
                   lhs_pipe=None, rhs_pipe=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = coerce_to_fields(self.__args[0])
     args = []
     if group_name:
         args.append(group_name)
     if pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
                 if reverse_order:
                     args.append(reverse_order)
     elif pipe:
         args.append(pipe.get_assembly())
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
             if reverse_order:
                 args.append(reverse_order)
     elif lhs_pipe:
         args.append(lhs_pipe.get_assembly())
         args.append(rhs_pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
     return args
Exemple #16
0
 def __create_args(self,
                   group_name=None,
                   pipes=None,
                   group_fields=None,
                   sort_fields=None,
                   reverse_order=None,
                   pipe=None,
                   lhs_pipe=None,
                   rhs_pipe=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = coerce_to_fields(self.__args[0])
     args = []
     if group_name:
         args.append(group_name)
     if pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
                 if reverse_order:
                     args.append(reverse_order)
     elif pipe:
         args.append(pipe.get_assembly())
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
             if reverse_order:
                 args.append(reverse_order)
     elif lhs_pipe:
         args.append(lhs_pipe.get_assembly())
         args.append(rhs_pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
     return args
Exemple #17
0
def average(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Average(*args)
Exemple #18
0
def unique(*args):
    args = list(args)
    args[0] = coerce_to_fields(args[0])
    return SubAssembly(assembly.Unique, *args)
Exemple #19
0
def sum_by(*args):
    # SumBy has at least 3 parameters
    args = list(args)
    for i in xrange(0, 3):
        args[i] = coerce_to_fields(args[i])
    return SubAssembly(assembly.SumBy, *args)
Exemple #20
0
def min(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Min(*args)
Exemple #21
0
def sum(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Sum(*args)
Exemple #22
0
def average(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Average(*args)
Exemple #23
0
 def __create_args(self,
                   group_name=None,
                   pipes=None,
                   group_fields=None,
                   declared_fields=None,
                   result_group_fields=None,
                   joiner=None,
                   pipe=None,
                   num_self_joins=None,
                   lhs=None,
                   lhs_group_fields=None,
                   rhs=None,
                   rhs_group_fields=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = [coerce_to_fields(f) for f in self.__args[0]]
     args = []
     if group_name:
         args.append(str(group_name))
     if lhs:
         args.append(lhs.get_assembly())
         args.append(coerce_to_fields(lhs_group_fields))
         args.append(rhs.get_assembly())
         args.append(coerce_to_fields(rhs_group_fields))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     elif pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append([coerce_to_fields(f) for f in group_fields])
             if declared_fields:
                 args.append(coerce_to_fields(declared_fields))
                 if result_group_fields:
                     args.append(coerce_to_fields(result_group_fields))
             else:
                 args.append(None)
             if joiner is None:
                 joiner = cascading.pipe.cogroup.InnerJoin()
             args.append(joiner)
     elif pipe:
         args.append(pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
         args.append(int(num_self_joins))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     return args
Exemple #24
0
 def __create_args(self,
                   group_name=None,
                   pipes=None, group_fields=None, declared_fields=None,
                   result_group_fields=None, joiner=None,
                   pipe=None, num_self_joins=None,
                   lhs=None, lhs_group_fields=None,
                   rhs=None, rhs_group_fields=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = [coerce_to_fields(f) for f in self.__args[0]]
     args = []
     if group_name:
         args.append(str(group_name))
     if lhs:
         args.append(lhs.get_assembly())
         args.append(coerce_to_fields(lhs_group_fields))
         args.append(rhs.get_assembly())
         args.append(coerce_to_fields(rhs_group_fields))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     elif pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append([coerce_to_fields(f) for f in group_fields])
             if declared_fields:
                 args.append(coerce_to_fields(declared_fields))
                 if result_group_fields:
                     args.append(coerce_to_fields(result_group_fields))
             else:
                 args.append(None)
             if joiner is None:
                 joiner = cascading.pipe.cogroup.InnerJoin()
             args.append(joiner)
     elif pipe:
         args.append(pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
         args.append(int(num_self_joins))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     return args
Exemple #25
0
def sum(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Sum(*args)
Exemple #26
0
def last(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Last(*args)
Exemple #27
0
def min(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Min(*args)
Exemple #28
0
def unique(*args):
    args = list(args)
    args[0] = coerce_to_fields(args[0])
    return SubAssembly(assembly.Unique, *args)
Exemple #29
0
def sum_by(*args):
    # SumBy has at least 3 parameters
    args = list(args)
    for i in xrange(0, 3):
        args[i] = coerce_to_fields(args[i])
    return SubAssembly(assembly.SumBy, *args)
Exemple #30
0
def last(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Last(*args)