コード例 #1
0
ファイル: native.py プロジェクト: twitter/pycascading
def count_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    return SubAssembly(assembly.CountBy, *args)
コード例 #2
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def count_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    return SubAssembly(assembly.CountBy, *args)
コード例 #3
0
ファイル: native.py プロジェクト: twitter/pycascading
def average_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        args[2] = coerce_to_fields(args[2])
    return SubAssembly(assembly.AverageBy, *args)
コード例 #4
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def average_by(*args):
    args = list(args)
    if len(args) > 0:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        args[2] = coerce_to_fields(args[2])
    return SubAssembly(assembly.AverageBy, *args)
コード例 #5
0
ファイル: each.py プロジェクト: ArturFis/pycascading
 def _create_with_parent(self, parent):
     args = []
     if self.__argument_selector:
         args.append(coerce_to_fields(self.__argument_selector))
     args.append(self.__function)
     if self.__output_selector:
         args.append(coerce_to_fields(self.__output_selector))
     # We need to put another Pipe after the Each since otherwise
     # joins may not work as the names of pipes apparently have to be
     # different for Cascading.
     each = cascading.pipe.Each(parent.get_assembly(), *args)
     return cascading.pipe.Pipe(random_pipe_name('each'), each)
コード例 #6
0
ファイル: each.py プロジェクト: twitter/pycascading
 def _create_with_parent(self, parent):
     args = []
     if self.__argument_selector:
         args.append(coerce_to_fields(self.__argument_selector))
     args.append(self.__function)
     if self.__output_selector:
         args.append(coerce_to_fields(self.__output_selector))
     # We need to put another Pipe after the Each since otherwise
     # joins may not work as the names of pipes apparently have to be
     # different for Cascading.
     each = cascading.pipe.Each(parent.get_assembly(), *args)
     return cascading.pipe.Pipe(random_pipe_name('each'), each)
コード例 #7
0
ファイル: operators.py プロジェクト: ArturFis/pycascading
def rename(*args):
    """Rename the fields to new names.

    If only one argument (a list of names) is given, it is assumed that the
    user wants to rename all the fields. If there are two arguments, the first
    list is the set of fields to be renamed, and the second is a list of the
    new names.
    """
    if len(args) == 1:
        (fields_from, fields_to) = (Fields.ALL, args[0])
    else:
        (fields_from, fields_to) = (args[0], args[1])
    return SubAssembly(cascading.pipe.assembly.Rename, \
                       coerce_to_fields(fields_from), \
                       coerce_to_fields(fields_to))
コード例 #8
0
ファイル: operators.py プロジェクト: twitter/pycascading
def rename(*args):
    """Rename the fields to new names.

    If only one argument (a list of names) is given, it is assumed that the
    user wants to rename all the fields. If there are two arguments, the first
    list is the set of fields to be renamed, and the second is a list of the
    new names.
    """
    if len(args) == 1:
        (fields_from, fields_to) = (Fields.ALL, args[0])
    else:
        (fields_from, fields_to) = (args[0], args[1])
    return SubAssembly(cascading.pipe.assembly.Rename, \
                       coerce_to_fields(fields_from), \
                       coerce_to_fields(fields_to))
コード例 #9
0
ファイル: helpers.py プロジェクト: pfig/pycascading
def SelectFields(fields):
    """Keeps only some fields in the tuple stream.
    
    Arguments:
    fields -- a list of fields to keep, or a Cascading Fields wildcard
    """
    return com.twitter.pycascading.SelectFields(coerce_to_fields(fields))
コード例 #10
0
ファイル: helpers.py プロジェクト: seanjensengrey/pycascading
def SelectFields(fields):
    """Keeps only some fields in the tuple stream.
    
    Arguments:
    fields -- a list of fields to keep, or a Cascading Fields wildcard
    """
    return com.twitter.pycascading.SelectFields(coerce_to_fields(fields))
コード例 #11
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def un_group(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        if isinstance(args[1], (list, tuple)):
            new_arg = []
            for f in args[1]:
                new_arg.append(coerce_to_fields(f))
            args[1] = new_arg
        else:
            args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        if isinstance(args[2], (list, tuple)):
            new_arg = []
            for f in args[2]:
                new_arg.append(coerce_to_fields(f))
            args[2] = new_arg
    return function.UnGroup(*args)
コード例 #12
0
ファイル: native.py プロジェクト: twitter/pycascading
def un_group(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    if len(args) > 1:
        if isinstance(args[1], (list, tuple)):
            new_arg = []
            for f in args[1]:
                new_arg.append(coerce_to_fields(f))
            args[1] = new_arg
        else:
            args[1] = coerce_to_fields(args[1])
    if len(args) > 2:
        if isinstance(args[2], (list, tuple)):
            new_arg = []
            for f in args[2]:
                new_arg.append(coerce_to_fields(f))
            args[2] = new_arg
    return function.UnGroup(*args)
コード例 #13
0
    def __create_args(self,
                      pipe=None,
                      aggregator=None,
                      output_selector=None,
                      assertion_level=None,
                      assertion=None,
                      buffer=None,
                      argument_selector=None):
        if self.__args:
            # If we pass in an unnamed argument, try to determine its type
            if isinstance(self.__args[0], cascading.operation.Aggregator):
                aggregator = self.__args[0]
            else:
                buffer = self.__args[0]
        # Set up some defaults
        if argument_selector is None:
            argument_selector = cascading.tuple.Fields.ALL
        if output_selector is None:
            if aggregator is not None:
                # In the case of aggregators, we want to return both the
                # groupings and the results
                output_selector = cascading.tuple.Fields.ALL
            else:
                output_selector = cascading.tuple.Fields.RESULTS

        args = []
        args.append(pipe.get_assembly())
        if argument_selector is not None:
            args.append(coerce_to_fields(argument_selector))
        if aggregator is not None:
            # for now we assume it's a Cascading aggregator straight
            args.append(wrap_function(aggregator, CascadingAggregatorWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        if assertion_level is not None:
            args.append(assertion_level)
            args.append(assertion)
        if buffer is not None:
            args.append(wrap_function(buffer, CascadingBufferWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        return args
コード例 #14
0
ファイル: every.py プロジェクト: ArturFis/pycascading
    def __create_args(self,
                      pipe=None,
                      aggregator=None, output_selector=None,
                      assertion_level=None, assertion=None,
                      buffer=None,
                      argument_selector=None):
        if self.__args:
            # If we pass in an unnamed argument, try to determine its type
            if isinstance(self.__args[0], cascading.operation.Aggregator):
                aggregator = self.__args[0]
            else:
                buffer = self.__args[0]
        # Set up some defaults
        if argument_selector is None:
            argument_selector = cascading.tuple.Fields.ALL
        if output_selector is None:
            if aggregator is not None:
                # In the case of aggregators, we want to return both the
                # groupings and the results
                output_selector = cascading.tuple.Fields.ALL
            else:
                output_selector = cascading.tuple.Fields.RESULTS

        args = []
        args.append(pipe.get_assembly())
        if argument_selector is not None:
            args.append(coerce_to_fields(argument_selector))
        if aggregator is not None:
            # for now we assume it's a Cascading aggregator straight
            args.append(wrap_function(aggregator, CascadingAggregatorWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        if assertion_level is not None:
            args.append(assertion_level)
            args.append(assertion)
        if buffer is not None:
            args.append(wrap_function(buffer, CascadingBufferWrapper))
            if output_selector:
                args.append(coerce_to_fields(output_selector))
        return args
コード例 #15
0
ファイル: every.py プロジェクト: ArturFis/pycascading
 def __create_args(self,
                   group_name=None,
                   pipes=None, group_fields=None, sort_fields=None,
                   reverse_order=None,
                   pipe=None,
                   lhs_pipe=None, rhs_pipe=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = coerce_to_fields(self.__args[0])
     args = []
     if group_name:
         args.append(group_name)
     if pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
                 if reverse_order:
                     args.append(reverse_order)
     elif pipe:
         args.append(pipe.get_assembly())
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
             if reverse_order:
                 args.append(reverse_order)
     elif lhs_pipe:
         args.append(lhs_pipe.get_assembly())
         args.append(rhs_pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
     return args
コード例 #16
0
 def __create_args(self,
                   group_name=None,
                   pipes=None,
                   group_fields=None,
                   sort_fields=None,
                   reverse_order=None,
                   pipe=None,
                   lhs_pipe=None,
                   rhs_pipe=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = coerce_to_fields(self.__args[0])
     args = []
     if group_name:
         args.append(group_name)
     if pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
                 if reverse_order:
                     args.append(reverse_order)
     elif pipe:
         args.append(pipe.get_assembly())
         if group_fields:
             args.append(coerce_to_fields(group_fields))
             if sort_fields:
                 args.append(coerce_to_fields(sort_fields))
             if reverse_order:
                 args.append(reverse_order)
     elif lhs_pipe:
         args.append(lhs_pipe.get_assembly())
         args.append(rhs_pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
     return args
コード例 #17
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def average(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Average(*args)
コード例 #18
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def unique(*args):
    args = list(args)
    args[0] = coerce_to_fields(args[0])
    return SubAssembly(assembly.Unique, *args)
コード例 #19
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def sum_by(*args):
    # SumBy has at least 3 parameters
    args = list(args)
    for i in xrange(0, 3):
        args[i] = coerce_to_fields(args[i])
    return SubAssembly(assembly.SumBy, *args)
コード例 #20
0
ファイル: native.py プロジェクト: twitter/pycascading
def min(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Min(*args)
コード例 #21
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def sum(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Sum(*args)
コード例 #22
0
ファイル: native.py プロジェクト: twitter/pycascading
def average(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Average(*args)
コード例 #23
0
ファイル: cogroup.py プロジェクト: twitter/pycascading
 def __create_args(self,
                   group_name=None,
                   pipes=None,
                   group_fields=None,
                   declared_fields=None,
                   result_group_fields=None,
                   joiner=None,
                   pipe=None,
                   num_self_joins=None,
                   lhs=None,
                   lhs_group_fields=None,
                   rhs=None,
                   rhs_group_fields=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = [coerce_to_fields(f) for f in self.__args[0]]
     args = []
     if group_name:
         args.append(str(group_name))
     if lhs:
         args.append(lhs.get_assembly())
         args.append(coerce_to_fields(lhs_group_fields))
         args.append(rhs.get_assembly())
         args.append(coerce_to_fields(rhs_group_fields))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     elif pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append([coerce_to_fields(f) for f in group_fields])
             if declared_fields:
                 args.append(coerce_to_fields(declared_fields))
                 if result_group_fields:
                     args.append(coerce_to_fields(result_group_fields))
             else:
                 args.append(None)
             if joiner is None:
                 joiner = cascading.pipe.cogroup.InnerJoin()
             args.append(joiner)
     elif pipe:
         args.append(pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
         args.append(int(num_self_joins))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     return args
コード例 #24
0
ファイル: cogroup.py プロジェクト: ArturFis/pycascading
 def __create_args(self,
                   group_name=None,
                   pipes=None, group_fields=None, declared_fields=None,
                   result_group_fields=None, joiner=None,
                   pipe=None, num_self_joins=None,
                   lhs=None, lhs_group_fields=None,
                   rhs=None, rhs_group_fields=None):
     # We can use an unnamed parameter only for group_fields
     if self.__args:
         group_fields = [coerce_to_fields(f) for f in self.__args[0]]
     args = []
     if group_name:
         args.append(str(group_name))
     if lhs:
         args.append(lhs.get_assembly())
         args.append(coerce_to_fields(lhs_group_fields))
         args.append(rhs.get_assembly())
         args.append(coerce_to_fields(rhs_group_fields))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     elif pipes:
         args.append([p.get_assembly() for p in pipes])
         if group_fields:
             args.append([coerce_to_fields(f) for f in group_fields])
             if declared_fields:
                 args.append(coerce_to_fields(declared_fields))
                 if result_group_fields:
                     args.append(coerce_to_fields(result_group_fields))
             else:
                 args.append(None)
             if joiner is None:
                 joiner = cascading.pipe.cogroup.InnerJoin()
             args.append(joiner)
     elif pipe:
         args.append(pipe.get_assembly())
         args.append(coerce_to_fields(group_fields))
         args.append(int(num_self_joins))
         if declared_fields:
             args.append(coerce_to_fields(declared_fields))
             if result_group_fields:
                 args.append(coerce_to_fields(result_group_fields))
         if joiner:
             args.append(joiner)
     return args
コード例 #25
0
ファイル: native.py プロジェクト: twitter/pycascading
def sum(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Sum(*args)
コード例 #26
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def last(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Last(*args)
コード例 #27
0
ファイル: native.py プロジェクト: agarnitin86/pycascading
def min(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Min(*args)
コード例 #28
0
ファイル: native.py プロジェクト: twitter/pycascading
def unique(*args):
    args = list(args)
    args[0] = coerce_to_fields(args[0])
    return SubAssembly(assembly.Unique, *args)
コード例 #29
0
ファイル: native.py プロジェクト: twitter/pycascading
def sum_by(*args):
    # SumBy has at least 3 parameters
    args = list(args)
    for i in xrange(0, 3):
        args[i] = coerce_to_fields(args[i])
    return SubAssembly(assembly.SumBy, *args)
コード例 #30
0
ファイル: native.py プロジェクト: twitter/pycascading
def last(*args):
    args = list(args)
    if args:
        args[0] = coerce_to_fields(args[0])
    return aggregator.Last(*args)