Example #1
0
def plan_sort(descriptor):
    plan = DataQuantaBuilder(descriptor)
    sink_dataquanta = \
        plan.source("../test/words.txt") \
            .sort(lambda elem: elem.lower()) \
            .sink("../test/output.txt", end="")
    return sink_dataquanta
Example #2
0
def plan_tpch_q1(descriptor):

    #TODO create reduce by
    plan = DataQuantaBuilder(descriptor)

    def reducer(obj1, obj2):
        return obj1[0]

    sink = plan.source("../test/lineitem.txt") \
        .map(lambda elem: elem.split("|")) \
        .filter(lambda elem: datetime.datetime.strptime(elem[10], '%Y-%m-%d') <= datetime.datetime.strptime("1998-09-02", '%Y-%m-%d')) \
        .map(lambda elem:
           [elem[8], elem[9], elem[4], elem[5],
            float(elem[5]) * (1 - float(elem[6])),
            float(elem[5]) * (1 - float(elem[6])) * (1 + float(elem[7])),
            elem[4], elem[5],
            elem[6], 1]) \
        .sink("../test/output.txt", end="")
    # .group_by(lambda elem: elem) \
    # .reduce_by(reducer) \
    # .flatmap(lambda elem: elem.split("|"))
    # .map(lambda elem: (elem, elem.split("|"))) \
    # L_RETURNFLAG 8
    # L_LINESTATUS 9
    # L_QUANTITY 4
    # L_EXTENDEDPRICE 5
    # discount 6
    # tax 7

    return dq_source_b
Example #3
0
def plan_basic(descriptor):
    plan = DataQuantaBuilder(descriptor)

    sink_dataquanta = \
        plan.source("../test/lines.txt") \
            .sink("../test/output.txt", end="")

    return sink_dataquanta
Example #4
0
def plan_sort_filter(descriptor):
    plan = DataQuantaBuilder(descriptor)
    sink_dataquanta = \
        plan.source("../test/words.txt") \
            .sort(lambda elem: elem.lower()) \
            .filter(lambda elem: str(elem).startswith("f")) \
            .sink("../test/output.txt", end="")
    return sink_dataquanta
Example #5
0
def plan_filter(descriptor):
    plan = DataQuantaBuilder(descriptor)

    sink_dataquanta = \
        plan.source("../test/numbers.txt") \
            .filter(lambda elem: int(elem) % 2 != 0) \
            .sink("../test/output.txt", end="")

    return sink_dataquanta
Example #6
0
def plan_wordcount(descriptor):

    plan = DataQuantaBuilder(descriptor)
    sink_wordcount = plan.source("../test/lineitem.txt") \
        .filter(lambda elem: len(str(elem).split("|")[0]) < 4) \
        .flatmap(lambda elem: str(elem).split("|")) \
        .sink("../test/output.txt", end="")

    return sink_wordcount
Example #7
0
def plan_full_java(descriptor):

    plan = DataQuantaBuilder(descriptor)

    dq_source_a = plan.source("../test/lines.txt")
    dq_source_b = plan.source("../test/morelines.txt")
    sink_dataquanta = dq_source_a.union(dq_source_b) \
        .sink("../test/output.txt", end="")

    return sink_dataquanta
Example #8
0
def plan_java_junction(descriptor):

    plan = DataQuantaBuilder(descriptor)

    dq_source_a = plan.source("../test/lines.txt")
    dq_source_b = plan.source("../test/morelines.txt")
    sink_dataquanta = dq_source_a.union(dq_source_b) \
        .filter(lambda elem: str(elem).startswith("I")) \
        .sort(lambda elem: elem.lower()) \
        .sink("../test/output.txt", end="")

    return sink_dataquanta
Example #9
0
def plan_tpch_q1(descriptor):

    # TODO create reduce by
    plan = DataQuantaBuilder(descriptor)

    def reducer(obj1, obj2):
        return obj1[0], obj1[1], obj1[2] + obj2[2], obj1[3] + obj2[3], obj1[4] + obj2[4], obj1[5] + obj2[5], \
               obj1[6] + obj2[6], obj1[7] + obj2[7], obj1[8] + obj2[8], obj1[9] + obj2[9]

    sink = plan.source("../test/lineitem.txt") \
        .map(lambda elem: elem.split("|")) \
        .sink("../test/output.txt", end="")
    """
        .filter(lambda elem: datetime.datetime.strptime(elem[10], '%Y-%m-%d') <= datetime.datetime.strptime('1998-09-02', '%Y-%m-%d')) \
        .map(lambda elem:
             [elem[8], elem[9], elem[4], elem[5],
              float(elem[5]) * (1 - float(elem[6])),
              float(elem[5]) * (1 - float(elem[6])) * (1 + float(elem[7])),
              elem[4], elem[5],
              elem[6], 1]) \
        .sink("../test/output.txt", end="")"""
    # .reduce_by_key([0, 1], reducer) \

    return sink