Example #1
0
 def test_left_outer_join(self):
     sc = SparkContext(master='', conf=SparkConf())
     rdd1 = sc.parallelize([('A', [1, 2, 3]), ('B', [2,3,4])])
     rdd2 = sc.parallelize([('A', [1, 2, 3]), ('B', [2,3,4]), ('B', [4,5,6])])
     out = rdd1.leftOuterJoin(rdd2).collect()
     print(out)
     self.assertEqual(len(out), 2)
Example #2
0
    def test_word_count_3(self):

        lines = [
            'apple',
            'apple banana',
            'apple banana',
            'apple banana grape',
            'banana grape',
            'banana'
        ]

        expected_output = [
            ('apple', 4),
            ('banana', 5),
            ('grape', 2),
        ]

        sc = SparkContext(master='', conf=SparkConf())

        rdd = sc.parallelize(lines)
        rdd = rdd.flatMap(lambda x: x.split(' '))
        rdd = rdd.map(lambda word: (word, 1))
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        output = rdd.collect()
        self.assertEquals(sorted(output), sorted(expected_output))
Example #3
0
    def test_word_count_3(self):

        lines = [
            'apple',
            'apple banana',
            'apple banana',
            'apple banana grape',
            'banana grape',
            'banana'
        ]

        expected_output = [
            ('apple', 4),
            ('banana', 5),
            ('grape', 2),
        ]

        sc = SparkContext(master='', conf=SparkConf())

        rdd = sc.parallelize(lines)
        rdd = rdd.flatMap(lambda x: x.split(' '))
        rdd = rdd.map(lambda word: (word, 1))
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        output = rdd.collect()
        self.assertEquals(sorted(output), sorted(expected_output))
Example #4
0
 def test_empty_RDD(self):
     ctx = SparkContext()
     rdd = ctx.emptyRDD()
     self.assertEquals(type(rdd), RDD)
     l = rdd.collect()
     self.assertEqual(type(l), list)
     self.assertEquals(len(l), 0)
Example #5
0
 def test_empty_RDD(self):
     ctx = SparkContext()
     rdd = ctx.emptyRDD()
     self.assertEquals(type(rdd), RDD)
     l = rdd.collect()
     self.assertEqual(type(l), list)
     self.assertEquals(len(l), 0)
Example #6
0
    def test_combineByKey(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([
            ('A', 1),
            ('B', 2),
            ('B', 3),
            ('C', 4),
            ('C', 5),
            ('A', 6),
        ])

        def create_combiner(a):
            return [a]

        def merge_value(a, b):
            a.append(b)
            return a

        def merge_combiners(a, b):
            a.extend(b)
            return a

        rdd = rdd.combineByKey(create_combiner, merge_value, merge_combiners)
        self.assertListEqual(
            rdd.collect(),
            [('A', [1, 6]), ('B', [2, 3]), ('C', [4, 5])],
        )
Example #7
0
def test_minion_perform_deliver_success():
    workflow_id = '6666'
    app_id = '1000'
    job_id = '1'
    out_queue = 'queue_2000'
    sconf = SparkConf()
    sc = SparkContext(master='', conf=sconf)

    rdd = sc.parallelize(get_records())

    df0 = DataFrame(rdd=rdd)
    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        redis_conn = mocked_redis()
        state_control = StateControlRedis(redis_conn)

        data = {
            'workflow_id': workflow_id,
            'app_id': app_id,
            'job_id': job_id,
            'type': 'deliver',
            'task_id': '033f-284ab-28987e',
            'port': 'port0',
            'output': out_queue,
            'workflow': ''
        }
        state_control.push_app_queue(app_id, json.dumps(data))
        minion = SparkMinion(redis_conn=redis_conn,
                             workflow_id=workflow_id,
                             app_id=app_id,
                             config=config)
        minion._emit_event = dummy_emit_event
        minion._state = {
            data['task_id']: {
                'port0': {
                    'output': df0,
                    'sample': []
                },
                'time': 35.92
            }
        }
        minion._process_message()

        # Discard first status message
        state_control.pop_app_output_queue(app_id, False)

        msg = json.loads(state_control.pop_app_output_queue(app_id, False))
        assert msg['status'] == 'SUCCESS', 'Invalid status'
        assert msg['code'] == minion.MNN002[0], 'Invalid code'

        # CSV data
        csv_records = '\n'.join(
            map(dataframe_util.convert_to_csv, get_records()))

        result = json.loads(state_control.pop_queue(out_queue, False))
        assert result['sample'] == csv_records, 'Wrong CSV generated'
Example #8
0
 def test_text_file(self):
     ctx = SparkContext()
     for start, stop, step in self.TEST_RANGES:
         with NamedTemporaryFile(mode='w') as f:
             l = ['{}\n'.format(x) for x in range(start, stop, step)]
             for x in l:
                 f.write(x)
             f.flush()
             f.seek(0)
             rdd = ctx.textFile(f.name)
             self.assertEquals(l, rdd.collect())
Example #9
0
 def test_text_file(self):
     ctx = SparkContext()
     for start, stop, step in self.TEST_RANGES:
         with NamedTemporaryFile(mode='w') as f:
             l = ['{}\n'.format(x) for x in range(start, stop, step)]
             for x in l:
                 f.write(x)
             f.flush()
             f.seek(0)
             rdd = ctx.textFile(f.name)
             self.assertEquals(l, rdd.collect())
Example #10
0
def spark_ctx():
    """A simple spark context."""

    if IF_DUMMY_SPARK:
        from dummy_spark import SparkConf, SparkContext
        conf = SparkConf()
        ctx = SparkContext(master='', conf=conf)
    else:
        from pyspark import SparkConf, SparkContext
        conf = SparkConf().setMaster('local[2]').setAppName('drudge-unittest')
        ctx = SparkContext(conf=conf)

    return ctx
Example #11
0
def test_nonzero_by_cartan():

    # For pairing algebra, N_p * P_p or Pdag_p * N_p should be ZERO
    #   That is what we test here

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p = names.A_dumms[0]

    # Operators
    N_p = names.N_[p]
    Pdag_p = names.P_dag[p]
    P_p = names.P_[p]

    # expressions
    expr1 = dr.simplify(Pdag_p * N_p)
    expr2 = dr.simplify(N_p * P_p)

    # assertions
    assert expr1 == 0
    assert expr2 == 0
Example #12
0
def test_fermi_anti_comm_rules():

    # Test commutation relations for the fermionic algebra

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q = names.A_dumms[:2]

    # fermion operators
    cdag_p_up = names.c_dag[p, UP]
    cdag_p_dn = names.c_dag[p, DN]
    c_q_up = names.c_[q, UP]
    c_q_dn = names.c_[q, DN]

    # Anti-commutation relations
    expr1 = dr.simplify(cdag_p_up * c_q_dn + c_q_dn * cdag_p_up)
    expr2 = dr.simplify(cdag_p_dn * c_q_up + c_q_up * cdag_p_dn)
    expr3 = dr.simplify(cdag_p_up * c_q_up + c_q_up * cdag_p_up)
    expr4 = dr.simplify(cdag_p_dn * c_q_dn + c_q_dn * cdag_p_dn)

    # Assertions
    assert expr1 == 0
    assert expr2 == 0
    assert dr.simplify(expr3 - delK(p, q)) == 0
    assert dr.simplify(expr4 - delK(p, q)) == 0
Example #13
0
def test_spinflip_su2_comm_rules():

    # Test commutation relations for the spin-flip SU2 algebra

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q = names.A_dumms[:2]

    # BCS Operators
    Jp_p = names.J_p[p]
    Jm_q = names.J_m[q]
    Jz_p = names.J_z[p]
    Jz_q = names.J_z[q]

    # Commutation relations
    expr1 = dr.simplify(Jp_p * Jm_q - Jm_q * Jp_p)
    expr2 = dr.simplify(Jz_q * Jp_p - Jp_p * Jz_q)
    expr3 = dr.simplify(Jz_p * Jm_q - Jm_q * Jz_p)

    # Assertions
    assert dr.simplify(expr1 - delK(p, q) * 2 * Jz_p) == 0
    assert dr.simplify(expr2 - delK(p, q) * Jp_p) == 0
    assert dr.simplify(expr3 + delK(p, q) * Jm_q) == 0
Example #14
0
def test_unique_indices_functionality():

    # Test for unique indices functionality

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q, r, s = names.A_dumms[:4]

    # list of unique indices shoule be empty
    assert dr.unique_del_lists == []

    # declare r and s to be unique indices
    dr.unique_indices([r, s])

    # check unique indices list now
    # Basically, unique ind list is a list of tuples
    assert dr.unique_del_lists[0] == {r, s}

    # Expression evaluation
    e_pq = names.e_[p, q]
    expr = dr.simplify((delK(r, s) + delK(p, r)) * e_pq)
    expr2 = dr.simplify(delK(p, r) * e_pq)

    # assertion
    assert dr.simplify(expr - expr2) == 0
Example #15
0
 def test_sortByKey_descending(self):
     sc = SparkContext(master='', conf=SparkConf())
     rdd = (sc.parallelize([
         ('a', 1),
         ('b', 2),
         ('c', 3),
         ('d', 4),
         ('e', 5),
     ]).sortByKey(ascending=False))
     self.assertListEqual(
         rdd.collect(),
         [
             ('e', 5),
             ('d', 4),
             ('c', 3),
             ('b', 2),
             ('a', 1),
         ],
     )
Example #16
0
def test_canonical_ordering():

    # Test the canonical ordering functionality

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q, r, s = names.A_dumms[:4]

    # Operators
    cdag_p_up = names.c_dag[p, UP]
    cdag_p_dn = names.c_dag[p, DN]
    c_q_up = names.c_[q, UP]
    c_q_dn = names.c_[q, DN]

    Pdag_p = names.P_dag[p]
    N_q = names.N_[q]
    P_r = names.P_[r]

    Jp_p = names.J_p[p]
    Jz_q = names.J_z[q]
    Jm_r = names.J_m[r]

    # Let all the indices be unique - so no commutation terms arise
    dr.unique_indices([p, q, r, s])

    # expression for intra algebra ordering
    expr1 = dr.simplify(c_q_up * c_q_dn * cdag_p_up * cdag_p_dn)
    expr2 = dr.simplify(P_r * N_q * Pdag_p)
    expr3 = dr.simplify(Jm_r * Jz_q * Jp_p)

    # assertions
    assert dr.simplify(expr1 + cdag_p_up * cdag_p_dn * c_q_dn * c_q_up) == 0
    assert dr.simplify(expr2 - Pdag_p * N_q * P_r) == 0
    assert dr.simplify(expr3 - Jp_p * Jz_q * Jm_r) == 0

    # expressions for inter algebra ordering
    Pdag_r = names.P_dag[r]
    N_r = names.N_[r]
    expr1a = dr.simplify(cdag_p_up * cdag_p_dn * Pdag_r * N_r * P_r)

    Jp_q = names.J_p[q]
    Jm_q = names.J_m[q]
    expr2a = dr.simplify(cdag_p_up * cdag_p_dn * Pdag_r * Jp_q * Jz_q * Jm_q)

    # assertions
    assert dr.simplify(expr1a -
                       Pdag_r * N_r * P_r * cdag_p_up * cdag_p_dn) == 0
    assert dr.simplify(expr2a - Pdag_r * Jp_q * Jz_q * Jm_q * cdag_p_up *
                       cdag_p_dn) == 0
Example #17
0
def test_nilpotency_of_operators():

    # Test the nilpotency of fermi and Pairing-SU2 operators

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q, r = names.A_dumms[:3]

    # Operators
    cdag_p = names.c_dag[p, UP]
    c_p = names.c_[p, UP]

    N_q = names.N_[q]
    Pdag_q = names.P_dag[q]
    P_q = names.P_[q]

    Jp_r = names.J_p[r]
    Jm_r = names.J_m[r]
    Jz_r = names.J_z[r]

    # Expressions
    expr1a = dr.simplify(cdag_p * cdag_p)
    expr1b = dr.simplify(c_p * c_p)

    expr2a = dr.simplify(Pdag_q * Pdag_q)
    expr2b = dr.simplify(P_q * P_q)

    expr3a = dr.simplify(Jp_r * Jp_r)
    expr3b = dr.simplify(Jm_r * Jm_r)

    # assertions
    assert expr1a == 0
    assert expr1b == 0

    assert expr2a == 0
    assert expr2b == 0

    assert expr3a == 0
    assert expr3b == 0
Example #18
0
class Wavelet:
    def __init__(self, context, file, sample_size):
        self.sc = SparkContext(context, 'Wavelet')
        self.file_size = self.sc.textFile(file).count()
        self.sample_size = sample_size
        self.graph_size = int(self.file_size / self.sample_size)
        self.file = file

    def wavelet(self, column, name):
        sample_size = self.sample_size
        sc = self.sc
        link = self.file
        length = self.file_size

        tab = []
        for i in range(0, length):
            tab.append(length - i)

        def get_key(iterator, size):
            key = int(iterator/size)
            iterator += 1
            return key

        rdd = sc\
            .textFile(link)\
            .filter(lambda line: name not in line)\
            .map(lambda line: (get_key(tab.pop(), sample_size), re.split(r';', line)[column]))\
            .groupByKey().mapValues(list)\
            .map(lambda line: (line[0], pywt.dwt(line[1], 'db1')[1]))

        def get_previous_line(line):
            iterator = line[0]
            if iterator == 0:
                prev = rdd.filter(lambda my_line: my_line[0] == iterator).collect()[0][1]
            else:
                prev = rdd.filter(lambda my_line: my_line[0] == iterator - 1).collect()[0][1]
            d = distance.euclidean(line[1], prev)
            return d

        return rdd\
            .map(lambda line: get_previous_line(line))\
            .collect()
Example #19
0
def test_pairing_comm_rules():

    # Test commutation relations for the pairing SU2 algebra

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q = names.A_dumms[:2]

    # BCS Operators
    Pdag_p = names.P_dag[p]
    P_q = names.P_[q]
    N_p = names.N[p]
    Nup_p = dr.N_up[p]
    Ndn_p = dr.N_dn[p]
    N_q = names.N[q]
    Nup_q = dr.N_up[q]
    Ndn_q = dr.N_dn[q]

    # Commutation relations
    expr1 = dr.simplify(Pdag_p * P_q - P_q * Pdag_p)
    expr2 = dr.simplify(N_q * Pdag_p - Pdag_p * N_q)
    expr2a = dr.simplify(Nup_q * Pdag_p - Pdag_p * Nup_q)
    expr2b = dr.simplify(Ndn_q * Pdag_p - Pdag_p * Ndn_q)
    expr3 = dr.simplify(N_p * P_q - P_q * N_p)
    expr3a = dr.simplify(Nup_p * P_q - P_q * Nup_p)
    expr3b = dr.simplify(Ndn_p * P_q - P_q * Ndn_p)

    # Assertions
    assert dr.simplify(expr1 - delK(p, q) * (names.N[p] - 1)) == 0
    assert dr.simplify(expr2 - 2 * delK(p, q) * Pdag_p) == 0
    assert dr.simplify(expr2a - delK(p, q) * Pdag_p) == 0
    assert dr.simplify(expr2b - delK(p, q) * Pdag_p) == 0
    assert dr.simplify(expr3 + 2 * delK(p, q) * P_q) == 0
    assert dr.simplify(expr3a + delK(p, q) * P_q) == 0
    assert dr.simplify(expr3b + delK(p, q) * P_q) == 0
Example #20
0
def test_get_seniority_zero():

    # Get seniority zero expressions corresponding to some test results that we know already
    #   This will indirectly also include testing of extract_su2

    # Initialise drudge
    ctx = SparkContext()
    dr = AGPFermi(ctx)

    # namespace
    names = dr.names

    # Indices
    p, q, r, s = names.A_dumms[:4]

    # Operators
    cdag_p_up = names.c_dag[p, UP]
    cdag_p_dn = names.c_dag[p, DN]
    c_p_up = names.c_[p, UP]
    c_p_dn = names.c_[p, DN]

    # expression1: should simplify to Np Np /4
    expr1a = dr.simplify(cdag_p_up * cdag_p_dn * c_p_dn * c_p_up)
    expr1 = dr.get_seniority_zero(expr1a)
    res1 = dr.simplify(names.N_[p] * names.N_[p] / 4)

    # expression2: should simplify to 2 Pdag_p P_q (when p not= q)
    e_pq = names.e_[p, q]
    dr.unique_indices([p, q])
    expr2a = dr.simplify(e_pq * e_pq)
    expr2 = dr.get_seniority_zero(expr2a)
    res2 = dr.simplify(names.P_dag[p] * names.P_[q] * 2)

    # assertions
    assert dr.simplify(expr1 - res1) == 0
    assert dr.simplify(expr2 - res2) == 0
Example #21
0
 def test_values(self):
     sc = SparkContext(master='', conf=SparkConf())
     rdd = sc.parallelize([('A', 1), ('B', 2), ('C', 3)])
     self.assertListEqual(rdd.values().collect(), [1, 2, 3])
Example #22
0
 def __init__(self, context, file, sample_size):
     self.sc = SparkContext(context, 'Wavelet')
     self.file_size = self.sc.textFile(file).count()
     self.sample_size = sample_size
     self.graph_size = int(self.file_size / self.sample_size)
     self.file = file
Example #23
0
    def test_not_implemented_methods(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([])

        with self.assertRaises(NotImplementedError):
            rdd._pickled()

        with self.assertRaises(NotImplementedError):
            rdd.mapPartitionsWithIndex(None, None,)

        with self.assertRaises(NotImplementedError):
            rdd._computeFractionForSampleSize(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.pipe(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.reduce(None)

        with self.assertRaises(NotImplementedError):
            rdd.treeReduce(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.fold(None, None,)

        with self.assertRaises(NotImplementedError):
            rdd.aggregate(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.treeAggregate(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.stats()

        with self.assertRaises(NotImplementedError):
            rdd.histogram(None)

        with self.assertRaises(NotImplementedError):
            rdd.variance()

        with self.assertRaises(NotImplementedError):
            rdd.stdev()

        with self.assertRaises(NotImplementedError):
            rdd.sampleStdev()

        with self.assertRaises(NotImplementedError):
            rdd.sampleVariance()

        with self.assertRaises(NotImplementedError):
            rdd.countByValue()

        with self.assertRaises(NotImplementedError):
            rdd.top(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.takeOrdered(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsNewAPIHadoopDataset(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsNewAPIHadoopFile(None, None, None, None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsHadoopDataset(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsHadoopFile(None, None, None, None, None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsSequenceFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsPickleFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsTextFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.collectAsMap()

        with self.assertRaises(NotImplementedError):
            rdd.keys()

        with self.assertRaises(NotImplementedError):
            rdd.values()

        with self.assertRaises(NotImplementedError):
            rdd.reduceByKeyLocally(None)

        with self.assertRaises(NotImplementedError):
            rdd.countByKey()

        with self.assertRaises(NotImplementedError):
            rdd.join(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.leftOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.rightOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.fullOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.partitionBy(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.combineByKey(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.aggregateByKey(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.foldByKey(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd._can_spill()

        with self.assertRaises(NotImplementedError):
            rdd._memory_limit()

        with self.assertRaises(NotImplementedError):
            rdd.groupWith(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.sampleByKey(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.subtractByKey(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.subtract(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.keyBy(None)

        with self.assertRaises(NotImplementedError):
            rdd.repartition(None)

        with self.assertRaises(NotImplementedError):
            rdd.coalesce(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.zipWithUniqueId()

        with self.assertRaises(NotImplementedError):
            rdd.toDebugString()

        with self.assertRaises(NotImplementedError):
            rdd.getStorageLevel()

        with self.assertRaises(NotImplementedError):
            rdd._to_java_object_rdd()
Example #24
0
"""Configures a simple drudge for reduced BCS model."""

from dummy_spark import SparkContext
#from pyspark import SparkContext

from sympy import Symbol, collect, Add, Mul, Integer, symbols, factor, diff, IndexedBase
from bcs import ReducedBCSDrudge
from drudge import InvariantIndexable, Perm, IDENT, NEG

ctx = SparkContext()
raise_ = ReducedBCSDrudge.DEFAULT_RAISE
lower = ReducedBCSDrudge.DEFAULT_LOWER
dr = ReducedBCSDrudge(ctx,
                      interact=InvariantIndexable(Symbol('G')),
                      specials={(raise_, lower): 2 * raise_ * lower - 1})

#====================================
# AGP expected values:
#====================================
# case: z00
Z00 = IndexedBase('Z00')

# case: z02
Z02 = IndexedBase('Z02')
dr.set_symm(Z02, Perm([1, 0], IDENT), valence=2)

# case: z04
Z04 = IndexedBase('Z04')
dr.set_symm(Z04,
            Perm([1, 0, 2, 3], IDENT),
            Perm([0, 1, 3, 2], IDENT),
Example #25
0
    def test_not_implemented_methods(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([])

        with self.assertRaises(NotImplementedError):
            rdd._pickled()

        with self.assertRaises(NotImplementedError):
            rdd.mapPartitionsWithIndex(
                None,
                None,
            )

        with self.assertRaises(NotImplementedError):
            rdd._computeFractionForSampleSize(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.pipe(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.reduce(None)

        with self.assertRaises(NotImplementedError):
            rdd.treeReduce(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.fold(
                None,
                None,
            )

        with self.assertRaises(NotImplementedError):
            rdd.aggregate(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.treeAggregate(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.stats()

        with self.assertRaises(NotImplementedError):
            rdd.histogram(None)

        with self.assertRaises(NotImplementedError):
            rdd.variance()

        with self.assertRaises(NotImplementedError):
            rdd.stdev()

        with self.assertRaises(NotImplementedError):
            rdd.sampleStdev()

        with self.assertRaises(NotImplementedError):
            rdd.sampleVariance()

        with self.assertRaises(NotImplementedError):
            rdd.countByValue()

        with self.assertRaises(NotImplementedError):
            rdd.top(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.takeOrdered(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsNewAPIHadoopDataset(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsNewAPIHadoopFile(None, None, None, None, None, None,
                                       None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsHadoopDataset(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsHadoopFile(None, None, None, None, None, None, None,
                                 None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsSequenceFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsPickleFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsTextFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.collectAsMap()

        with self.assertRaises(NotImplementedError):
            rdd.reduceByKeyLocally(None)

        with self.assertRaises(NotImplementedError):
            rdd.countByKey()

        with self.assertRaises(NotImplementedError):
            rdd.join(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.rightOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.fullOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.foldByKey(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd._can_spill()

        with self.assertRaises(NotImplementedError):
            rdd._memory_limit()

        with self.assertRaises(NotImplementedError):
            rdd.groupWith(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.sampleByKey(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.subtract(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.coalesce(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.toDebugString()

        with self.assertRaises(NotImplementedError):
            rdd.getStorageLevel()

        with self.assertRaises(NotImplementedError):
            rdd._to_java_object_rdd()
Example #26
0
 def test_sortBy_descending(self):
     sc = SparkContext(master='', conf=SparkConf())
     rdd = (sc.parallelize([1, 2, 3, 4, 5]).sortBy(lambda x: x,
                                                   ascending=False))
     self.assertListEqual(rdd.collect(), [5, 4, 3, 2, 1])
Example #27
0
 def test_with_block(self):
     with SparkContext():
         pass
     self.assertTrue(True)
Example #28
0
 def test_version(self):
     ctx = SparkContext()
     self.assertEquals(ctx.version, SparkContext.DUMMY_VERSION)
Example #29
0
import os
import random

from dummy_spark import SparkContext, SparkConf
from dummy_spark.sql import SQLContext

__author__ = 'willmcginnis'

# make a spark conf
sconf = SparkConf()

# set some property (won't do anything)
sconf.set('spark.executor.extraClassPath', 'foo')

# use the spark conf to make a spark context
sc = SparkContext(master='', conf=sconf)

# set the log level (also doesn't do anything)
sc.setLogLevel('INFO')

# maybe make a useless sqlcontext (nothing implimented here yet)
sqlctx = SQLContext(sc)

# add pyfile just appends to the sys path
sc.addPyFile(os.path.dirname(__file__))

# do some hadoop configuration into the ether
sc._jsc.hadoopConfiguration().set('foo', 'bar')

# maybe make some data
rdd = sc.parallelize([1, 2, 3, 4, 5])
Example #30
0
 def test_parallelize_set(self):
     ctx = SparkContext()
     for start, stop, step in self.TEST_RANGES:
         l = list(range(start, stop, step))
         rdd = ctx.parallelize(set(l))
         self.assertEquals(sorted(l), sorted(rdd.collect()))
Example #31
0
 def test_range(self):
     ctx = SparkContext()
     for start, stop, step in self.TEST_RANGES:
         l = list(range(start, stop, step))
         rdd = ctx.range(start, stop, step)
         self.assertEquals(l, rdd.collect())
Example #32
0
 def test_parallelize_set(self):
     ctx = SparkContext()
     for start, stop, step in self.TEST_RANGES:
         l = list(range(start, stop, step))
         rdd = ctx.parallelize(set(l))
         self.assertEquals(sorted(l), sorted(rdd.collect()))
Example #33
0
 def test_add_py_file(self):
     with SparkContext() as ctx:
         ctx.addPyFile(__file__)
     self.assertTrue(True)
Example #34
0
 def test_hadoop_config(self):
     ctx = SparkContext()
     jvm = ctx._jsc
     hc = jvm.hadoopConfiguration()
     hc.set('key', 'value')
     self.assertEquals(hc.get('key'), 'value')
Example #35
0
 def test_subtractByKey(self):
     sc = SparkContext(master='', conf=SparkConf())
     rdd1 = sc.parallelize([('A', 1), ('B', 2), ('C', 3)])
     rdd2 = sc.parallelize([('A', None), ('C', None)])
     self.assertListEqual(rdd1.subtractByKey(rdd2).collect(), [('B', 2)])
Example #36
0
    def test_not_implemented_methods(self):
        ctx = SparkContext()

        with self.assertRaises(NotImplementedError):
            ctx._checkpointFile(None, None)

        with self.assertRaises(NotImplementedError):
            ctx._dictToJavaMap(None)

        with self.assertRaises(NotImplementedError):
            ctx._getJavaStorageLevel(None)

        with self.assertRaises(NotImplementedError):
            ctx.accumulator(None)

        with self.assertRaises(NotImplementedError):
            ctx.addFile(None)

        with self.assertRaises(NotImplementedError):
            ctx.binaryFiles(None)

        with self.assertRaises(NotImplementedError):
            ctx.binaryRecords(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.broadcast(None)

        with self.assertRaises(NotImplementedError):
            ctx.cancelAllJobs()

        with self.assertRaises(NotImplementedError):
            ctx.cancelJobGroup(None)

        with self.assertRaises(NotImplementedError):
            ctx.clearFiles()

        with self.assertRaises(NotImplementedError):
            ctx.dump_profiles(None)

        with self.assertRaises(NotImplementedError):
            ctx.getLocalProperty(None)

        with self.assertRaises(NotImplementedError):
            ctx.hadoopFile(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.hadoopRDD(None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.newAPIHadoopFile(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.pickleFile(None)

        with self.assertRaises(NotImplementedError):
            ctx.runJob(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.sequenceFile(None)

        with self.assertRaises(NotImplementedError):
            ctx.setCheckpointDir(None)

        with self.assertRaises(NotImplementedError):
            ctx.setJobGroup(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.setLocalProperty(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.show_profiles()

        with self.assertRaises(NotImplementedError):
            ctx.sparkUser()

        with self.assertRaises(NotImplementedError):
            ctx.statusTracker()

        with self.assertRaises(NotImplementedError):
            ctx.union(None)

        with self.assertRaises(NotImplementedError):
            ctx.wholeTextFiles(None)
Example #37
0
class RDDTests(unittest.TestCase):

    SPARK_CONTEXT = SparkContext(master='', conf=SparkConf())
    TEST_RANGES = [
        (0, 0, 1),
        (0, 10, 1),
        (0, 10, 2),
        (0, 100, 13),
        (0, 1000, 17),
        (0, 10000, 31),
    ]
    SAMPLE_FRACTION = 0.10
    SAMPLE_SEED = 1234

    def test_init(self):
        for start, stop, step in self.TEST_RANGES:
            l = list(range(start, stop, step))
            rdd = RDD(l, self.SPARK_CONTEXT)
            self.assertEquals(l, rdd.collect())

            s = set(range(100))
            rdd = RDD(s, self.SPARK_CONTEXT)
            self.assertEquals(sorted(list(s)), sorted(rdd.collect()))

        t = (1, 2, 3)
        with self.assertRaises(AttributeError):
            RDD(t, self.SPARK_CONTEXT)

        with self.assertRaises(AttributeError):
            RDD('', self.SPARK_CONTEXT)

    def test_ctx(self):
        rdd = RDD([], self.SPARK_CONTEXT)
        self.assertEquals(rdd.ctx, self.SPARK_CONTEXT)

    @staticmethod
    def square(x):
        return x**2

    def test_map(self):
        for start, stop, step in self.TEST_RANGES:
            l1 = range(start, stop, step)
            l2 = map(RDDTests.square, l1)
            rdd = RDD(list(l1), self.SPARK_CONTEXT)
            rdd = rdd.map(RDDTests.square)
            self.assertEquals(rdd.collect(), list(l2))

    @staticmethod
    def triplicate(x):
        return [x, x, x]

    def test_flat_map(self):
        for start, stop, step in self.TEST_RANGES:
            l1 = range(start, stop, step)
            l2 = map(RDDTests.triplicate, l1)
            l3 = []
            for sl in l2:
                l3.extend(sl)
            rdd = RDD(list(l1), self.SPARK_CONTEXT)
            rdd = rdd.flatMap(RDDTests.triplicate)
            self.assertEquals(rdd.collect(), list(l3))

    @staticmethod
    def is_square(x):
        return x == x**2

    def test_filter(self):
        for start, stop, step in self.TEST_RANGES:
            l1 = range(start, stop, step)
            l2 = filter(RDDTests.is_square, l1)
            rdd = RDD(list(l1), self.SPARK_CONTEXT)
            rdd = rdd.filter(RDDTests.is_square)
            self.assertEquals(rdd.collect(), list(l2))

    @staticmethod
    def return_one(x):
        return x - x + 1

    def test_distinct(self):
        for start, stop, step in self.TEST_RANGES:
            l = range(start, stop, step)
            rdd = RDD(list(l), self.SPARK_CONTEXT)
            rdd = rdd.map(RDDTests.return_one)
            rdd = rdd.distinct()
            if len(l) > 0:
                self.assertEquals(rdd.collect(), [1])
            else:
                self.assertEquals(rdd.collect(), [])

    def test_sample_with_replacement(self):
        for start, stop, step in self.TEST_RANGES:
            l = range(start, stop, step)
            rdd = RDD(list(l), self.SPARK_CONTEXT)
            sample = rdd.sample(True, self.SAMPLE_FRACTION).collect()
            self.assertEquals(len(sample), int(len(l) * self.SAMPLE_FRACTION))
            for item in sample:
                self.assertTrue(item in l)

    def test_sample_with_replacement_with_seed(self):
        for start, stop, step in self.TEST_RANGES:
            l = range(start, stop, step)
            rdd = RDD(list(l), self.SPARK_CONTEXT)
            sample1 = rdd.sample(True, self.SAMPLE_FRACTION,
                                 self.SAMPLE_SEED).collect()
            sample2 = rdd.sample(True, self.SAMPLE_FRACTION,
                                 self.SAMPLE_SEED).collect()
            self.assertEquals(sorted(sample1), sorted(sample2))
            sample = sample1
            self.assertEquals(len(sample), int(len(l) * self.SAMPLE_FRACTION))
            for item in sample:
                self.assertTrue(item in l)

    def test_sample_without_replacement(self):
        for start, stop, step in self.TEST_RANGES:
            l = range(start, stop, step)
            rdd = RDD(list(l), self.SPARK_CONTEXT)
            sample = rdd.sample(False, self.SAMPLE_FRACTION).collect()
            self.assertEquals(len(sample), int(len(l) * self.SAMPLE_FRACTION))
            self.assertEquals(sorted(l), sorted(set(l)))
            for item in sample:
                self.assertTrue(item in l)

    def test_sample_without_replacement_with_seed(self):
        for start, stop, step in self.TEST_RANGES:
            l = range(start, stop, step)
            rdd = RDD(list(l), self.SPARK_CONTEXT)
            sample1 = rdd.sample(False, self.SAMPLE_FRACTION,
                                 self.SAMPLE_SEED).collect()
            sample2 = rdd.sample(False, self.SAMPLE_FRACTION,
                                 self.SAMPLE_SEED).collect()
            self.assertEquals(sorted(sample1), sorted(sample2))
            sample = sample1
            self.assertEquals(len(sample), int(len(l) * self.SAMPLE_FRACTION))
            self.assertEquals(sorted(l), sorted(set(l)))
            for item in sample:
                self.assertTrue(item in l)

    def test_union(self):
        for start1, stop1, step1 in self.TEST_RANGES:
            for start2, stop2, step2 in self.TEST_RANGES:
                l1 = range(start1, stop1, step1)
                l2 = range(start2, stop2, step2)
                rdd1 = RDD(list(l1), self.SPARK_CONTEXT)
                rdd2 = RDD(list(l2), self.SPARK_CONTEXT)
                rdd = rdd1.union(rdd2)
                self.assertEquals(sorted(rdd.collect()),
                                  sorted(list(l1) + list(l2)))

    def test_intersection(self):
        for start1, stop1, step1 in self.TEST_RANGES:
            for start2, stop2, step2 in self.TEST_RANGES:
                l1 = range(start1, stop1, step1)
                l2 = range(start2, stop2, step2)
                rdd1 = RDD(list(l1), self.SPARK_CONTEXT)
                rdd2 = RDD(list(l2), self.SPARK_CONTEXT)
                rdd = rdd1.intersection(rdd2)
                self.assertEquals(sorted(rdd.collect()),
                                  sorted([x for x in l1 if x in l2]))

    def test_group_by_key(self):
        l = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
        rdd = RDD(l, self.SPARK_CONTEXT)
        rdd = rdd.groupByKey()
        r = rdd.collect()
        r = [(kv[0], list(kv[1])) for kv in r]
        self.assertEquals(sorted(r),
                          sorted([(1, [1]), (2, [1, 2]), (3, [1, 2, 3])]))

    def test_reduce_by_key(self):
        l = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
        rdd = RDD(l, self.SPARK_CONTEXT)
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        print(rdd)

        self.assertEquals(sorted(rdd.collect()),
                          sorted([(1, 1), (2, 3), (3, 6)]))

    def test_cartesian(self):
        for start1, stop1, step1 in self.TEST_RANGES:
            for start2, stop2, step2 in self.TEST_RANGES:
                l1 = range(start1, stop1, step1)
                l2 = range(start2, stop2, step2)
                rdd1 = RDD(list(l1), self.SPARK_CONTEXT)
                rdd2 = RDD(list(l2), self.SPARK_CONTEXT)
                rdd = rdd1.cartesian(rdd2)
                r = rdd.collect()
                self.assertEquals(len(r), len(l1) * len(l2))
                for t, u in r:
                    self.assertTrue(t in l1)
                    self.assertTrue(u in l2)

    def test_cogroup(self):
        l1 = [(1, 1), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]
        l2 = [(2, 10), (2, 20), (3, 10), (3, 20), (3, 30), (4, 40)]
        rdd1 = RDD(l1, self.SPARK_CONTEXT)
        rdd2 = RDD(l2, self.SPARK_CONTEXT)
        rdd = rdd1.cogroup(rdd2)
        l = rdd.collect()
        self.assertEquals(
            sorted(l),
            sorted([(1, [1], []), (2, [1, 2], [10, 20]),
                    (3, [1, 2, 3], [10, 20, 30]), (4, [], [40])]))

    def test_word_count_1(self):

        lines = [
            'grape banana apple',
        ]

        expected_output = [
            ('apple', 1),
            ('banana', 1),
            ('grape', 1),
        ]

        sc = SparkContext(master='', conf=SparkConf())

        rdd = sc.parallelize(lines)
        rdd = rdd.flatMap(lambda x: x.split(' '))
        rdd = rdd.map(lambda word: (word, 1))
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        output = rdd.collect()
        self.assertEquals(sorted(output), sorted(expected_output))

    def test_word_count_2(self):

        lines = [
            'apple',
            'apple banana',
            'apple banana',
            'apple banana grape',
        ]

        expected_output = [
            ('apple', 4),
            ('banana', 3),
            ('grape', 1),
        ]

        sc = SparkContext(master='', conf=SparkConf())

        rdd = sc.parallelize(lines)
        rdd = rdd.flatMap(lambda x: x.split(' '))
        rdd = rdd.map(lambda word: (word, 1))
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        output = rdd.collect()
        self.assertEquals(sorted(output), sorted(expected_output))

    def test_word_count_3(self):

        lines = [
            'apple', 'apple banana', 'apple banana', 'apple banana grape',
            'banana grape', 'banana'
        ]

        expected_output = [
            ('apple', 4),
            ('banana', 5),
            ('grape', 2),
        ]

        sc = SparkContext(master='', conf=SparkConf())

        rdd = sc.parallelize(lines)
        rdd = rdd.flatMap(lambda x: x.split(' '))
        rdd = rdd.map(lambda word: (word, 1))
        rdd = rdd.reduceByKey(lambda a, b: a + b)

        output = rdd.collect()
        self.assertEquals(sorted(output), sorted(expected_output))

    def test_left_outer_join(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd1 = sc.parallelize([('A', [1, 2, 3]), ('B', [2, 3, 4])])
        rdd2 = sc.parallelize([('A', [1, 2, 3]), ('B', [2, 3, 4]),
                               ('B', [4, 5, 6])])
        out = rdd1.leftOuterJoin(rdd2).collect()
        print(out)
        self.assertEqual(len(out), 2)

    def test_keys(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([('A', 1), ('B', 2), ('C', 3)])
        self.assertListEqual(rdd.keys().collect(), ['A', 'B', 'C'])

    def test_values(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([('A', 1), ('B', 2), ('C', 3)])
        self.assertListEqual(rdd.values().collect(), [1, 2, 3])

    def test_combineByKey(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([
            ('A', 1),
            ('B', 2),
            ('B', 3),
            ('C', 4),
            ('C', 5),
            ('A', 6),
        ])

        def create_combiner(a):
            return [a]

        def merge_value(a, b):
            a.append(b)
            return a

        def merge_combiners(a, b):
            a.extend(b)
            return a

        rdd = rdd.combineByKey(create_combiner, merge_value, merge_combiners)
        self.assertListEqual(
            rdd.collect(),
            [('A', [1, 6]), ('B', [2, 3]), ('C', [4, 5])],
        )

    def test_sortByKey_ascending(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = (sc.parallelize([
            ('e', 5),
            ('d', 4),
            ('c', 3),
            ('b', 2),
            ('a', 1),
        ]).sortByKey(ascending=True))
        self.assertListEqual(
            rdd.collect(),
            [
                ('a', 1),
                ('b', 2),
                ('c', 3),
                ('d', 4),
                ('e', 5),
            ],
        )

    def test_sortByKey_descending(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = (sc.parallelize([
            ('a', 1),
            ('b', 2),
            ('c', 3),
            ('d', 4),
            ('e', 5),
        ]).sortByKey(ascending=False))
        self.assertListEqual(
            rdd.collect(),
            [
                ('e', 5),
                ('d', 4),
                ('c', 3),
                ('b', 2),
                ('a', 1),
            ],
        )

    def test_sortBy_ascending(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = (sc.parallelize([5, 4, 3, 2, 1]).sortBy(lambda x: x,
                                                      ascending=True))
        self.assertListEqual(rdd.collect(), [1, 2, 3, 4, 5])

    def test_sortBy_descending(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = (sc.parallelize([1, 2, 3, 4, 5]).sortBy(lambda x: x,
                                                      ascending=False))
        self.assertListEqual(rdd.collect(), [5, 4, 3, 2, 1])

    def test_subtractByKey(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd1 = sc.parallelize([('A', 1), ('B', 2), ('C', 3)])
        rdd2 = sc.parallelize([('A', None), ('C', None)])
        self.assertListEqual(rdd1.subtractByKey(rdd2).collect(), [('B', 2)])

    def test_not_implemented_methods(self):
        sc = SparkContext(master='', conf=SparkConf())
        rdd = sc.parallelize([])

        with self.assertRaises(NotImplementedError):
            rdd._pickled()

        with self.assertRaises(NotImplementedError):
            rdd.mapPartitionsWithIndex(
                None,
                None,
            )

        with self.assertRaises(NotImplementedError):
            rdd._computeFractionForSampleSize(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.pipe(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.reduce(None)

        with self.assertRaises(NotImplementedError):
            rdd.treeReduce(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.fold(
                None,
                None,
            )

        with self.assertRaises(NotImplementedError):
            rdd.aggregate(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.treeAggregate(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.stats()

        with self.assertRaises(NotImplementedError):
            rdd.histogram(None)

        with self.assertRaises(NotImplementedError):
            rdd.variance()

        with self.assertRaises(NotImplementedError):
            rdd.stdev()

        with self.assertRaises(NotImplementedError):
            rdd.sampleStdev()

        with self.assertRaises(NotImplementedError):
            rdd.sampleVariance()

        with self.assertRaises(NotImplementedError):
            rdd.countByValue()

        with self.assertRaises(NotImplementedError):
            rdd.top(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.takeOrdered(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsNewAPIHadoopDataset(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsNewAPIHadoopFile(None, None, None, None, None, None,
                                       None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsHadoopDataset(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsHadoopFile(None, None, None, None, None, None, None,
                                 None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsSequenceFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsPickleFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.saveAsTextFile(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.collectAsMap()

        with self.assertRaises(NotImplementedError):
            rdd.reduceByKeyLocally(None)

        with self.assertRaises(NotImplementedError):
            rdd.countByKey()

        with self.assertRaises(NotImplementedError):
            rdd.join(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.rightOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.fullOuterJoin(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.foldByKey(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd._can_spill()

        with self.assertRaises(NotImplementedError):
            rdd._memory_limit()

        with self.assertRaises(NotImplementedError):
            rdd.groupWith(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.sampleByKey(None, None, None)

        with self.assertRaises(NotImplementedError):
            rdd.subtract(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.coalesce(None, None)

        with self.assertRaises(NotImplementedError):
            rdd.toDebugString()

        with self.assertRaises(NotImplementedError):
            rdd.getStorageLevel()

        with self.assertRaises(NotImplementedError):
            rdd._to_java_object_rdd()
Example #38
0
"""
Configuration file for SU(4) Lipkin Model in the
Author: Gaurav Harsha
Date: July 29, 2019
"""

import collections
import functools

from dummy_spark import SparkContext
# from pyspark import SparkContext
from sympy import Symbol, collect, Add, Mul, Integer, symbols, factor, diff
from su4 import *

ctx = SparkContext('local[*]', 'su4')

dr = SU4LatticeDrudge(ctx)
nams = dr.names

DRUDGE = dr
Example #39
0
 def test_range(self):
     ctx = SparkContext()
     for start, stop, step in self.TEST_RANGES:
         l = list(range(start, stop, step))
         rdd = ctx.range(start, stop, step)
         self.assertEquals(l, rdd.collect())
Example #40
0
    def test_not_implemented_methods(self):
        ctx = SparkContext()

        with self.assertRaises(NotImplementedError):
            ctx._checkpointFile(None, None)

        with self.assertRaises(NotImplementedError):
            ctx._dictToJavaMap(None)

        with self.assertRaises(NotImplementedError):
            ctx._getJavaStorageLevel(None)

        with self.assertRaises(NotImplementedError):
            ctx.accumulator(None)

        with self.assertRaises(NotImplementedError):
            ctx.addFile(None)

        with self.assertRaises(NotImplementedError):
            ctx.binaryFiles(None)

        with self.assertRaises(NotImplementedError):
            ctx.binaryRecords(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.broadcast(None)

        with self.assertRaises(NotImplementedError):
            ctx.cancelAllJobs()

        with self.assertRaises(NotImplementedError):
            ctx.cancelJobGroup(None)

        with self.assertRaises(NotImplementedError):
            ctx.clearFiles()

        with self.assertRaises(NotImplementedError):
            ctx.dump_profiles(None)

        with self.assertRaises(NotImplementedError):
            ctx.getLocalProperty(None)

        with self.assertRaises(NotImplementedError):
            ctx.hadoopFile(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.hadoopRDD(None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.newAPIHadoopFile(None, None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.newAPIHadoopRDD(None, None, None)

        with self.assertRaises(NotImplementedError):
            ctx.pickleFile(None)

        with self.assertRaises(NotImplementedError):
            ctx.runJob(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.sequenceFile(None)

        with self.assertRaises(NotImplementedError):
            ctx.setCheckpointDir(None)

        with self.assertRaises(NotImplementedError):
            ctx.setJobGroup(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.setLocalProperty(None, None)

        with self.assertRaises(NotImplementedError):
            ctx.show_profiles()

        with self.assertRaises(NotImplementedError):
            ctx.sparkUser()

        with self.assertRaises(NotImplementedError):
            ctx.statusTracker()

        with self.assertRaises(NotImplementedError):
            ctx.union(None)

        with self.assertRaises(NotImplementedError):
            ctx.wholeTextFiles(None)