Example #1
0
def test_join(transform):
    other = transform([1, 2, 3])
    c = b.join(other, on_self=isodd, on_other=iseven)
    assert_eq(c, list(join(iseven, [1, 2, 3], isodd, list(b))))
    assert_eq(b.join(other, isodd),
              list(join(isodd, [1, 2, 3], isodd, list(b))))
    assert c.name == b.join(other, on_self=isodd, on_other=iseven).name
Example #2
0
def test_join():
    c = b.join([1, 2, 3], on_self=isodd, on_other=iseven)
    assert list(c) == list(join(iseven, [1, 2, 3], isodd, list(b)))
    assert (list(b.join([1, 2, 3],
                        isodd)) == list(join(isodd, [1, 2, 3], isodd,
                                             list(b))))
    assert c.name == b.join([1, 2, 3], on_self=isodd, on_other=iseven).name
Example #3
0
def test_join(transform):
    other = transform([1, 2, 3])
    c = b.join(other, on_self=isodd, on_other=iseven)
    assert_eq(c, list(join(iseven, [1, 2, 3], isodd, list(b))))
    assert_eq(b.join(other, isodd), list(join(isodd, [1, 2, 3], isodd,
                                              list(b))))
    assert c.name == b.join(other, on_self=isodd, on_other=iseven).name
Example #4
0
def compute_up(t, lhs, rhs, **kwargs):
    """ Join Operation for Python Streaming Backend

    Note that a pure streaming Join is challenging/impossible because any row
    in one seq might connect to any row in the other, requiring simultaneous
    complete access.

    As a result this approach compromises and fully realizes the LEFT sequence
    while allowing the RIGHT sequence to stream.  As a result

    Always put your bigger collection on the RIGHT side of the Join.
    """
    if lhs == rhs:
        lhs, rhs = itertools.tee(lhs, 2)

    on_left = [t.lhs.fields.index(col) for col in listpack(t.on_left)]
    on_right = [t.rhs.fields.index(col) for col in listpack(t.on_right)]

    left_default = (None if t.how in ('right', 'outer')
                    else toolz.itertoolz.no_default)
    right_default = (None if t.how in ('left', 'outer')
                     else toolz.itertoolz.no_default)

    pairs = toolz.join(on_left, lhs,
                       on_right, rhs,
                       left_default=left_default,
                       right_default=right_default)

    assemble = pair_assemble(t)

    return map(assemble, pairs)
Example #5
0
def compute_up(t, lhs, rhs, **kwargs):
    """ Join Operation for Python Streaming Backend

    Note that a pure streaming Join is challenging/impossible because any row
    in one seq might connect to any row in the other, requiring simultaneous
    complete access.

    As a result this approach compromises and fully realizes the LEFT sequence
    while allowing the RIGHT sequence to stream.  As a result

    Always put your bigger collection on the RIGHT side of the Join.
    """
    if lhs == rhs:
        lhs, rhs = itertools.tee(lhs, 2)

    on_left = [t.lhs.fields.index(col) for col in listpack(t.on_left)]
    on_right = [t.rhs.fields.index(col) for col in listpack(t.on_right)]

    left_default = (None if t.how in ('right',
                                      'outer') else toolz.itertoolz.no_default)
    right_default = (None if t.how in ('left', 'outer') else
                     toolz.itertoolz.no_default)

    pairs = toolz.join(on_left,
                       lhs,
                       on_right,
                       rhs,
                       left_default=left_default,
                       right_default=right_default)

    assemble = pair_assemble(t, on_left, on_right)

    return map(assemble, pairs)
Example #6
0
 def generate2(buckets1, buckets2):
     for x, y in join(first, generate1(buckets1), first, generate1(buckets2),
                      left_default=None, right_default=None):
         if x is None:
             yield y[0], 0, y[1]
         elif y is None:
             yield x[0], x[1], 0
         else:
             yield x[0], x[1], y[1]
Example #7
0
 def generate2(buckets1, buckets2):
     for x, y in join(first,
                      generate1(buckets1),
                      first,
                      generate1(buckets2),
                      left_default=None,
                      right_default=None):
         if x is None:
             yield y[0], 0, y[1]
         elif y is None:
             yield x[0], x[1], 0
         else:
             yield x[0], x[1], y[1]
    def spacy_spanpostokenize(self,
                              message: str,
                              stopwords=[],
                              pos_tag_type="simple"):
        postokens = self.spacy_postokenize(message, pos_tag_type)
        spantokens = self.span_tokenize(message, stopwords)
        # this is like a left join
        finaltokens = []
        merged = t.join(0, spantokens, 0, postokens)

        for ((a, b), (c, d)) in merged:
            finaltokens.append((a, b, d))
        return list(set(finaltokens))
Example #9
0
def test_join():
    assert list(b.join([1, 2, 3], on_self=isodd, on_other=iseven)) == \
            list(join(iseven, [1, 2, 3], isodd, list(b)))
    assert list(b.join([1, 2, 3], isodd)) == \
            list(join(isodd, [1, 2, 3], isodd, list(b)))
Example #10
0
def test_join():
    assert list(b.join([1, 2, 3], on_self=isodd, on_other=iseven)) == \
            list(join(iseven, [1, 2, 3], isodd, list(b)))
    assert list(b.join([1, 2, 3], isodd)) == \
            list(join(isodd, [1, 2, 3], isodd, list(b)))
Example #11
0
def test_join():
    c = b.join([1, 2, 3], on_self=isodd, on_other=iseven)
    assert list(c) == list(join(iseven, [1, 2, 3], isodd, list(b)))
    assert list(b.join([1, 2, 3], isodd)) == \
            list(join(isodd, [1, 2, 3], isodd, list(b)))
    assert c.name == b.join([1, 2, 3], on_self=isodd, on_other=iseven).name