Beispiel #1
0
 def test_len(self):
     """the __len__ method and len(.) builtin should work"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ls = LazySorted(xs)
         self.assertEqual(len(ls), n)
         self.assertEqual(ls.__len__(), n)
Beispiel #2
0
 def test_len(self):
     """the __len__ method and len(.) builtin should work"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ls = LazySorted(xs)
         self.assertEqual(len(ls), n)
         self.assertEqual(ls.__len__(), n)
Beispiel #3
0
 def test_keys(self):
     """Using keys should work fine, with or without reverse"""
     for rep in xrange(100):
         items = [(random.random(), random.random()) for _ in xrange(256)]
         random.shuffle(items)
         for reverse in [True, False]:
             self.assertEqual(list(LazySorted(items, key=lambda x: x[0])),
                              sorted(items, key=lambda x: x[0]))
             self.assertEqual(list(LazySorted(items, key=lambda x: x[1])),
                              sorted(items, key=lambda x: x[1]))
Beispiel #4
0
    def test_index_valueerror(self):
        """The index method should raise a ValueError if item not in list"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            for rep in xrange(5):
                random.shuffle(xs)
                ls = LazySorted(xs)

                self.assertRaises(ValueError, lambda: ls.index(-1))
                self.assertRaises(ValueError, lambda: ls.index(n))
                self.assertRaises(ValueError, lambda: ls.index(5.5))
Beispiel #5
0
    def test_index_valueerror(self):
        """The index method should raise a ValueError if item not in list"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            for rep in xrange(5):
                random.shuffle(xs)
                ls = LazySorted(xs)

                self.assertRaises(ValueError, lambda: ls.index(-1))
                self.assertRaises(ValueError, lambda: ls.index(n))
                self.assertRaises(ValueError, lambda: ls.index(5.5))
Beispiel #6
0
    def test_simple_index(self):
        """The index method should work"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            ys = range(n)
            for rep in xrange(5):
                random.shuffle(xs)
                random.shuffle(ys)
                ls = LazySorted(xs)

                for y in ys:
                    self.assertEqual(ls.index(y), y)
Beispiel #7
0
    def test_simple_index(self):
        """The index method should work"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            ys = range(n)
            for rep in xrange(5):
                random.shuffle(xs)
                random.shuffle(ys)
                ls = LazySorted(xs)

                for y in ys:
                    self.assertEqual(ls.index(y), y)
Beispiel #8
0
 def test_count_simple(self):
     """The count method should work on simple queries"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ys = range(0, n, 5) + [-4, -3, -2, -1, 0, n, n + 1, n + 2, 3.3]
         for rep in xrange(5):
             random.shuffle(xs)
             random.shuffle(ys)
             ls = LazySorted(xs)
             for y in ys:
                 self.assertEqual(ls.count(y), 1 if (isinstance(y, int) and
                                  0 <= y < n) else 0)
Beispiel #9
0
 def test_count_simple(self):
     """The count method should work on simple queries"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ys = range(0, n, 5) + [-4, -3, -2, -1, 0, n, n + 1, n + 2, 3.3]
         for rep in xrange(5):
             random.shuffle(xs)
             random.shuffle(ys)
             ls = LazySorted(xs)
             for y in ys:
                 self.assertEqual(
                     ls.count(y), 1 if
                     (isinstance(y, int) and 0 <= y < n) else 0)
Beispiel #10
0
    def test_count_manynonunique(self):
        """The count method should work with very many nonunique items"""
        for rep in xrange(2000):
            items = range(random.randint(1, 50))
            random.shuffle(items)
            itemcounts = [random.randint(0, 16) for _ in items]
            xs = [y for x in [[i] * itemcounts[i] for i in items] for y in x]

            ls = LazySorted(xs)
            for item in items:
                self.assertEqual(ls.count(item), itemcounts[item])

        for n in TestLazySorted.test_lengths:
            ls = LazySorted([0] * n)
            self.assertEqual(ls.count(0), n)
Beispiel #11
0
 def test_reverse(self):
     """Reverse iteration should be equivalent to reverse sorting"""
     for length in TestLazySorted.test_lengths:
         items = range(length)
         random.shuffle(items)
         self.assertEqual(list(LazySorted(items, reverse=True)),
                          range(length - 1, -1, -1))
Beispiel #12
0
    def test_between(self):
        """the between method should work"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            ys = range(n)
            for rep in xrange(100):
                a = random.randrange(-n, n + 1)
                b = random.randrange(-n, n + 1)

                random.shuffle(xs)
                ls = LazySorted(xs)
                between = ls.between(a, b)

                self.assertEqual(len(between), len(ys[a:b]), msg="n = %d; "
                                 "called ls.between(%d, %d)" % (n, a, b))
                self.assertEqual(set(between), set(ys[a:b]), msg="n = %d; "
                                 "called ls.between(%d, %d)" % (n, a, b))
Beispiel #13
0
    def test_contains(self):
        """The __contains__ method and `in' keyword should work"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            ys = range(0, n, 5) + [-4, -3, -2, -1, 0, n, n + 1, n + 2, 3.3]
            for rep in xrange(10):
                random.shuffle(xs)
                random.shuffle(ys)

                ls = LazySorted(xs)
                for y in ys:
                    self.assertEqual(y in xs, y in ls, msg="ys = %s; xs = %s" %
                                     (ys, xs))

                ls = LazySorted(xs)
                for y in ys:
                    self.assertEqual(xs.__contains__(y), ls.__contains__(y),
                                     msg="ys = %s; xs = %s" % (ys, xs))
Beispiel #14
0
    def test_count_nonunique(self):
        """The count method should work in the presence of nonunique items"""
        for a in xrange(1, 32):
            for b in xrange(1, 32):
                xs = a * ["a"] + b * ["b"]
                for rep in xrange(3):
                    random.shuffle(xs)
                    ls = LazySorted(xs)

                    self.assertEqual(ls.count("b"), b)
                    self.assertEqual(ls.count("a"), a)

                for rep in xrange(3):
                    random.shuffle(xs)
                    ls = LazySorted(xs)

                    self.assertEqual(ls.count("a"), a)
                    self.assertEqual(ls.count("b"), b)
Beispiel #15
0
 def test_random_select(self):
     """Selection should work once"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         for k in xrange(1, n):
             for rep in xrange(10):
                 random.shuffle(xs)
                 self.assertEqual(LazySorted(xs)[k],
                                  k,
                                  msg="xs = %s; k = %d" % (xs, k))
Beispiel #16
0
def get_closest_images(rgb_color, num) -> list:
    def distance(v1):
        return (sum([(a - b)**2 for (a, b) in zip(v1, rgb_color)]))**(1 / 2)

    def key(x):
        return min(map(distance, x["colors"]))

    colors = Image.objects.all().exclude(colors__isnull=True).values()
    return list(map(lambda x: str(x["id"]),
                    LazySorted(colors, key=key)[0:num]))
Beispiel #17
0
 def test_creation(self):
     """LazySorted objects can be created from any iterable"""
     x = LazySorted([])
     x = LazySorted([1, 2, 3, 4])
     x = LazySorted(x for x in range(100) if x % 3 == 0)
     x = LazySorted((3, -2, 5))
     x = LazySorted(xrange(100))
     x = LazySorted(xrange(0))
     x = LazySorted({"foo": 10, "bar": 3, "baz": 9})
Beispiel #18
0
 def test_interupted_iter(self):
     """Iteration should work even if it's interrupted by other calls"""
     for rep in xrange(100):
         items = range(512)
         random.shuffle(items)
         ls = LazySorted(items)
         it = iter(ls)
         self.assertEqual(list(islice(it, 30)), range(0, 30))
         _ = ls[random.randrange(512)]
         _ = random.randrange(-100, 600) in ls
         self.assertEqual(list(islice(it, 30)), range(30, 60))
Beispiel #19
0
    def test_contains(self):
        """The __contains__ method and `in' keyword should work"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            ys = range(0, n, 5) + [-4, -3, -2, -1, 0, n, n + 1, n + 2, 3.3]
            for rep in xrange(10):
                random.shuffle(xs)
                random.shuffle(ys)

                ls = LazySorted(xs)
                for y in ys:
                    self.assertEqual(y in xs,
                                     y in ls,
                                     msg="ys = %s; xs = %s" % (ys, xs))

                ls = LazySorted(xs)
                for y in ys:
                    self.assertEqual(xs.__contains__(y),
                                     ls.__contains__(y),
                                     msg="ys = %s; xs = %s" % (ys, xs))
Beispiel #20
0
 def test_step(self):
     """selecting slice objects with only a step defined should work"""
     steps = [-64, -16, -2, -1, 1, 2, 16, 64]
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ys = range(n)
         for list_rep in xrange(5):
             random.shuffle(xs)
             ls = LazySorted(xs)
             random.shuffle(steps)
             for step in steps:
                 self.assertEqual(ls[::step], ys[::step])
Beispiel #21
0
    def test_between(self):
        """the between method should work"""
        for n in TestLazySorted.test_lengths:
            xs = range(n)
            ys = range(n)
            for rep in xrange(100):
                a = random.randrange(-n, n + 1)
                b = random.randrange(-n, n + 1)

                random.shuffle(xs)
                ls = LazySorted(xs)
                between = ls.between(a, b)

                self.assertEqual(len(between),
                                 len(ys[a:b]),
                                 msg="n = %d; "
                                 "called ls.between(%d, %d)" % (n, a, b))
                self.assertEqual(set(between),
                                 set(ys[a:b]),
                                 msg="n = %d; "
                                 "called ls.between(%d, %d)" % (n, a, b))
Beispiel #22
0
 def test_new_init(self):
     """Calling just __new__ should give you a working LazySorted object"""
     ls = LazySorted.__new__(LazySorted, [])
     self.assertRaises(IndexError, lambda: ls[4])
     self.assertRaises(TypeError, lambda: ls["foo"])
     self.assertRaises(ValueError, lambda: ls.index(4))
     self.assertRaises(ValueError, lambda: ls.index("foo"))
     self.assertFalse("foo" in ls)
     self.assertFalse(4 in ls)
     self.assertEqual(ls.count("foo"), 0)
     self.assertEqual(ls[0:5], [])
     self.assertEqual(len(ls), 0)
     self.assertEqual(list(ls), [])
Beispiel #23
0
 def test_new_init(self):
     """Calling just __new__ should give you a working LazySorted object"""
     ls = LazySorted.__new__(LazySorted, [])
     self.assertRaises(IndexError, lambda: ls[4])
     self.assertRaises(TypeError, lambda: ls["foo"])
     self.assertRaises(ValueError, lambda: ls.index(4))
     self.assertRaises(ValueError, lambda: ls.index("foo"))
     self.assertFalse("foo" in ls)
     self.assertFalse(4 in ls)
     self.assertEqual(ls.count("foo"), 0)
     self.assertEqual(ls[0:5], [])
     self.assertEqual(len(ls), 0)
     self.assertEqual(list(ls), [])
Beispiel #24
0
 def test_multiple_select(self):
     """Selection should work many times in a row"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ks = 2 * range(n)  # include multiple accesses
         for rep in xrange(10):
             random.shuffle(xs)
             random.shuffle(ks)
             ls = LazySorted(xs)
             for k in ks:
                 self.assertEqual(ls[k],
                                  k,
                                  msg="xs = %s; ks = %s; k = %d" %
                                  (xs, ks, k))
Beispiel #25
0
 def test_select_range(self):
     """selecting contiguous forward ranges should work"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         for list_rep in xrange(5):
             random.shuffle(xs)
             ls = LazySorted(xs)
             for select_rep in xrange(128):
                 a, b = random.randrange(n + 1), random.randrange(n + 1)
                 a, b = min(a, b), max(a, b)
                 self.assertEqual(ls[a:b],
                                  range(a, b),
                                  msg="xs = %s; "
                                  "(a, b) = (%d, %d); select_rep = %d" %
                                  (xs, a, b, select_rep))
Beispiel #26
0
 def test_full_range(self):
     """selecting slice objects with steps should work"""
     for n in TestLazySorted.test_lengths:
         xs = range(n)
         ys = range(n)
         for list_rep in xrange(5):
             random.shuffle(xs)
             ls = LazySorted(xs)
             for select_rep in xrange(16):
                 a = random.randrange(-n, n + 1)
                 b = random.randrange(-n, n + 1)
                 c = random.randrange(1, n + 3) * random.choice([-1, 1])
                 self.assertEqual(ls[a:b:c],
                                  ys[a:b:c],
                                  msg="xs = %s; "
                                  "called ls[%d:%d:%d]" % (xs, a, b, c))
Beispiel #27
0
    def test_count_manynonunique(self):
        """The count method should work with very many nonunique items"""
        for rep in xrange(2000):
            items = range(random.randint(1, 50))
            random.shuffle(items)
            itemcounts = [random.randint(0, 16) for _ in items]
            xs = [y for x in [[i] * itemcounts[i] for i in items] for y in x]

            ls = LazySorted(xs)
            for item in items:
                self.assertEqual(ls.count(item), itemcounts[item])

        for n in TestLazySorted.test_lengths:
            ls = LazySorted([0] * n)
            self.assertEqual(ls.count(0), n)
Beispiel #28
0
def similar_sentences(request, sentence_id):
    SENTENCES_IN_RESPONSE = 100

    sentence = get_object_or_404(Sentence.objects.all(), id=sentence_id)
    vec = np.array(sentence.wordvec)

    # Very hot code here, optimization required
    other_sentences = LazySorted(
        ((np.linalg.multi_dot((vec, np.array(another_sentence.wordvec))), another_sentence)
         for another_sentence in Sentence.objects.exclude(id=sentence_id)),
        key=lambda pair: -pair[0]
    )[:SENTENCES_IN_RESPONSE]

    return Response(
        data=dict(
            sentence=SingleSentenceSerializer(sentence).data,
            similar_sentences=[
                SingleSentenceSerializer(another_sentence).data for (__, another_sentence) in other_sentences
            ]
        )
    )
Beispiel #29
0
    def test_count_nonunique(self):
        """The count method should work in the presence of nonunique items"""
        for a in xrange(1, 32):
            for b in xrange(1, 32):
                xs = a * ["a"] + b * ["b"]
                for rep in xrange(3):
                    random.shuffle(xs)
                    ls = LazySorted(xs)

                    self.assertEqual(ls.count("b"), b)
                    self.assertEqual(ls.count("a"), a)

                for rep in xrange(3):
                    random.shuffle(xs)
                    ls = LazySorted(xs)

                    self.assertEqual(ls.count("a"), a)
                    self.assertEqual(ls.count("b"), b)
Beispiel #30
0
    def get_aspects(self, query):
        """Applies CoFiH algorithm to find text documents where the 
        concept expressed in query is present.
        
        query: numpy.ndarray or list or tuple                           Louis: Term being in document as first way to express concept, 
                                                                                get every one that contains word of interst
            Mask or indices of the documents assumed to contain the 
            concept of interest (typically because they contain the 
            word associated with said concept). 
        
        Yields
        ------
        topics: list of sets of integers
            Each set is the list of row indices that represents the 
            extension of a topic.
        """

        # Create partial matrix containing only query vectors
        qmat = self.mat[
            query]  #CSR_MATRIX -> every number in query, the respective row is added to qmat

        # Remove empty attributes
        qmat = qmat.T[np.squeeze(np.asarray(qmat.sum(0) > 0))].T

        # Get best k
        k = min(enumerate(fKTest(qmat)), key=itemgetter(1))[0]
        # Get partition
        km = KMeans(k)
        km.fit(qmat)

        self.kmeans_labels = km.labels_
        n_docs, n_terms = self.mat.shape

        for cidx in range(k):
            if sum(km.labels_ == cidx) == 1:

                cg_index = query[km.labels_ == cidx]
                yield np.where(np.squeeze(cg_index))[-1]
                continue
            elif sum(km.labels_ == cidx) == 0:
                continue

            assocfn = globals()[self.assoc_function]

            # Get top associated
            topn = int(np.round(self.alpha * n_terms))
            topterms = map(
                itemgetter(0),
                islice(
                    LazySorted(enumerate(assocfn(self.mat, km.labels_, cidx)),
                               key=itemgetter(1),
                               reverse=True), topn))

            # Get reduced space
            cmat = self.mat.T[list(
                topterms
            )].T  # list of documents in one cluster, contain the word we are searching, contain top words for that cluster

            # Get cluster vectors' global indices
            cg_indices = query[km.labels_ == cidx]

            # Get what's needed to construct confidence intervals
            mu = cmat[cg_indices].mean(0)  # mean of the cluster

            Sigma = np.cov(cmat[cg_indices].todense().T)

            invcov = np.linalg.inv(Sigma)

            # Yield an iterator for all

            yield [ i for i, x in enumerate(cmat) \
                if within_interval(x, mu, invcov, self.chi22p) ]
Beispiel #31
0
 def test_sorting(self):
     """Iteration should be equivalent to sorting"""
     for length in TestLazySorted.test_lengths:
         items = range(length)
         random.shuffle(items)
         self.assertEqual(list(LazySorted(items)), range(length))
Beispiel #32
0
    def test_API(self):
        """The sorted(...) API should be implemented except for cmp"""
        xs = range(10)
        for tryme in [
                lambda: LazySorted(xs, reverse="foo"),
                lambda: LazySorted(xs, key="foo"),
                lambda: LazySorted(xs, reverse=True, key="foo"),
                lambda: LazySorted(xs, key=5),
                lambda: LazySorted(xs, reverse="foo", key=lambda x: x),
                lambda: LazySorted(xs, reverse=True, key=5)
        ]:
            self.assertRaises(TypeError, tryme)

        # NB: LazySorted(xs, reverse=1.5) will succeed in python2.6 and down,
        # even though it should really fail. This was fixed in python2.7 and
        # up. See issue 5080 for details: http://bugs.python.org/issue5080

        # Keyword order shouldn't matter if they're named, but should if not
        LazySorted(xs, key=lambda x: x, reverse=False)
        LazySorted(xs, reverse=False, key=lambda x: x)
        LazySorted(xs, lambda x: x, False)
        self.assertRaises(TypeError, lambda: LazySorted(xs, 0, lambda x: x))

        # You can't call LazySorted without arguments
        self.assertRaises(TypeError, lambda: LazySorted())

        # You can't use a key with the wrong number of arguments
        for key in [lambda: "foo", lambda x, y: x + y]:
            self.assertRaises(TypeError, lambda: LazySorted(xs, key=key)[3])
            self.assertRaises(TypeError, lambda: LazySorted(xs, key=key)[3])