Exemple #1
0
class MongoZipEmitter(Emitter):
    
    def __init__(self):
        super(MongoZipEmitter, self).__init__()
        self._zipcodes = Connection()[settings.MONGO_DATABASE]['zipcodes']
        self._zipcodes.drop()
    
    def emit_record(self, record):
        self._zipcodes.insert(record)
Exemple #2
0
def construct_test_user():
    all_products = get_all_ids_from_file('product')
    collection = Connection().jd.test_users
    collection.drop()
    linked_users = Connection().jd.weibo_users
    fname = RAW_DATA_DIR + 'test_user_review.data'
    uids_with_kids = [
        line[:-1] for line in open(RAW_DATA_DIR + 'uids_with_kids.data')
    ]
    uids_without_kids = [
        line[:-1] for line in open(RAW_DATA_DIR + 'uids_without_kids.data')
    ]
    linked_uids = dict([(line[:-1].split(' ')[1], line[:-1].split(' ')[0])
                        for line in open(RAW_DATA_DIR + 'linked_uids.data')])
    prone_words = ['宝宝', '女儿', '儿子', '男朋友', '女朋友']
    f = open(fname)
    count = int(f.readline()[:-1])
    bar = progress_bar(count)
    for i in xrange(count):
        uid = f.readline()[:-1]
        products = f.readline()[:-1].split(' ')
        products = list(set(products) & all_products)
        mentions = count_mentions(f.readline())
        profile = {
            'gender': [0] * 2,
            'age': [0] * 2,
            'location': [0] * 2,
            'kids': [0] * 2,
        }
        if uid in linked_uids:
            user = linked_users.find_one({'_id': linked_uids[uid]})
            if user == None:
                pass
            else:
                profile['gender'] = user['profile']['gender']
                profile['age'] = user['profile']['age']
                profile['location'] = user['profile']['location']
        if uid in uids_with_kids:
            profile['kids'] = [0, 1]
        if uid in uids_without_kids:
            profile['kids'] = [1, 0]
        if uid in uids_without_kids or uid in uids_with_kids:
            for w in prone_words:
                if w in mentions:
                    mentions.pop(w)
        collection.insert({
            '_id': uid,
            'products': products,
            'mentions': mentions,
            'profile': profile
        })
        bar.draw(i + 1)
Exemple #3
0
class CombineTest(unittest2.TestCase):
    def setUp(self):
        self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"]
        self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"]
        self.col0.drop()
        self.col1.drop()

    def create_rg_for(self, datasets, col=None):
        for n in datasets:
            create_rg(npoints[n], col if col else self.col0, distance_function=edist)

    def test_simple_combine(self):
        self.create_rg_for([12, 13])
        anglecombine(self.col0, 20)
        self.assertEqual(self.col0.count(), 7)
        self.assertDictContainsSubset({
            0: [1, 1],
            1: [2, 1],
            2: [3.0, 1.15],
            3: [4.0, 1.15],
            4: [5.0, 1.15],
            5: [6, 1],
            6: [7, 1]}, {n["_id"]: n["loc"] for n in self.col0.find()})

    def test_combine_2switches(self):
        self.create_rg_for([12, 13, 15])
        anglecombine(self.col0, 20)
        self.assertEqual(self.col0.count(), 8)
        self.assertDictContainsSubset({
            0: [1, 1],
            1: [2, 1],
            2: [3.0, 1.15],
            3: [4.0, 1.15],
            4: [5.0, 1.15],
            5: [6, 1],
            12: [4, 4],
            6: [7, 1]}, {n["_id"]: n["loc"] for n in self.col0.find()})
Exemple #4
0
class AllTest(unittest2.TestCase):
    def setUp(self):
        self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"]
        self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"]
        self.col2 = Connection("127.0.0.1", 27017)["algolab-test"]["rg2"]
        self.col0.drop()
        self.col1.drop()
        self.col2.drop()

        create_rg(npoints[2], self.col0, distance_function=edist)
        create_rg(npoints[5], self.col0, distance_function=edist)

        create_rg(npoints[2], self.col1, distance_function=edist)
        create_rg(npoints[5], self.col1, distance_function=edist)
        create_rg(npoints[3], self.col1, distance_function=edist)
        create_rg(npoints[4], self.col1, distance_function=edist)

    def test_rdp(self):
        segments = S(self.col0).segments
        for seg in segments:
            sloc = locs_for(seg, self.col0)
            create_rg(rdp(sloc, 0), self.col2)

        self.assertEqual(self.col2.count(), 8)

    def test_rdp2(self):
        segments = S(self.col1).segments

        for seg in segments:
            sloc = locs_for(seg, self.col1)
            create_rg(rdp(sloc, 0), self.col2)

        self.assertEqual(self.col2.count(), 11)

    def test_rdp3(self):
        segments = S(self.col1).segments
        for seg in segments:
            sloc = locs_for(seg, self.col1)
            create_rg(rdp(sloc, 100000), self.col2)

        self.assertEqual(self.col2.count(), 8)

    def test_anglered(self):
        segments = S(self.col1).segments
        for seg in segments:
            sloc = locs_for(seg, self.col1)
            create_rg(anglereduce(sloc, 1), self.col2)

        self.assertEqual(self.col2.count(), 8)

    def test_anglered2(self):
        segments = S(self.col1).segments
        for seg in segments:
            sloc = locs_for(seg, self.col1)
            create_rg(anglereduce(sloc, 180), self.col2)

        self.assertEqual(self.col2.count(), 11)
Exemple #5
0
class SegmentTest(unittest2.TestCase):
    def create_rg_for(self, datasets, col=None):
        for n in datasets:
            create_rg(npoints[n], col if col else self.col0, distance_function=edist)

    def setUp(self):
        self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"]
        self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"]
        self.col2 = Connection("127.0.0.1", 27017)["algolab-test"]["rg2"]
        self.col0.drop()
        self.col1.drop()
        self.col2.drop()
        create_rg(npoints[2], self.col0, distance_function=edist)

    def test_already_segmented(self):
        self.assertEqual(list(S(self.col0).segment_ids)[0], range(0, len(npoints[2])))

    def test_already_segmented2(self):
        create_rg(npoints[3], self.col1)
        self.assertItemsEqual(list(S(self.col1).segment_ids)[0], [4, 2, 5])

    def test_already_segmented3(self):
        create_rg(npoints[4], self.col1)
        self.assertEqual(list(S(self.col1).segment_ids)[0], [6, 2])

    def test_switch_segment(self):
        create_rg(npoints[3], self.col0, distance_function=edist)

        intersect = npoints[3][1]
        n = self.col0.find_one(intersect[2])

        self.assertEqual(len(n["successors"]), 4)
        self.assertTrue({"distance": 1, "id": 1} in n["successors"])
        self.assertTrue({"distance": 1, "id": 3} in n["successors"])
        self.assertTrue({"distance": 1, "id": 4} in n["successors"])
        self.assertTrue({"distance": 4, "id": 5} in n["successors"])

    def test_switch_segment2(self):
        self.create_rg_for([2, 3, 4, 5])
        print list(S(self.col0).segment_ids)
        self.assertItemsEqual(list(S(self.col0).segment_ids),
                [[0, 1, 2], [3, 2], [4, 2], [2, 5],
                    [2, 6], [2, 8, 7], [2, 9, 10, 11]])

    def test_switch_segment3(self):
        self.create_rg_for([2, 3, 4, 5, 6, 7])
        self.assertItemsEqual(list(S(self.col0).segment_ids),
                [[0, 1, 2], [3, 2], [4, 2], [5, 2],
                    [6, 2], [7, 8, 2], [11, 10, 9, 2],
                    [12, 2], [13, 15], [13, 16], [2, 13], [13, 14]])
        self.assertEqual(len(list(S(self.col0).segment_ids)), 12)

    def test_swith2_segment(self):
        empty(self.col0)
        self.create_rg_for([8, 9, 10, 11])
        segmenter = S(self.col0)

        segs = list(segmenter.segments_as_triplets)

        for s in segs:
            print s

        self.assertEqual(len(segs), 4)
Exemple #6
0
#!/usr/bin/env python
# coding:utf-8

import threading, time
from pymongo import Connection

lock = threading.RLock()
cond = threading.Condition()
table = Connection().test.tablex
table.drop()
table.insert({'x': 0})


def target():
    cond.acquire()
    cond.wait()
    lock.acquire()
    i = table.find_one({'x': 0})
    if i:
        # found it
        table.update({'_id': i['_id']}, {'$set': {'x': 1}})
    lock.release()
    cond.release()
    if i:
        print '\n%s got i' % threading.current_thread()


for i in range(20):
    t = threading.Thread(target=target)
    t.start()
Exemple #7
0
#!/usr/bin/env python
# coding:utf-8

import threading, time
from pymongo import Connection


lock = threading.RLock()
cond = threading.Condition()
table = Connection().test.tablex
table.drop()
table.insert({'x': 0})

def target():
	cond.acquire()
	cond.wait()
	lock.acquire()
	i = table.find_one({'x': 0})
	if i:
		# found it
		table.update({'_id': i['_id']}, {'$set': {'x': 1}})
	lock.release()
	cond.release()
	if i:
		print '\n%s got i' % threading.current_thread()

for i in range(20):
	t = threading.Thread(target=target)
	t.start()

time.sleep(1)