class MongoZipEmitter(Emitter): def __init__(self): super(MongoZipEmitter, self).__init__() self._zipcodes = Connection()[settings.MONGO_DATABASE]['zipcodes'] self._zipcodes.drop() def emit_record(self, record): self._zipcodes.insert(record)
def construct_test_user(): all_products = get_all_ids_from_file('product') collection = Connection().jd.test_users collection.drop() linked_users = Connection().jd.weibo_users fname = RAW_DATA_DIR + 'test_user_review.data' uids_with_kids = [ line[:-1] for line in open(RAW_DATA_DIR + 'uids_with_kids.data') ] uids_without_kids = [ line[:-1] for line in open(RAW_DATA_DIR + 'uids_without_kids.data') ] linked_uids = dict([(line[:-1].split(' ')[1], line[:-1].split(' ')[0]) for line in open(RAW_DATA_DIR + 'linked_uids.data')]) prone_words = ['宝宝', '女儿', '儿子', '男朋友', '女朋友'] f = open(fname) count = int(f.readline()[:-1]) bar = progress_bar(count) for i in xrange(count): uid = f.readline()[:-1] products = f.readline()[:-1].split(' ') products = list(set(products) & all_products) mentions = count_mentions(f.readline()) profile = { 'gender': [0] * 2, 'age': [0] * 2, 'location': [0] * 2, 'kids': [0] * 2, } if uid in linked_uids: user = linked_users.find_one({'_id': linked_uids[uid]}) if user == None: pass else: profile['gender'] = user['profile']['gender'] profile['age'] = user['profile']['age'] profile['location'] = user['profile']['location'] if uid in uids_with_kids: profile['kids'] = [0, 1] if uid in uids_without_kids: profile['kids'] = [1, 0] if uid in uids_without_kids or uid in uids_with_kids: for w in prone_words: if w in mentions: mentions.pop(w) collection.insert({ '_id': uid, 'products': products, 'mentions': mentions, 'profile': profile }) bar.draw(i + 1)
class CombineTest(unittest2.TestCase): def setUp(self): self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"] self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"] self.col0.drop() self.col1.drop() def create_rg_for(self, datasets, col=None): for n in datasets: create_rg(npoints[n], col if col else self.col0, distance_function=edist) def test_simple_combine(self): self.create_rg_for([12, 13]) anglecombine(self.col0, 20) self.assertEqual(self.col0.count(), 7) self.assertDictContainsSubset({ 0: [1, 1], 1: [2, 1], 2: [3.0, 1.15], 3: [4.0, 1.15], 4: [5.0, 1.15], 5: [6, 1], 6: [7, 1]}, {n["_id"]: n["loc"] for n in self.col0.find()}) def test_combine_2switches(self): self.create_rg_for([12, 13, 15]) anglecombine(self.col0, 20) self.assertEqual(self.col0.count(), 8) self.assertDictContainsSubset({ 0: [1, 1], 1: [2, 1], 2: [3.0, 1.15], 3: [4.0, 1.15], 4: [5.0, 1.15], 5: [6, 1], 12: [4, 4], 6: [7, 1]}, {n["_id"]: n["loc"] for n in self.col0.find()})
class AllTest(unittest2.TestCase): def setUp(self): self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"] self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"] self.col2 = Connection("127.0.0.1", 27017)["algolab-test"]["rg2"] self.col0.drop() self.col1.drop() self.col2.drop() create_rg(npoints[2], self.col0, distance_function=edist) create_rg(npoints[5], self.col0, distance_function=edist) create_rg(npoints[2], self.col1, distance_function=edist) create_rg(npoints[5], self.col1, distance_function=edist) create_rg(npoints[3], self.col1, distance_function=edist) create_rg(npoints[4], self.col1, distance_function=edist) def test_rdp(self): segments = S(self.col0).segments for seg in segments: sloc = locs_for(seg, self.col0) create_rg(rdp(sloc, 0), self.col2) self.assertEqual(self.col2.count(), 8) def test_rdp2(self): segments = S(self.col1).segments for seg in segments: sloc = locs_for(seg, self.col1) create_rg(rdp(sloc, 0), self.col2) self.assertEqual(self.col2.count(), 11) def test_rdp3(self): segments = S(self.col1).segments for seg in segments: sloc = locs_for(seg, self.col1) create_rg(rdp(sloc, 100000), self.col2) self.assertEqual(self.col2.count(), 8) def test_anglered(self): segments = S(self.col1).segments for seg in segments: sloc = locs_for(seg, self.col1) create_rg(anglereduce(sloc, 1), self.col2) self.assertEqual(self.col2.count(), 8) def test_anglered2(self): segments = S(self.col1).segments for seg in segments: sloc = locs_for(seg, self.col1) create_rg(anglereduce(sloc, 180), self.col2) self.assertEqual(self.col2.count(), 11)
class SegmentTest(unittest2.TestCase): def create_rg_for(self, datasets, col=None): for n in datasets: create_rg(npoints[n], col if col else self.col0, distance_function=edist) def setUp(self): self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"] self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"] self.col2 = Connection("127.0.0.1", 27017)["algolab-test"]["rg2"] self.col0.drop() self.col1.drop() self.col2.drop() create_rg(npoints[2], self.col0, distance_function=edist) def test_already_segmented(self): self.assertEqual(list(S(self.col0).segment_ids)[0], range(0, len(npoints[2]))) def test_already_segmented2(self): create_rg(npoints[3], self.col1) self.assertItemsEqual(list(S(self.col1).segment_ids)[0], [4, 2, 5]) def test_already_segmented3(self): create_rg(npoints[4], self.col1) self.assertEqual(list(S(self.col1).segment_ids)[0], [6, 2]) def test_switch_segment(self): create_rg(npoints[3], self.col0, distance_function=edist) intersect = npoints[3][1] n = self.col0.find_one(intersect[2]) self.assertEqual(len(n["successors"]), 4) self.assertTrue({"distance": 1, "id": 1} in n["successors"]) self.assertTrue({"distance": 1, "id": 3} in n["successors"]) self.assertTrue({"distance": 1, "id": 4} in n["successors"]) self.assertTrue({"distance": 4, "id": 5} in n["successors"]) def test_switch_segment2(self): self.create_rg_for([2, 3, 4, 5]) print list(S(self.col0).segment_ids) self.assertItemsEqual(list(S(self.col0).segment_ids), [[0, 1, 2], [3, 2], [4, 2], [2, 5], [2, 6], [2, 8, 7], [2, 9, 10, 11]]) def test_switch_segment3(self): self.create_rg_for([2, 3, 4, 5, 6, 7]) self.assertItemsEqual(list(S(self.col0).segment_ids), [[0, 1, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 8, 2], [11, 10, 9, 2], [12, 2], [13, 15], [13, 16], [2, 13], [13, 14]]) self.assertEqual(len(list(S(self.col0).segment_ids)), 12) def test_swith2_segment(self): empty(self.col0) self.create_rg_for([8, 9, 10, 11]) segmenter = S(self.col0) segs = list(segmenter.segments_as_triplets) for s in segs: print s self.assertEqual(len(segs), 4)
#!/usr/bin/env python # coding:utf-8 import threading, time from pymongo import Connection lock = threading.RLock() cond = threading.Condition() table = Connection().test.tablex table.drop() table.insert({'x': 0}) def target(): cond.acquire() cond.wait() lock.acquire() i = table.find_one({'x': 0}) if i: # found it table.update({'_id': i['_id']}, {'$set': {'x': 1}}) lock.release() cond.release() if i: print '\n%s got i' % threading.current_thread() for i in range(20): t = threading.Thread(target=target) t.start()
#!/usr/bin/env python # coding:utf-8 import threading, time from pymongo import Connection lock = threading.RLock() cond = threading.Condition() table = Connection().test.tablex table.drop() table.insert({'x': 0}) def target(): cond.acquire() cond.wait() lock.acquire() i = table.find_one({'x': 0}) if i: # found it table.update({'_id': i['_id']}, {'$set': {'x': 1}}) lock.release() cond.release() if i: print '\n%s got i' % threading.current_thread() for i in range(20): t = threading.Thread(target=target) t.start() time.sleep(1)