def renderAnswer(request): uniqDesc = request.GET.get("id") mongoConnection = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"] curAnswer = mongoConnection.find_one({"uniqDesc": uniqDesc}) idEvent = curAnswer.get("idEvent") curEvent = mongoConnection.find_one({"_id": ObjectId(idEvent)}) curEvent["_id"] = str(curEvent["_id"]) return render_to_response("showevent.html", {"json": json.dumps(curEvent)})
def output_graph_matrix(): from pymongo import Connection users = Connection().user_profilling.users graph = Connection().user_profilling.graph_embedding print graph.count() bar = get_progressive_bar(users.count()) x = [] y = [] finish_count = 0 uids = [] for user in users.find({'int_id': { '$exists': True }}, { 'information': 1, 'int_id': 1 }): finish_count += 1 print finish_count #bar.cursor.restore() #bar.draw(value=finish_count) user_embedding = graph.find_one({'_id': user['int_id']}) if user_embedding is None: print user_embedding continue gender = user['information']['gender'] if gender == 'f': y.append(0) else: y.append(1) x.append(user_embedding['embedding']) uids.append(user['information']['uid']) #dump_train_valid_test(x,y,'gender_graph.data') dump_user_vector(x, y, uids, 'user_graph_vector.data')
class Application(tornado.web.Application): def __init__(self, handlers, **settings): tornado.web.Application.__init__(self, handlers, **settings) self.collection = Connection().vapour.urls self.templates = TemplateLookup(directories=["templates"]) def get_link_by_id(self, id): record = self.collection.find_one({'_id': uuid.UUID(id)}) return fix_id(record) def get_links_by_tag(self, tag): records = self.collection.find({'tags': re.compile(tag, re.I)}) return fix_ids(records) def get_links_by_url(self, url): records = self.collection.find({'url': re.compile(url, re.I)}) return fix_ids(records) def insert_link(self, url, desc, tags): return self.collection.insert({ '_id': uuid.uuid4(), 'url': url, 'desc': desc, 'tags': tags, 'added': datetime.datetime.utcnow() })
def output_graph_matrix(): from pymongo import Connection users=Connection().user_profilling.users graph=Connection().user_profilling.graph_embedding print graph.count() bar=get_progressive_bar(users.count()) x=[] y=[] finish_count=0 uids=[] for user in users.find({'int_id':{'$exists':True}},{'information':1,'int_id':1}): finish_count+=1 print finish_count #bar.cursor.restore() #bar.draw(value=finish_count) user_embedding=graph.find_one({'_id':user['int_id']}) if user_embedding is None: print user_embedding continue gender=user['information']['gender'] if gender=='f': y.append(0) else: y.append(1) x.append(user_embedding['embedding']) uids.append(user['information']['uid']) #dump_train_valid_test(x,y,'gender_graph.data') dump_user_vector(x,y,uids,'user_graph_vector.data')
def update_all(): tags = Connection()["reddit"]["tags"] index = Connection()["reddit"]["inverted_index"] invalid = ['.', '$'] for tag in tags.find(): for key in tag.keys(): if key != "_id": word_list = tag[key] for w in word_list: for i in invalid: if i in w: w = w.replace(i,'') row = index.find_one({"key" : w}) if not row: index.insert({"key": w, "ids" : [key]}) else: print "Updating", w print row, row["ids"] lst = list(row["ids"]) print lst, key lst.append(key) new_row = {"key":w, "ids": lst} print new_row index.update({"key":w}, new_row)
def create_election(slug, userlist, ballot_html, success_html, failure_html, email, email_author, email_subject): elections = Connection().stopgap.elections election = elections.find_one({"slug": slug}) if election is not None: raise Exception("There is already an election by the name of '%s'" % slug) election = { "slug": slug, "html": { "ballot": ballot_html.read(), "success": success_html.read(), "failure": failure_html.read() }, "email": { "content": email.read(), "from": email_author, "subject": email_subject }, "participants": [], "tokens": [], "startTime": datetime.datetime.utcnow() } # Convert into set to remove duplicates election['participants'] = [{"email": x.strip(), "sent": False} for x in set(userlist)] return safe_insert(elections, election)
def age_distribute(): from small_utils.progress_bar import progress_bar from pymongo import Connection from collections import Counter collection=Connection().jd.test_users weibo_collection=Connection().jd.weibo_users linked_jd_ids=dict() ages=[] for line in open('/mnt/data1/adoni/data/linked_uids.data'): linked_jd_ids[line[:-1].split(' ')[1]]=line.split(' ')[0] bar=progress_bar(collection.count()) for index,user in enumerate(collection.find()): if sum(user['profile']['age'])==0: continue weibo_id=linked_jd_ids[user['_id']] weibo_user=weibo_collection.find_one({'_id':weibo_id}) if weibo_user==None: continue age=2015-int(weibo_user['birthday'].split(u'年')[0]) if age>50 or age<10: continue ages.append(age) if age<30: user['profile']['age']=[1,0] else: user['profile']['age']=[0,1] collection.update({'_id':user['_id']},{'$set':{'profile':user['profile']}}) bar.draw(index) s=sum(Counter(ages).values()) ages=sorted(Counter(ages).items(),key=lambda d:d[0]) ss=0. for age in ages: ss+=age[1] print age[0],(ss)/s
def find_by_name_pair(namepairs): out = [] coll = Connection().ppau.members for (fn, sn, email) in namepairs: record = coll.find_one({"details.given_names": re.compile(fn, re.I), "details.surname": re.compile(sn, re.I)}) if record is not None: out.append((email, record['_id'])) return dict(out)
def test_expired_messages_not_returned_by_read(self): queue = Queue(database='karait_test', queue='queue_test') queue.write(Message({'foo': 1}), expire=0.1) time.sleep(0.2) self.assertEqual(0, len(queue.read())) self.assertEqual(0, len(queue.read())) # Make sure the meta._expired key is actually set. collection = Connection().karait_test.queue_test raw_message = collection.find_one({}) self.assertEqual(True, raw_message['_meta']['expired'])
def find_by_name_pair(namepairs): out = [] coll = Connection().ppau.members for (fn, sn, email) in namepairs: record = coll.find_one({ "details.given_names": re.compile(fn, re.I), "details.surname": re.compile(sn, re.I) }) if record is not None: out.append((email, record['_id'])) return dict(out)
class LocalTouristClassifier(): def __init__(self): self.tweets = Connection().tweetsDB.tweetsCollection self.tweetUsers = Connection().tweetsDB.tweetUsersCollection self.tweetUsers.ensure_index( [("loc", GEO2D )] ) self.photos = Connection().flickrDB.flickrCollection self.linked = Connection().linkedDB.linkedCollection API_KEY = 'dj0yJmk9UUY5TWxNMXBRb0M3JmQ9WVdrOVV6RlVOWFEzTjJzbWNHbzlNVGMzTVRBNE5EazJNZy0tJnM9Y29uc3VtZXJzZWNyZXQmeD0zYQ--' SHARED_SECRET = '92a96753c369996f18b6a2ef4a6b1b9c85de04f5' self.y = yql.TwoLegged(API_KEY, SHARED_SECRET) self.yqlCache = {} def unescape_html_chars(self, item): return item.replace("&", "&").replace(">", ">").replace("<", "<").replace(""", "\"") def classifyTwitter(self): for tweet in self.tweets.find({"place":self.place}): if tweet['fromUserID'] is not None: tweetUser = self.tweetUsers.find_one({'id':tweet['fromUserID']}) if tweetUser is not None: tweetUserLocation = tweetUser['location'] if tweetUserLocation is not None and tweetUser['loc'] is None: tweetUserLocation = tweetUserLocation.encode('utf-8') #print "%s || %s" % (tweetUserLocation, self.place) # we use the yqlCache local dictionary to use as few calls as possible if self.yqlCache.get(tweetUserLocation) is not None and self.yqlCache[tweetUserLocation] != 0: tweetUser['loc'] = self.yqlCache[tweetUserLocation] print 'cacheSuccess: %20s %15s %s' % (tweetUserLocation, tweetUser['id'], tweetUser['loc']) else: # send request out to YQL yqlQuery = 'select * from geo.placefinder where text="%s";' % tweetUserLocation try: yqlResult = self.y.execute(yqlQuery) if yqlResult.rows == []: # yql couldn't figure out where this is, so don't save a loc self.yqlCache[tweetUserLocation] = 0 print 'fail: %20s %s' % (tweetUserLocation, tweetUser['id']) else: # yql found a lat and lon, so let's tag it loc = [float(yqlResult.rows[0].get('latitude')), float(yqlResult.rows[0].get('longitude'))] tweetUser['loc'] = loc self.yqlCache[tweetUserLocation] = loc print 'success: %20s %15s %s' % (tweetUserLocation, tweetUser['id'], loc) except: print "Exception Detected:", sys.exc_info()[0] # ready to save user self.tweetUsers.save(tweetUser)
def construct_test_user(): all_products = get_all_ids_from_file('product') collection = Connection().jd.test_users collection.drop() linked_users = Connection().jd.weibo_users fname = RAW_DATA_DIR + 'test_user_review.data' uids_with_kids = [ line[:-1] for line in open(RAW_DATA_DIR + 'uids_with_kids.data') ] uids_without_kids = [ line[:-1] for line in open(RAW_DATA_DIR + 'uids_without_kids.data') ] linked_uids = dict([(line[:-1].split(' ')[1], line[:-1].split(' ')[0]) for line in open(RAW_DATA_DIR + 'linked_uids.data')]) prone_words = ['宝宝', '女儿', '儿子', '男朋友', '女朋友'] f = open(fname) count = int(f.readline()[:-1]) bar = progress_bar(count) for i in xrange(count): uid = f.readline()[:-1] products = f.readline()[:-1].split(' ') products = list(set(products) & all_products) mentions = count_mentions(f.readline()) profile = { 'gender': [0] * 2, 'age': [0] * 2, 'location': [0] * 2, 'kids': [0] * 2, } if uid in linked_uids: user = linked_users.find_one({'_id': linked_uids[uid]}) if user == None: pass else: profile['gender'] = user['profile']['gender'] profile['age'] = user['profile']['age'] profile['location'] = user['profile']['location'] if uid in uids_with_kids: profile['kids'] = [0, 1] if uid in uids_without_kids: profile['kids'] = [1, 0] if uid in uids_without_kids or uid in uids_with_kids: for w in prone_words: if w in mentions: mentions.pop(w) collection.insert({ '_id': uid, 'products': products, 'mentions': mentions, 'profile': profile }) bar.draw(i + 1)
def export_ballots(slugs): elections = Connection().stopgap.elections ballots = Connection().stopgap.ballots out = OrderedDict() for slug in slugs: election = elections.find_one({"slug": slug}) if election is None: raise Exception("No election with slug.") o = list(ballots.find({"election_id": election['_id']})) out[slug] = o return dumps(out, indent=2)
def test_expired_messages_not_returned_by_read(self): queue = Queue( database='capsulemq_test', queue='queue_test' ) queue.write(Message({'foo': 1}), expire=0.1) time.sleep(0.2) self.assertEqual(0, len(queue.read())) self.assertEqual(0, len(queue.read())) # Make sure the meta._expired key is actually set. collection = Connection().capsulemq_test.queue_test raw_message = collection.find_one({}) self.assertEqual(True, raw_message['_meta']['expired'])
class MongoDB(Database): """ MongoDB abstraction. """ def __init__(self, mongodburi): self._description = 'mongo' db_name = mongodburi[mongodburi.rfind('/') + 1:] self._mongo = Connection(mongodburi)[db_name][COLLECTION] def __contains__(self, uid): return self._mongo.find_one(uid) is not None def get(self, uid): entry = self._mongo.find_one(uid) return zlib.decompress(entry['code']) if entry is not None else None def put(self, code, uid=None): if uid is None: uid = Database.hash_(code) if uid not in self: self._mongo.insert(Database.make_ds(uid, Binary(zlib.compress(code))), safe=True) return uid
def addAnswer(request): curAnswer = request.POST.get("json") curAnswer = json.loads(curAnswer) compromises = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"] curRecord = compromises.find_one({"_id": ObjectId(curAnswer["_id"])}) del curAnswer["_id"] curAnswer["type"] = "answer" curAnswer["compromise_id"] = curRecord["_id"] answers = Connection(host="127.0.0.1", port=27017)["compDB"]["answers"] answers.insert(curAnswer) del curRecord["_id"] return HttpResponse(json.dumps(curRecord))
def export_elections(slugs, keep_participants=False): elections = Connection().stopgap.elections out = OrderedDict() for slug in slugs: election = elections.find_one({"slug": slug}) if election is None: raise Exception("No election with slug.") if not keep_participants: # protect email addresses of participants del election['participants'] out[slug] = election return dumps(out, indent=2)
def test_writing_a_message_to_the_queue_populates_it_within_mongodb(self): queue = Queue(database='karait_test', queue='queue_test') message = Message() message.apple = 5 message.banana = 6 message.inner_object = {'foo': 1, 'bar': 2} queue.write(message) collection = Connection().karait_test.queue_test obj = collection.find_one({}) self.assertEqual(6, obj['banana']) self.assertEqual(2, obj['inner_object']['bar']) self.assertTrue(obj['_meta']['expire']) self.assertTrue(obj['_meta']['timestamp'])
def test_calling_delete_removes_the_message_from_mongodb(self): collection = Connection().capsulemq_test.queue_test collection.insert({ 'routing_key': 'foobar', 'apple': 3, 'banana': 5, '_meta': { 'timestamp': 2523939, 'expire': 20393, 'expired': False } }) raw_message = collection.find_one({'_meta.expired': False}) self.assertEqual(3, raw_message['apple']) message = Message(raw_message, queue_collection=collection) message.delete() self.assertEqual(0, collection.find({'_meta.expired': False}).count())
def test_calling_delete_removes_the_message_from_mongodb(self): collection = Connection().karait_test.queue_test collection.insert({ 'routing_key': 'foobar', 'apple': 3, 'banana': 5, '_meta': { 'timestamp': 2523939, 'expire': 20393, 'expired': False } }) raw_message = collection.find_one({'_meta.expired': False}) self.assertEqual(3, raw_message['apple']) message = Message(raw_message, queue_collection=collection) message.delete() self.assertEqual(0, collection.find({'_meta.expired': False}).count())
def test_writing_a_dictionary_to_the_queue_populates_it_within_mongodb( self): queue = Queue(database='karait_test', queue='queue_test') queue.write({ 'apple': 5, 'banana': 6, 'inner_object': { 'foo': 1, 'bar': 2 } }) collection = Connection().karait_test.queue_test obj = collection.find_one({}) self.assertEqual(6, obj['banana']) self.assertEqual(2, obj['inner_object']['bar']) self.assertTrue(obj['_meta']['expire']) self.assertTrue(obj['_meta']['timestamp'])
def update_index(id, words): index = Connection()["reddit"]["inverted_index"] for w in words: for i in invalid: if i in w: w = w.replace(i,'') row = index.find_one({"key" : w}) if not row: index.insert({ "key" : w, "ids" : [id] }) else: lst = list(row["ids"]) if id not in lst: lst.append(id) new_row = {"key":w, "ids":lst} index.update({"key" : w}, new_row)
def create_tokens_and_send_email(slug, dry_run=False, force=False): elections = Connection().stopgap.elections election = elections.find_one({"slug": slug}) if election is None: raise Exception("There is no election by the name of '%s'" % slug) unsent = [] for o in election['participants']: if o['sent'] is not True: unsent.append(o['email']) total_unsent = len(unsent) if not force: res = input("Are you sure you want to send %s emails?\n[y/N]>" % total_unsent) if res.lower() != "y": return for n, email in enumerate(unsent): sys.stdout.write("[%s/%s] %s... " % (n+1, total_unsent, email)) token = uuid.uuid4() if not dry_run: res = safe_modify(elections, {"slug": slug}, {"$push": {"tokens": token}}) if res is False: raise Exception("A token failed to be saved to the server: '%s'" % token.hex) mail = create_email( frm=election['email']['from'], subject=election['email']['subject'], to=email, text=election['email']['content'].format( slug=slug, token=token.hex ) ) if not dry_run: sendmail(mail) res = safe_modify(elections, {"slug": slug, "participants.email": email}, {"$set": {"participants.$.sent": True}}) if res is False: raise Exception("An email sent flag failed to be set for '%s'" % email) print("Sent!")
def test_writing_a_dictionary_to_the_queue_populates_it_within_mongodb(self): queue = Queue( database='capsulemq_test', queue='queue_test' ) queue.write({ 'apple': 5, 'banana': 6, 'inner_object': { 'foo': 1, 'bar': 2 } }) collection = Connection().capsulemq_test.queue_test obj = collection.find_one({}) self.assertEqual(6, obj['banana']) self.assertEqual(2, obj['inner_object']['bar']) self.assertTrue(obj['_meta']['expire']) self.assertTrue(obj['_meta']['timestamp'])
class MongoEmitter(Emitter): def __init__(self): super(MongoEmitter, self).__init__() self._mongo = Connection()['k2']['locations'] def emit_record(self, record): doc = self._mongo.find_one({'code': record['msa_code']}) if doc: doc['ffiec'] = { 'low': record['low'], 'high': record['high'], 'avg': record['avg'], } self._mongo.save(doc) else: print "[%s] %s not found" % (record['msa_code'], record['name']) def done(self): pass
def test_writing_a_message_to_the_queue_populates_it_within_mongodb(self): queue = Queue( database='capsulemq_test', queue='queue_test' ) message = Message() message.apple = 5 message.banana = 6 message.inner_object = { 'foo': 1, 'bar': 2 } queue.write(message) collection = Connection().capsulemq_test.queue_test obj = collection.find_one({}) self.assertEqual(6, obj['banana']) self.assertEqual(2, obj['inner_object']['bar']) self.assertTrue(obj['_meta']['expire']) self.assertTrue(obj['_meta']['timestamp'])
def ready(request): compromise_id = request.GET.get("id") compromises = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"] compromise = compromises.find_one({"_id": ObjectId(compromise_id)}) for q in compromise["questions"]: for a in q["answers"]: a["current"] = 0.0 answers = Connection(host="127.0.0.1", port=27017)['compDB']['answers'] for answer in answers.find({'compromise_id': ObjectId(compromise_id)}): for i, q in enumerate(answer["questions"]): for j, a in enumerate(q["answers"]): compromise["questions"][i]["answers"][j]["current"] += float(a["current"]) for q in compromise["questions"]: for a in q["answers"]: if a["current"] > q["answers"][0]["current"]: q["answers"][0] = a del q["answers"][1:] del compromise["_id"] return render_to_response("readyevent.html", {"json": json.dumps(compromise)})
class AutoCompModule: # Auto completion module # Using the MongoDB server # Holds three dictionaries : # dict - holds the amount of x's appearances in the learned text # dictBy2 - holds the amount of (x,y) appearances in the learned text # dictBy2 - holds the amount of (x,y,z) appearances in the learned text def __init__(self,DBName): self.dict = Connection()[DBName]['dict'] self.dictBy2 = Connection()[DBName]['dictBy2'] self.dictBy3 = Connection()[DBName]['dictBy3'] # Method to learn from a single file # For each file the method detects all the information mentioned above # Definitions : # pprev,prev,word are the three last seen words (where word is the current word) def learnSingle(self,fileName): input = open(fileName, encoding='utf-8') for line in input: pprev = prev = None for word in line.split(): if re.match("[.,\"\(\);']",word): pprev = prev = word = None continue if self.dict.find_one({"word": word,"grade": { "$exists": True}}) != None: self.dict.update({"word": word},{ "$inc": {"grade":1}}) else: self.dict.insert({"word": word, "grade":1, "info": None}) if prev!=None: if self.dictBy2.find_one({"first": prev,"second": word,"grade": { "$exists": True}}) != None: self.dictBy2.update({"first": prev,"second": word},{ "$inc": {"grade":1}}) else: self.dictBy2.insert({"first": prev,"second": word,"grade":1}) if pprev!=None: if self.dictBy3.find_one({"first": pprev,"second": prev,"third": word,"grade": { "$exists": True}}) != None: self.dictBy3.update({"first": pprev,"second": prev,"third": word},{ "$inc": {"grade":1}}) else: self.dictBy3.insert({"first": pprev,"second": prev,"third": word,"grade":1}) pprev=prev prev = word input.close() # Method to learn from multiple files # Uses learnSingle Method def learn(self,inputDir): size = len(os.listdir(inputDir)) i=1 if os.path.isdir(inputDir): for f in sorted(os.listdir(inputDir)): self.learnSingle(inputDir + '/' + f) sys.stdout.flush() print(str(int((i*100)/size))+"%",end="\r") i+=1 print ("SUCCESS LEARNING FINISH") else: print ("ERROR!!") def addMalletInfoToDB(self, wtcfile, twwfile, keysfile): wordDict = malletGetWordsAndData(wtcfile, twwfile, keysfile) for word in wordDict: if self.dict.find_one({"word": word,"grade": { "$exists": True}}) != None: self.dict.update({"word": word},{"$set":{"info": wordDict[word]}}) ##################################### # Method that suggests the next word # For a given pprev and prev (definitions mentioned above) it finds the most likely word, one time # using only prev and the second using both pprev and prev # # This method returns both NONE and NOT NONE values # None values are returned when there is no match to prev (or pprev and prev) in the dictionaries # or when they are given as NONE def suggest(self,pprev=None,prev=None): if prev is None: return None , None if pprev is None: a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)]) if a is not None: return a["second"] , None else: return None, None a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)]) b = self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)]) if b is not None: return a["second"] , b["third"] else: return None , None def suggest2(self,pprev=None,prev=None,x=5): if prev is None: return None , None i=0 lst=[] for a in self.dictBy2.find({"first": prev}).sort([('grade',-1),('second',1)]): if i<x: lst.append(a) i+=1 else: break if lst == []: return None, None else: res1 = [[a["grade"],a["second"]] for a in lst] if pprev is None: return res1, None else: i=0 lstBy3=[] for a in self.dictBy3.find({"first": pprev,"second":prev}).sort([('grade',-1),('second',1)]): if i<x: lstBy3.append(a) i+=1 else: break if lstBy3 is []: return res1, None else: return res1,[[a["grade"],a["third"]] for a in lstBy3]
class AutoCompModule: def __init__(self,DBName): self.dict = Connection()[DBName]['dict'] self.dictBy2 = Connection()[DBName]['dictBy2'] self.dictBy3 = Connection()[DBName]['dictBy3'] def learnSingle(self,fileName): input = open(fileName, encoding='utf-8') for line in input: pprev = prev = None for word in line.split(): if re.match("[.,\"\(\);']",word): pprev = prev = word = None continue if self.dict.find_one({"word": word,"amount": { "$exists": True}}) != None: self.dict.update({"word": word},{ "$inc": {"amount":1}}) else: self.dict.insert({"word": word, "amount":1}) if prev!=None: if self.dictBy2.find_one({"first": prev,"second": word,"grade": { "$exists": True}}) != None: self.dictBy2.update({"first": prev,"second": word},{ "$inc": {"grade":1}}) else: self.dictBy2.insert({"first": prev,"second": word,"grade":1}) if pprev!=None: if self.dictBy3.find_one({"first": pprev,"second": prev,"third": word,"grade": { "$exists": True}}) != None: self.dictBy3.update({"first": pprev,"second": prev,"third": word},{ "$inc": {"grade":1}}) else: self.dictBy3.insert({"first": pprev,"second": prev,"third": word,"grade":1}) pprev=prev prev = word for entity in self.dictBy3.find(): amount = self.dictBy2.find_one({"first": entity["first"],"second": entity["second"]})["grade"] self.dictBy3.update({"first": entity["first"],"second": entity["second"],"third": entity["third"]},{ "$set": {"grade": entity["grade"]/amount }}) for entity in self.dictBy2.find(): amount = self.dict.find_one({"word": entity["first"]})["amount"] self.dictBy2.update({"first": entity["first"],"second": entity["second"]}, { "$set": {"grade": entity["grade"]/amount}}) input.close() def learn(self,inputDir): if os.path.isdir(inputDir): for f in sorted(os.listdir(inputDir)): self.learnSingle(inputDir + '/' + f) print ("SUCCESS LEARNING") else: print ("ERROR!!") def suggest(self,pprev=None,prev=None): if prev is None: return None , None if pprev is None: return self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])["second"] , None return self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])["second"] , self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)])["third"] def simpleTest(self, testFile, num): test = open(testFile,'r',encoding='utf-8') numOfChecks1 = numOfChecks2 = succ1 = succ2 = 0 i = num for line in test: pprev = prev = None for word in line.split(): if re.match("[.,\"\(\);']",word): pprev = prev = word = None i = num continue if i!= 0: i-=1 pprev = prev prev = word else: a,b = self.suggest(pprev,prev) if a is not None: if a is word: succ1+=1 numOfChecks1+=1 if b is not None: if b is word: succ2+=1 numOfChecks2+=1 i=num pprev=prev prev=word test.close() return succ1/numOfChecks1, succ2/numOfChecks2
] while True: n_inserted = 0 n_downloaded = 0 n_skipped = 0 n_total = 0 for feed_url in feed_urls: source = feed_url feed_url = 'http://feeds.reuters.com/' + feed_url + '.rss' feed = feedparser.parse(feed_url) for entry in feed['entries']: n_total += 1 if conn.find_one({'url': entry['link']}): logging.debug('Skip: %s' % entry['link']) n_skipped += 1 continue n_downloaded += 1 article = newspaper.Article(entry['link']) article.download() article.parse() conn.insert({ 'url': entry['link'], 'link': article.canonical_link, 'title': article.title, 'text': article.text, 'date': str(article.publish_date), 'images': article.images,
class MonFS(fuse.Fuse): """ A Fuse plugin which allows one to store a Nagios configuration into MongoDB and expose it as a regular filesystem. As a base for this script I have shamelessly ripped of: http://sourceforge.net/apps/mediawiki/fuse/index.php?title=FUSE_Python_tutorial Currently NagiosFS exposes each object as a file because that works for me. If there's any interest in alternative ways, let me know. Example fstab entry: /opt/monfs/monfs.py# /mnt/monfs fuse allow_other,host=sandbox,db=monfs,collection=objects 0 1 Parameters: * host: The name of the MongoDB server. * db: The name of the database. * collection: The name of the collection containing the configuration. """ def __init__(self, host='localhost', db='monfs', collection='objects', *args, **kw): fuse.Fuse.__init__(self, *args, **kw) self.host=host self.db=db self.collection=collection self.dir_structure = [ '.', '..', 'host', 'hostTemplates', 'hostGroup', 'hostDependency', 'hostEscalation', 'service', 'serviceTemplates', 'serviceGroup', 'serviceDependency', 'serviceEscalation', 'contact', 'contactTemplates', 'contactGroup', 'timePeriod', 'command', 'hostExtInfo', 'serviceExtInfo' ] def setupConnection(self): '''Create a connection object to the MongoDB.''' try: self.mongo = Connection(self.host)[self.db][self.collection] except Exception as err: print sys.stderr.write('Could not connect to MongoDB. Reason: %s'%err) sys.exit(1) def getattr(self, path): """ - st_mode (protection bits) - st_ino (inode number) - st_dev (device) - st_nlink (number of hard links) - st_uid (user ID of owner) - st_gid (group ID of owner) - st_size (size of file, in bytes) - st_atime (time of most recent access) - st_mtime (time of most recent content modification) - st_ctime (platform dependent; time of most recent metadata change on Unix, or the time of creation on Windows). """ print '*** getattr', path st = fuse.Stat() if path.lstrip('/') in self.dir_structure or path in ['/','.','..']: st.st_mode = stat.S_IFDIR | 0755 st.st_nlink = 2 st.st_atime = int(time()) st.st_mtime = st.st_atime st.st_ctime = st.st_atime return st elif path.split('/')[1] in self.dir_structure: size = self.__queryDocument(path) if size == None: return -errno.ENOENT else: st.st_mode = stat.S_IFREG | 0755 st.st_nlink = 1 st.st_atime = int(time()) st.st_mtime = st.st_atime st.st_ctime = st.st_atime st.st_size = len(size) return st else: return -errno.ENOENT def readdir(self, path, offset): print '*** getdir', path path = self.__splitPath(path) if path[0] == '/': for dir in self.dir_structure: yield fuse.Direntry(str(dir)) else: for file in self.generateMongoDir(path[1]): yield fuse.Direntry(str(file)) def generateMongoDir(self, type): '''Generates the content of a directory by type. Each directory is "yielded" so it becomes iterable. ''' if "Templates" in type : query = {"_monfs.type":match('(.*?)Templates',type).group(1), "register":"0"} else: query = {"_monfs.type":type.lower(), "register":{"$ne":"0"}} for object in self.mongo.find(query): if object['_monfs']['enabled'] == True: yield ('%s.cfg'%(object['_id'])) else: yield ('%s.cfg.disabled'%(object['_id'])) for object in ['.', '..']: yield object def read ( self, path, length, offset ): print '*** read', path, length, offset document = self.__queryDocument(path) slen = len(document) if offset < slen: if offset + length > slen: size = slen - offset buf = document[offset:offset+size] else: buf = '' return buf def __splitPath(self, path): if path == '/': return ('/','') else: parts = path.split('/') if len(parts) == 2: return (parts[0], parts[1].split('.')[0]) else: return (parts[0], None) def __queryDocument(self, path): '''Queries MongoDB for a document and transforms it to a Nagios compatible format. ''' try: document = self.mongo.find_one({ "_id":ObjectId(path.split('/')[2].split('.')[0])}) return self.__transformDocument(document) except: return None def __transformDocument(self, doc): '''Pretty print the dictonary ''' object = [] object.append("define %s{"%(doc['_monfs']['type'])) for key in sorted(doc.keys()): if not key in [ '_monfs', '_id' ]: object.append(" {0:50} {1}".format(key, doc[key])) object.append('}\n') return str('\n'.join(object)) def mythread ( self ): print '*** mythread' return -errno.ENOSYS def chmod ( self, path, mode ): print '*** chmod', path, oct(mode) return -errno.ENOSYS def chown ( self, path, uid, gid ): print '*** chown', path, uid, gid return -errno.ENOSYS def fsync ( self, path, isFsyncFile ): print '*** fsync', path, isFsyncFile return -errno.ENOSYS def link ( self, targetPath, linkPath ): print '*** link', targetPath, linkPath return -errno.ENOSYS def mkdir ( self, path, mode ): print '*** mkdir', path, oct(mode) return -errno.ENOSYS def mknod ( self, path, mode, dev ): print '*** mknod', path, oct(mode), dev return -errno.ENOSYS def open ( self, path, flags ): print '*** open', path, flags return 0 def readlink ( self, path ): print '*** readlink', path return -errno.ENOSYS def release ( self, path, flags ): print '*** release', path, flags return 0 def rename ( self, oldPath, newPath ): print '*** rename', oldPath, newPath return -errno.ENOSYS def rmdir ( self, path ): print '*** rmdir', path return -errno.ENOSYS def statfs ( self ): print '*** statfs' return -errno.ENOSYS def symlink ( self, targetPath, linkPath ): print '*** symlink', targetPath, linkPath return -errno.ENOSYS def truncate ( self, path, size ): print '*** truncate', path, size return -errno.ENOSYS def unlink ( self, path ): print '*** unlink', path return -errno.ENOSYS def utime ( self, path, times ): print '*** utime', path, times return -errno.ENOSYS def write ( self, path, buf, offset ): print '*** write', path, buf, offset return -errno.ENOSYS
class Mongo(object): def __init__(self, log, sw=None): self.name = self.__class__.__name__ self.log = log self.articles = Connection().aivb_db.articles \ if not sw else Connection().aivb_redux.dater def __str__(self): return """ 'all': None, 'search': {k: v}, 'empty': {k: 0}, 'filled': {k: {'$gt': 0.5}}, 'gtv': {k: {'$gt': v}}, 'regex': {k: {'$regex': v}}, 'exists': {k: {'$exists': True}}, 'and_ex': {'$and': [{k: v}, {k2: {'$exists': True}}]}, 'grt_ex': {'$and': [{k: {'$exists': True}}, {k2: {'$gt': v2}}]}, 'grt_eq': {'$and': [{k: {'$exists': True}}, {k2: v2}]}, 'p_range': {'$and': [{k: {'$gte': v}}, {k2: {'$lte': v2}}]}, 'period': {'$and': [{k: v}, {k2: {'$gt': v2}}]}, 'andand': {'$and': [{k: v}, {k2: v2}]} """ def load(self, n=None): load = Loader(self.log) data = load.fetch_data(n) [[self.articles.insert(i) for i in x] for x in data] self.log.mlog.info("Inserted %d Instances of articles." % n) def search(self, command, key=None, value=None, s_key=None, s_value=None, t_key=None): if not key: res = [self.articles.find_one()] else: res = self.parse_search(command, key, value, s_key, s_value, t_key) return res def clear_all(self, v=None): for art in self.articles.find(): if v: print art self.articles.remove(art) def parse_search(self, c, k, v, k2, v2, k3): op = { 'all': None, 'search': { k: v }, 'empty': { k: 0 }, 'filled': { k: { '$gt': 0.5 } }, 'gtv': { k: { '$gt': v } }, 'regex': { k: { '$regex': v } }, 'exists': { k: { '$exists': True } }, 'and_ex': { '$and': [{ k: v }, { k2: { '$exists': True } }] }, 'grt_ex': { '$and': [{ k: { '$exists': True } }, { k2: { '$gt': v2 } }] }, 'grt_eq': { '$and': [{ k: { '$exists': True } }, { k2: v2 }] }, 'p_range': { '$and': [{ k: { '$gte': v } }, { k2: { '$lte': v2 } }] }, 'period': { '$and': [{ k: v }, { k2: { '$gt': v2 } }] }, 'andand': { '$and': [{ k: v }, { k2: v2 }] } } if 'select' not in c: return self.articles.find(op[c]) else: if not k3: return self.articles.find(op[c.split('_')[1]], { '_id': k2, v2: 1 }) else: return self.articles.find(op[c.split('_')[1]], { '_id': k2, v2: 1, k3: 1 }) def update(self, c, eid, k, v, k2=None): op = {'one': {'$set': {k: v}}, 'two': {'$set': {k2: {'$set': {k: v}}}}} self.articles.update({'_id': eid}, op[c], upsert=False, multi=False)
class SegmentTest(unittest2.TestCase): def create_rg_for(self, datasets, col=None): for n in datasets: create_rg(npoints[n], col if col else self.col0, distance_function=edist) def setUp(self): self.col0 = Connection("127.0.0.1", 27017)["algolab-test"]["rg0"] self.col1 = Connection("127.0.0.1", 27017)["algolab-test"]["rg1"] self.col2 = Connection("127.0.0.1", 27017)["algolab-test"]["rg2"] self.col0.drop() self.col1.drop() self.col2.drop() create_rg(npoints[2], self.col0, distance_function=edist) def test_already_segmented(self): self.assertEqual(list(S(self.col0).segment_ids)[0], range(0, len(npoints[2]))) def test_already_segmented2(self): create_rg(npoints[3], self.col1) self.assertItemsEqual(list(S(self.col1).segment_ids)[0], [4, 2, 5]) def test_already_segmented3(self): create_rg(npoints[4], self.col1) self.assertEqual(list(S(self.col1).segment_ids)[0], [6, 2]) def test_switch_segment(self): create_rg(npoints[3], self.col0, distance_function=edist) intersect = npoints[3][1] n = self.col0.find_one(intersect[2]) self.assertEqual(len(n["successors"]), 4) self.assertTrue({"distance": 1, "id": 1} in n["successors"]) self.assertTrue({"distance": 1, "id": 3} in n["successors"]) self.assertTrue({"distance": 1, "id": 4} in n["successors"]) self.assertTrue({"distance": 4, "id": 5} in n["successors"]) def test_switch_segment2(self): self.create_rg_for([2, 3, 4, 5]) print list(S(self.col0).segment_ids) self.assertItemsEqual(list(S(self.col0).segment_ids), [[0, 1, 2], [3, 2], [4, 2], [2, 5], [2, 6], [2, 8, 7], [2, 9, 10, 11]]) def test_switch_segment3(self): self.create_rg_for([2, 3, 4, 5, 6, 7]) self.assertItemsEqual(list(S(self.col0).segment_ids), [[0, 1, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 8, 2], [11, 10, 9, 2], [12, 2], [13, 15], [13, 16], [2, 13], [13, 14]]) self.assertEqual(len(list(S(self.col0).segment_ids)), 12) def test_swith2_segment(self): empty(self.col0) self.create_rg_for([8, 9, 10, 11]) segmenter = S(self.col0) segs = list(segmenter.segments_as_triplets) for s in segs: print s self.assertEqual(len(segs), 4)
class MonFS(fuse.Fuse): """ A Fuse plugin which allows one to store a Nagios configuration into MongoDB and expose it as a regular filesystem. As a base for this script I have shamelessly ripped of: http://sourceforge.net/apps/mediawiki/fuse/index.php?title=FUSE_Python_tutorial Currently NagiosFS exposes each object as a file because that works for me. If there's any interest in alternative ways, let me know. Example fstab entry: /opt/monfs/monfs.py# /mnt/monfs fuse allow_other,host=sandbox,db=monfs,collection=objects 0 1 Parameters: * host: The name of the MongoDB server. * db: The name of the database. * collection: The name of the collection containing the configuration. """ def __init__(self, host='localhost', db='monfs', collection='objects', *args, **kw): fuse.Fuse.__init__(self, *args, **kw) self.host = host self.db = db self.collection = collection self.dir_structure = [ '.', '..', 'host', 'hostTemplates', 'hostGroup', 'hostDependency', 'hostEscalation', 'service', 'serviceTemplates', 'serviceGroup', 'serviceDependency', 'serviceEscalation', 'contact', 'contactTemplates', 'contactGroup', 'timePeriod', 'command', 'hostExtInfo', 'serviceExtInfo' ] def setupConnection(self): '''Create a connection object to the MongoDB.''' try: self.mongo = Connection(self.host)[self.db][self.collection] except Exception as err: print sys.stderr.write('Could not connect to MongoDB. Reason: %s' % err) sys.exit(1) def getattr(self, path): """ - st_mode (protection bits) - st_ino (inode number) - st_dev (device) - st_nlink (number of hard links) - st_uid (user ID of owner) - st_gid (group ID of owner) - st_size (size of file, in bytes) - st_atime (time of most recent access) - st_mtime (time of most recent content modification) - st_ctime (platform dependent; time of most recent metadata change on Unix, or the time of creation on Windows). """ print '*** getattr', path st = fuse.Stat() if path.lstrip('/') in self.dir_structure or path in ['/', '.', '..']: st.st_mode = stat.S_IFDIR | 0755 st.st_nlink = 2 st.st_atime = int(time()) st.st_mtime = st.st_atime st.st_ctime = st.st_atime return st elif path.split('/')[1] in self.dir_structure: size = self.__queryDocument(path) if size == None: return -errno.ENOENT else: st.st_mode = stat.S_IFREG | 0755 st.st_nlink = 1 st.st_atime = int(time()) st.st_mtime = st.st_atime st.st_ctime = st.st_atime st.st_size = len(size) return st else: return -errno.ENOENT def readdir(self, path, offset): print '*** getdir', path path = self.__splitPath(path) if path[0] == '/': for dir in self.dir_structure: yield fuse.Direntry(str(dir)) else: for file in self.generateMongoDir(path[1]): yield fuse.Direntry(str(file)) def generateMongoDir(self, type): '''Generates the content of a directory by type. Each directory is "yielded" so it becomes iterable. ''' if "Templates" in type: query = { "_monfs.type": match('(.*?)Templates', type).group(1), "register": "0" } else: query = {"_monfs.type": type.lower(), "register": {"$ne": "0"}} for object in self.mongo.find(query): if object['_monfs']['enabled'] == True: yield ('%s.cfg' % (object['_id'])) else: yield ('%s.cfg.disabled' % (object['_id'])) for object in ['.', '..']: yield object def read(self, path, length, offset): print '*** read', path, length, offset document = self.__queryDocument(path) slen = len(document) if offset < slen: if offset + length > slen: size = slen - offset buf = document[offset:offset + size] else: buf = '' return buf def __splitPath(self, path): if path == '/': return ('/', '') else: parts = path.split('/') if len(parts) == 2: return (parts[0], parts[1].split('.')[0]) else: return (parts[0], None) def __queryDocument(self, path): '''Queries MongoDB for a document and transforms it to a Nagios compatible format. ''' try: document = self.mongo.find_one( {"_id": ObjectId(path.split('/')[2].split('.')[0])}) return self.__transformDocument(document) except: return None def __transformDocument(self, doc): '''Pretty print the dictonary ''' object = [] object.append("define %s{" % (doc['_monfs']['type'])) for key in sorted(doc.keys()): if not key in ['_monfs', '_id']: object.append(" {0:50} {1}".format(key, doc[key])) object.append('}\n') return str('\n'.join(object)) def mythread(self): print '*** mythread' return -errno.ENOSYS def chmod(self, path, mode): print '*** chmod', path, oct(mode) return -errno.ENOSYS def chown(self, path, uid, gid): print '*** chown', path, uid, gid return -errno.ENOSYS def fsync(self, path, isFsyncFile): print '*** fsync', path, isFsyncFile return -errno.ENOSYS def link(self, targetPath, linkPath): print '*** link', targetPath, linkPath return -errno.ENOSYS def mkdir(self, path, mode): print '*** mkdir', path, oct(mode) return -errno.ENOSYS def mknod(self, path, mode, dev): print '*** mknod', path, oct(mode), dev return -errno.ENOSYS def open(self, path, flags): print '*** open', path, flags return 0 def readlink(self, path): print '*** readlink', path return -errno.ENOSYS def release(self, path, flags): print '*** release', path, flags return 0 def rename(self, oldPath, newPath): print '*** rename', oldPath, newPath return -errno.ENOSYS def rmdir(self, path): print '*** rmdir', path return -errno.ENOSYS def statfs(self): print '*** statfs' return -errno.ENOSYS def symlink(self, targetPath, linkPath): print '*** symlink', targetPath, linkPath return -errno.ENOSYS def truncate(self, path, size): print '*** truncate', path, size return -errno.ENOSYS def unlink(self, path): print '*** unlink', path return -errno.ENOSYS def utime(self, path, times): print '*** utime', path, times return -errno.ENOSYS def write(self, path, buf, offset): print '*** write', path, buf, offset return -errno.ENOSYS