class DaoSunPosition(): def __init__(self): self.col = Connection()['rdam']['sunpos'] self.bulk = [] def create_datetime_index(self): self.col.create_index('datetime') def persist(self, sunpos): self.bulk.append(sunpos) def flush(self): self.col.insert(self.bulk) self.bulk = [] def find_within_time(self, start_date, end_date): result = [] for bson in self.col.find({ '$and': [ { 'datetime' : { '$gt': start_date }}, { 'datetime' : { '$lt': end_date }} ]}): sunpos = SunPosition(bson['az'], bson['el'], bson['datetime']) result.append(sunpos) return result
def save(nmap, **kwargs): logger = save.get_logger() nampTask = Connection().phoenorama.nmapTask nampTask.insert(nmap.toJSON()) logger.info("Nmap Task was successfully saved")
def saveCompromise(request): try: #user = "******" #send_mail(EMAIL_SUBJECT_CREATE, (EMAIL_TEXT_CREATE % "http://ya.ru/"), EMAIL_HOST_USER, [user]) currentCompromise = request.POST.get("json") currentCompromise = json.loads(currentCompromise) mongoConnection = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"] users = currentCompromise.get("users", []) if not users: users = ['*****@*****.**', '*****@*****.**', "*****@*****.**"] currentCompromise["type"] = "event" recordId = mongoConnection.insert(currentCompromise) for user in users: uniqDesc = md5(user + str(recordId)).hexdigest() uniqUrl = ANSWER_URL + uniqDesc mongoConnection.insert({"uniqDesc": uniqDesc, "idEvent": str(recordId), 'mail': user}) send_mail(EMAIL_SUBJECT_CREATE, (EMAIL_TEXT_CREATE % uniqUrl), EMAIL_HOST_USER, [user]) return HttpResponse('{"status": "ok", "url": %s}' % uniqUrl) except TypeError: return HttpResponse("bad json")
def run(nmap, **kwargs): ''' Start nmap task ''' logger = run.get_logger() # Update OpenVAS Task status and report_uuid __updateNmap(nmap, {'status': "RUNNING"}) start_task = "%s" % (nmap.target) cmd = shlex.split(TOOL_PATH + start_task) report_xml = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] #TODO: Validate start_task status # Save report report = parse(StringIO(report_xml)) nmapReport = Connection().phoenorama.nmapReport nmapReport.insert(report.toJSON()) # Update OpenVAS Task status and report_uuid __updateNmap(nmap, {'status': "DONE", 'report_uuid': report.report_uuid}) logger.info("Report id: %s was successfully generated and saved to DB" % report.report_uuid) return report.report_uuid
def save(openvas, **kwargs): logger = save.get_logger() openvasTask = Connection().phoenorama.openvasTask openvasTask.insert(openvas.toJSON()) logger.info("Openvas Task was successfully saved")
class CustomStreamListener(tweepy.StreamListener): def __init__(self): self.tweets = Connection().mytimeline.tweets super(CustomStreamListener, self).__init__() def on_data(self, data): self.tweets.insert(json.loads(data))
def update_min_max_sum(entity): ''' Used to insert min max and sum ''' collection = Connection().jd['train_%s_mentions' % entity] collection_user = Connection().jd['train_%ss' % entity] mentions = [ line[:-1].decode('utf8') for line in open('../features/mention.feature') ] min_d = dict() max_d = dict() sum_d = dict() sum_u_d = dict() for m in mentions: min_d[m] = float('inf') max_d[m] = -1 sum_d[m] = 0 sum_u_d[m] = 0 for user in collection_user.find(): for m in user['mentions']: v = user['mentions'][m] if v < min_d[m]: min_d[m] = v if v > max_d[m]: max_d[m] = v sum_d[m] += v sum_u_d[m] += 1 for m in mentions: collection.insert({ '_id': m, 'distribute': [min_d[m], max_d[m], sum_d[m], sum_u_d[m]] })
def update_all(): tags = Connection()["reddit"]["tags"] index = Connection()["reddit"]["inverted_index"] invalid = ['.', '$'] for tag in tags.find(): for key in tag.keys(): if key != "_id": word_list = tag[key] for w in word_list: for i in invalid: if i in w: w = w.replace(i,'') row = index.find_one({"key" : w}) if not row: index.insert({"key": w, "ids" : [key]}) else: print "Updating", w print row, row["ids"] lst = list(row["ids"]) print lst, key lst.append(key) new_row = {"key":w, "ids": lst} print new_row index.update({"key":w}, new_row)
def update_min_max_sum(entity): ''' Used to insert min max and sum ''' collection=Connection().jd['train_%s_mentions'%entity] collection_user=Connection().jd['train_%ss'%entity] mentions=[line[:-1].decode('utf8') for line in open('../features/mention.feature')] min_d=dict() max_d=dict() sum_d=dict() sum_u_d=dict() for m in mentions: min_d[m]=float('inf') max_d[m]=-1 sum_d[m]=0 sum_u_d[m]=0 for user in collection_user.find(): for m in user['mentions']: v=user['mentions'][m] if v<min_d[m]: min_d[m]=v if v>max_d[m]: max_d[m]=v sum_d[m]+=v sum_u_d[m]+=1 for m in mentions: collection.insert({'_id':m,'distribute':[min_d[m],max_d[m],sum_d[m],sum_u_d[m]]})
class MongoZipEmitter(Emitter): def __init__(self): super(MongoZipEmitter, self).__init__() self._zipcodes = Connection()[settings.MONGO_DATABASE]['zipcodes'] self._zipcodes.drop() def emit_record(self, record): self._zipcodes.insert(record)
def __saveReport(report_uuid): getReport_task = "--get-report %s" % (report_uuid) cmd = shlex.split(TOOL_PATH + getReport_task) report_xml = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] #@TODO: check if output result is valid report = parse(StringIO(report_xml)) openvasReport = Connection().phoenorama.openvasReport openvasReport.insert(report.toJSON()) return "Report was successfully generated and saved to DB"
def construct_test_user(): all_products = get_all_ids_from_file('product') collection = Connection().jd.test_users collection.drop() linked_users = Connection().jd.weibo_users fname = RAW_DATA_DIR + 'test_user_review.data' uids_with_kids = [ line[:-1] for line in open(RAW_DATA_DIR + 'uids_with_kids.data') ] uids_without_kids = [ line[:-1] for line in open(RAW_DATA_DIR + 'uids_without_kids.data') ] linked_uids = dict([(line[:-1].split(' ')[1], line[:-1].split(' ')[0]) for line in open(RAW_DATA_DIR + 'linked_uids.data')]) prone_words = ['宝宝', '女儿', '儿子', '男朋友', '女朋友'] f = open(fname) count = int(f.readline()[:-1]) bar = progress_bar(count) for i in xrange(count): uid = f.readline()[:-1] products = f.readline()[:-1].split(' ') products = list(set(products) & all_products) mentions = count_mentions(f.readline()) profile = { 'gender': [0] * 2, 'age': [0] * 2, 'location': [0] * 2, 'kids': [0] * 2, } if uid in linked_uids: user = linked_users.find_one({'_id': linked_uids[uid]}) if user == None: pass else: profile['gender'] = user['profile']['gender'] profile['age'] = user['profile']['age'] profile['location'] = user['profile']['location'] if uid in uids_with_kids: profile['kids'] = [0, 1] if uid in uids_without_kids: profile['kids'] = [1, 0] if uid in uids_without_kids or uid in uids_with_kids: for w in prone_words: if w in mentions: mentions.pop(w) collection.insert({ '_id': uid, 'products': products, 'mentions': mentions, 'profile': profile }) bar.draw(i + 1)
def test_queue_object_can_attach_to_a_collection_that_already_exists(self): collection = Connection().karait_test.queue_test collection.insert({ 'routing_key': 'foobar', 'message': { 'apple': 3, 'banana': 5 }, 'timestamp': 2523939, 'expire': 20393 }) queue = Queue(database='karait_test', queue='queue_test') self.assertEqual(1, collection.find({}).count())
class GameHandler(BaseHandler): allowed_methods = ('GET', 'POST') def __init__(self): self.games = Connection().dnb.games; def read(self, request, id): return list(self.games.find()) def create(self, request, id): if (request.data): request.data['time'] = time() self.games.insert(request.data) return rc.CREATED
def construct_train_product(): all_users = get_all_ids_from_file('user') | get_all_ids_from_file( 'test_user') collection = Connection().jd.train_products fname = RAW_DATA_DIR + 'product_review.data' f = open(fname) count = int(f.readline()[:-1]) bar = progress_bar(count) for i in xrange(count): pid = f.readline()[:-1] users = f.readline()[:-1].split(' ') users = list(set(users) & all_users) mentions = count_mentions(f.readline()) collection.insert({'_id': pid, 'users': users, 'mentions': mentions}) bar.draw(i + 1)
def addAnswer(request): curAnswer = request.POST.get("json") curAnswer = json.loads(curAnswer) compromises = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"] curRecord = compromises.find_one({"_id": ObjectId(curAnswer["_id"])}) del curAnswer["_id"] curAnswer["type"] = "answer" curAnswer["compromise_id"] = curRecord["_id"] answers = Connection(host="127.0.0.1", port=27017)["compDB"]["answers"] answers.insert(curAnswer) del curRecord["_id"] return HttpResponse(json.dumps(curRecord))
def test_queue_object_can_attach_to_a_collection_that_already_exists(self): collection = Connection().capsulemq_test.queue_test collection.insert({ 'routing_key': 'foobar', 'message': { 'apple': 3, 'banana': 5 }, 'timestamp': 2523939, 'expire': 20393 }) queue = Queue( database='capsulemq_test', queue='queue_test' ) self.assertEqual(1, collection.find({}).count())
class Application(tornado.web.Application): def __init__(self, handlers, **settings): tornado.web.Application.__init__(self, handlers, **settings) self.collection = Connection().vapour.urls self.templates = TemplateLookup(directories=["templates"]) def get_link_by_id(self, id): record = self.collection.find_one({'_id': uuid.UUID(id)}) return fix_id(record) def get_links_by_tag(self, tag): records = self.collection.find({'tags': re.compile(tag, re.I)}) return fix_ids(records) def get_links_by_url(self, url): records = self.collection.find({'url': re.compile(url, re.I)}) return fix_ids(records) def insert_link(self, url, desc, tags): return self.collection.insert({ '_id': uuid.uuid4(), 'url': url, 'desc': desc, 'tags': tags, 'added': datetime.datetime.utcnow() })
def extract(data): collection = Connection()['conf']['b'] for post in data: logger = {} try: confID = getID(post["message"]) except KeyError as k: print 'Error', k continue logger["confID"] = confID logger["message"] = post["message"] cursor = collection.find({'confID': str(confID)}) if cursor.count() != 0: continue collection.insert(logger, safe=True) print 'Inserted', confID
def split(self): newc = Connection().aivb_redux.dater [ newc.insert(x) for x in self.search('period', 'date_range', True, 'subject_count', 1) ] self.log.mlog.info("New database was created!")
def test_calling_delete_removes_the_message_from_mongodb(self): collection = Connection().karait_test.queue_test collection.insert({ 'routing_key': 'foobar', 'apple': 3, 'banana': 5, '_meta': { 'timestamp': 2523939, 'expire': 20393, 'expired': False } }) raw_message = collection.find_one({'_meta.expired': False}) self.assertEqual(3, raw_message['apple']) message = Message(raw_message, queue_collection=collection) message.delete() self.assertEqual(0, collection.find({'_meta.expired': False}).count())
def test_calling_delete_removes_the_message_from_mongodb(self): collection = Connection().capsulemq_test.queue_test collection.insert({ 'routing_key': 'foobar', 'apple': 3, 'banana': 5, '_meta': { 'timestamp': 2523939, 'expire': 20393, 'expired': False } }) raw_message = collection.find_one({'_meta.expired': False}) self.assertEqual(3, raw_message['apple']) message = Message(raw_message, queue_collection=collection) message.delete() self.assertEqual(0, collection.find({'_meta.expired': False}).count())
def update_index(id, words): index = Connection()["reddit"]["inverted_index"] for w in words: for i in invalid: if i in w: w = w.replace(i,'') row = index.find_one({"key" : w}) if not row: index.insert({ "key" : w, "ids" : [id] }) else: lst = list(row["ids"]) if id not in lst: lst.append(id) new_row = {"key":w, "ids":lst} index.update({"key" : w}, new_row)
def construct_train_user(): from pyltp import Segmentor all_products = get_all_ids_from_file('product') collection = Connection().jd.train_users fname = RAW_DATA_DIR + 'user_review.data' f = open(fname) count = int(f.readline()[:-1]) print count bar = progress_bar(count) for i in xrange(count): uid = f.readline()[:-1] products = f.readline()[:-1].split(' ') products = list(set(products) & all_products) mentions = count_mentions(f.readline()) collection.insert({ '_id': uid, 'products': products, 'mentions': mentions }) bar.draw(i + 1)
class MongoLogWriter(object): def __init__(self, db_name, collection_name, host="localhost", port=27017): self.collection = Connection(host, port)[db_name][collection_name] def parse_and_write(self, content): log_entries = content.strip("\n").split("\n") log_entries = filter(None, log_entries) map(self.parse_and_write_single_log_entry, log_entries) def parse_and_write_single_log_entry(self, log_entry): timestamp, uid, url, status, response_time, response_length = log_entry.split("|") log_dt = datetime.fromtimestamp(float(timestamp)) self.collection.insert( { "datetime": log_dt, "uid": uid if uid != "-" else None, "status": int(status), "response_time": float(response_time), "response_length": int(response_length), } )
class DaoShadow(): def __init__(self): self.col = Connection()['rdam']['shadow'] self.bulk = [] def create_index(self): self.col.create_index([('loc', GEO2D), ('date_time', 1)]) def persist(self, shadow): bson = self.shadow_to_bson(shadow) self.bulk.append(bson) if(len(self.bulk) >= 10): self.flush() def flush(self): self.col.insert(self.bulk) self.bulk = [] def shadow_to_bson(self, shadow): bson = {'loc': {'lat':shadow.lat, 'lon':shadow.lon}, 'date_time': shadow.date_time} return bson
class MongoDB(Database): """ MongoDB abstraction. """ def __init__(self, mongodburi): self._description = 'mongo' db_name = mongodburi[mongodburi.rfind('/') + 1:] self._mongo = Connection(mongodburi)[db_name][COLLECTION] def __contains__(self, uid): return self._mongo.find_one(uid) is not None def get(self, uid): entry = self._mongo.find_one(uid) return zlib.decompress(entry['code']) if entry is not None else None def put(self, code, uid=None): if uid is None: uid = Database.hash_(code) if uid not in self: self._mongo.insert(Database.make_ds(uid, Binary(zlib.compress(code))), safe=True) return uid
network_config = [2, 2] robot = robotics.EnergyRobot(components[0], None, 'robot_0', '127.0.0.1') population = evolution.DifferentialEvolution(F, CR, NP, neural.EvolvableFFANN, network_config, logistic) for iteration in range(iterations): population.recombine() info = [] for i, (current, candidate) in enumerate(population): fitness = [] robot.network = candidate for j in range(trials): robot.move(steps, interval) fitness.append(robot.energy) candidate.fitness = mean(fitness) msg = '> iteration {0}, individual {1}, fitness {2} (previous {3})' print msg.format(iteration, i, candidate.fitness, current.fitness) info.append({'iteration': iteration, 'robot': i, 'fitness': current.fitness, 'weights': current.weights}) population.select() collection.insert(info)
class DaoHeight(): def __init__(self, scale=1): self.col = Connection()['rdam']['height'] self.bulk = [] # Py(mongo) can't handle large x and y values, # so you can use to create smaller xs and ys. # Scale is the power of 10. So scale = 3 would # mean scale by 1000 self.scale = scale def create_geo_index(self, min=0, max=1000): self.col.create_index([('loc', GEO2D)], min=min , max=max) def persist(self, point): bson = self.point_to_bson(point) self.bulk.append(bson) if(len(self.bulk) >= 50000): self.flush() def flush(self): if(len(self.bulk) > 0): self.col.insert(self.bulk) print len(self.bulk) self.bulk = [] def find_within_box(self, points): box = self.points_to_box(points) result = [] for bson in self.col.find({'loc': {'$within': {'$box': box}}}): point = self.bson_to_point(bson) result.append(point) return result def find_within_polygon(self, points): polygon = self.points_to_polygon_bson(points) result = [] for bson in self.col.find({'loc': {'$within': {'$polygon': polygon}}}): point = self.bson_to_point(bson) result.append(point) return result def points_to_box(self, points): p1x = points[0].x / math.pow(10, self.scale) p1y = points[0].y / math.pow(10, self.scale) p2x = points[1].x / math.pow(10, self.scale) p2y = points[1].y / math.pow(10, self.scale) box = { 'a': {'x':p1x, 'y':p1y}, 'b': {'x':p2x, 'y':p2y} } return box def points_to_polygon_bson(self, points): result = [] for point in points: bson = self.point_to_polygon_bson(point) result.append(bson) return result def point_to_polygon_bson(self, point): x = point.x / math.pow(10, self.scale) y = point.y / math.pow(10, self.scale) return {'x': x, 'y': y} def points_to_bson(self, points): result = [] for point in points: bson = self.point_to_bson(point) result.append(bson) return result def point_to_bson(self, point): x = point.x / math.pow(10, self.scale) y = point.y / math.pow(10, self.scale) z = point.z return {'loc': {'x': x, 'y': y}, 'z': z} def bson_to_point(self, bson): x = bson['loc']['x'] * math.pow(10, self.scale) y = bson['loc']['y'] * math.pow(10, self.scale) z = bson['z'] return Point(x, y, z)
feed = feedparser.parse(feed_url) for entry in feed['entries']: n_total += 1 if conn.find_one({'url': entry['link']}): logging.debug('Skip: %s' % entry['link']) n_skipped += 1 continue n_downloaded += 1 article = newspaper.Article(entry['link']) article.download() article.parse() conn.insert({ 'url': entry['link'], 'link': article.canonical_link, 'title': article.title, 'text': article.text, 'date': str(article.publish_date), 'images': article.images, 'source': source }) n_inserted += 1 logging.debug('Insert: %s' % entry['link']) response = "%d total articles\n%d downloaded\n%d inserted\n%d skipped" % ( n_total, n_downloaded, n_inserted, n_skipped) print response logging.info(response) time.sleep(3600)
class AutoCompModule: # Auto completion module # Using the MongoDB server # Holds three dictionaries : # dict - holds the amount of x's appearances in the learned text # dictBy2 - holds the amount of (x,y) appearances in the learned text # dictBy2 - holds the amount of (x,y,z) appearances in the learned text def __init__(self,DBName): self.dict = Connection()[DBName]['dict'] self.dictBy2 = Connection()[DBName]['dictBy2'] self.dictBy3 = Connection()[DBName]['dictBy3'] # Method to learn from a single file # For each file the method detects all the information mentioned above # Definitions : # pprev,prev,word are the three last seen words (where word is the current word) def learnSingle(self,fileName): input = open(fileName, encoding='utf-8') for line in input: pprev = prev = None for word in line.split(): if re.match("[.,\"\(\);']",word): pprev = prev = word = None continue if self.dict.find_one({"word": word,"grade": { "$exists": True}}) != None: self.dict.update({"word": word},{ "$inc": {"grade":1}}) else: self.dict.insert({"word": word, "grade":1, "info": None}) if prev!=None: if self.dictBy2.find_one({"first": prev,"second": word,"grade": { "$exists": True}}) != None: self.dictBy2.update({"first": prev,"second": word},{ "$inc": {"grade":1}}) else: self.dictBy2.insert({"first": prev,"second": word,"grade":1}) if pprev!=None: if self.dictBy3.find_one({"first": pprev,"second": prev,"third": word,"grade": { "$exists": True}}) != None: self.dictBy3.update({"first": pprev,"second": prev,"third": word},{ "$inc": {"grade":1}}) else: self.dictBy3.insert({"first": pprev,"second": prev,"third": word,"grade":1}) pprev=prev prev = word input.close() # Method to learn from multiple files # Uses learnSingle Method def learn(self,inputDir): size = len(os.listdir(inputDir)) i=1 if os.path.isdir(inputDir): for f in sorted(os.listdir(inputDir)): self.learnSingle(inputDir + '/' + f) sys.stdout.flush() print(str(int((i*100)/size))+"%",end="\r") i+=1 print ("SUCCESS LEARNING FINISH") else: print ("ERROR!!") def addMalletInfoToDB(self, wtcfile, twwfile, keysfile): wordDict = malletGetWordsAndData(wtcfile, twwfile, keysfile) for word in wordDict: if self.dict.find_one({"word": word,"grade": { "$exists": True}}) != None: self.dict.update({"word": word},{"$set":{"info": wordDict[word]}}) ##################################### # Method that suggests the next word # For a given pprev and prev (definitions mentioned above) it finds the most likely word, one time # using only prev and the second using both pprev and prev # # This method returns both NONE and NOT NONE values # None values are returned when there is no match to prev (or pprev and prev) in the dictionaries # or when they are given as NONE def suggest(self,pprev=None,prev=None): if prev is None: return None , None if pprev is None: a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)]) if a is not None: return a["second"] , None else: return None, None a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)]) b = self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)]) if b is not None: return a["second"] , b["third"] else: return None , None def suggest2(self,pprev=None,prev=None,x=5): if prev is None: return None , None i=0 lst=[] for a in self.dictBy2.find({"first": prev}).sort([('grade',-1),('second',1)]): if i<x: lst.append(a) i+=1 else: break if lst == []: return None, None else: res1 = [[a["grade"],a["second"]] for a in lst] if pprev is None: return res1, None else: i=0 lstBy3=[] for a in self.dictBy3.find({"first": pprev,"second":prev}).sort([('grade',-1),('second',1)]): if i<x: lstBy3.append(a) i+=1 else: break if lstBy3 is []: return res1, None else: return res1,[[a["grade"],a["third"]] for a in lstBy3]
class AutoCompModule: def __init__(self,DBName): self.dict = Connection()[DBName]['dict'] self.dictBy2 = Connection()[DBName]['dictBy2'] self.dictBy3 = Connection()[DBName]['dictBy3'] def learnSingle(self,fileName): input = open(fileName, encoding='utf-8') for line in input: pprev = prev = None for word in line.split(): if re.match("[.,\"\(\);']",word): pprev = prev = word = None continue if self.dict.find_one({"word": word,"amount": { "$exists": True}}) != None: self.dict.update({"word": word},{ "$inc": {"amount":1}}) else: self.dict.insert({"word": word, "amount":1}) if prev!=None: if self.dictBy2.find_one({"first": prev,"second": word,"grade": { "$exists": True}}) != None: self.dictBy2.update({"first": prev,"second": word},{ "$inc": {"grade":1}}) else: self.dictBy2.insert({"first": prev,"second": word,"grade":1}) if pprev!=None: if self.dictBy3.find_one({"first": pprev,"second": prev,"third": word,"grade": { "$exists": True}}) != None: self.dictBy3.update({"first": pprev,"second": prev,"third": word},{ "$inc": {"grade":1}}) else: self.dictBy3.insert({"first": pprev,"second": prev,"third": word,"grade":1}) pprev=prev prev = word for entity in self.dictBy3.find(): amount = self.dictBy2.find_one({"first": entity["first"],"second": entity["second"]})["grade"] self.dictBy3.update({"first": entity["first"],"second": entity["second"],"third": entity["third"]},{ "$set": {"grade": entity["grade"]/amount }}) for entity in self.dictBy2.find(): amount = self.dict.find_one({"word": entity["first"]})["amount"] self.dictBy2.update({"first": entity["first"],"second": entity["second"]}, { "$set": {"grade": entity["grade"]/amount}}) input.close() def learn(self,inputDir): if os.path.isdir(inputDir): for f in sorted(os.listdir(inputDir)): self.learnSingle(inputDir + '/' + f) print ("SUCCESS LEARNING") else: print ("ERROR!!") def suggest(self,pprev=None,prev=None): if prev is None: return None , None if pprev is None: return self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])["second"] , None return self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])["second"] , self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)])["third"] def simpleTest(self, testFile, num): test = open(testFile,'r',encoding='utf-8') numOfChecks1 = numOfChecks2 = succ1 = succ2 = 0 i = num for line in test: pprev = prev = None for word in line.split(): if re.match("[.,\"\(\);']",word): pprev = prev = word = None i = num continue if i!= 0: i-=1 pprev = prev prev = word else: a,b = self.suggest(pprev,prev) if a is not None: if a is word: succ1+=1 numOfChecks1+=1 if b is not None: if b is word: succ2+=1 numOfChecks2+=1 i=num pprev=prev prev=word test.close() return succ1/numOfChecks1, succ2/numOfChecks2
"Other_issue": 0 } else: project_item['past_month'] = repo_dict[href]['past_month'] project_item['past_week'] = repo_dict[href]['past_week'] project_item['yesterday'] = repo_dict[href]['yesterday'] cnt += 1 cur_item['children'].append(project_item) next_a = next_a.find_next('a') else: if len(cur_item['children']) > 0: # document.append(cur_item) curation_collection.insert(cur_item) cur_category = next_category cur_item = {"category": cur_category.contents[0], "children": []} next_category = next_category.find_next('h2') # print next_category.contents[0] # print cnt # print '-------------------------------------' # cnt = 0 print cnt # print next_category # print next_a # f = open('changed_awesome_go.html', 'w') # f.write(str(soup)) # f.close()
#!/usr/bin/env python # coding:utf-8 import threading, time from pymongo import Connection lock = threading.RLock() cond = threading.Condition() table = Connection().test.tablex table.drop() table.insert({'x': 0}) def target(): cond.acquire() cond.wait() lock.acquire() i = table.find_one({'x': 0}) if i: # found it table.update({'_id': i['_id']}, {'$set': {'x': 1}}) lock.release() cond.release() if i: print '\n%s got i' % threading.current_thread() for i in range(20): t = threading.Thread(target=target) t.start() time.sleep(1)
for line in cur.fetchall(): actor, repo_name, event_type = line owner = repo_name.rsplit('/', 1) # if not repo_dict[repo_name].get("yesterday"): # repo_dict[repo_name]['yesterday'] = {"Owner_push": 0, "Owner_issue": 0, "Owner_pullrequest": 0, "Other_star": 0, "Other_pullrequest": 0, "Other_issue": 0} key_prefix = "Owner_" if owner == actor else "Other_" cur_item = repo_dict[repo_name]['yesterday'] if event_type == 'PushEvent': cur_item["Owner_push"] += 1 if event_type in ['IssuesEvent', 'IssueCommentEvent']: cur_item[key_prefix + 'issue'] += 1 if event_type in ['PullRequestEvent', 'PullRequestReviewCommentEvent']: cur_item[key_prefix + 'pullrequest'] += 1 if event_type == 'WatchEvent': cur_item["Other_star"] += 1 # Save to mongodb client = MongoClient('localhost', 27017) curation_collection = Connection()['curation']['curation_profile'] for k, v in repo_dict.iteritems(): v['url'] = k curation_collection.insert(v) del curation_collection
#!/usr/bin/env python # coding:utf-8 import threading, time from pymongo import Connection lock = threading.RLock() cond = threading.Condition() table = Connection().test.tablex table.drop() table.insert({'x': 0}) def target(): cond.acquire() cond.wait() lock.acquire() i = table.find_one({'x': 0}) if i: # found it table.update({'_id': i['_id']}, {'$set': {'x': 1}}) lock.release() cond.release() if i: print '\n%s got i' % threading.current_thread() for i in range(20): t = threading.Thread(target=target) t.start()
class Mongo(object): def __init__(self, log, sw=None): self.name = self.__class__.__name__ self.log = log self.articles = Connection().aivb_db.articles \ if not sw else Connection().aivb_redux.dater def __str__(self): return """ 'all': None, 'search': {k: v}, 'empty': {k: 0}, 'filled': {k: {'$gt': 0.5}}, 'gtv': {k: {'$gt': v}}, 'regex': {k: {'$regex': v}}, 'exists': {k: {'$exists': True}}, 'and_ex': {'$and': [{k: v}, {k2: {'$exists': True}}]}, 'grt_ex': {'$and': [{k: {'$exists': True}}, {k2: {'$gt': v2}}]}, 'grt_eq': {'$and': [{k: {'$exists': True}}, {k2: v2}]}, 'p_range': {'$and': [{k: {'$gte': v}}, {k2: {'$lte': v2}}]}, 'period': {'$and': [{k: v}, {k2: {'$gt': v2}}]}, 'andand': {'$and': [{k: v}, {k2: v2}]} """ def load(self, n=None): load = Loader(self.log) data = load.fetch_data(n) [[self.articles.insert(i) for i in x] for x in data] self.log.mlog.info("Inserted %d Instances of articles." % n) def search(self, command, key=None, value=None, s_key=None, s_value=None, t_key=None): if not key: res = [self.articles.find_one()] else: res = self.parse_search(command, key, value, s_key, s_value, t_key) return res def clear_all(self, v=None): for art in self.articles.find(): if v: print art self.articles.remove(art) def parse_search(self, c, k, v, k2, v2, k3): op = { 'all': None, 'search': { k: v }, 'empty': { k: 0 }, 'filled': { k: { '$gt': 0.5 } }, 'gtv': { k: { '$gt': v } }, 'regex': { k: { '$regex': v } }, 'exists': { k: { '$exists': True } }, 'and_ex': { '$and': [{ k: v }, { k2: { '$exists': True } }] }, 'grt_ex': { '$and': [{ k: { '$exists': True } }, { k2: { '$gt': v2 } }] }, 'grt_eq': { '$and': [{ k: { '$exists': True } }, { k2: v2 }] }, 'p_range': { '$and': [{ k: { '$gte': v } }, { k2: { '$lte': v2 } }] }, 'period': { '$and': [{ k: v }, { k2: { '$gt': v2 } }] }, 'andand': { '$and': [{ k: v }, { k2: v2 }] } } if 'select' not in c: return self.articles.find(op[c]) else: if not k3: return self.articles.find(op[c.split('_')[1]], { '_id': k2, v2: 1 }) else: return self.articles.find(op[c.split('_')[1]], { '_id': k2, v2: 1, k3: 1 }) def update(self, c, eid, k, v, k2=None): op = {'one': {'$set': {k: v}}, 'two': {'$set': {k2: {'$set': {k: v}}}}} self.articles.update({'_id': eid}, op[c], upsert=False, multi=False)