def startTest(self, test): db = MongoClient( 'localhost', settings.MONGO_DATABASE_PORT)[settings.MONGO_DATABASE_NAME] for collection_name in db.collection_names(): if collection_name != 'system.indexes': getattr(db, collection_name).remove({})
def main(): db = MongoClient(host=MONGO_HOST, port=int(MONGO_PORT))[MONGO_DATABASE] if 'medb' not in db.collection_names(): db.create_collection('medb') db.messages.ensure_index('id') print( "OK: Collection 'medb' and indexes were created in '{0}' database". format(MONGO_DATABASE)) print("OK: Now load medb into '{0}' database".format(MONGO_DATABASE)) # unzip call(['unzip', '../data/medb.zip']) f = open('medb.txt', 'r') for line in f: # convert string into tuple t = literal_eval(line) db.medb.insert({'id': t[0], 'm': t[1], 'e': t[2], 'a': t[3]}) f.close() remove('medb.txt') print('OK: MEDB loaded.') else: print("ERROR: Collection 'medb' already exists")
class Db: def __init__(self, host, port, dbname): try: self.mongodb_host = host self.mongodb_port = port self.db_name = dbname self.connection = MongoClient(self.mongodb_host, self.mongodb_port)[self.db_name] except Exception as e: print(e) def getCollectionsNames(self): """ Returns all collections available in the database """ return self.connection.collection_names( include_system_collections=False) def build_collection(self, collection_name): """ Access a particular collection given its name """ try: return self.connection[collection_name] except Exception as e: print(e) def initialize_collections(self): """ Function to initialize mongodb collections. Return a dictionary w/ each collection. """ COLLECTIONS = self.getCollectionsNames() try: collections_dict = {} for collection in COLLECTIONS: collections_dict[collection] = self.build_collection( collection) return collections_dict except Exception as e: print(e)
class Update(): def __init__(self, ip, db_name): self.db_name = db_name self.db = MongoClient('127.0.0.1', 27017)[db_name] self.core_name = SOLR_CORE_NAME self.solr_url = 'http://127.0.0.1:8999/solr' self.solr = SOLR(self.solr_url) def check_solr_core(self): if not self.solr.solr_core_exists(self.core_name): self.solr.create_solr_core(self.core_name) def update_data(self, collection): def insert(data): if not data: return data_one = data.copy() data_one['_id'] = str(data_one['_id']) data_one['scene'] = self.db_name data_one['topic'] = collection if 'super_intention' in data_one: if data_one['super_intention'] == '': data_one['super_intention'] = 'null' if 'equal_questions' in data_one: data_one.pop('equal_questions') for q in data['equal_questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.core_name) elif 'questions' in data_one: data_one.pop('questions') for q in data['questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.core_name) else: self.solr.update_solr(data_one, self.core_name) self.solr.delete_solr_by_query( self.core_name, 'scene_str:' + self.db_name + ' AND topic_str:' + collection) data = [x for x in self.db[collection].find()] for d in data: insert(d) def update(self): try: collections = self.db.collection_names() if 'log' in collections: collections.remove('log') for collection in collections: print('start ' + collection) self.update_data(collection) return 1 except Exception: traceback.print_exc() return 0
def get_stores(request): """Returns a list of available store names. Keyword arguments: request -- Django HttpRequest object """ db = MongoClient()['stores'] return filter(lambda s: s!="system.indexes", db.collection_names())
def __init__(self): db = MongoClient()[self.DATABASE_NAME] self.mongo_coll = db[self.COLLECTION_NAME] if self.COLLECTION_NAME not in db.collection_names(): self.mongo_coll.create_index([('TradingDay', pymongo.ASCENDING)], unique=True) # whether to replace all conflicted data self.replace_all: bool = False
def get_stores(request): """Returns a list of available store names. Keyword arguments: request -- Django HttpRequest object """ db = MongoClient()['stores'] return filter(lambda s: s != "system.indexes", db.collection_names())
def setup_db(): """ Creates a mongodb instance and shuts it down after testing has concluded. """ client = MongoClient(mongo_addr, mongo_port)[mongo_db_name] if len(client.collection_names()) != 0: client.connection.drop_database(mongo_db_name) #Set debug client for mongo if api.common.external_client is None: api.common.external_client = client return client
class LocalDataManager: def __init__(self): self.database_conn = MongoClient(document_class=OrderedDict).phonebook self.contacts = self.get_contacts() self.file_name = 'phone_book.{}' def get_contacts(self): return [ list(x.values()) for x in list(self.database_conn.contacts.find({}, {'_id': False})) ] def save_txt(self, file_path=None): with open(file_path or self.file_name.format('txt'), 'w') as file: for contact in self.contacts: file.write('{}\n'.format(' '.join(contact))) return 'Successfully saved to .txt file.' def save_csv(self, file_path=None): with open(file_path or self.file_name.format('csv'), 'w', newline='') as csv_file: writer = csv.writer(csv_file, delimiter=';') writer.writerow(['First name', 'Last name', 'Phone number']) for contact in self.contacts: writer.writerow([*contact]) return 'Successfully saved to .csv file.' def gui_saver(self, query): if query: path, extension = os.path.splitext(query) if extension == '.txt': return self.save_txt(query) if extension == '.csv': return self.save_csv(query) def check_database(self): base_data = (OrderedDict([("_id", 1), ('first_name', 'Vadim'), ('last_name', 'Kuznetsov'), ('phone_number', '0101')]), OrderedDict([("_id", 2), ('first_name', 'Ivan'), ('last_name', 'Petrov'), ('phone_number', '102')]), OrderedDict([("_id", 8), ('first_name', 'Petr'), ('last_name', 'Ivanovich'), ('phone_number', '102')])) if 'contacts' not in self.database_conn.collection_names(): self.database_conn.contacts.insert(base_data) return 'Database is successfully created.' else: return 'Database is successfully loaded.'
def main(): db = MongoClient(host=MONGO_HOST, port=int(MONGO_PORT))[MONGO_DATABASE] if 'messages' not in db.collection_names(): db.create_collection('messages') db.messages.ensure_index('h') # TTL-collection for 31 days db.messages.ensure_index([('d', DESCENDING)], expireAfterSeconds=60*60*24*31) db.messages.ensure_index('f') db.messages.ensure_index('a') db.messages.ensure_index('p') db.users.ensure_index('username', unique=True) db.charts.ensure_index('name', unique=True) db.cache.ensure_index('type', unique=True) print("OK: Collections and indexes were created in '{0}' database".format(MONGO_DATABASE)) else: print("ERROR: Collection 'messages' already exists")
class DB_vectors(): def __init__(self, ip, port, db_name): self.db_name = db_name self.data_db = MongoClient(ip, port)[db_name] self.vector_db = MongoClient(ip, port)['data_vectors'] self.common_db = MongoClient(ip, port)['common'] self.data = set() def load_data(self): for collection in self.data_db.collection_names(): if collection == 'refuse2chat': continue for x in self.data_db[collection].find({}, {'equal_questions':1}): for q in x['equal_questions']: self.data.add(q) for x in self.common_db['interaction'].find({}, {'equal_questions':1}): for q in x['equal_questions']: self.data.add(q) for x in self.common_db['repeat_guest'].find(): self.data.add(x['question']) for x in self.common_db['repeat_machine'].find(): self.data.add(x['question']) for x in self.data_db['dialogue'].find(): for q in x['equal_questions']: self.data.add(x['super_intention']+q) def write_data(self): self.vector_db['vectors'].drop() for q in self.data: data = {} data['sentence'] = q #data['vector'] = [0,0,0] data['vector'] = [0,0,0] self.vector_db['vectors'].insert(data) self.vector_db['vectors'].create_index('sentence') def get_vector(self, s): try: result = self.vector_db['vectors'].find_one({'sentence':s}) return result['vector'] except Exception: return None
def setUp(self): self.app = app.test_client() self.app.TESTING = True # Inject test database into application db = MongoClient('localhost', 27017).TestDB # Drop collection (faster than dropping entire db) if 'FirstCollection' in db.collection_names(): db.drop_collection('FirstCollection') webapp.db = db new_posters = [{ "name": "James", "url": "my_url" }, { "name": "James", "url": "my_url" }] webapp.db.FirstCollection.insert_many(new_posters)
def main(): db = MongoClient(host=MONGO_HOST, port=int(MONGO_PORT))[MONGO_DATABASE] if 'messages' not in db.collection_names(): db.create_collection('messages') db.messages.ensure_index('h') # TTL-collection for 31 days db.messages.ensure_index([('d', DESCENDING)], expireAfterSeconds=60 * 60 * 24 * 31) db.messages.ensure_index('f') db.messages.ensure_index('a') db.messages.ensure_index('p') db.users.ensure_index('username', unique=True) db.charts.ensure_index('name', unique=True) db.cache.ensure_index('type', unique=True) print("OK: Collections and indexes were created in '{0}' database". format(MONGO_DATABASE)) else: print("ERROR: Collection 'messages' already exists")
def showCollection(MongoClient,dummyarg): Collections = MongoClient.collection_names() print("Found {0} collection(s)".format(len(Collections))) if len(Collections) > 3: # Get a confirmation (y) from console flag = input( "Found more than one Collections in the database, input [y] if you want to show them all, otherwise I will exit....") if flag != 'y': print("{0} entered, bye".format(flag)) raise SystemExit elif len(Collections) < 1: print("No collections in {1}".format(MongoClient.full_name)) # To traversal all collections in a db for my_collection in Collections: print("+--------------------------+") print("Print one record in collection [{0}]".format(MongoClient[my_collection].full_name)) print("Total records in collection <{0}>".format(MongoClient[my_collection].count())) # print(dbclient[my_collection].explain()) pprint.pprint(MongoClient[my_collection].find_one()) print("+--------------------------+")
def test_reset(self): """ Check if all normalized collections get dropped and that normalized is set to False in all hpfeed entries. """ db = MongoClient('localhost', 27017)[self.dbname] #prepare and insert dummy values directly into the hpfeed collection insert_items = [ {'channel': 'channel1', 'ident': 'ident1', 'payload': 'payload1', 'timestamp': datetime.utcnow(), 'normalized': True}, {'channel': 'channel2', 'ident': 'ident2', 'payload': 'payload2', 'timestamp': datetime.utcnow(), 'normalized': True}, {'channel': 'channel3', 'ident': 'ident3', 'payload': 'payload3', 'timestamp': datetime.utcnow(), 'normalized': True, 'last_error': "Some error", 'last_error_timestamp': datetime.now()} ] for item in insert_items: db['hpfeed'].insert(item) #create a few dummy collection that we expect to get dropped db['somecollection1'].insert({'something': 'something'}) db['somecollection2'].insert({'something': 'something'}) sut = mnemodb.MnemoDB(self.dbname) #This is the function we are testing sut.reset_normalized() #has normalized collections been removed self.assertNotIn('somecollection1', db.collection_names()) self.assertNotIn('somecollection2', db.collection_names()) #has all normalized been set to True self.assertEquals(0, db['hpfeed'].find({'normalized': True}).count()) #has last_error attribute been removed self.assertEquals(0, db['hpfeed'].find({'last_error': {'$exists': 1}}).count()) #has last_error_timestamp attribute been removed self.assertEquals(0, db['hpfeed'].find({'last_error_timestamp': {'$exists': 1}}).count())
def main(): db = MongoClient(host=MONGO_HOST, port=int(MONGO_PORT))[MONGO_DATABASE] if 'medb' not in db.collection_names(): db.create_collection('medb') db.messages.ensure_index('id') print("OK: Collection 'medb' and indexes were created in '{0}' database".format(MONGO_DATABASE)) print("OK: Now load medb into '{0}' database".format(MONGO_DATABASE)) # unzip call(['unzip', '../data/medb.zip']) f = open('medb.txt', 'r') for line in f: # convert string into tuple t = literal_eval(line) db.medb.insert({'id': t[0], 'm': t[1], 'e': t[2], 'a': t[3]}) f.close() remove('medb.txt') print('OK: MEDB loaded.') else: print("ERROR: Collection 'medb' already exists")
class MongoOP: def __init__(self, mongo_uri='mongodb://localhost:27017/test', \ collect_name='videos_update', old_collect_name='videos'): self.db = MongoClient(mongo_uri).get_default_database() self.collect_name = collect_name self.old_collect_name = old_collect_name def update_json_list(self, json_list, collect_name=None): collect = self.get_collection(collect_name) if collect_name == 'videos_new': print("new Videos drop") collect.drop() for idx, json in enumerate(json_list): if idx % 100 == 0 and idx > 0: print("update into collect {} : {} / {}".format( collect_name, idx, len(json_list))) collect.update_one({'url': json['url']}, {'$set': json}, upsert=True) def delete_url(self, url, collect_name=None): collect = self.get_collection(collect_name) collect.delete_one({'url': url}) def info_is_exists(self, url, collect_name=None): collect = self.get_collection(collect_name) return bool(collect.find_one({'url': url, 'title': {'$exists': True}})) def get_unfinished_url_list(self, collect_name=None): collect = self.get_collection(collect_name) url_json_list = list( collect.find({'update_date': { '$exists': False }}, { 'url': 1, '_id': 0 })) return url_json_list def get_all_url_set(self, collect_name): collect = self.get_collection(collect_name) url_set = set(each['url'] for each \ in collect.find({'update_date': {'$exists':True}}, {'url':1, '_id':0})) return url_set def get_film_info_list(self, url_list, collect_name=None): collect = self.get_collection(collect_name) info_json_list = list(collect.find({'url': {'$in': url_list}})) return info_json_list def get_collection(self, collect_name): if collect_name is None: return self.db[self.collect_name] else: return self.db[collect_name] def get_url_update_date(self, url, collect_name=None): collect = self.get_collection(collect_name) return collect.find_one({'url': url, 'update_date': {'$exists':True}}, \ {'update_date':1, '_id':0}) def get_logs(self, collect_name='logs'): collect = self.db[collect_name] for each in collect.find(): print(each) def rename_collection(self, old_name, new_name, drop=False): if new_name in self.db.collection_names() and drop: self.drop_collection(new_name) old_collect = self.db[old_name] old_collect.rename(new_name) def drop_collection(self, collect_name): self.db.drop_collection(collect_name)
class CheapVol: """ Profits from buying shitcoins in accumulation phase. Enters when price is still cheap and volume spikes Config Requirements: - periodsMA - periodsVolLong - periodsVolShort - volCoef - bolStd """ def __init__(self, stratName, assetList, isTest=False): logging.debug("Initialising CheapVol()") pd.options.mode.chained_assignment = None self.assetList = assetList self.isTest = isTest with open("%s/Pipeline/resources/%s/config.yml" % (Settings.BASE_PATH, stratName)) as configFile: params = yaml.load(configFile) self.enterParams = params["enter"] self.exchangeList = params["assetSelection"]["exchangeList"] self.db = MongoClient("localhost", 27017)[stratName] self.col = self.db["PastPriceAction"] self.initiateCollection() if not self.isTest else None def _initSingle(self, asset, exchange, testData=[]): logging.debug("Starting CheapVol._initSingle(asset=%s)" % asset) logging.debug("1 second sleep to avoid rate limiters") time.sleep(1.5 if not self.isTest else 0) try: pullData = (Pull( emailOnFailure=False if self.isTest else True).candles( asset="%sBTC" % asset, exchange=exchange, limit=max( self.enterParams["periodsMA"], self.enterParams["periodsVolLong"], ) + 1, interval=self.enterParams["granularity"], ) if len(testData) == 0 else testData) priceList = list( pullData["close"])[-self.enterParams["periodsMA"]:] volList = list( pullData["volume"])[-self.enterParams["periodsVolLong"]:] if (len(priceList) == self.enterParams["periodsMA"] and len(volList) == self.enterParams["periodsVolLong"]): self.col.insert_one({ "asset": asset, "price": priceList, "vol": volList, "isLive": False, }) return True else: logging.info("Not enough data for asset: %s" % asset) except IndexError: logging.warning("Failure on asset: %s" % asset) return False def initiateCollection(self): """ Creates mongo collection which contains the price action data required for CheapVol """ failList = [] logging.debug("Starting CheapVol.init()") if "PastPriceAction" in self.db.collection_names(): self.db.drop_collection("PastPriceAction") for asset, exchange in self.assetList: if not self._initSingle(asset, exchange): failList.append(asset) logging.debug("No failed assets" if len(failList) == 0 else "%s Failed assets: %s" % (len(failList), failList)) logging.debug("Finished CheapVol.initiateCollection()") return failList if self.isTest else None def _getPADict(self, exchange): logging.debug("Starting CheapVol._getPADict()") startTS = int(time.time() - self.enterParams["granularity"]) dateStart = datetime.fromtimestamp(startTS).strftime( "%Y-%m-%dT%H:%M:%S.000Z") return Pull().getPriceAction(exchange=exchange, startDate=dateStart, baseAsset="BTC") def before(self, testData=None): """ Runs before CheapVol on each asset and updates the mongo collection """ logging.debug("Starting CheapVol.before()") newPA = {} delistDict = {} # using reversed to keep exchange priority for exchange in reversed(self.exchangeList): newPA.update( self._getPADict( exchange=exchange) if not self.isTest else testData) delistDict.update(Pull().getDepositStatus( exchange=exchange)) if not self.isTest else {} for assetDict in list(self.col.find()): assetDict["price"] = assetDict["price"][1:] + [ newPA[assetDict["asset"]]["price"] ] assetDict["vol"] = assetDict["vol"][1:] + [ newPA[assetDict["asset"]]["vol"] ] assetDict["isLive"] = (delistDict[assetDict["asset"]] if not self.isTest else True) assetDict.pop("_id", None) self.col.find_one_and_replace({"asset": assetDict["asset"]}, assetDict) logging.debug("Finished CheapVol.before()") def run(self, asset): logging.debug("Starting CheapVol.run(asset=%s)" % asset) assetData = self.col.find_one({"asset": asset}) if assetData: volL = np.round( np.nanmean(np.array(assetData["vol"]).astype(np.float)), 5) volS = np.round( np.nanmean( np.array(assetData["vol"] [-self.enterParams["periodsVolShort"]:]).astype( np.float)), 5, ) priceData = np.array(assetData["price"]).astype(np.float) bolDown = np.nanmean( priceData) - self.enterParams["bolStd"] * np.nanstd(priceData) logging.debug("volL: %s, volS: %s, price: %s, bolDown: %s" % (volL, volS, priceData[-1], bolDown)) return (volS > self.enterParams["volCoef"] * volL and priceData[-1] < bolDown and assetData["isLive"]) else: return False
class MgHelper(object): """ 并保存到MongoDB """ def __init__(self, server='localhost', port=27017, dbname='Collections'): self.server = server self.port = port self.dbname = dbname self.mongo = MongoClient(self.server, self.port) self.db = self.mongo[self.dbname] def SaveFile(self, fname, tbname='jsontxt'): #把json文件存成一行 f = file(fname) j = json.loads(f.read()) table = self.db[tbname] #表名 id = table.save(j) return id def SaveDictObjs(self, dictListObj, tbname='jsontxt'): table = self.db[tbname] #表名 for dictObj in dictListObj: # jsObj = json.dumps(dictObj) id = table.insert(dictObj) return id def SaveDictObj(self, dobj, tbname='jsontxt'): #把dict存成一行 table = self.db[tbname] #表名 id = table.insert(dobj) return id def GetDBNames(self): # ls=list(db.Blog.find({"QQ":"2273075635"})) # tuple(ls[0]) # (u'QQ', u'Comment', u'isTransfered', u'Like', u'Title', u'URL', u'Transfer', u'Blog_cont', u'Share', u'Source', u'PubTime', u'_id') dbnames = self.mongo.collection_names(include_system_collections=False) return dbnames def GetDictHeader(self, tbname='jsontxt', con={}): dictObj = self.db[tbname].find_one(con) header = tuple(dictObj) return header def GetDictObj(self, tbname='jsontxt', con={}): dictObj = self.db[tbname].find_one(con) return dictObj def GetDictObjs(self, tbname='jsontxt', con={}): dictObjs = list(self.db[tbname].find(con)) return dictObjs def GetDictObjsCnt(self, tbname='jsontxt', con={}): dictObjs = self.db[tbname].find(con).count() return dictObjs def RemoveTable(self, tbname='jsontxt', con={}): self.db[tbname].remove(con) return True def InsertTable(self, tbname='jsontxt', dictObjs={}): self.db[tbname].insert(dictObjs) return True
class Provider: provider_code = "" provider_name = "" provider_url = "" connect_timeout = 7 read_timeout = 30 @property def usage_limit_cache_duration(self): return (12 + randint(-2, 2)) * 3600 @property def google_api_error_cache_duration(self): return (60 + randint(-2, 2)) * 24 * 3600 @property def google_api_cache_duration(self): return (120 + randint(-2, 2)) * 24 * 3600 def __init__(self): self.mongo_db = MongoClient(MONGODB_URL).get_database() self.__stations_collection = self.mongo_db.stations self.__stations_collection.create_index([ ("loc", GEOSPHERE), ("status", ASCENDING), ("pv-code", ASCENDING), ("short", ASCENDING), ("name", ASCENDING), ]) self.collection_names = self.mongo_db.collection_names() self.redis = redis.StrictRedis.from_url(url=REDIS_URL, decode_responses=True) self.google_api_key = GOOGLE_API_KEY self.log = logging.getLogger(self.provider_code) sentry_sdk.set_tag("provider", self.provider_name) def stations_collection(self): return self.__stations_collection def measures_collection(self, station_id): if station_id not in self.collection_names: self.mongo_db.create_collection( station_id, **{ "capped": True, "size": 500000, "max": 5000 }) self.collection_names.append(station_id) return self.mongo_db[station_id] def __to_wind_direction(self, value): if isinstance(value, ureg.Quantity): return to_int(value.to(ureg.degree).magnitude, mandatory=True) else: return to_int(value, mandatory=True) def __to_wind_speed(self, value): if isinstance(value, ureg.Quantity): return to_float(value.to(ureg.kilometer / ureg.hour).magnitude, mandatory=True) else: return to_float(value, mandatory=True) def __to_temperature(self, value): if isinstance(value, ureg.Quantity): return to_float(value.to(ureg.degC).magnitude) else: return to_float(value) def __to_pressure(self, value): if isinstance(value, ureg.Quantity): return to_float(value.to(ureg.hPa).magnitude, ndigits=4) else: return to_float(value, ndigits=4) def __compute_pressures(self, p: Pressure, altitude, temperature, humidity): # Normalize pressure to HPa qfe = self.__to_pressure(p.qfe) qnh = self.__to_pressure(p.qnh) qff = self.__to_pressure(p.qff) if qfe and qnh is None: qnh = TWxUtils.StationToAltimeter(qfe, elevationM=altitude) if qnh and qfe is None: qfe = TWxUtils.AltimeterToStationPressure(qnh, elevationM=altitude) if qfe and qff is None and temperature is not None and humidity is not None: qff = TWxUtils.StationToSeaLevelPressure(qfe, elevationM=altitude, currentTempC=temperature, meanTempC=temperature, humidity=humidity) if qff and qfe is None and temperature is not None and humidity is not None: qfe = TWxUtils.SeaLevelToStationPressure(qff, elevationM=altitude, currentTempC=temperature, meanTempC=temperature, humidity=humidity) return { "qfe": to_float(qfe), "qnh": to_float(qnh), "qff": to_float(qff) } def __to_altitude(self, value): if isinstance(value, ureg.Quantity): return to_int(value.to(ureg.meter).magnitude) else: return to_int(value) def __to_rain(self, value): if isinstance(value, ureg.Quantity): return to_float( value.to(ureg.liter / (ureg.meter**2)).magnitude, 1) else: return to_float(value, 1) def add_redis_key(self, key, values, cache_duration): pipe = self.redis.pipeline() pipe.hmset(key, values) pipe.expire(key, cache_duration) pipe.execute() def call_google_api(self, api_url, api_name): url = furl(api_url) url.args["key"] = self.google_api_key result = requests.get(url.url, timeout=(self.connect_timeout, self.read_timeout)).json() if result["status"] == "OVER_QUERY_LIMIT": raise UsageLimitException(f"{api_name} OVER_QUERY_LIMIT") elif result["status"] == "INVALID_REQUEST": raise ProviderException( f'{api_name} INVALID_REQUEST: {result.get("error_message", "")}' ) elif result["status"] == "ZERO_RESULTS": raise ProviderException(f"{api_name} ZERO_RESULTS") return result def __compute_elevation(self, lat, lon) -> Tuple[float, bool]: radius = 500 nb = 6 path = f"{lat},{lon}|" for k in range(nb): angle = math.pi * 2 * k / nb dx = radius * math.cos(angle) dy = radius * math.sin(angle) path += "{lat},{lon}".format( lat=str(lat + (180 / math.pi) * (dy / 6378137)), lon=str(lon + (180 / math.pi) * (dx / 6378137) / math.cos(lat * math.pi / 180)), ) if k < nb - 1: path += "|" result = self.call_google_api( f"https://maps.googleapis.com/maps/api/elevation/json?locations={path}", "Google Elevation API") elevation = float(result["results"][0]["elevation"]) is_peak = False for point in result["results"][1:]: try: glide_ratio = radius / (elevation - float(point["elevation"])) except ZeroDivisionError: glide_ratio = float("Infinity") if 0 < glide_ratio < 6: is_peak = True break return elevation, is_peak def __get_place_geocoding_results(self, results): lat, lon, address_long_name = None, None, None for result in results["results"]: if result.get("geometry", {}).get("location"): lat = result["geometry"]["location"]["lat"] lon = result["geometry"]["location"]["lng"] for component in result["address_components"]: if "postal_code" not in component["types"]: address_long_name = component["long_name"] break break return lat, lon, address_long_name def __get_place_autocomplete(self, name): results = self.call_google_api( f"https://maps.googleapis.com/maps/api/place/autocomplete/json?input={name}", "Google Places API") place_id = results["predictions"][0]["place_id"] results = self.call_google_api( f"https://maps.googleapis.com/maps/api/geocode/json?place_id={place_id}", "Google Geocoding API") return self.__get_place_geocoding_results(results) def __get_place_geocoding(self, name): results = self.call_google_api( f"https://maps.googleapis.com/maps/api/geocode/json?address={name}", "Google Geocoding API") return self.__get_place_geocoding_results(results) def get_station_id(self, provider_id): return self.provider_code + "-" + str(provider_id) def __create_station(self, provider_id, short_name, name, latitude, longitude, altitude, is_peak, status, tz, urls, fixes): if fixes is None: fixes = {} if any((not short_name, not name, altitude is None, latitude is None, longitude is None, not status, not tz)): raise ProviderException("A mandatory value is none!") station = { "pv-id": provider_id, "pv-code": self.provider_code, "pv-name": self.provider_name, "url": urls, "short": fixes.get("short") or short_name, "name": fixes.get("name") or name, "alt": self.__to_altitude(fixes["alt"] if "alt" in fixes else altitude), "peak": to_bool(fixes["peak"] if "peak" in fixes else is_peak), "loc": { "type": "Point", "coordinates": [ to_float( fixes["longitude"] if "longitude" in fixes else longitude, 6), to_float( fixes["latitude"] if "latitude" in fixes else latitude, 6), ], }, "status": status, "tz": tz, "seen": arrow.utcnow().int_timestamp, } return station def save_station( self, provider_id, short_name, name, latitude, longitude, status: StationStatus, altitude=None, tz=None, url=None, default_name=None, lookup_name=None, ): if provider_id is None: raise ProviderException("'provider id' is none!") station_id = self.get_station_id(provider_id) lat = to_float(latitude, 6) lon = to_float(longitude, 6) address_key = f"address/{lat},{lon}" if (not short_name or not name) and not self.redis.exists(address_key): try: results = self.call_google_api( f"https://maps.googleapis.com/maps/api/geocode/json?latlng={lat},{lon}" f"&result_type=airport|colloquial_area|locality|natural_feature|point_of_interest|neighborhood", "Google Geocoding API", ) address_short_name = None address_long_name = None for result in results["results"]: for component in result["address_components"]: if "postal_code" not in component["types"]: address_short_name = component["short_name"] address_long_name = component["long_name"] break if not address_short_name or not address_long_name: raise ProviderException( "Google Geocoding API: No valid address name found") self.add_redis_key( address_key, { "short": address_short_name, "name": address_long_name }, self.google_api_cache_duration, ) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(address_key, {"error": repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception("Unable to call Google Geocoding API") self.add_redis_key(address_key, {"error": repr(e)}, self.google_api_error_cache_duration) address = lookup_name or name or short_name geolocation_key = f"geolocation/{address}" if (lat is None or lon is None) or (lat == 0 and lon == 0): if not self.redis.exists(geolocation_key): try: lat, lon, address_long_name = self.__get_place_geocoding( address) if not lat or not lon or not address_long_name: raise ProviderException( f"Google Geocoding API: No valid geolocation found {address}" ) self.add_redis_key( geolocation_key, { "lat": lat, "lon": lon, "name": address_long_name }, self.google_api_cache_duration, ) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(geolocation_key, {"error": repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception( "Unable to call Google Geocoding API") self.add_redis_key(geolocation_key, {"error": repr(e)}, self.google_api_error_cache_duration) if self.redis.exists(geolocation_key): if self.redis.hexists(geolocation_key, "error"): raise ProviderException( f'Unable to determine station geolocation: {self.redis.hget(geolocation_key, "error")}' ) lat = to_float(self.redis.hget(geolocation_key, "lat"), 6) lon = to_float(self.redis.hget(geolocation_key, "lon"), 6) if not name: name = self.redis.hget(geolocation_key, "name") alt_key = f"alt/{lat},{lon}" if not self.redis.exists(alt_key): try: elevation, is_peak = self.__compute_elevation(lat, lon) self.add_redis_key(alt_key, { "alt": elevation, "is_peak": str(is_peak) }, self.google_api_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(alt_key, {"error": repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception("Unable to call Google Elevation API") self.add_redis_key(alt_key, {"error": repr(e)}, self.google_api_error_cache_duration) tz_key = f"tz/{lat},{lon}" if not tz and not self.redis.exists(tz_key): try: now = arrow.utcnow().int_timestamp result = self.call_google_api( f"https://maps.googleapis.com/maps/api/timezone/json?location={lat},{lon}×tamp={now}", "Google Time Zone API", ) tz = result["timeZoneId"] gettz(tz) self.add_redis_key(tz_key, {"tz": tz}, self.google_api_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(tz_key, {"error": repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception("Unable to call Google Time Zone API") self.add_redis_key(tz_key, {"error": repr(e)}, self.google_api_error_cache_duration) if not short_name: if self.redis.hexists(address_key, "error"): if default_name: short_name = default_name else: raise ProviderException( f"Unable to determine station 'short': {self.redis.hget(address_key, 'error')}" ) else: short_name = self.redis.hget(address_key, "short") if not name: if self.redis.hexists(address_key, "error"): if default_name: name = default_name else: raise ProviderException( f"Unable to determine station 'name': {self.redis.hget(address_key, 'error')}" ) else: name = self.redis.hget(address_key, "name") if not altitude: if self.redis.hexists(alt_key, "error"): raise ProviderException( f"Unable to determine station 'alt': {self.redis.hget(alt_key, 'error')}" ) altitude = self.redis.hget(alt_key, "alt") if self.redis.hexists(alt_key, "error") == "error": raise ProviderException( f"Unable to determine station 'peak': {self.redis.hget(alt_key, 'error')}" ) is_peak = self.redis.hget(alt_key, "is_peak") == "True" if not tz: if self.redis.hexists(tz_key, "error"): raise ProviderException( f"Unable to determine station 'tz': {self.redis.hget(tz_key, 'error')}" ) tz = self.redis.hget(tz_key, "tz") if not url: urls = {"default": self.provider_url} elif isinstance(url, str): urls = {"default": url} elif isinstance(url, dict): if "default" not in url: raise ProviderException("No 'default' key in url") urls = url else: raise ProviderException("Invalid url") fixes = self.mongo_db.stations_fix.find_one(station_id) station = self.__create_station(provider_id, short_name, name, lat, lon, altitude, is_peak, status.value, tz, urls, fixes) self.stations_collection().update({"_id": station_id}, {"$set": station}, upsert=True) station["_id"] = station_id return station def create_measure( self, for_station, _id, wind_direction, wind_average, wind_maximum, temperature=None, humidity=None, pressure: Pressure = None, rain=None, ): if all((wind_direction is None, wind_average is None, wind_maximum is None)): raise ProviderException("All mandatory values are null!") # Mandatory keys: 0 if not present measure = { "_id": int(round(_id)), "w-dir": self.__to_wind_direction(wind_direction), "w-avg": self.__to_wind_speed(wind_average), "w-max": self.__to_wind_speed(wind_maximum), } # Optional keys if temperature is not None: measure["temp"] = self.__to_temperature(temperature) if humidity is not None: measure["hum"] = to_float(humidity, 1) if pressure is not None and (pressure.qfe is not None or pressure.qnh is not None or pressure.qff is not None): measure["pres"] = self.__compute_pressures( pressure, for_station["alt"], measure.get("temp", None), measure.get("hum", None)) if rain is not None: measure["rain"] = self.__to_rain(rain) measure["time"] = arrow.now().int_timestamp fixes = self.mongo_db.stations_fix.find_one(for_station["_id"]) if fixes and "measures" in fixes: for key, offset in fixes["measures"].items(): try: if key in measure: fixed_value = measure[key] + offset if key == "w-dir": fixed_value = fixed_value % 360 measure[key] = fixed_value except Exception as e: self.log.exception( f"Unable to fix '{key}' with offset '{offset}': {e}") return measure def has_measure(self, measure_collection, key): return measure_collection.find({"_id": key}).count() > 0 def __add_last_measure(self, measure_collection, station_id): last_measure = measure_collection.find_one({ "$query": {}, "$orderby": { "_id": -1 } }) if last_measure: self.stations_collection().update({"_id": station_id}, {"$set": { "last": last_measure }}) def insert_new_measures(self, measure_collection, station, new_measures): if len(new_measures) > 0: measure_collection.insert( sorted(new_measures, key=lambda m: m["_id"])) end_date = arrow.Arrow.fromtimestamp(new_measures[-1]["_id"], gettz(station["tz"])) self.log.info( "⏱ {end_date} ({end_date_local}), {short}/{name} ({id}): {nb} values inserted" .format( end_date=end_date.format("YY-MM-DD HH:mm:ssZZ"), end_date_local=end_date.to("local").format( "YY-MM-DD HH:mm:ssZZ"), short=station["short"], name=station["name"], id=station["_id"], nb=str(len(new_measures)), )) self.__add_last_measure(measure_collection, station["_id"])
## # Main ## def main(): if not config['load_hashbands']: count_hashbands() match_minhash_keys() validate_all_matches() cluster_all_matches() create_typeahead_collection() create_config_collection() create_metadata_collection() create_scatterplot_collection() if __name__ == '__main__': config = get_config() infiles = glob.glob(config['infiles']) text_ids = [str(i) for i in range(len(infiles))] metadata = get_metadata() # validate inputs are present if not infiles: raise Exception('No input files were found!') # remove all extant records db = MongoClient()['intertext'] [db[c].drop() for c in db.collection_names()] main()
MONGO_DB_VERSION = MONGO_CLIENT.connection.server_info()['version'] except TypeError: # for pymongo >= 3 MONGO_DB_VERSION = MONGO_CLIENT.client.server_info()['version'] if not float('.'.join(MONGO_DB_VERSION.split('.')[:-1])) >= 2.2: raise ImproperlyConfigured( ''' Your mongodb service doesn't support TTL http://docs.mongodb.org/manual/tutorial/expire-data/ ''' ) # create sessions collection if needed if MONGO_SESSIONS_COLLECTION not in MONGO_CLIENT.collection_names(): MONGO_CLIENT.create_collection(MONGO_SESSIONS_COLLECTION) # check existing indexes DB_COLLECTION = MONGO_CLIENT[MONGO_SESSIONS_COLLECTION] MONGO_SESSIONS_INDEXES = DB_COLLECTION.index_information() if len(MONGO_SESSIONS_INDEXES) <= 1: DB_COLLECTION.ensure_index( 'session_key', unique=True ) DB_COLLECTION.ensure_index( 'creation_date', expireAfterSeconds=MONGO_SESSIONS_TTL )
class App: def __init__(self, settings): dirname = os.path.dirname(os.path.realpath(__file__)) + '/' self.logger = logging.getLogger("bot") self.help_text = open(dirname + 'docs/help.txt').read() self.changelog_text = open(dirname + 'docs/changelog.txt').read() self.welcome_text = open(dirname + 'docs/welcome.txt').read() self.about_text = open(dirname + 'docs/about.txt').read() self.settings = settings self.wnl = WordNetLemmatizer() remainder.configure(settings) logs_dir = 'logs/' if not os.path.exists(logs_dir): os.makedirs(logs_dir) ###LOGGING fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' access = logging.FileHandler(logs_dir + 'access.log') access.setLevel(logging.INFO) access.setFormatter(logging.Formatter(fmt)) error = logging.FileHandler(logs_dir + 'error.log') error.setLevel(logging.ERROR) error.setFormatter(logging.Formatter(fmt)) self.logger.addHandler(access) self.logger.addHandler(error) logging.basicConfig(format=fmt) if env == 'debug': logging.basicConfig(level=logging.DEBUG) logging.warning("Cofiguration: %s" % (env,)) self.db = MongoClient(settings.mongo['uri']).get_default_database() if 'users' not in self.db.collection_names(): self.db.create_collection('users') self.users = self.db.users if 'remainders' not in self.db.collection_names(): self.db.create_collection('remainders') remainder.recover_jobs() self.params['offset'] = 0 logging.warning('Constructed') def listen(self): logging.warning('Listening') while True: self.get_updates() time.sleep(0.1) # app.run() def correct(self, string): baseurl_correction = 'http://service.afterthedeadline.com/checkDocument' correction = requests.get(baseurl_correction, {'data': string}).text correction = BeautifulSoup(correction, "lxml") if correction.find("option") is not None: string = correction.find("option").string return string def add_word(self, user, string): baseurl = 'https://translate.yandex.net/api/v1.5/tr.json/translate' # string = re.sub(r'[^A-Za-z\s]', '', string) # string = re.sub(r'\Wk+', ' ', string) string = string.lower() if len(string) == 0: telegram.send_message(user['chat_id'], "Wrong word") return if 'foreign' not in user: user['foreign'] = 'en' user['native'] = 'ru' if user['foreign'] == 'en': string = self.correct(string) if env != 'debug': string = self.wnl.lemmatize(string) string = string[0].upper() + string[1:] direction = '%s-%s' % (user['foreign'], user['native']) transtaltion = requests.get(baseurl, { 'key': self.settings.translate_yandex['token'], 'lang': direction, 'text': string }) out_word = transtaltion.json()['text'][0] already_has = False for w in user['words']: already_has |= w["en"] == string if not already_has: user['words'].append({"en": string, "ru": out_word, "stage": study_settings.min_stage, "expiration_date": datetime.datetime.utcnow() + study_settings.stages[1], "creation_date": datetime.datetime.utcnow()}) self.users.save(user) telegram.send_message(user['chat_id'], "Word added\n%s - %s" % (string, out_word)) else: telegram.send_message(user['chat_id'], "Already exist!\n%s - %s" % (string, out_word)) params = {} def get_list_word(self, user, text): str_out = "\n".join(["%s: (%s) %s - %s" % (i + 1, w['stage'], w['en'], w['ru']) for i, w in zip(range(10 ** 10), user['words'])]) telegram.send_message(user['chat_id'], str_out) def start(self, user, text): telegram.send_message(user['chat_id'], self.welcome_text) user['state'] = States.langs_asked def reask_langs(self, user, text): telegram.send_message(user['chat_id'], """ Now choose your native and foreign languages. Example: "en-ru" (en is foreign and ru is native) """) user['state'] = States.langs_asked def langs_ask(self, user, text): ans = requests.get('https://translate.yandex.net/api/v1.5/tr.json/getLangs', {'key': self.settings.translate_yandex['token']}) lang_list = ans.json()['dirs'] if text not in lang_list: telegram.send_message(user['chat_id'], "Please, choose any of this:\n" + "\n".join(lang_list)) else: telegram.send_message(user['chat_id'], "\"%s\" have successfully chosen" % (text,)) user['state'] = States.idle user['foreign'] = text[0:2] user['native'] = text[3:5] def help(self, user, text): telegram.send_message(user['chat_id'], self.help_text) def about(self, user, text): telegram.send_message(user['chat_id'], self.about_text) def start_train(self, user, text): user['train']['type'] = 0 train.do_train(user, text) def add_remainder(self, user, text): remainder.remove_job(user) tokens = text.split(' ') delta = datetime.timedelta() if len(tokens) >= 2: tokens = tokens[1].replace(' ', '').split(':') hours = int(tokens[0]) minutes = int(tokens[1]) delta = datetime.timedelta(hours=hours, minutes=minutes) remainder.add_job(user, datetime.datetime.utcnow() + delta) telegram.send_message(user['chat_id'], "Successfully set. Nearest at %s" % (datetime.datetime.now() + delta,)) def remove_remainder(self, user, text): remainder.remove_job(user) telegram.send_message(user['chat_id'], "Removed") def remove(self, user, text): if user['train']['type'] != 0: for w in user['words']: if w == user['train']['word']: user['words'].remove(w) str_out = "%s - %s" % (w['en'], w['ru']) telegram.send_message(user['chat_id'], "Deleted:\n%s" % (str_out,)) train.do_train(user, text) else: tokens = text.split(" ") if len(tokens) > 1: cnt = int(tokens[1]) if cnt > 0: cnt -= 1 else: cnt = -1 str_out = "%s - %s" % (user['words'][cnt]['en'], user['words'][cnt]['ru']) del user['words'][cnt] telegram.send_message(user['chat_id'], "Word with index %s removed\n%s" % (cnt, str_out)) comands = { 'list': get_list_word, 'rm': remove, 'train': start_train, 'end': train.end_train, 'start': start, 'help': help, 'setremainder': add_remainder, 'reask': reask_langs, 'about': about } def parse_action(self, chat_id, text): self.logger.warning("%s - %s" % (chat_id, text)) user = self.users.find_one({'chat_id': chat_id}) if user is None: user = {'chat_id': chat_id, 'state': States.idle, 'words': [], 'train': { 'type': 0, 'words': 0, 'correct': 0, 'cadidacies': [] }} if 'train' not in user: user['train'] = { 'type': 0, 'words': 0, 'correct': 0, 'cadidacies': [] } if text[0] == '/': # Command cmd = text[1:].lower().split(' ')[0] if cmd in self.comands: self.comands[cmd](self, user, text) elif user['train']['type'] != 0: train.do_train(user, text) elif user['state'] == States.idle: self.add_word(user, text) elif user['state'] == States.langs_asked: self.langs_ask(user, text) self.users.save(user) def get_updates(self): try: messages = telegram.get_updates(self.params['offset']) for u in messages: if 'message' in u and 'text' in u['message']: if u['update_id'] < self.params['offset']: print('Error') else: chat_id = u['message']['chat']['id'] text = u['message']['text'] self.params['offset'] = max(self.params['offset'], u['update_id'] + 1) try: self.parse_action(chat_id, text) except: logging.error('Error! (%s, %s)' % (chat_id, text)) logging.error(traceback.print_exc()) telegram.send_message(chat_id, 'An error occurred!') except: logging.error('Get updates error!') logging.error(traceback.print_exc()) self.db.meta.save(self.params)
import requests import xmltodict import json from pymongo import MongoClient import time res=requests.get('http://manybooks.net/index.xml') #Always encode XML to skip most of unicode errors db = MongoClient('localhost')['manybooks'] encoded_xml = res.text.encode('utf-8') od = xmltodict.parse(encoded_xml) myjson = json.loads(json.dumps(od)) today = time.strftime('%y-%m-%d',time.gmtime()) if today not in db.collection_names(): for item in myjson['rss']['channel']['item']: db[today].insert(item) else: print 'already up to date'
from pymongo import MongoClient from array import array db = MongoClient().test_database month = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06','Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'} for tweet in db.tweets.find(): tweetDate = tweet["created_at"].split(" ")[5] + month[tweet["created_at"].split(" ")[1]] + tweet["created_at"].split(" ")[2] for hashtag in tweet["entities"]["hashtags"]: hashtext = hashtag["text"].lower() nameExist = False for name in db.collection_names(): if name == hashtext: nameExist == True if nameExist == False: db[hashtext] # create collection with hashtag as the name result = db[hashtext].find_one({"Date": tweetDate}) if result == None: db[hashtext].insert_one( { "_id": tweetDate, "Date": tweetDate, "count": 1 } ) else: db[hashtext].update_one( {"_id": tweetDate},
class Provider: provider_code = '' provider_name = '' provider_url = '' connect_timeout = 7 read_timeout = 30 @property def usage_limit_cache_duration(self): return (12 + randint(-2, 2)) * 3600 @property def location_cache_duration(self): return (60 + randint(-2, 2)) * 24 * 3600 def __init__(self): self.mongo_db = MongoClient(MONGODB_URL).get_database() self.__stations_collection = self.mongo_db.stations self.__stations_collection.create_index([('loc', GEOSPHERE), ('status', ASCENDING), ('pv-code', ASCENDING), ('short', ASCENDING), ('name', ASCENDING)]) self.collection_names = self.mongo_db.collection_names() self.redis = redis.StrictRedis.from_url(url=REDIS_URL, decode_responses=True) self.google_api_key = GOOGLE_API_KEY self.log = get_logger(self.provider_code) sentry_sdk.init(SENTRY_URL, environment=ENVIRONMENT) with sentry_sdk.configure_scope() as scope: scope.set_tag('provider', self.provider_name) def __to_wind_direction(self, value): if isinstance(value, ureg.Quantity): return to_int(value.to(ureg.degree).magnitude, mandatory=True) else: return to_int(value, mandatory=True) def __to_wind_speed(self, value): if isinstance(value, ureg.Quantity): return to_float(value.to(ureg.kilometer / ureg.hour).magnitude, mandatory=True) else: return to_float(value, mandatory=True) def __to_temperature(self, value): if isinstance(value, ureg.Quantity): return to_float(value.to(ureg.degC).magnitude) else: return to_float(value) def __to_pressure(self, value): if isinstance(value, ureg.Quantity): return to_float(value.to(ureg.hPa).magnitude, ndigits=4) else: return to_float(value, ndigits=4) def __compute_pressures(self, p: Pressure, altitude, temperature, humidity): # Normalize pressure to HPa qfe = self.__to_pressure(p.qfe) qnh = self.__to_pressure(p.qnh) qff = self.__to_pressure(p.qff) if qfe and qnh is None: qnh = TWxUtils.StationToAltimeter(qfe, elevationM=altitude) if qnh and qfe is None: qfe = TWxUtils.AltimeterToStationPressure(qnh, elevationM=altitude) if qfe and qff is None and temperature is not None and humidity is not None: qff = TWxUtils.StationToSeaLevelPressure(qfe, elevationM=altitude, currentTempC=temperature, meanTempC=temperature, humidity=humidity) if qff and qfe is None and temperature is not None and humidity is not None: qfe = TWxUtils.SeaLevelToStationPressure(qff, elevationM=altitude, currentTempC=temperature, meanTempC=temperature, humidity=humidity) return { 'qfe': to_float(qfe), 'qnh': to_float(qnh), 'qff': to_float(qff) } def __to_altitude(self, value): if isinstance(value, ureg.Quantity): return to_int(value.to(ureg.meter).magnitude) else: return to_int(value) def __to_rain(self, value): if isinstance(value, ureg.Quantity): return to_float( value.to(ureg.liter / (ureg.meter**2)).magnitude, 1) else: return to_float(value, 1) def stations_collection(self): return self.__stations_collection def measures_collection(self, station_id): if station_id not in self.collection_names: self.mongo_db.create_collection( station_id, **{ 'capped': True, 'size': 500000, 'max': 5000 }) self.collection_names.append(station_id) return self.mongo_db[station_id] def add_redis_key(self, key, values, cache_duration): pipe = self.redis.pipeline() pipe.hmset(key, values) pipe.expire(key, cache_duration) pipe.execute() def __compute_elevation(self, lat, lon): radius = 500 nb = 6 path = f'{lat},{lon}|' for k in range(nb): angle = math.pi * 2 * k / nb dx = radius * math.cos(angle) dy = radius * math.sin(angle) path += '{lat},{lon}'.format( lat=str(lat + (180 / math.pi) * (dy / 6378137)), lon=str(lon + (180 / math.pi) * (dx / 6378137) / math.cos(lat * math.pi / 180))) if k < nb - 1: path += '|' result = requests.get( f'https://maps.googleapis.com/maps/api/elevation/json?locations={path}&key={self.google_api_key}', timeout=(self.connect_timeout, self.read_timeout)).json() if result['status'] == 'OVER_QUERY_LIMIT': raise UsageLimitException('Google Elevation API OVER_QUERY_LIMIT') elif result['status'] == 'INVALID_REQUEST': raise ProviderException( f'Google Elevation API INVALID_REQUEST: {result.get("error_message", "")}' ) elif result['status'] == 'ZERO_RESULTS': raise ProviderException('Google Elevation API ZERO_RESULTS') elevation = float(result['results'][0]['elevation']) is_peak = False for point in result['results'][1:]: try: glide_ratio = radius / (elevation - float(point['elevation'])) except ZeroDivisionError: glide_ratio = float('Infinity') if 0 < glide_ratio < 6: is_peak = True break return elevation, is_peak def __get_place_geocoding_results(self, results): lat, lon, address_long_name = None, None, None for result in results['results']: if result.get('geometry', {}).get('location'): lat = result['geometry']['location']['lat'] lon = result['geometry']['location']['lng'] for component in result['address_components']: if 'postal_code' not in component['types']: address_long_name = component['long_name'] break break return lat, lon, address_long_name def __get_place_autocomplete(self, name): results = requests.get( f'https://maps.googleapis.com/maps/api/place/autocomplete/json?input={name}&key={self.google_api_key}', timeout=(self.connect_timeout, self.read_timeout)).json() if results['status'] == 'OVER_QUERY_LIMIT': raise UsageLimitException('Google Places API OVER_QUERY_LIMIT') elif results['status'] == 'INVALID_REQUEST': raise ProviderException( f'Google Places API INVALID_REQUEST: {results.get("error_message", "")}' ) elif results['status'] == 'ZERO_RESULTS': raise ProviderException( f"Google Places API ZERO_RESULTS for '{name}'") place_id = results['predictions'][0]['place_id'] results = requests.get( f'https://maps.googleapis.com/maps/api/geocode/json?place_id={place_id}&key={self.google_api_key}', timeout=(self.connect_timeout, self.read_timeout)).json() if results['status'] == 'OVER_QUERY_LIMIT': raise UsageLimitException('Google Geocoding API OVER_QUERY_LIMIT') elif results['status'] == 'INVALID_REQUEST': raise ProviderException( f'Google Geocoding API INVALID_REQUEST: {results.get("error_message", "")}' ) elif results['status'] == 'ZERO_RESULTS': raise ProviderException( f"Google Geocoding API ZERO_RESULTS for '{name}'") return self.__get_place_geocoding_results(results) def __get_place_geocoding(self, name): results = requests.get( f'https://maps.googleapis.com/maps/api/geocode/json?address={name}&key={self.google_api_key}', timeout=(self.connect_timeout, self.read_timeout)).json() if results['status'] == 'OVER_QUERY_LIMIT': raise UsageLimitException('Google Geocoding API OVER_QUERY_LIMIT') elif results['status'] == 'INVALID_REQUEST': raise ProviderException( f'Google Geocoding API INVALID_REQUEST: {results.get("error_message", "")}' ) elif results['status'] == 'ZERO_RESULTS': raise ProviderException( f"Google Geocoding API ZERO_RESULTS for '{name}'") return self.__get_place_geocoding_results(results) def get_station_id(self, provider_id): return self.provider_code + '-' + str(provider_id) def __create_station(self, provider_id, short_name, name, latitude, longitude, altitude, is_peak, status, tz, urls, fixes): if fixes is None: fixes = {} if any((not short_name, not name, altitude is None, latitude is None, longitude is None, not status, not tz)): raise ProviderException('A mandatory value is none!') station = { 'pv-id': provider_id, 'pv-code': self.provider_code, 'pv-name': self.provider_name, 'url': urls, 'short': fixes.get('short') or short_name, 'name': fixes.get('name') or name, 'alt': self.__to_altitude(fixes['alt'] if 'alt' in fixes else altitude), 'peak': to_bool(fixes['peak'] if 'peak' in fixes else is_peak), 'loc': { 'type': 'Point', 'coordinates': [ to_float( fixes['longitude'] if 'longitude' in fixes else longitude, 6), to_float( fixes['latitude'] if 'latitude' in fixes else latitude, 6) ] }, 'status': status, 'tz': tz, 'seen': arrow.utcnow().timestamp } return station def save_station(self, provider_id, short_name, name, latitude, longitude, status, altitude=None, tz=None, url=None, default_name=None, lookup_name=None): if provider_id is None: raise ProviderException("'provider id' is none!") station_id = self.get_station_id(provider_id) lat = to_float(latitude, 6) lon = to_float(longitude, 6) address_key = f'address/{lat},{lon}' if (not short_name or not name) and not self.redis.exists(address_key): try: results = requests.get( f'https://maps.googleapis.com/maps/api/geocode/json?latlng={lat},{lon}' f'&result_type=airport|colloquial_area|locality|natural_feature|point_of_interest|neighborhood' f'&key={self.google_api_key}', timeout=(self.connect_timeout, self.read_timeout)).json() if results['status'] == 'OVER_QUERY_LIMIT': raise UsageLimitException( 'Google Geocoding API OVER_QUERY_LIMIT') elif results['status'] == 'INVALID_REQUEST': raise ProviderException( f'Google Geocoding API INVALID_REQUEST: {results.get("error_message", "")}' ) elif results['status'] == 'ZERO_RESULTS': raise ProviderException( 'Google Geocoding API ZERO_RESULTS') address_short_name = None address_long_name = None for result in results['results']: for component in result['address_components']: if 'postal_code' not in component['types']: address_short_name = component['short_name'] address_long_name = component['long_name'] break if not address_short_name or not address_long_name: raise ProviderException( 'Google Geocoding API: No valid address name found') self.add_redis_key(address_key, { 'short': address_short_name, 'name': address_long_name }, self.location_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(address_key, {'error': repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception('Unable to call Google Geocoding API') self.add_redis_key(address_key, {'error': repr(e)}, self.location_cache_duration) address = lookup_name or name or short_name geolocation_key = f'geolocation/{address}' if (lat is None or lon is None) or (lat == 0 and lon == 0): if not self.redis.exists(geolocation_key): try: lat, lon, address_long_name = self.__get_place_geocoding( address) if not lat or not lon or not address_long_name: raise ProviderException( f'Google Geocoding API: No valid geolocation found {address}' ) self.add_redis_key(geolocation_key, { 'lat': lat, 'lon': lon, 'name': address_long_name }, self.location_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(geolocation_key, {'error': repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception( 'Unable to call Google Geocoding API') self.add_redis_key(geolocation_key, {'error': repr(e)}, self.location_cache_duration) if self.redis.exists(geolocation_key): if self.redis.hexists(geolocation_key, 'error'): raise ProviderException( f'Unable to determine station geolocation: {self.redis.hget(geolocation_key, "error")}' ) lat = to_float(self.redis.hget(geolocation_key, 'lat'), 6) lon = to_float(self.redis.hget(geolocation_key, 'lon'), 6) if not name: name = self.redis.hget(geolocation_key, 'name') alt_key = f'alt/{lat},{lon}' if not self.redis.exists(alt_key): try: elevation, is_peak = self.__compute_elevation(lat, lon) self.add_redis_key(alt_key, { 'alt': elevation, 'is_peak': is_peak }, self.location_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(alt_key, {'error': repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception('Unable to call Google Elevation API') self.add_redis_key(alt_key, {'error': repr(e)}, self.location_cache_duration) tz_key = f'tz/{lat},{lon}' if not tz and not self.redis.exists(tz_key): try: now = arrow.utcnow().timestamp result = requests.get( f'https://maps.googleapis.com/maps/api/timezone/json?location={lat},{lon}' f'×tamp={now}&key={self.google_api_key}', timeout=(self.connect_timeout, self.read_timeout)).json() if result['status'] == 'OVER_QUERY_LIMIT': raise UsageLimitException( 'Google Time Zone API OVER_QUERY_LIMIT') elif result['status'] == 'INVALID_REQUEST': raise ProviderException( f'Google Time Zone API INVALID_REQUEST: {result.get("error_message", "")}' ) elif result['status'] == 'ZERO_RESULTS': raise ProviderException( 'Google Time Zone API ZERO_RESULTS') tz = result['timeZoneId'] dateutil.tz.gettz(tz) self.add_redis_key(tz_key, {'tz': tz}, self.location_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(tz_key, {'error': repr(e)}, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception('Unable to call Google Time Zone API') self.add_redis_key(tz_key, {'error': repr(e)}, self.location_cache_duration) if not short_name: if self.redis.hexists(address_key, 'error'): if default_name: short_name = default_name else: raise ProviderException( f"Unable to determine station 'short': {self.redis.hget(address_key, 'error')}" ) else: short_name = self.redis.hget(address_key, 'short') if not name: if self.redis.hexists(address_key, 'error'): if default_name: name = default_name else: raise ProviderException( f"Unable to determine station 'name': {self.redis.hget(address_key, 'error')}" ) else: name = self.redis.hget(address_key, 'name') if not altitude: if self.redis.hexists(alt_key, 'error'): raise ProviderException( f"Unable to determine station 'alt': {self.redis.hget(alt_key, 'error')}" ) altitude = self.redis.hget(alt_key, 'alt') if self.redis.hexists(alt_key, 'error') == 'error': raise ProviderException( f"Unable to determine station 'peak': {self.redis.hget(alt_key, 'error')}" ) is_peak = self.redis.hget(alt_key, 'is_peak') if not tz: if self.redis.hexists(tz_key, 'error'): raise ProviderException( f"Unable to determine station 'tz': {self.redis.hget(tz_key, 'error')}" ) tz = self.redis.hget(tz_key, 'tz') if not url: urls = {'default': self.provider_url} elif isinstance(url, str): urls = {'default': url} elif isinstance(url, dict): if 'default' not in url: raise ProviderException("No 'default' key in url") urls = url else: raise ProviderException('Invalid url') fixes = self.mongo_db.stations_fix.find_one(station_id) station = self.__create_station(provider_id, short_name, name, lat, lon, altitude, is_peak, status, tz, urls, fixes) self.stations_collection().update({'_id': station_id}, {'$set': station}, upsert=True) station['_id'] = station_id return station def create_measure(self, for_station, _id, wind_direction, wind_average, wind_maximum, temperature=None, humidity=None, pressure: Pressure = None, rain=None): if all((wind_direction is None, wind_average is None, wind_maximum is None)): raise ProviderException('All mandatory values are null!') # Mandatory keys: json 'null' if not present measure = { '_id': int(round(_id)), 'w-dir': self.__to_wind_direction(wind_direction), 'w-avg': self.__to_wind_speed(wind_average), 'w-max': self.__to_wind_speed(wind_maximum) } # Optional keys if temperature is not None: measure['temp'] = self.__to_temperature(temperature) if humidity is not None: measure['hum'] = to_float(humidity, 1) if pressure is not None and (pressure.qfe is not None or pressure.qnh is not None or pressure.qff is not None): measure['pres'] = self.__compute_pressures( pressure, for_station['alt'], measure.get('temp', None), measure.get('hum', None)) if rain is not None: measure['rain'] = self.__to_rain(rain) measure['time'] = arrow.now().timestamp fixes = self.mongo_db.stations_fix.find_one(for_station['_id']) if fixes and 'measures' in fixes: for key, offset in fixes['measures'].items(): try: if key in measure: fixed_value = measure[key] + offset if key == 'w-dir': fixed_value = fixed_value % 360 measure[key] = fixed_value except Exception: self.log.exception( f"Unable to fix '{key}' with offset '{offset}'".format( key=key, offset=offset)) return measure def has_measure(self, measure_collection, key): return measure_collection.find({'_id': key}).count() > 0 def insert_new_measures(self, measure_collection, station, new_measures): if len(new_measures) > 0: measure_collection.insert( sorted(new_measures, key=lambda m: m['_id'])) end_date = arrow.Arrow.fromtimestamp( new_measures[-1]['_id'], dateutil.tz.gettz(station['tz'])) self.log.info( '--> {end_date} ({end_date_local}), {short}/{name} ({id}): {nb} values inserted' .format(end_date=end_date.format('YY-MM-DD HH:mm:ssZZ'), end_date_local=end_date.to('local').format( 'YY-MM-DD HH:mm:ssZZ'), short=station['short'], name=station['name'], id=station['_id'], nb=str(len(new_measures)))) self.__add_last_measure(measure_collection, station['_id']) def __add_last_measure(self, measure_collection, station_id): last_measure = measure_collection.find_one({ '$query': {}, '$orderby': { '_id': -1 } }) if last_measure: self.stations_collection().update({'_id': station_id}, {'$set': { 'last': last_measure }})
class MongoDBConverter: def __init__(self): self.db = MongoClient( Settings.MONGO_CONNECTION_STRING)[Settings.REVIEWS_DATABASE] self.progess_bar = ProgressBar() def create_review_db(self): print_header("Creating Reviews") done = 0 dataset_file = Settings.REVIEW_DATASET_FILE business_collection = self.db[Settings.BUSINESS_COLLECTION] # Find all the businesses and their reviews and add the review_id to a dict. review_id_hashes = set() businesses = business_collection.find() for business in businesses: if 'reviews' in business: for review in business['reviews']: review_id_hashes.add(review['review_id']) self.progess_bar.start() with open(dataset_file, 'r') as dataset: count = sum(1 for _ in dataset) with open(dataset_file, 'r') as dataset: next(dataset) for line in dataset: try: data = json.loads(line, encoding='utf-8') except ValueError: print('Oops!') # Insert into DB if data["type"] == "review": business_id = data['business_id'] business = business_collection.find_one( {'business_id': business_id}) assert (business is not None) add_review = True if data['review_id'] in review_id_hashes: add_review = False business['reviews'] = business.get('reviews', []) if add_review: business['reviews'].append(data) business_collection.update_one( {'business_id': business_id}, {"$set": business}, upsert=True) done += 1 self.progess_bar.print_progress(done, count) def add_business_data_collection(self): print_header("Adding Business Data") dataset_file = Settings.BUSINESS_DATASET_FILE add_businesses = True if Settings.BUSINESS_COLLECTION in self.db.collection_names(): business_collection = self.db[Settings.BUSINESS_COLLECTION] if business_collection.count() > 0: add_businesses = False print("Data already present.... Skipping") if add_businesses: self.progess_bar.start() with open(dataset_file, 'r') as dataset: count = sum(1 for _ in dataset) business_collection = self.db[Settings.BUSINESS_COLLECTION] with open(dataset_file, 'r') as dataset: done = 0 for line in dataset: try: data = json.loads(line, encoding='utf-8') except ValueError: print("Error in Business json file") # Insert into DB assert (data['type'] == 'business') business_collection.insert(data) done += 1 self.progess_bar.print_progress(done, count, prefix='Progress:', suffix='Complete') business_collection.create_index('business_id') def add_user_data_collection(self): print_header("Adding User Data") dataset_file = Settings.USER_DATASET_FILE add_users = True if Settings.USER_COLLECTION in self.db.collection_names(): user_collection = self.db[Settings.USER_COLLECTION] if user_collection.count() > 0: add_users = False print("Data already present.... Skipping") if add_users: self.progess_bar.start() with open(dataset_file, 'r') as dataset: count = sum(1 for _ in dataset) user_collection = self.db[Settings.USER_COLLECTION] with open(dataset_file, 'r') as dataset: done = 0 for line in dataset: try: data = json.loads(line, encoding='utf-8') except ValueError: print("Error in Business json file") # Insert into DB assert (data['type'] == 'user') user_collection.insert(data) done += 1 self.progess_bar.print_progress(done, count, prefix='Progress:', suffix='Complete') user_collection.create_index('user_id')
class Monitor(object): """ """ name = 'slavem' WARNING_LOG_INTERVAL = datetime.timedelta(minutes=2) def __init__(self, email, host='localhost', port=27017, dbn='slavem', username=None, password=None, serverChan=None, loggingconf=None, ): """ :param host: :param port: :param dbn: :param username: :param password: :param serverChan: :param loggingconf: logging 的配置 Dict() """ now = arrow.now() self.mongoSetting = { 'host': host, 'port': port, 'dbn': dbn, 'username': username, 'password': password, } self.log = logging.getLogger() self.initLog(loggingconf) # serverChan 的汇报地址 # self.serverChan = serverChan or {} # if self.serverChan: # for account, url in self.serverChan.items(): # serverChanUrl = requests.get(url).text # self.serverChan[account] = serverChanUrl # else: # self.log.warning(u'没有配置 serverChan 的 url') self.email = EMail(serverChan=serverChan, **email) self.mongourl = 'mongodb://{username}:{password}@{host}:{port}/{dbn}?authMechanism=SCRAM-SHA-1'.format( **self.mongoSetting) self.__active = False self._inited = False # 下次查看是否已经完成任务的时间 self.nextWatchTime = now # 下次检查心跳的时间 self.nextCheckHeartBeatTime = now self.nextRemoveOutdateReportTime = now # 关闭服务的信号 for sig in [signal.SIGINT, # 键盘中 Ctrl-C 组合键信号 signal.SIGHUP, # nohup 守护进程发出的关闭信号 signal.SIGTERM, # 命令行数据 kill pid 时的信号 ]: signal.signal(sig, self.shutdown) self.authed = False # 定时检查日志中LEVEL >= WARNING self.threadWarningLog = Thread(target=self.logWarning, name='logWarning') self.lastWarningLogTime = now logMongoConf = loggingconf['handlers']['mongo'] self.logDB = MongoClient( logMongoConf['host'], logMongoConf['port'], )[logMongoConf['database_name']] self.logDB.authenticate(logMongoConf['username'], logMongoConf['password']) # 初始化日志的 collection self.initLogCollection() def initLog(self, loggingconf): """ 初始化日志 :param loggingconf: :return: """ if loggingconf: # log4mongo 的bug导致使用非admin用户时,建立会报错。 # 这里使用注入的方式跳过会报错的代码 log4mongo.handlers._connection = MongoClient( host=loggingconf['handlers']['mongo']['host'], port=loggingconf['handlers']['mongo']['port'], ) logging.config.dictConfig(loggingconf) self.log = logging.getLogger(self.name) else: self.log = logging.getLogger('root') self.log.setLevel('DEBUG') fmt = "%(asctime)-15s %(levelname)s %(filename)s %(lineno)d %(process)d %(message)s" # datefmt = "%a-%d-%b %Y %H:%M:%S" datefmt = None formatter = logging.Formatter(fmt, datefmt) sh = logging.StreamHandler(sys.stdout) sh.setFormatter(formatter) sh.setLevel('DEBUG') self.log.addHandler(sh) sh = logging.StreamHandler(sys.stderr) sh.setFormatter(formatter) sh.setLevel('WARN') self.log.addHandler(sh) self.log.warning(u'未配置 loggingconfig') @property def taskCollectionName(self): return 'task' @property def reportCollectionName(self): return 'report' @property def heartBeatCollectionName(self): return 'heartbeat' def dbConnect(self): """ 建立数据库链接 :return: """ try: # 检查链接是否正常 self.mongoclient.server_info() except AttributeError: # 重新链接 self.mongoclient = MongoClient( host=self.mongoSetting['host'], port=self.mongoSetting['port'] ) db = self.mongoclient[self.mongoSetting['dbn']] self.db = db if self.mongoSetting.get('username'): # self.mongoclient = pymongo.MongoClient(self.mongourl) self.authed = db.authenticate( self.mongoSetting['username'], self.mongoSetting['password'] ) self.reportCollection = db[self.reportCollectionName].with_options( codec_options=CodecOptions(tz_aware=True, tzinfo=LOCAL_TIMEZONE)) self.tasksCollection = db[self.taskCollectionName].with_options( codec_options=CodecOptions(tz_aware=True, tzinfo=LOCAL_TIMEZONE)) self.heartBeatCollection = db[self.heartBeatCollectionName].with_options( codec_options=CodecOptions(tz_aware=True, tzinfo=LOCAL_TIMEZONE)) def init(self): """ 初始化服务 :return: """ self._inited = True # 建立数据库链接 self.dbConnect() # 从数据库加载任务 self.loadTask() # 对任务进行排序 self.sortTask() # 最后更新任务时间 self.refreshWatchTime() def _run(self): """ :return: """ # 下次任务时间 self.reportWatchTime() while self.__active: time.sleep(1) # 检查任务启动 self.doCheckTaskLanuch() # 检查心跳 self.doCheckHeartBeat() # 删除过期的汇报 self.removeOutdateReport() def doCheckHeartBeat(self): """ :return: """ now = arrow.now() if now >= self.nextCheckHeartBeatTime: # 心跳间隔检查是每5分钟1次 self.nextCheckHeartBeatTime = now + datetime.timedelta(minutes=5) # 检查心跳 cursor = self.heartBeatCollection.find({}, {'_id': 0}) noHeartBeat = [] for heartBeat in cursor: if now - heartBeat['datetime'] > datetime.timedelta(minutes=3): # 心跳异常,汇报 noHeartBeat.append(heartBeat) try: if noHeartBeat: self.noticeHeartBeat(noHeartBeat) except Exception as e: self.log.error(traceback.format_exc()) def doCheckTaskLanuch(self): now = arrow.now() if now >= self.nextWatchTime: self.log.info(u'达到截止时间') # 检查任务 self.checkTask() # 任务排序 self.sortTask() # 最后更新任务时间 self.refreshWatchTime() # 下次任务时间 self.reportWatchTime() def reportWatchTime(self): """ 下次任务的时间 :return: """ now = arrow.now() if now < self.nextWatchTime: # 还没到观察下一个任务的时间 rest = self.nextWatchTime - now self.log.info(u'下次截止时间 {}'.format(self.nextWatchTime)) # time.sleep(rest.total_seconds()) # self.log.info(u'达到截止时间') def start(self): """ :return: """ try: self.init() self.__active = True self.threadWarningLog.start() self._run() if self.threadWarningLog.isAlive(): self.threadWarningLog.join() except Exception as e: err = traceback.format_exc() self.log.critical(err) title = u'slavem 异常崩溃' text = err self.sendEmail(title, text) self.stop() def stop(self): """ 关闭服务 :return: """ self.__active = False self.log.info(u'服务即将关闭……') time.sleep(1) def shutdown(self, signalnum, frame): """ 处理 signal 信号触发的结束服务信号 :param signalnum: :param frame: :return: """ self.stop() def __del__(self): """ 实例释放时的处理 :return: """ try: if self.authed: self.db.logout() self.mongoclient.close() except: pass def loadTask(self): """ 加载所有任务 :return: """ # 读取任务 taskCol = self.tasksCollection taskList = [] for t in taskCol.find(): if not t.get('active'): continue t.pop('_id') taskList.append(Task(**t)) self.tasks = taskList self.log.info(u'加载了 {} 个任务'.format(len(self.tasks))) if __debug__: for t in self.tasks: self.log.debug(str(t)) def sortTask(self): """ 对任务进行排序 :return: """ self.tasks.sort(key=lambda x: x.deadline) def refreshWatchTime(self): """ :return: """ try: t = self.tasks[0] self.nextWatchTime = t.deadline except IndexError: # 如果没有任务,那么下次检查时间就是1分钟后 self.nextWatchTime = arrow.now() + datetime.timedelta(seconds=60) return def checkTask(self): """ 有任务达到检查时间了,开始检查任务 :return: """ # 获取所有 deadline 时间到的任务实例 taskList = [] firstLanuchTime = None now = arrow.now() for t in self.tasks: assert isinstance(t, Task) if now >= t.deadline: taskList.append(t) try: # 最早开始的一个任务 if firstLanuchTime < t.lanuchTime: firstLanuchTime = t.lanuchTime except TypeError: firstLanuchTime = t.lanuchTime self.log.info(u'查询启动报告时间 > {}'.format(firstLanuchTime)) # 查询 >firstLanuchTime 的启动报告 sql = { 'datetime': { '$gte': firstLanuchTime, } } reportCol = self.reportCollection cursor = reportCol.find(sql) if __debug__: self.log.debug(u'查询到 {} 条报告'.format(cursor.count())) # 核对启动报告 for report in cursor: try: for t in taskList: assert isinstance(t, Task) if t.isReport(report): # 完成了,刷新deadline self.log.info(u'{} 服务启动完成 {}'.format(t.name, t.lanuchTime)) if t.isLate: # 迟到的启动报告, 也需要发通知 self.noticeDealyReport(t) t.finishAndRefresh() taskList.remove(t) break except Exception: self.log.error(traceback.format_exc()) # 未能准时启动的服务 for t in taskList: if t.isTimeToNoticeDelay(): self.noticeUnreport(t) t.refreshLastDelayNoticeTime() # 设置为启动迟到 t.setLate() # 未完成,将 deadline 延迟到1分钟后 t.delayDeadline() def noticeDealyReport(self, task): """ :param task: tasks.Task :return: """ # 通知:任务延迟完成了 title = u'服务{name}启动迟到'.format(name=task.name) text = u'当前时间:{}'.format(arrow.now()) for k, v in task.toNotice().items(): text += u'\n{}\t:{}'.format(k, v) self.sendEmail(title, text) def noticeUnreport(self, task): """ :param task: tasks.Task :return: """ # 通知:未收到任务完成通知 title = u'服务{name}未启动'.format(name=task.name) text = u'当前时间\t:{}'.format(arrow.now()) for k, v in task.toNotice().items(): text += u'\n{}\t:{}'.format(k, v) self.sendEmail(title, text) def noticeHeartBeat(self, noHeartBeats): # 通知:未收到任务完成通知 title = u'心跳异常' text = u'' for dic in noHeartBeats: text += u'=====================================\n' for k, v in dic.items(): text += u'{}: {}\n'.format(k, v) shockSecs = arrow.now().datetime - dic['datetime'] text += u'secs: {}\n'.format(shockSecs) self.sendEmail(title, text) def sendEmail(self, subject, text): """ 发送邮件通知 :param subject: :param text: :return: """ self.email.send(subject, text) def createTask(self, **kwargs): """ 创建任务 :param kwargs: :return: """ newTask = Task(**kwargs) sql = newTask.toSameTaskKV() dic = newTask.toMongoDB() self.tasksCollection.update_one(sql, {'$set': dic}, upsert=True) # self.db.task.find_one_and_update(sql, {'$set': dic}, upsert=True) self.log.info(u'创建了task {}'.format(str(dic))) def showTask(self): """ :return: """ for t in self.tasks: self.log.info(u'{}'.format(t.toMongoDB())) def removeOutdateReport(self): """ :return: """ now = arrow.now() if now >= self.nextRemoveOutdateReportTime: # 每天执行一次 self.nextRemoveOutdateReportTime = now + datetime.timedelta(days=1) collection = self.reportCollection # 删除7天之前的 deadline = now.datetime - datetime.timedelta(days=7) result = collection.remove({ 'datetime': { '$lt': deadline } }) num = result['n'] self.log.info(u'清空了 {} 条启动报告'.format(num)) def _logWarning(self): """ 遍历所有的日志,将最新的 warning 进行汇报 :return: """ now, self.lastWarningLogTime = self.lastWarningLogTime, arrow.now() colNames = self.logDB.collection_names() for colName in colNames: col = self.logDB[colName] sql = { 'timestamp': { '$gte': now.datetime, '$lt': self.lastWarningLogTime.datetime, }, 'level': { '$in': ["WARNING", "ERROR", "CRITICAL"] }, } cursor = col.find(sql, {'_id': 0}) count = cursor.count() if count == 0: # 没有查询到任何 warning 以上的日志 continue logs = u'{} 共 {} 条\n'.format(now.datetime, count) for l in cursor.limit(10): logs += u'==================================\n' for k, v in l.items(): logs += u'{}: {} \n'.format(k, v) text = u'{}有异常日志'.format(colName) desp = logs self.sendEmail(text, desp) time.sleep(5) def logWarning(self): while self.__active: try: if arrow.now() - self.lastWarningLogTime < self.WARNING_LOG_INTERVAL: # 每5分钟清查一次日志 time.sleep(1) continue self._logWarning() except: err = traceback.format_exc() self.log.error(err) def initLogCollection(self): """ 初始化collection的索引 :return: """ indexTimestamp = IndexModel([('timestamp', ASCENDING)], name='timestamp', background=True) indexLevel = IndexModel([('level', DESCENDING)], name='level', background=True) indexes = [indexTimestamp, indexLevel] # 初始化日线的 collection for colName in self.logDB.collection_names(): col = self.logDB[colName] self._initCollectionIndex(col, indexes) def _initCollectionIndex(self, col, indexes): """ 初始化分钟线的 collection :return: """ # 检查索引 try: indexInformation = col.index_information() for indexModel in indexes: if indexModel.document['name'] not in indexInformation: col.create_indexes( [ indexModel, ], ) except OperationFailure: # 有索引 col.create_indexes(indexes)
'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12' } for tweet in db.tweets.find(): tweetDate = tweet["created_at"].split(" ")[5] + month[ tweet["created_at"].split(" ")[1]] + tweet["created_at"].split(" ")[2] for hashtag in tweet["entities"]["hashtags"]: hashtext = hashtag["text"].lower() nameExist = False for name in db.collection_names(): if name == hashtext: nameExist == True if nameExist == False: db[hashtext] # create collection with hashtag as the name result = db[hashtext].find_one({"Date": tweetDate}) if result == None: db[hashtext].insert_one({ "_id": tweetDate, "Date": tweetDate, "count": 1 }) else: db[hashtext].update_one({"_id": tweetDate}, {"$inc": {"count": 1}})
def startTest(self, test): db = MongoClient('localhost', settings.MONGO_DATABASE_PORT)[settings.MONGO_DATABASE_NAME] for collection_name in db.collection_names(): if collection_name != 'system.indexes': getattr(db, collection_name).remove({})
#Code for setting all intial values in a proper manner to start bot import redis import pickle from pymongo import MongoClient import datetime #creating mongoclient where quotes are stored in my db mydb = MongoClient('localhost')['goodread'] #Create a redis instance red = redis.Redis() red.set('per_day',120) red.set('today_balance',0) red.set('today_or_yesterday',datetime.datetime.today().timetuple().tm_mday) #Storing category names in redis for future use for i in mydb.collection_names(): if i!='system.indexes': for cur in mydb[i].find(): red.sadd(i,str(cur['_id'])) red.sadd('cats',i) #Creating a dictionary for mapping category to day in a month days_cats = {i+1:k for i,k in enumerate(red.smembers('cats'))} #Writing that mapping to a pickle file with open('dayscats.pickle','wb') as f: pickle.dump(days_cats,f)
# Set up and configure app # ................................. app = Flask(__name__, static_folder='static', static_url_path='') # ................................. # Load configuration # ................................. app.config.from_object(config.load()) # ................................. # MongoDB # ................................. # configure mongo client (defaults to localhost:27017), and get 'masfotos' db db = MongoClient()[app.config.get('MONGO_DB')] if 'counters' not in db.collection_names(): db.counters.save({ '_id': 'share_key_seq', 'seq': 0}) def get_next_seq(name): return db.counters.find_and_modify( query={'_id': name}, fields={'_id': 0}, update={'$inc': { 'seq': 1 }}, new=True)['seq'] # use our custom serializer that normalizes _id as oid in session app.session_interface.serializer = \ utils.SimpleObjectIdReplacingSerializer(app.session_interface.serializer)
from pymongo import MongoClient db = MongoClient().testdb #db.things.insert({'test':1,'4':[5,7]}) db.things.drop() print(db.collection_names()) for doc in db.things.find(): print(doc)
class HRTDatabase: def __init__(self): self.database = MongoClient(os.environ['MONGODB_URI']).get_database(None) def removeOldGTFS(self, date): print "Removing Old GTFS Data" collection_prefix = self.genCollectionName('', date) for collection in self.database.collection_names(): if collection.find('_') != -1 and (not collection.endswith(collection_prefix)): self.database.drop_collection(collection) def insertGTFS(self, data, date): self.insertData(self.genCollectionName('gtfs_', date), data) def insertStops(self, data, date): collection_name = self.genCollectionName('stops_', date) self.insertData(collection_name, data) self.database[collection_name].ensure_index([('location', GEO2D)]) def insertRoutes(self, data, date): self.insertData(self.genCollectionName('routes_', date), data) def insertDestinations(self, data, date): self.insertData(self.genCollectionName('destinations_', date), data) def getStopName(self, stop_id, date): collection_name = self.genCollectionName('stops_', date) stop = self.database[collection_name].find_one({"stopId": stop_id}) return stop['stopName'] def getFinalStops(self, date): collection_name = self.genCollectionName('gtfs_', date) final_stops = self.database[collection_name].aggregate([ {"$group": { "_id": "$trip_id", "stopId": {"$last": "$stop_id"}, "sequence": {"$last": "$stop_sequence"} }} ]) return final_stops def generateIndicesForGTFS(self, date): collection_name = self.genCollectionName('gtfs_', date) self.database[collection_name].create_index([ ("block_id", ASCENDING) ], background=True) self.database[collection_name].create_index([ ("block_id", ASCENDING), ("arrival_time", ASCENDING) ], background=True) self.database[collection_name].create_index([ ("block_id", ASCENDING), ("actual_arrival_time", ASCENDING) ], background=True) self.database[collection_name].create_index([ ("stop_id", ASCENDING), ("arrival_time", ASCENDING), ("actual_arrival_time", ASCENDING) ], background=True) self.database[collection_name].create_index([ ("route_short_name", ASCENDING), ("stop_id", ASCENDING), ("direction_id", ASCENDING), ("arrival_time", ASCENDING) ], background=True) self.database[collection_name].create_index([ ("route_short_name", ASCENDING), ("stop_id", ASCENDING), ("direction_id", ASCENDING), ("departure_time", ASCENDING) ], background=True) def genCollectionName(self, prefix, date): return prefix + date.strftime('%Y%m%d') def insertData(self, collection_name, data): if len(data) > 0: self.database[collection_name].remove() self.database[collection_name].insert_many(data) # get bus route mappings that are not more than 30 minutes old def getBusRouteMappings(self): mappings = {} for mapping in self.database['busRouteMappings'].find(): if mapping['time'] > datetime.utcnow() + timedelta(minutes=-30): mappings[mapping['busId']] = mapping return mappings def setBusRouteMappings(self, mappings): self.database['busRouteMappings'].remove() if len(mappings) > 0: self.database['busRouteMappings'].insert(mappings) # return the last time to the minute that a bus checked in and # a list of all buses that checked in during that minute def getLastCheckinSummary(self): if self.database['checkins'].find().count() > 0: last_time = self.database['checkins'].find().sort("$natural", -1)[0]["time"] last_buses = self.database['checkins'].find({"time" : last_time}).distinct("busId") return {"time": last_time.replace(tzinfo=pytz.UTC), "busIds": last_buses} return None def updateCheckins(self, checkins): # purge checkins that are more than 2 hours self.database['checkins'].remove({"time": {"$lt": datetime.utcnow() + timedelta(hours=-2)}}) if len(checkins) > 0: self.database['checkins'].insert(checkins) def getRealTimeArrivalUpdates(self, checkin): checkin_local_time = checkin.time + timedelta(hours=-5) collection_name = 'gtfs_' + checkin_local_time.strftime('%Y%m%d') stop_times = self.database[collection_name].find({ 'block_id': checkin.blockId, '$or': [ {'arrival_time': { '$gte': datetime.utcnow() + timedelta(minutes=-5-checkin.adherence), '$lte': datetime.utcnow() + timedelta(minutes=30-checkin.adherence) }}, {'actual_arrival_time': { '$gte': datetime.utcnow() + timedelta(minutes=-5), '$lte': datetime.utcnow() + timedelta(minutes=30) }} ] }, {'arrival_time': 1, 'actual_arrival_time': 1}) updates = [] for stoptime in stop_times: new_arrival_time = stoptime['arrival_time'] - timedelta(minutes=checkin.adherence) if 'actual_arrival_time' not in stoptime or new_arrival_time != stoptime['actual_arrival_time']: updates.append(UpdateOne( {'_id': stoptime['_id']}, {'$set': {'actual_arrival_time': new_arrival_time}} )) return (collection_name, updates) def updateRealTimeArrivals(self, updates): for collection_name in updates: if updates[collection_name]: result = self.database[collection_name].bulk_write(updates[collection_name]) #print result.bulk_api_result def getScheduledStop(self, checkin): checkin_local_time = checkin.time + timedelta(hours=-5) collection_name = 'gtfs_' + checkin_local_time.strftime('%Y%m%d') scheduled_stop = self.database[collection_name].find_one({ "route_short_name" : checkin.routeShortName, "stop_id": checkin.stopId, "direction_id": {"$ne": checkin.direction}, "$or": [ {"arrival_time": { "$gte": checkin.time + timedelta(minutes=checkin.adherence - 2), "$lte": checkin.time + timedelta(minutes=checkin.adherence + 2) }}, {"departure_time": { "$gte": checkin.time + timedelta(minutes=checkin.adherence - 2), "$lte": checkin.time + timedelta(minutes=checkin.adherence + 2) }} ] }) if scheduled_stop is None: print "No scheduled stop found for the following checkin in {0}".format(collection_name) print checkin.__dict__ return None # get the stop sequence that OneBusAway uses scheduled_stop['stop_sequence_OBA'] = self.database[collection_name].find({ "trip_id": scheduled_stop["trip_id"], "stop_sequence": {"$lt": scheduled_stop["stop_sequence"]} }).count() return scheduled_stop
for fileName in fileNames: if not dirPath in ignoreDirs: entry = os.path.normpath(os.path.join(dirPath, fileName)) fileList[entry] = False ##### Connect to the Database ##### db = MongoClient(connString) for database in db.database_names(): if database != "admin" and database != "local" and database != "notifications": db = MongoClient(connString)[database] if verbose: print("--database:" + database) ##### Get a model ID and find entries ##### regex = re.compile(".+\.ref$") for colName in db.collection_names(): result = regex.match(colName) if result: if verbose: print("\t--collection:" + colName) for refEntry in db[colName].find({"type": "fs"}): filePath = os.path.normpath( os.path.join(localFolder, refEntry['link'])) inIgnoreDir = bool( [x for x in ignoreDirs if filePath.find(x) + 1]) if not inIgnoreDir: fileStatus = fileList.get(filePath) if fileStatus == None: refInfo = database + "." + colName + ": " + refEntry[ "_id"] if dryRun:
from pymongo import MongoClient db = MongoClient().testdb #db.things.insert({'test':1,'4':[5,7]}) db.things.drop() print (db.collection_names()) for doc in db.things.find(): print (doc)
class Output(): def __init__(self, ip, port, db_name): self.db_name = db_name self.db = MongoClient(ip, port)[db_name] self.db_common = MongoClient(ip, port)['common'] self.collections = self.db.collection_names() def write_dialogue(self): dirpath = self.db_name + '_dialogue_data' if os.path.exists(dirpath): shutil.rmtree(dirpath) os.mkdir(dirpath) data = {} for x in self.db['dialogue'].find(): key = x['intention'] data[key] = data.setdefault(key, []) + list( map(lambda q: x['super_intention'] + q, x['equal_questions'])) for k in data.keys(): f = open(os.path.join(dirpath, k), 'w', encoding='utf-8') for i in data[k]: f.write(i + '\n') f.close() def write_topic_dialogue(self, filepath): data = [x['equal_questions'] for x in self.db['dialogue'].find()] f = open(filepath, 'w', encoding='utf-8') for d in set(sum(data, [])): f.write(d + '\n') f.close() def write_topic_common(self, dirpath): data = [x['question'] for x in self.db_common['repeat_guest'].find()] f = open(os.path.join(dirpath, 'repeat_guest'), 'w', encoding='utf-8') for d in set(data): f.write(d + '\n') f.close() data = [x['question'] for x in self.db_common['repeat_machine'].find()] f = open(os.path.join(dirpath, 'repeat_machine'), 'w', encoding='utf-8') for d in set(data): f.write(d + '\n') f.close() data = [ x['equal_questions'] for x in self.db_common['interaction'].find() ] f = open(os.path.join(dirpath, 'interaction'), 'a', encoding='utf-8') for d in set(sum(data, [])): f.write(d + '\n') f.close() def write_topic_collection(self, filepath, doc_name): data = [x['equal_questions'] for x in self.db[doc_name].find()] f = open(filepath, 'w', encoding='utf-8') for d in set(sum(data, [])): f.write(d + '\n') f.close() def write_topic(self): dirpath = self.db_name + '_topic_data' if os.path.exists(dirpath): shutil.rmtree(dirpath) os.mkdir(dirpath) for collection in self.collections: if collection == 'dialogue': self.write_topic_dialogue(os.path.join(dirpath, 'dialogue')) elif collection in ['greeting', 'qa', 'sale', 'interaction']: self.write_topic_collection(os.path.join(dirpath, collection), collection) self.write_topic_common(dirpath)
class PubMedCrawler: """ This class allow to search and collect data from PubMed: 1) .search(query) -> grab all pmids involved in a query, return a "query_name" 2) .collect(query_name) -> save abstract+meta of pmids previously finded """ def __init__(self): Entrez.email = "*****@*****.**" Entrez.tool = "tool_name" self.db = MongoClient()['pubmed'] def search(self, query, reldate='', refresh=False, warning=True): """ Search ids related to query terms and returned if: 1) already cached OR 2) queried to Entrez and cached :param query: must be a string with blank spaced words (es: 'gene protein') :param reldate: docs returned are no more older than this (in days), leave blank('') to not set """ query_name = '_'.join(query.split()) ids_name = 'q.' + query_name + '.ids' # 0. Query cached? Skip if self.db[ids_name].count() == 0 or refresh: proceed = 'y' # 1. How many entries? result = self._esearch(query, reldate) # 2. Report result_count = int(result['Count']) n_query = (result_count / 100000) + 1 # integer division + rest if warning: # 3. Many result? Ask before proceed print '{0} IDs found. It will take {1} queries to save them all.'.format(result_count, n_query) proceed = raw_input("Press 'y' to continue: ").lower() if proceed == 'y': # 4. Query and saving entries print 'Saving {} IDs...'.format(result_count) self.db[ids_name].drop() # drop if exists (maybe a previous query) for n in range(0, n_query): # sliding window to collect more than 100k data result = self._esearch(query, reldate, retstart=n*100000, retmax=100000) self.db[ids_name].insert_many([{'_id': i} for i in result['IdList']], bypass_document_validation=True) time.sleep(1) # 1 sec of sleep, just to be safe... print '...Done!' # return (query name, cursor to collected pmid) return query_name, self.db[ids_name].find() @staticmethod def _esearch(query, reldate='', retstart=0, retmax=0): """ Envelope and ESearch query :param retmax=0 -> by default it return only a result['Count'] result :return result['Count'] = # record found | result['IdList'] = ['123',...] """ handle = Entrez.esearch(db='pubmed', term=query, retmode='xml', reldate=reldate, retstart=retstart, retmax=retmax) result = Entrez.read(handle) handle.close() return result def collect(self, query_name, from_chunk=0, refresh=False, warning=True): """ Save abstract+meta from a previously searched query """ ids_name = 'q.' + query_name + '.ids' # 0. Query exists? if ids_name not in self.db.collection_names(): print "No query found. Use search() before collect." return None # 1. Query cached? Skip collect_name = 'q.' + query_name + '.data' #2. Many ids? Ask before proceed if warning: print '{0} IDs found.'.format(self.db[ids_name].count()) proceed = raw_input("Press 'y' to continue: ").lower() if proceed == 'y': # 3. Query and collect data if refresh: self.db[collect_name].drop() # drop if exists (maybe a previous query) # 4. if specified, resume from chunk requested (keep track of "n" from console print) ids_chunks = self._chunk(self.db[ids_name].find(no_cursor_timeout=True).skip(from_chunk*10000), 10000) for n, chunk in enumerate(ids_chunks, from_chunk): naked_ids = [d['_id'] for d in chunk] print 'Chunk {0} > saving {1} articles: {2} ~ {3} ...'.format( n, len(naked_ids), naked_ids[0], naked_ids[-1]) result = self._efetch(naked_ids, retmax=10000) # 4. If article can't be read, skip result_docs = [doc for doc in [self._article2json(r) for r in result] if doc is not None] self.db[collect_name].insert_many(result_docs, bypass_document_validation=True) time.sleep(1) # 1 sec of sleep, just to be safe... print '...Done!' # return (query name, cursor to collected data) return query_name, self.db[collect_name].find() @staticmethod def _article2json(article): try: json = {'_id': str(article['MedlineCitation']['PMID'])} # if raise error, skip article except KeyError: return None try: json['title'] = article['MedlineCitation']['Article']['ArticleTitle'] except KeyError: pass try: json['abstract'] = ' '.join(article['MedlineCitation']['Article']['Abstract']['AbstractText']) except KeyError: pass try: json['date'] = datetime.datetime(int(article['MedlineCitation']['DateCreated']['Year']), int(article['MedlineCitation']['DateCreated']['Month']), int(article['MedlineCitation']['DateCreated']['Day'])) except ValueError or KeyError: pass try: json['authors'] = [a['LastName'] + ',' + a['ForeName'] for a in article['MedlineCitation']['Article']['AuthorList']] except KeyError: pass try: json['journal'] = article['MedlineCitation']['Article']['Journal']['Title'] except KeyError: pass try: json['keywords'] = [kws.encode('utf-8') for kws in article['MedlineCitation']['KeywordList'][0]] except IndexError or KeyError: pass try: json['mesh'] = [mesh['DescriptorName'].encode('utf-8') for mesh in article['MedlineCitation'][ 'MeshHeadingList']] except KeyError: pass return json @staticmethod def _chunk(cursor, size): """ Collect data into fixed-sized chunks or blocks until exhaust """ for it in xrange(0, cursor.count(), size): chunk = list() for n in xrange(it, it+size): try: chunk.append(next(cursor)) except StopIteration: yield chunk # final chunk cursor.close() raise StopIteration yield chunk @staticmethod def _efetch(ids, retstart=0, retmax=0): """ Envelope and EFetch query :param ids must be a list (single query ['25683065'] :param retmax=0 -> by default it return nothing, always set to # of result required :return result[0]['MedlineCitation'] -> get first article obj ...['PMID'] -> pmid: apply str() ...['DateCompleted'] + ['Year'] | ['Month'] | ['Day']: apply datetime.datetime() ...['Article']['ArticleTitle'] -> article title ...['Journal']['Title'] -> journal title ...['Abstract']['AbstractText'] -> list of string composing abstract ...['AuthorList'] -> list of author ...[0]['LastName'] -> surname ...[0]['ForeName'] -> name OPT: ...['KeywordList'] -> list of string keywords (arguments): apply str(k) ...['MeshHeadingList'] -> list of l['DescriptorName']: apply str() (ignoring QualifierName) """ url_ids = ','.join(ids) handle = Entrez.efetch(db='pubmed', id=url_ids, retmode='xml', retstart=retstart, retmax=retmax) result = Entrez.read(handle) handle.close() return result
from pymongo import MongoClient from datetime import datetime db = MongoClient("mongodb://localhost:27017/")['test'] collections = db.collection_names() for coll in collections: first_date = datetime.strptime(db[coll].find().limit(1)[0]['date'], '%Y/%m/%d').date() last_date = datetime.strptime(db[coll].find().skip(db[coll].count() - 1)[0]['date'], '%Y/%m/%d').date() number_of_days_between_dates = (last_date - first_date).days + 1 number_of_days_in_record = db[coll].count() percent_complete = float(number_of_days_in_record)/float(number_of_days_between_dates)*100 print 'Customer: ' + coll + ', First: ' + str(first_date) + ', Last: ' + str(last_date) + ', Percentage complete: '\ + str(("{0:.0f}".format(round(percent_complete)))) + '%'
game = discord.Game(f"{self.config.prefix}help for help!") await self.change_presence(status=discord.Status.idle, activity=game) print("Resumed.") # Connects to MongoDB with open("./config.json", "r", encoding="utf8") as file: data = json.dumps(json.load(file)) config = json.loads(data, object_hook=lambda d: recordclass("config", d.keys()) (*d.values())) db_client = MongoClient(config.uri)[config.db] try: db_client.collection_names() except Exception: db_client = None logger.warning( "MongoDB connection failed. There will be no MongoDB support.") def _prefix_callable(bot, msg): base = [f"<@!{bot.user.id}> ", f"<@{bot.user.id}> "] try: db = Mongo(db_client, "guilds") guild_db = db.find(str(msg.guild.id)) if not msg.guild: base.append(config.prefix) elif not guild_db:
class Finder(object): def __init__(self, uri, db, verbose=True, timeout=3, **kwargs): self.uri = uri self.verbose = verbose self.timeout = timeout try: self.conn = MongoClient(self.uri, **kwargs)[db] print("Connected to {conn}".format(conn=self.conn)) except pymongo.errors.ConnectionFailure as e: print("Couldn't connect to MongoDB: {e}".format(e=e)) self.filters = defaultdict(list) self.sort_props = {} self.config = {} def _get_suggestions(self, cmd): return [method[len('do_'):] for (method, _) in inspect.getmembers(self) if cmd in method and method.startswith('do_')] def _print_suggestions(self, cmd): suggs = self._get_suggestions(cmd) if len(suggs) == 0: print("I don't understand ¯\(°_o)/") return print("Do you mean?:") for sugg in suggs: print(" " + sugg) def query_filters(self): """Parse currently stored filter values into a pymongo query dict""" def filter_value(val): if len(val) == 1: filter_val = val[0] is_regex = re.match(r"/([^/]+)/", filter_val) if is_regex: return {"$regex": is_regex.groups()[0]} else: return filter_val if len(val) > 1: return {"$in": val} return {f: filter_value(val) for (f, val) in self.filters.items()} def parse(self, text): cmd, *args = text.split() # this is python3 only (I think) try: method = getattr(self, "do_" + cmd) return method(args) except AttributeError as e: self._print_suggestions(cmd) return except pymongo.errors.AutoReconnect as e: print("Reconnecting... in %d seconds" % self.timeout) time.sleep(self.timeout) return # AutoReconnect is a subclass of ConnectionFailure except pymongo.errors.ConnectionFailure as e: print("MongoDB Exception {te}: {e}".format(te=type(e), e=e)) def add_sort(self, prop, order='des'): self.sort_props[prop] = order def clear_sort_props(self): self.sort_props = {} def get_filter(self, filter_key): return self.filters[filter_key] def add_to_filter(self, filter_key, filter_val): self.filters[filter_key].append(filter_val) if self.verbose: pprint(self.query_filters()) def reset_filter(self, filter_key, filter_val): self.filters[filter_key] = [filter_val] if self.verbose: pprint(self.query_filters()) def clear_filters(self): print("Cleared all filters") self.filters = defaultdict(list) def clear_filter(self, filter_key): if filter_key in self.filters: print("Cleared filter {f}".format(f=filter_key)) del self.filters[filter_key] if self.verbose: pprint(self.query_filters()) """ API functions. First docstr line is used for in-line autocompletion help. API-methods must start with do_ to be picked by the CLI-parser. Each API-method gets passed a list of 0 or more arguments. Argument validation must be done via one of: assert custom_assertion, "Informative assertion message", It checks for a custom assertion and displays a messsage to the user. raise ParseError(given_value, expected_value), Custom exception class. This is useful for fine-grained exception handling in case of methods that accept arglists (e.g. do_filter). API-methods can return None or a (possibly nested) generator of (prompt_text, input_callback), such that the prompt_text is shown to the user and the input_callback is called on the user's response. Note that input_callback can itself return another generator that will be called on the user response to the first prompt (thereby allowing for recursive workflows - see `do_filter` for an example). """ def do_show(self, args): """ Show the current value of a particular settings (e.g. filters). """ vals = ('filters', 'sort') assert len(args) == 1, "Specify at least one value" assert args[0] in vals, "Specify one of {vals}".format(vals=vals) if args[0] == 'filters': for f in self.filters: fs = ', '.join(self.filters[f]) print(' {f} => {fs}'.format(f=f, fs=fs)) elif args[0] == 'sort': for sort_prop, order in self.sort_props.items(): print(' {s}[{o}]'.format( s=sort_prop, o={'asc': 'ascending', 'des': 'descending'}[order])) def do_filter(self, args): """ Add a filter to the query. Multiple filters can be added using the following syntax: filter key1:value key2:value ... Input a key to clear the filter for that key: filter username Input no arguments to clear all filters: filter Possible keys are [username, corpus, query] """ ALREADY_EXISTS = \ "A value for filter {key} already exists, " + \ "do you want to (o)verwrite or (c)oncatenate?" def callback(res): if res == 'o': self.reset_filter(key, val) elif res == 'c': self.add_to_filter(key, val) else: yield "Please answer (o,c)", callback if not args: self.clear_filters() return for arg in args: if ":" not in arg: # assume arg is filter_key self.clear_filter(arg) return try: # assume arg is key:val key, val = arg.split(":") f = self.get_filter(key) if not f: self.add_to_filter(key, val) if val in f: continue else: yield ALREADY_EXISTS.format(key=key), callback except ValueError: raise(ParseError(arg, "key:value")) def do_sort(self, args): """ Add sort criteria to the query output. Multiple sort criteria can be added following the pattern: sort field1:asc field2:des Specify `asc` or `des` for ascending or descending order. Order defaults to descending order. Possible value for field are [timestamp, username, corpus]. """ orders = ('asc', 'des') for arg in args: try: prop, order = arg.split(':') assert order in orders, "Sort order must be in " + str(orders) self.add_sort(prop, order=order) except ValueError: self.add_sort(arg) def do_reconnect(self, args): """ Refresh MongoClient connection. """ raise pymongo.errors.AutoReconnect() def do_config(self, args): """ Not implemented yet Change session configuration. Values: page_size: type int, default 10 """ raise NotImplementedError() def do_exit(self, args): """ Exit the application. """ print("See you soon!") sys.exit(0) def do_help(self, args): """ Show help. Takes a command and display corresponding info. Example: help filter """ if len(args) == 0: print("Please, specify a command.") return try: cmd, *rest = args # ignore rest arguments print(" " + getattr(self, "do_" + cmd).__doc__.strip()) except AttributeError: self._print_suggestions(cmd) def get_coll_names(self): for coll_name in self.conn.collection_names(): if coll_name.startswith('_') and coll_name != "_vcs": yield coll_name def get_project_name(self, coll_name): return coll_name[1:] def get_project_names(self): for coll_name in self.get_coll_names(): yield self.get_project_name(coll_name) def get_project(self, project): return self.conn['_' + project] def get_projects(self): for project_name in self.get_project_names(): yield self.get_project(project_name) def do_projects(self, args): """ Show information about projects projects: show existing projects """ if len(args) == 0: for project_name in self.get_project_names(): print(project_name) def groupcounts(self, project, groupkeys): result = [] cursor = project.aggregate( [{"$match": self.query_filters()}, {"$group": {"_id": {k: "$" + k for k in groupkeys}, "count": {"$sum": 1}}}]) for row in cursor: # transform mongodb cursor output row_dict = {} row_dict['count'] = row['count'] for key, value in row["_id"].items(): assert key != "count", "Found count field in groupby keys" row_dict[key] = value result.append(row_dict) return result def simplecounts(self, project): return project.find(self.query_filters()).count() def do_count(self, args): """ Count annotations using the current filters. Specify a project to get only counts for that project: count myProject Multiple projects can be specified with commas: count myProject,projectTest,otherProject """ assert args, "Specify a project (e.g. GET) or 'all' for all projects" if self.verbose: pprint(self.query_filters()) # parse arguments project, *rest = args parsed_args = parse_rest(rest, {'groupby': ['key1,key2,etc'], 'output': ['filename']}) # gather projects if project == 'all': projects = self.get_project_names() else: projects = project.split(',') by_project = {} for project_name in projects: project = self.get_project(project_name) # compute counts if 'groupby' in parsed_args: groupkeys = parsed_args['groupby']['key1,key2,etc'].split(',') counts = self.groupcounts(project, groupkeys) if counts: by_project[project_name] = counts else: print("Empty results for project [%s]" % project_name) else: by_project[project_name] = self.simplecounts(project) # display output if 'output' in parsed_args: outfile = parsed_args['output']['filename'] ext = get_extension(outfile) if ext == 'csv': if 'groupby' in parsed_args: writers.csv_count_group(by_project, outfile) else: writers.csv_count(by_project, outfile) else: raise ValueError("Unrecognized extension [%s]" % ext) else: if 'groupby' in parsed_args: writers.print_count_group(by_project) else: writers.print_count(by_project)
# # Generates a data summary for a given table that follows the schema in schemas.txt. # # Arguments: A list of groups to build indexes for. # Ex: To build a table for clean_data and all_data, the command should be: # python generate-indexes.py all clean # if len(sys.argv) <= 1: print("Error: Please enter a collection to index!") print("Aborting.") sys.exit() db = MongoClient().viraldb collections = db.collection_names() for collection in sys.argv[1:]: collection_data = collection + '_data' collection_indexes = collection + '_indexes' if 'data' in collection: print("Please do not include 'data' in the collection...") print("Skipping.") continue elif 'index' in collection: print("Please do not include 'index' in the collection...") print("Skipping.") continue elif not collection_data in collections: print("Collection specified not found!")
import ConfigParser import random # 导入random from pymongo import MongoClient # 导入Mongoclient # 连接mongodb,增insert,save删drop,remove改update查find # config = ConfigParser.RawConfigParser() # config.read(r'C:\Users\ieware\pyexercises\august.conf') # r不解析\ # try: # dbhost = config.get('base', 'dbhost') # except NoSectionError, e: # print e db = MongoClient(dbhost).august # 连接August数据库 # db.authenticate # 用户认证 print db.collection_names() # show collections查询所有聚合名称 db.user.count() # 统计user聚合中数据数量 db.user.drop() # 清空user聚合 db.user.save({'id': 1, 'name': 'nana', 'age': '23'}) # 插入一个数据 # 插入多个数据 for id in range(2, 10): name = random.choice(['lily', 'tom', 'jack']) age = random.choice(['23', '34', '58']) db.user.insert({'id': id, 'name': name, 'age': age}) users = db.user.find() # 查询user聚合下所有数据 db.user.update({'age':'23'},{'$set':{'age':'33'}},upsert=False,multi=False) # 更改第一个'age'为'33',upsert不存在则会插入,multi批量更新 db.user.remove({'id':1}) # 删除'id'为1的数据 for x in users: print x['age'], x['name'], x['id']